1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 56 even when it isn't. 57 58 * some of the FCOM cases could do with testing -- not convinced 59 that the args are the right way round. 60 61 * FSAVE does not re-initialise the FPU; it should do 62 63 * FINIT not only initialises the FPU environment, it also zeroes 64 all the FP registers. It should leave the registers unchanged. 65 66 RDTSC returns zero, always. 67 68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 69 per Intel docs this bit has no meaning anyway. Since PUSHF is the 70 only way to observe eflags[1], a proper fix would be to make that 71 bit be set by PUSHF. 72 73 This module uses global variables and so is not MT-safe (if that 74 should ever become relevant). 75 */ 76 77 /* Notes re address size overrides (0x67). 78 79 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 81 and System Instructions"), Section 1.2.3 ("Address-Size Override 82 Prefix"): 83 84 0x67 applies to all explicit memory references, causing the top 85 32 bits of the effective address to become zero. 86 87 0x67 has no effect on stack references (push/pop); these always 88 use a 64-bit address. 89 90 0x67 changes the interpretation of instructions which implicitly 91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 92 instead. These are: 93 94 cmp{s,sb,sw,sd,sq} 95 in{s,sb,sw,sd} 96 jcxz, jecxz, jrcxz 97 lod{s,sb,sw,sd,sq} 98 loop{,e,bz,be,z} 99 mov{s,sb,sw,sd,sq} 100 out{s,sb,sw,sd} 101 rep{,e,ne,nz} 102 sca{s,sb,sw,sd,sq} 103 sto{s,sb,sw,sd,sq} 104 xlat{,b} */ 105 106 /* "Special" instructions. 107 108 This instruction decoder can decode three special instructions 109 which mean nothing natively (are no-ops as far as regs/mem are 110 concerned) but have meaning for supporting Valgrind. A special 111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 114 Following that, one of the following 3 are allowed (standard 115 interpretation in parentheses): 116 117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 120 121 Any other bytes following the 16-byte preamble are illegal and 122 constitute a failure in instruction decoding. This all assumes 123 that the preamble will never occur except in specific code 124 fragments designed for Valgrind to catch. 125 126 No prefixes may precede a "Special" instruction. 127 */ 128 129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 130 insns: the side-exit back to the start of the insn is done with 131 Ijk_Boring. This is quite wrong, it should be done with 132 Ijk_NoRedir, since otherwise the side exit, which is intended to 133 restart the instruction for whatever reason, could go somewhere 134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 135 no-redir jumps performance critical, at least for rep-prefixed 136 instructions, since all iterations thereof would involve such a 137 jump. It's not such a big deal with casLE since the side exit is 138 only taken if the CAS fails, that is, the location is contended, 139 which is relatively unlikely. 140 141 Note also, the test for CAS success vs failure is done using 142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 144 shouldn't definedness-check these comparisons. See 145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 146 background/rationale. 147 */ 148 149 /* LOCK prefixed instructions. These are translated using IR-level 150 CAS statements (IRCAS) and are believed to preserve atomicity, even 151 from the point of view of some other process racing against a 152 simulated one (presumably they communicate via a shared memory 153 segment). 154 155 Handlers which are aware of LOCK prefixes are: 156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 157 dis_cmpxchg_G_E (cmpxchg) 158 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 159 dis_Grp3 (not, neg) 160 dis_Grp4 (inc, dec) 161 dis_Grp5 (inc, dec) 162 dis_Grp8_Imm (bts, btc, btr) 163 dis_bt_G_E (bts, btc, btr) 164 dis_xadd_G_E (xadd) 165 */ 166 167 168 #include "libvex_basictypes.h" 169 #include "libvex_ir.h" 170 #include "libvex.h" 171 #include "libvex_guest_amd64.h" 172 173 #include "main_util.h" 174 #include "main_globals.h" 175 #include "guest_generic_bb_to_IR.h" 176 #include "guest_generic_x87.h" 177 #include "guest_amd64_defs.h" 178 179 180 /*------------------------------------------------------------*/ 181 /*--- Globals ---*/ 182 /*------------------------------------------------------------*/ 183 184 /* These are set at the start of the translation of an insn, right 185 down in disInstr_AMD64, so that we don't have to pass them around 186 endlessly. They are all constant during the translation of any 187 given insn. */ 188 189 /* These are set at the start of the translation of a BB, so 190 that we don't have to pass them around endlessly. */ 191 192 /* We need to know this to do sub-register accesses correctly. */ 193 static Bool host_is_bigendian; 194 195 /* Pointer to the guest code area (points to start of BB, not to the 196 insn being processed). */ 197 static UChar* guest_code; 198 199 /* The guest address corresponding to guest_code[0]. */ 200 static Addr64 guest_RIP_bbstart; 201 202 /* The guest address for the instruction currently being 203 translated. */ 204 static Addr64 guest_RIP_curr_instr; 205 206 /* The IRSB* into which we're generating code. */ 207 static IRSB* irsb; 208 209 /* For ensuring that %rip-relative addressing is done right. A read 210 of %rip generates the address of the next instruction. It may be 211 that we don't conveniently know that inside disAMode(). For sanity 212 checking, if the next insn %rip is needed, we make a guess at what 213 it is, record that guess here, and set the accompanying Bool to 214 indicate that -- after this insn's decode is finished -- that guess 215 needs to be checked. */ 216 217 /* At the start of each insn decode, is set to (0, False). 218 After the decode, if _mustcheck is now True, _assumed is 219 checked. */ 220 221 static Addr64 guest_RIP_next_assumed; 222 static Bool guest_RIP_next_mustcheck; 223 224 225 /*------------------------------------------------------------*/ 226 /*--- Helpers for constructing IR. ---*/ 227 /*------------------------------------------------------------*/ 228 229 /* Generate a new temporary of the given type. */ 230 static IRTemp newTemp ( IRType ty ) 231 { 232 vassert(isPlausibleIRType(ty)); 233 return newIRTemp( irsb->tyenv, ty ); 234 } 235 236 /* Add a statement to the list held by "irsb". */ 237 static void stmt ( IRStmt* st ) 238 { 239 addStmtToIRSB( irsb, st ); 240 } 241 242 /* Generate a statement "dst := e". */ 243 static void assign ( IRTemp dst, IRExpr* e ) 244 { 245 stmt( IRStmt_WrTmp(dst, e) ); 246 } 247 248 static IRExpr* unop ( IROp op, IRExpr* a ) 249 { 250 return IRExpr_Unop(op, a); 251 } 252 253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 254 { 255 return IRExpr_Binop(op, a1, a2); 256 } 257 258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 259 { 260 return IRExpr_Triop(op, a1, a2, a3); 261 } 262 263 static IRExpr* mkexpr ( IRTemp tmp ) 264 { 265 return IRExpr_RdTmp(tmp); 266 } 267 268 static IRExpr* mkU8 ( ULong i ) 269 { 270 vassert(i < 256); 271 return IRExpr_Const(IRConst_U8( (UChar)i )); 272 } 273 274 static IRExpr* mkU16 ( ULong i ) 275 { 276 vassert(i < 0x10000ULL); 277 return IRExpr_Const(IRConst_U16( (UShort)i )); 278 } 279 280 static IRExpr* mkU32 ( ULong i ) 281 { 282 vassert(i < 0x100000000ULL); 283 return IRExpr_Const(IRConst_U32( (UInt)i )); 284 } 285 286 static IRExpr* mkU64 ( ULong i ) 287 { 288 return IRExpr_Const(IRConst_U64(i)); 289 } 290 291 static IRExpr* mkU ( IRType ty, ULong i ) 292 { 293 switch (ty) { 294 case Ity_I8: return mkU8(i); 295 case Ity_I16: return mkU16(i); 296 case Ity_I32: return mkU32(i); 297 case Ity_I64: return mkU64(i); 298 default: vpanic("mkU(amd64)"); 299 } 300 } 301 302 static void storeLE ( IRExpr* addr, IRExpr* data ) 303 { 304 stmt( IRStmt_Store(Iend_LE, addr, data) ); 305 } 306 307 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 308 { 309 return IRExpr_Load(Iend_LE, ty, addr); 310 } 311 312 static IROp mkSizedOp ( IRType ty, IROp op8 ) 313 { 314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 315 || op8 == Iop_Mul8 316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 319 || op8 == Iop_CasCmpNE8 320 || op8 == Iop_Not8 ); 321 switch (ty) { 322 case Ity_I8: return 0 +op8; 323 case Ity_I16: return 1 +op8; 324 case Ity_I32: return 2 +op8; 325 case Ity_I64: return 3 +op8; 326 default: vpanic("mkSizedOp(amd64)"); 327 } 328 } 329 330 static 331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 332 { 333 if (szSmall == 1 && szBig == 4) { 334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 335 } 336 if (szSmall == 1 && szBig == 2) { 337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 338 } 339 if (szSmall == 2 && szBig == 4) { 340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 341 } 342 if (szSmall == 1 && szBig == 8 && !signd) { 343 return unop(Iop_8Uto64, src); 344 } 345 if (szSmall == 1 && szBig == 8 && signd) { 346 return unop(Iop_8Sto64, src); 347 } 348 if (szSmall == 2 && szBig == 8 && !signd) { 349 return unop(Iop_16Uto64, src); 350 } 351 if (szSmall == 2 && szBig == 8 && signd) { 352 return unop(Iop_16Sto64, src); 353 } 354 vpanic("doScalarWidening(amd64)"); 355 } 356 357 358 359 /*------------------------------------------------------------*/ 360 /*--- Debugging output ---*/ 361 /*------------------------------------------------------------*/ 362 363 /* Bomb out if we can't handle something. */ 364 __attribute__ ((noreturn)) 365 static void unimplemented ( HChar* str ) 366 { 367 vex_printf("amd64toIR: unimplemented feature\n"); 368 vpanic(str); 369 } 370 371 #define DIP(format, args...) \ 372 if (vex_traceflags & VEX_TRACE_FE) \ 373 vex_printf(format, ## args) 374 375 #define DIS(buf, format, args...) \ 376 if (vex_traceflags & VEX_TRACE_FE) \ 377 vex_sprintf(buf, format, ## args) 378 379 380 /*------------------------------------------------------------*/ 381 /*--- Offsets of various parts of the amd64 guest state. ---*/ 382 /*------------------------------------------------------------*/ 383 384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 400 401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 402 403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) 404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) 405 406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 410 411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 419 420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 438 439 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN) 440 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) 441 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) 442 443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 444 445 446 /*------------------------------------------------------------*/ 447 /*--- Helper bits and pieces for deconstructing the ---*/ 448 /*--- amd64 insn stream. ---*/ 449 /*------------------------------------------------------------*/ 450 451 /* This is the AMD64 register encoding -- integer regs. */ 452 #define R_RAX 0 453 #define R_RCX 1 454 #define R_RDX 2 455 #define R_RBX 3 456 #define R_RSP 4 457 #define R_RBP 5 458 #define R_RSI 6 459 #define R_RDI 7 460 #define R_R8 8 461 #define R_R9 9 462 #define R_R10 10 463 #define R_R11 11 464 #define R_R12 12 465 #define R_R13 13 466 #define R_R14 14 467 #define R_R15 15 468 469 /* This is the Intel register encoding -- segment regs. */ 470 #define R_ES 0 471 #define R_CS 1 472 #define R_SS 2 473 #define R_DS 3 474 #define R_FS 4 475 #define R_GS 5 476 477 478 /* Various simple conversions */ 479 480 static ULong extend_s_8to64 ( UChar x ) 481 { 482 return (ULong)((((Long)x) << 56) >> 56); 483 } 484 485 static ULong extend_s_16to64 ( UShort x ) 486 { 487 return (ULong)((((Long)x) << 48) >> 48); 488 } 489 490 static ULong extend_s_32to64 ( UInt x ) 491 { 492 return (ULong)((((Long)x) << 32) >> 32); 493 } 494 495 /* Figure out whether the mod and rm parts of a modRM byte refer to a 496 register or memory. If so, the byte will have the form 11XXXYYY, 497 where YYY is the register number. */ 498 inline 499 static Bool epartIsReg ( UChar mod_reg_rm ) 500 { 501 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 502 } 503 504 /* Extract the 'g' field from a modRM byte. This only produces 3 505 bits, which is not a complete register number. You should avoid 506 this function if at all possible. */ 507 inline 508 static Int gregLO3ofRM ( UChar mod_reg_rm ) 509 { 510 return (Int)( (mod_reg_rm >> 3) & 7 ); 511 } 512 513 /* Ditto the 'e' field of a modRM byte. */ 514 inline 515 static Int eregLO3ofRM ( UChar mod_reg_rm ) 516 { 517 return (Int)(mod_reg_rm & 0x7); 518 } 519 520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 521 522 static inline UChar getUChar ( Long delta ) 523 { 524 UChar v = guest_code[delta+0]; 525 return v; 526 } 527 528 static UInt getUDisp16 ( Long delta ) 529 { 530 UInt v = guest_code[delta+1]; v <<= 8; 531 v |= guest_code[delta+0]; 532 return v & 0xFFFF; 533 } 534 535 //.. static UInt getUDisp ( Int size, Long delta ) 536 //.. { 537 //.. switch (size) { 538 //.. case 4: return getUDisp32(delta); 539 //.. case 2: return getUDisp16(delta); 540 //.. case 1: return getUChar(delta); 541 //.. default: vpanic("getUDisp(x86)"); 542 //.. } 543 //.. return 0; /*notreached*/ 544 //.. } 545 546 547 /* Get a byte value out of the insn stream and sign-extend to 64 548 bits. */ 549 static Long getSDisp8 ( Long delta ) 550 { 551 return extend_s_8to64( guest_code[delta] ); 552 } 553 554 /* Get a 16-bit value out of the insn stream and sign-extend to 64 555 bits. */ 556 static Long getSDisp16 ( Long delta ) 557 { 558 UInt v = guest_code[delta+1]; v <<= 8; 559 v |= guest_code[delta+0]; 560 return extend_s_16to64( (UShort)v ); 561 } 562 563 /* Get a 32-bit value out of the insn stream and sign-extend to 64 564 bits. */ 565 static Long getSDisp32 ( Long delta ) 566 { 567 UInt v = guest_code[delta+3]; v <<= 8; 568 v |= guest_code[delta+2]; v <<= 8; 569 v |= guest_code[delta+1]; v <<= 8; 570 v |= guest_code[delta+0]; 571 return extend_s_32to64( v ); 572 } 573 574 /* Get a 64-bit value out of the insn stream. */ 575 static Long getDisp64 ( Long delta ) 576 { 577 ULong v = 0; 578 v |= guest_code[delta+7]; v <<= 8; 579 v |= guest_code[delta+6]; v <<= 8; 580 v |= guest_code[delta+5]; v <<= 8; 581 v |= guest_code[delta+4]; v <<= 8; 582 v |= guest_code[delta+3]; v <<= 8; 583 v |= guest_code[delta+2]; v <<= 8; 584 v |= guest_code[delta+1]; v <<= 8; 585 v |= guest_code[delta+0]; 586 return v; 587 } 588 589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 590 if this is called with size==8. Should not happen. */ 591 static Long getSDisp ( Int size, Long delta ) 592 { 593 switch (size) { 594 case 4: return getSDisp32(delta); 595 case 2: return getSDisp16(delta); 596 case 1: return getSDisp8(delta); 597 default: vpanic("getSDisp(amd64)"); 598 } 599 } 600 601 static ULong mkSizeMask ( Int sz ) 602 { 603 switch (sz) { 604 case 1: return 0x00000000000000FFULL; 605 case 2: return 0x000000000000FFFFULL; 606 case 4: return 0x00000000FFFFFFFFULL; 607 case 8: return 0xFFFFFFFFFFFFFFFFULL; 608 default: vpanic("mkSzMask(amd64)"); 609 } 610 } 611 612 static Int imin ( Int a, Int b ) 613 { 614 return (a < b) ? a : b; 615 } 616 617 static IRType szToITy ( Int n ) 618 { 619 switch (n) { 620 case 1: return Ity_I8; 621 case 2: return Ity_I16; 622 case 4: return Ity_I32; 623 case 8: return Ity_I64; 624 default: vex_printf("\nszToITy(%d)\n", n); 625 vpanic("szToITy(amd64)"); 626 } 627 } 628 629 630 /*------------------------------------------------------------*/ 631 /*--- For dealing with prefixes. ---*/ 632 /*------------------------------------------------------------*/ 633 634 /* The idea is to pass around an int holding a bitmask summarising 635 info from the prefixes seen on the current instruction, including 636 info from the REX byte. This info is used in various places, but 637 most especially when making sense of register fields in 638 instructions. 639 640 The top 8 bits of the prefix are 0x55, just as a hacky way to 641 ensure it really is a valid prefix. 642 643 Things you can safely assume about a well-formed prefix: 644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 645 * if REX is not present then REXW,REXR,REXX,REXB will read 646 as zero. 647 * F2 and F3 will not both be 1. 648 */ 649 650 typedef UInt Prefix; 651 652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 672 positions. */ 673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 677 678 679 #define PFX_EMPTY 0x55000000 680 681 static Bool IS_VALID_PFX ( Prefix pfx ) { 682 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 683 } 684 685 static Bool haveREX ( Prefix pfx ) { 686 return toBool(pfx & PFX_REX); 687 } 688 689 static Int getRexW ( Prefix pfx ) { 690 return (pfx & PFX_REXW) ? 1 : 0; 691 } 692 static Int getRexR ( Prefix pfx ) { 693 return (pfx & PFX_REXR) ? 1 : 0; 694 } 695 static Int getRexX ( Prefix pfx ) { 696 return (pfx & PFX_REXX) ? 1 : 0; 697 } 698 static Int getRexB ( Prefix pfx ) { 699 return (pfx & PFX_REXB) ? 1 : 0; 700 } 701 702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 703 completely changes what instruction it really is. */ 704 static Bool haveF2orF3 ( Prefix pfx ) { 705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 706 } 707 static Bool haveF2 ( Prefix pfx ) { 708 return toBool((pfx & PFX_F2) > 0); 709 } 710 static Bool haveF3 ( Prefix pfx ) { 711 return toBool((pfx & PFX_F3) > 0); 712 } 713 714 static Bool have66 ( Prefix pfx ) { 715 return toBool((pfx & PFX_66) > 0); 716 } 717 static Bool haveASO ( Prefix pfx ) { 718 return toBool((pfx & PFX_ASO) > 0); 719 } 720 721 /* Return True iff pfx has 66 set and F2 and F3 clear */ 722 static Bool have66noF2noF3 ( Prefix pfx ) 723 { 724 return 725 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 726 } 727 728 /* Return True iff pfx has F2 set and 66 and F3 clear */ 729 static Bool haveF2no66noF3 ( Prefix pfx ) 730 { 731 return 732 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 733 } 734 735 /* Return True iff pfx has F3 set and 66 and F2 clear */ 736 static Bool haveF3no66noF2 ( Prefix pfx ) 737 { 738 return 739 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 740 } 741 742 /* Return True iff pfx has F3 set and F2 clear */ 743 static Bool haveF3noF2 ( Prefix pfx ) 744 { 745 return 746 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 747 } 748 749 /* Return True iff pfx has F2 set and F3 clear */ 750 static Bool haveF2noF3 ( Prefix pfx ) 751 { 752 return 753 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 754 } 755 756 /* Return True iff pfx has 66, F2 and F3 clear */ 757 static Bool haveNo66noF2noF3 ( Prefix pfx ) 758 { 759 return 760 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 761 } 762 763 /* Return True iff pfx has any of 66, F2 and F3 set */ 764 static Bool have66orF2orF3 ( Prefix pfx ) 765 { 766 return toBool( ! haveNo66noF2noF3(pfx) ); 767 } 768 769 /* Return True iff pfx has 66 or F2 set */ 770 static Bool have66orF2 ( Prefix pfx ) 771 { 772 return toBool((pfx & (PFX_66|PFX_F2)) > 0); 773 } 774 775 /* Clear all the segment-override bits in a prefix. */ 776 static Prefix clearSegBits ( Prefix p ) 777 { 778 return 779 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 780 } 781 782 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 783 static UInt getVexNvvvv ( Prefix pfx ) { 784 UInt r = (UInt)pfx; 785 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 786 return r & 0xF; 787 } 788 789 static Bool haveVEX ( Prefix pfx ) { 790 return toBool(pfx & PFX_VEX); 791 } 792 793 static Int getVexL ( Prefix pfx ) { 794 return (pfx & PFX_VEXL) ? 1 : 0; 795 } 796 797 798 /*------------------------------------------------------------*/ 799 /*--- For dealing with escapes ---*/ 800 /*------------------------------------------------------------*/ 801 802 803 /* Escapes come after the prefixes, but before the primary opcode 804 byte. They escape the primary opcode byte into a bigger space. 805 The 0xF0000000 isn't significant, except so as to make it not 806 overlap valid Prefix values, for sanity checking. 807 */ 808 809 typedef 810 enum { 811 ESC_NONE=0xF0000000, // none 812 ESC_0F, // 0F 813 ESC_0F38, // 0F 38 814 ESC_0F3A // 0F 3A 815 } 816 Escape; 817 818 819 /*------------------------------------------------------------*/ 820 /*--- For dealing with integer registers ---*/ 821 /*------------------------------------------------------------*/ 822 823 /* This is somewhat complex. The rules are: 824 825 For 64, 32 and 16 bit register references, the e or g fields in the 826 modrm bytes supply the low 3 bits of the register number. The 827 fourth (most-significant) bit of the register number is supplied by 828 the REX byte, if it is present; else that bit is taken to be zero. 829 830 The REX.R bit supplies the high bit corresponding to the g register 831 field, and the REX.B bit supplies the high bit corresponding to the 832 e register field (when the mod part of modrm indicates that modrm's 833 e component refers to a register and not to memory). 834 835 The REX.X bit supplies a high register bit for certain registers 836 in SIB address modes, and is generally rarely used. 837 838 For 8 bit register references, the presence of the REX byte itself 839 has significance. If there is no REX present, then the 3-bit 840 number extracted from the modrm e or g field is treated as an index 841 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 842 old x86 encoding scheme. 843 844 But if there is a REX present, the register reference is 845 interpreted in the same way as for 64/32/16-bit references: a high 846 bit is extracted from REX, giving a 4-bit number, and the denoted 847 register is the lowest 8 bits of the 16 integer registers denoted 848 by the number. In particular, values 3 through 7 of this sequence 849 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 850 %rsp %rbp %rsi %rdi. 851 852 The REX.W bit has no bearing at all on register numbers. Instead 853 its presence indicates that the operand size is to be overridden 854 from its default value (32 bits) to 64 bits instead. This is in 855 the same fashion that an 0x66 prefix indicates the operand size is 856 to be overridden from 32 bits down to 16 bits. When both REX.W and 857 0x66 are present there is a conflict, and REX.W takes precedence. 858 859 Rather than try to handle this complexity using a single huge 860 function, several smaller ones are provided. The aim is to make it 861 as difficult as possible to screw up register decoding in a subtle 862 and hard-to-track-down way. 863 864 Because these routines fish around in the host's memory (that is, 865 in the guest state area) for sub-parts of guest registers, their 866 correctness depends on the host's endianness. So far these 867 routines only work for little-endian hosts. Those for which 868 endianness is important have assertions to ensure sanity. 869 */ 870 871 872 /* About the simplest question you can ask: where do the 64-bit 873 integer registers live (in the guest state) ? */ 874 875 static Int integerGuestReg64Offset ( UInt reg ) 876 { 877 switch (reg) { 878 case R_RAX: return OFFB_RAX; 879 case R_RCX: return OFFB_RCX; 880 case R_RDX: return OFFB_RDX; 881 case R_RBX: return OFFB_RBX; 882 case R_RSP: return OFFB_RSP; 883 case R_RBP: return OFFB_RBP; 884 case R_RSI: return OFFB_RSI; 885 case R_RDI: return OFFB_RDI; 886 case R_R8: return OFFB_R8; 887 case R_R9: return OFFB_R9; 888 case R_R10: return OFFB_R10; 889 case R_R11: return OFFB_R11; 890 case R_R12: return OFFB_R12; 891 case R_R13: return OFFB_R13; 892 case R_R14: return OFFB_R14; 893 case R_R15: return OFFB_R15; 894 default: vpanic("integerGuestReg64Offset(amd64)"); 895 } 896 } 897 898 899 /* Produce the name of an integer register, for printing purposes. 900 reg is a number in the range 0 .. 15 that has been generated from a 901 3-bit reg-field number and a REX extension bit. irregular denotes 902 the case where sz==1 and no REX byte is present. */ 903 904 static 905 HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 906 { 907 static HChar* ireg64_names[16] 908 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 909 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 910 static HChar* ireg32_names[16] 911 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 912 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 913 static HChar* ireg16_names[16] 914 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 915 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 916 static HChar* ireg8_names[16] 917 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 918 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 919 static HChar* ireg8_irregular[8] 920 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 921 922 vassert(reg < 16); 923 if (sz == 1) { 924 if (irregular) 925 vassert(reg < 8); 926 } else { 927 vassert(irregular == False); 928 } 929 930 switch (sz) { 931 case 8: return ireg64_names[reg]; 932 case 4: return ireg32_names[reg]; 933 case 2: return ireg16_names[reg]; 934 case 1: if (irregular) { 935 return ireg8_irregular[reg]; 936 } else { 937 return ireg8_names[reg]; 938 } 939 default: vpanic("nameIReg(amd64)"); 940 } 941 } 942 943 /* Using the same argument conventions as nameIReg, produce the 944 guest state offset of an integer register. */ 945 946 static 947 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 948 { 949 vassert(reg < 16); 950 if (sz == 1) { 951 if (irregular) 952 vassert(reg < 8); 953 } else { 954 vassert(irregular == False); 955 } 956 957 /* Deal with irregular case -- sz==1 and no REX present */ 958 if (sz == 1 && irregular) { 959 switch (reg) { 960 case R_RSP: return 1+ OFFB_RAX; 961 case R_RBP: return 1+ OFFB_RCX; 962 case R_RSI: return 1+ OFFB_RDX; 963 case R_RDI: return 1+ OFFB_RBX; 964 default: break; /* use the normal case */ 965 } 966 } 967 968 /* Normal case */ 969 return integerGuestReg64Offset(reg); 970 } 971 972 973 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 974 975 static IRExpr* getIRegCL ( void ) 976 { 977 vassert(!host_is_bigendian); 978 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 979 } 980 981 982 /* Write to the %AH register. */ 983 984 static void putIRegAH ( IRExpr* e ) 985 { 986 vassert(!host_is_bigendian); 987 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 988 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 989 } 990 991 992 /* Read/write various widths of %RAX, as it has various 993 special-purpose uses. */ 994 995 static HChar* nameIRegRAX ( Int sz ) 996 { 997 switch (sz) { 998 case 1: return "%al"; 999 case 2: return "%ax"; 1000 case 4: return "%eax"; 1001 case 8: return "%rax"; 1002 default: vpanic("nameIRegRAX(amd64)"); 1003 } 1004 } 1005 1006 static IRExpr* getIRegRAX ( Int sz ) 1007 { 1008 vassert(!host_is_bigendian); 1009 switch (sz) { 1010 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1011 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1012 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1013 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1014 default: vpanic("getIRegRAX(amd64)"); 1015 } 1016 } 1017 1018 static void putIRegRAX ( Int sz, IRExpr* e ) 1019 { 1020 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1021 vassert(!host_is_bigendian); 1022 switch (sz) { 1023 case 8: vassert(ty == Ity_I64); 1024 stmt( IRStmt_Put( OFFB_RAX, e )); 1025 break; 1026 case 4: vassert(ty == Ity_I32); 1027 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1028 break; 1029 case 2: vassert(ty == Ity_I16); 1030 stmt( IRStmt_Put( OFFB_RAX, e )); 1031 break; 1032 case 1: vassert(ty == Ity_I8); 1033 stmt( IRStmt_Put( OFFB_RAX, e )); 1034 break; 1035 default: vpanic("putIRegRAX(amd64)"); 1036 } 1037 } 1038 1039 1040 /* Read/write various widths of %RDX, as it has various 1041 special-purpose uses. */ 1042 1043 static HChar* nameIRegRDX ( Int sz ) 1044 { 1045 switch (sz) { 1046 case 1: return "%dl"; 1047 case 2: return "%dx"; 1048 case 4: return "%edx"; 1049 case 8: return "%rdx"; 1050 default: vpanic("nameIRegRDX(amd64)"); 1051 } 1052 } 1053 1054 static IRExpr* getIRegRDX ( Int sz ) 1055 { 1056 vassert(!host_is_bigendian); 1057 switch (sz) { 1058 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1059 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1060 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1061 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1062 default: vpanic("getIRegRDX(amd64)"); 1063 } 1064 } 1065 1066 static void putIRegRDX ( Int sz, IRExpr* e ) 1067 { 1068 vassert(!host_is_bigendian); 1069 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1070 switch (sz) { 1071 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1072 break; 1073 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1074 break; 1075 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1076 break; 1077 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1078 break; 1079 default: vpanic("putIRegRDX(amd64)"); 1080 } 1081 } 1082 1083 1084 /* Simplistic functions to deal with the integer registers as a 1085 straightforward bank of 16 64-bit regs. */ 1086 1087 static IRExpr* getIReg64 ( UInt regno ) 1088 { 1089 return IRExpr_Get( integerGuestReg64Offset(regno), 1090 Ity_I64 ); 1091 } 1092 1093 static void putIReg64 ( UInt regno, IRExpr* e ) 1094 { 1095 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1096 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1097 } 1098 1099 static HChar* nameIReg64 ( UInt regno ) 1100 { 1101 return nameIReg( 8, regno, False ); 1102 } 1103 1104 1105 /* Simplistic functions to deal with the lower halves of integer 1106 registers as a straightforward bank of 16 32-bit regs. */ 1107 1108 static IRExpr* getIReg32 ( UInt regno ) 1109 { 1110 vassert(!host_is_bigendian); 1111 return unop(Iop_64to32, 1112 IRExpr_Get( integerGuestReg64Offset(regno), 1113 Ity_I64 )); 1114 } 1115 1116 static void putIReg32 ( UInt regno, IRExpr* e ) 1117 { 1118 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1119 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1120 unop(Iop_32Uto64,e) ) ); 1121 } 1122 1123 static HChar* nameIReg32 ( UInt regno ) 1124 { 1125 return nameIReg( 4, regno, False ); 1126 } 1127 1128 1129 /* Simplistic functions to deal with the lower quarters of integer 1130 registers as a straightforward bank of 16 16-bit regs. */ 1131 1132 static IRExpr* getIReg16 ( UInt regno ) 1133 { 1134 vassert(!host_is_bigendian); 1135 return IRExpr_Get( integerGuestReg64Offset(regno), 1136 Ity_I16 ); 1137 } 1138 1139 static void putIReg16 ( UInt regno, IRExpr* e ) 1140 { 1141 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1142 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1143 unop(Iop_16Uto64,e) ) ); 1144 } 1145 1146 static HChar* nameIReg16 ( UInt regno ) 1147 { 1148 return nameIReg( 2, regno, False ); 1149 } 1150 1151 1152 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1153 which field of the REX byte is to be used to extend to a 4-bit 1154 number. These functions cater for that situation. 1155 */ 1156 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1157 { 1158 vassert(lo3bits < 8); 1159 vassert(IS_VALID_PFX(pfx)); 1160 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1161 } 1162 1163 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1164 { 1165 vassert(lo3bits < 8); 1166 vassert(IS_VALID_PFX(pfx)); 1167 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1168 } 1169 1170 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1171 { 1172 vassert(lo3bits < 8); 1173 vassert(IS_VALID_PFX(pfx)); 1174 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1175 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1176 toBool(sz==1 && !haveREX(pfx)) ); 1177 } 1178 1179 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1180 { 1181 vassert(lo3bits < 8); 1182 vassert(IS_VALID_PFX(pfx)); 1183 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1184 if (sz == 4) { 1185 sz = 8; 1186 return unop(Iop_64to32, 1187 IRExpr_Get( 1188 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1189 toBool(sz==1 && !haveREX(pfx)) ), 1190 szToITy(sz) 1191 ) 1192 ); 1193 } else { 1194 return IRExpr_Get( 1195 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1196 toBool(sz==1 && !haveREX(pfx)) ), 1197 szToITy(sz) 1198 ); 1199 } 1200 } 1201 1202 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1203 { 1204 vassert(lo3bits < 8); 1205 vassert(IS_VALID_PFX(pfx)); 1206 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1207 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1208 stmt( IRStmt_Put( 1209 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1210 toBool(sz==1 && !haveREX(pfx)) ), 1211 sz==4 ? unop(Iop_32Uto64,e) : e 1212 )); 1213 } 1214 1215 1216 /* Functions for getting register numbers from modrm bytes and REX 1217 when we don't have to consider the complexities of integer subreg 1218 accesses. 1219 */ 1220 /* Extract the g reg field from a modRM byte, and augment it using the 1221 REX.R bit from the supplied REX byte. The R bit usually is 1222 associated with the g register field. 1223 */ 1224 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1225 { 1226 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1227 reg += (pfx & PFX_REXR) ? 8 : 0; 1228 return reg; 1229 } 1230 1231 /* Extract the e reg field from a modRM byte, and augment it using the 1232 REX.B bit from the supplied REX byte. The B bit usually is 1233 associated with the e register field (when modrm indicates e is a 1234 register, that is). 1235 */ 1236 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1237 { 1238 Int rm; 1239 vassert(epartIsReg(mod_reg_rm)); 1240 rm = (Int)(mod_reg_rm & 0x7); 1241 rm += (pfx & PFX_REXB) ? 8 : 0; 1242 return rm; 1243 } 1244 1245 1246 /* General functions for dealing with integer register access. */ 1247 1248 /* Produce the guest state offset for a reference to the 'g' register 1249 field in a modrm byte, taking into account REX (or its absence), 1250 and the size of the access. 1251 */ 1252 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1253 { 1254 UInt reg; 1255 vassert(!host_is_bigendian); 1256 vassert(IS_VALID_PFX(pfx)); 1257 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1258 reg = gregOfRexRM( pfx, mod_reg_rm ); 1259 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1260 } 1261 1262 static 1263 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1264 { 1265 if (sz == 4) { 1266 sz = 8; 1267 return unop(Iop_64to32, 1268 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1269 szToITy(sz) )); 1270 } else { 1271 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1272 szToITy(sz) ); 1273 } 1274 } 1275 1276 static 1277 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1278 { 1279 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1280 if (sz == 4) { 1281 e = unop(Iop_32Uto64,e); 1282 } 1283 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1284 } 1285 1286 static 1287 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1288 { 1289 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1290 toBool(sz==1 && !haveREX(pfx)) ); 1291 } 1292 1293 1294 /* Produce the guest state offset for a reference to the 'e' register 1295 field in a modrm byte, taking into account REX (or its absence), 1296 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1297 denotes a memory access rather than a register access. 1298 */ 1299 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1300 { 1301 UInt reg; 1302 vassert(!host_is_bigendian); 1303 vassert(IS_VALID_PFX(pfx)); 1304 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1305 reg = eregOfRexRM( pfx, mod_reg_rm ); 1306 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1307 } 1308 1309 static 1310 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1311 { 1312 if (sz == 4) { 1313 sz = 8; 1314 return unop(Iop_64to32, 1315 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1316 szToITy(sz) )); 1317 } else { 1318 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1319 szToITy(sz) ); 1320 } 1321 } 1322 1323 static 1324 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1325 { 1326 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1327 if (sz == 4) { 1328 e = unop(Iop_32Uto64,e); 1329 } 1330 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1331 } 1332 1333 static 1334 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1335 { 1336 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1337 toBool(sz==1 && !haveREX(pfx)) ); 1338 } 1339 1340 1341 /*------------------------------------------------------------*/ 1342 /*--- For dealing with XMM registers ---*/ 1343 /*------------------------------------------------------------*/ 1344 1345 static Int ymmGuestRegOffset ( UInt ymmreg ) 1346 { 1347 switch (ymmreg) { 1348 case 0: return OFFB_YMM0; 1349 case 1: return OFFB_YMM1; 1350 case 2: return OFFB_YMM2; 1351 case 3: return OFFB_YMM3; 1352 case 4: return OFFB_YMM4; 1353 case 5: return OFFB_YMM5; 1354 case 6: return OFFB_YMM6; 1355 case 7: return OFFB_YMM7; 1356 case 8: return OFFB_YMM8; 1357 case 9: return OFFB_YMM9; 1358 case 10: return OFFB_YMM10; 1359 case 11: return OFFB_YMM11; 1360 case 12: return OFFB_YMM12; 1361 case 13: return OFFB_YMM13; 1362 case 14: return OFFB_YMM14; 1363 case 15: return OFFB_YMM15; 1364 default: vpanic("ymmGuestRegOffset(amd64)"); 1365 } 1366 } 1367 1368 static Int xmmGuestRegOffset ( UInt xmmreg ) 1369 { 1370 /* Correct for little-endian host only. */ 1371 vassert(!host_is_bigendian); 1372 return ymmGuestRegOffset( xmmreg ); 1373 } 1374 1375 /* Lanes of vector registers are always numbered from zero being the 1376 least significant lane (rightmost in the register). */ 1377 1378 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1379 { 1380 /* Correct for little-endian host only. */ 1381 vassert(!host_is_bigendian); 1382 vassert(laneno >= 0 && laneno < 8); 1383 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1384 } 1385 1386 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1387 { 1388 /* Correct for little-endian host only. */ 1389 vassert(!host_is_bigendian); 1390 vassert(laneno >= 0 && laneno < 4); 1391 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1392 } 1393 1394 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1395 { 1396 /* Correct for little-endian host only. */ 1397 vassert(!host_is_bigendian); 1398 vassert(laneno >= 0 && laneno < 2); 1399 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1400 } 1401 1402 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1403 { 1404 /* Correct for little-endian host only. */ 1405 vassert(!host_is_bigendian); 1406 vassert(laneno >= 0 && laneno < 2); 1407 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1408 } 1409 1410 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1411 { 1412 /* Correct for little-endian host only. */ 1413 vassert(!host_is_bigendian); 1414 vassert(laneno >= 0 && laneno < 4); 1415 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1416 } 1417 1418 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1419 { 1420 /* Correct for little-endian host only. */ 1421 vassert(!host_is_bigendian); 1422 vassert(laneno >= 0 && laneno < 8); 1423 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1424 } 1425 1426 static IRExpr* getXMMReg ( UInt xmmreg ) 1427 { 1428 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1429 } 1430 1431 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1432 { 1433 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1434 } 1435 1436 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1437 { 1438 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1439 } 1440 1441 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1442 { 1443 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1444 } 1445 1446 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1447 { 1448 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1449 } 1450 1451 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1452 { 1453 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1454 } 1455 1456 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1457 { 1458 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1459 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1460 } 1461 1462 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1463 { 1464 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1465 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1466 } 1467 1468 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1469 { 1470 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1471 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1472 } 1473 1474 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1475 { 1476 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1477 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1478 } 1479 1480 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1481 { 1482 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1483 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1484 } 1485 1486 static IRExpr* getYMMReg ( UInt xmmreg ) 1487 { 1488 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1489 } 1490 1491 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1492 { 1493 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1494 } 1495 1496 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1497 { 1498 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1499 } 1500 1501 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1502 { 1503 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1504 } 1505 1506 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1507 { 1508 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1509 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1510 } 1511 1512 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1513 { 1514 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1515 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1516 } 1517 1518 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1519 { 1520 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1521 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1522 } 1523 1524 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1525 { 1526 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1527 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1528 } 1529 1530 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1531 { 1532 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1533 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1534 } 1535 1536 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1537 { 1538 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1539 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1540 } 1541 1542 static IRExpr* mkV128 ( UShort mask ) 1543 { 1544 return IRExpr_Const(IRConst_V128(mask)); 1545 } 1546 1547 /* Write the low half of a YMM reg and zero out the upper half. */ 1548 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1549 { 1550 putYMMRegLane128( ymmreg, 0, e ); 1551 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1552 } 1553 1554 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1555 { 1556 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1557 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1558 return unop(Iop_64to1, 1559 binop(Iop_And64, 1560 unop(Iop_1Uto64,x), 1561 unop(Iop_1Uto64,y))); 1562 } 1563 1564 /* Generate a compare-and-swap operation, operating on memory at 1565 'addr'. The expected value is 'expVal' and the new value is 1566 'newVal'. If the operation fails, then transfer control (with a 1567 no-redir jump (XXX no -- see comment at top of this file)) to 1568 'restart_point', which is presumably the address of the guest 1569 instruction again -- retrying, essentially. */ 1570 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1571 Addr64 restart_point ) 1572 { 1573 IRCAS* cas; 1574 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1575 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1576 IRTemp oldTmp = newTemp(tyE); 1577 IRTemp expTmp = newTemp(tyE); 1578 vassert(tyE == tyN); 1579 vassert(tyE == Ity_I64 || tyE == Ity_I32 1580 || tyE == Ity_I16 || tyE == Ity_I8); 1581 assign(expTmp, expVal); 1582 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1583 NULL, mkexpr(expTmp), NULL, newVal ); 1584 stmt( IRStmt_CAS(cas) ); 1585 stmt( IRStmt_Exit( 1586 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1587 mkexpr(oldTmp), mkexpr(expTmp) ), 1588 Ijk_Boring, /*Ijk_NoRedir*/ 1589 IRConst_U64( restart_point ), 1590 OFFB_RIP 1591 )); 1592 } 1593 1594 1595 /*------------------------------------------------------------*/ 1596 /*--- Helpers for %rflags. ---*/ 1597 /*------------------------------------------------------------*/ 1598 1599 /* -------------- Evaluating the flags-thunk. -------------- */ 1600 1601 /* Build IR to calculate all the eflags from stored 1602 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1603 Ity_I64. */ 1604 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1605 { 1606 IRExpr** args 1607 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1608 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1609 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1610 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1611 IRExpr* call 1612 = mkIRExprCCall( 1613 Ity_I64, 1614 0/*regparm*/, 1615 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1616 args 1617 ); 1618 /* Exclude OP and NDEP from definedness checking. We're only 1619 interested in DEP1 and DEP2. */ 1620 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1621 return call; 1622 } 1623 1624 /* Build IR to calculate some particular condition from stored 1625 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1626 Ity_Bit. */ 1627 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1628 { 1629 IRExpr** args 1630 = mkIRExprVec_5( mkU64(cond), 1631 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1632 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1633 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1634 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1635 IRExpr* call 1636 = mkIRExprCCall( 1637 Ity_I64, 1638 0/*regparm*/, 1639 "amd64g_calculate_condition", &amd64g_calculate_condition, 1640 args 1641 ); 1642 /* Exclude the requested condition, OP and NDEP from definedness 1643 checking. We're only interested in DEP1 and DEP2. */ 1644 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1645 return unop(Iop_64to1, call); 1646 } 1647 1648 /* Build IR to calculate just the carry flag from stored 1649 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1650 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1651 { 1652 IRExpr** args 1653 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1654 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1655 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1656 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1657 IRExpr* call 1658 = mkIRExprCCall( 1659 Ity_I64, 1660 0/*regparm*/, 1661 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1662 args 1663 ); 1664 /* Exclude OP and NDEP from definedness checking. We're only 1665 interested in DEP1 and DEP2. */ 1666 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1667 return call; 1668 } 1669 1670 1671 /* -------------- Building the flags-thunk. -------------- */ 1672 1673 /* The machinery in this section builds the flag-thunk following a 1674 flag-setting operation. Hence the various setFlags_* functions. 1675 */ 1676 1677 static Bool isAddSub ( IROp op8 ) 1678 { 1679 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1680 } 1681 1682 static Bool isLogic ( IROp op8 ) 1683 { 1684 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1685 } 1686 1687 /* U-widen 8/16/32/64 bit int expr to 64. */ 1688 static IRExpr* widenUto64 ( IRExpr* e ) 1689 { 1690 switch (typeOfIRExpr(irsb->tyenv,e)) { 1691 case Ity_I64: return e; 1692 case Ity_I32: return unop(Iop_32Uto64, e); 1693 case Ity_I16: return unop(Iop_16Uto64, e); 1694 case Ity_I8: return unop(Iop_8Uto64, e); 1695 default: vpanic("widenUto64"); 1696 } 1697 } 1698 1699 /* S-widen 8/16/32/64 bit int expr to 32. */ 1700 static IRExpr* widenSto64 ( IRExpr* e ) 1701 { 1702 switch (typeOfIRExpr(irsb->tyenv,e)) { 1703 case Ity_I64: return e; 1704 case Ity_I32: return unop(Iop_32Sto64, e); 1705 case Ity_I16: return unop(Iop_16Sto64, e); 1706 case Ity_I8: return unop(Iop_8Sto64, e); 1707 default: vpanic("widenSto64"); 1708 } 1709 } 1710 1711 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1712 of these combinations make sense. */ 1713 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1714 { 1715 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1716 if (src_ty == dst_ty) 1717 return e; 1718 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1719 return unop(Iop_32to16, e); 1720 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1721 return unop(Iop_32to8, e); 1722 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1723 return unop(Iop_64to32, e); 1724 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1725 return unop(Iop_64to16, e); 1726 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1727 return unop(Iop_64to8, e); 1728 1729 vex_printf("\nsrc, dst tys are: "); 1730 ppIRType(src_ty); 1731 vex_printf(", "); 1732 ppIRType(dst_ty); 1733 vex_printf("\n"); 1734 vpanic("narrowTo(amd64)"); 1735 } 1736 1737 1738 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1739 auto-sized up to the real op. */ 1740 1741 static 1742 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1743 { 1744 Int ccOp = 0; 1745 switch (ty) { 1746 case Ity_I8: ccOp = 0; break; 1747 case Ity_I16: ccOp = 1; break; 1748 case Ity_I32: ccOp = 2; break; 1749 case Ity_I64: ccOp = 3; break; 1750 default: vassert(0); 1751 } 1752 switch (op8) { 1753 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1754 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1755 default: ppIROp(op8); 1756 vpanic("setFlags_DEP1_DEP2(amd64)"); 1757 } 1758 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1759 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1760 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1761 } 1762 1763 1764 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1765 1766 static 1767 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1768 { 1769 Int ccOp = 0; 1770 switch (ty) { 1771 case Ity_I8: ccOp = 0; break; 1772 case Ity_I16: ccOp = 1; break; 1773 case Ity_I32: ccOp = 2; break; 1774 case Ity_I64: ccOp = 3; break; 1775 default: vassert(0); 1776 } 1777 switch (op8) { 1778 case Iop_Or8: 1779 case Iop_And8: 1780 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1781 default: ppIROp(op8); 1782 vpanic("setFlags_DEP1(amd64)"); 1783 } 1784 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1785 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1786 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1787 } 1788 1789 1790 /* For shift operations, we put in the result and the undershifted 1791 result. Except if the shift amount is zero, the thunk is left 1792 unchanged. */ 1793 1794 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1795 IRTemp res, 1796 IRTemp resUS, 1797 IRType ty, 1798 IRTemp guard ) 1799 { 1800 Int ccOp = 0; 1801 switch (ty) { 1802 case Ity_I8: ccOp = 0; break; 1803 case Ity_I16: ccOp = 1; break; 1804 case Ity_I32: ccOp = 2; break; 1805 case Ity_I64: ccOp = 3; break; 1806 default: vassert(0); 1807 } 1808 1809 vassert(guard); 1810 1811 /* Both kinds of right shifts are handled by the same thunk 1812 operation. */ 1813 switch (op64) { 1814 case Iop_Shr64: 1815 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1816 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1817 default: ppIROp(op64); 1818 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1819 } 1820 1821 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1822 stmt( IRStmt_Put( OFFB_CC_OP, 1823 IRExpr_Mux0X( mkexpr(guard), 1824 IRExpr_Get(OFFB_CC_OP,Ity_I64), 1825 mkU64(ccOp))) ); 1826 stmt( IRStmt_Put( OFFB_CC_DEP1, 1827 IRExpr_Mux0X( mkexpr(guard), 1828 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 1829 widenUto64(mkexpr(res)))) ); 1830 stmt( IRStmt_Put( OFFB_CC_DEP2, 1831 IRExpr_Mux0X( mkexpr(guard), 1832 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 1833 widenUto64(mkexpr(resUS)))) ); 1834 } 1835 1836 1837 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1838 the former value of the carry flag, which unfortunately we have to 1839 compute. */ 1840 1841 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1842 { 1843 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1844 1845 switch (ty) { 1846 case Ity_I8: ccOp += 0; break; 1847 case Ity_I16: ccOp += 1; break; 1848 case Ity_I32: ccOp += 2; break; 1849 case Ity_I64: ccOp += 3; break; 1850 default: vassert(0); 1851 } 1852 1853 /* This has to come first, because calculating the C flag 1854 may require reading all four thunk fields. */ 1855 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1856 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1857 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1858 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1859 } 1860 1861 1862 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1863 two arguments. */ 1864 1865 static 1866 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1867 { 1868 switch (ty) { 1869 case Ity_I8: 1870 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1871 break; 1872 case Ity_I16: 1873 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1874 break; 1875 case Ity_I32: 1876 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1877 break; 1878 case Ity_I64: 1879 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1880 break; 1881 default: 1882 vpanic("setFlags_MUL(amd64)"); 1883 } 1884 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1885 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1886 } 1887 1888 1889 /* -------------- Condition codes. -------------- */ 1890 1891 /* Condition codes, using the AMD encoding. */ 1892 1893 static HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1894 { 1895 switch (cond) { 1896 case AMD64CondO: return "o"; 1897 case AMD64CondNO: return "no"; 1898 case AMD64CondB: return "b"; 1899 case AMD64CondNB: return "ae"; /*"nb";*/ 1900 case AMD64CondZ: return "e"; /*"z";*/ 1901 case AMD64CondNZ: return "ne"; /*"nz";*/ 1902 case AMD64CondBE: return "be"; 1903 case AMD64CondNBE: return "a"; /*"nbe";*/ 1904 case AMD64CondS: return "s"; 1905 case AMD64CondNS: return "ns"; 1906 case AMD64CondP: return "p"; 1907 case AMD64CondNP: return "np"; 1908 case AMD64CondL: return "l"; 1909 case AMD64CondNL: return "ge"; /*"nl";*/ 1910 case AMD64CondLE: return "le"; 1911 case AMD64CondNLE: return "g"; /*"nle";*/ 1912 case AMD64CondAlways: return "ALWAYS"; 1913 default: vpanic("name_AMD64Condcode"); 1914 } 1915 } 1916 1917 static 1918 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1919 /*OUT*/Bool* needInvert ) 1920 { 1921 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1922 if (cond & 1) { 1923 *needInvert = True; 1924 return cond-1; 1925 } else { 1926 *needInvert = False; 1927 return cond; 1928 } 1929 } 1930 1931 1932 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1933 1934 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1935 appropriately. 1936 1937 Optionally, generate a store for the 'tres' value. This can either 1938 be a normal store, or it can be a cas-with-possible-failure style 1939 store: 1940 1941 if taddr is IRTemp_INVALID, then no store is generated. 1942 1943 if taddr is not IRTemp_INVALID, then a store (using taddr as 1944 the address) is generated: 1945 1946 if texpVal is IRTemp_INVALID then a normal store is 1947 generated, and restart_point must be zero (it is irrelevant). 1948 1949 if texpVal is not IRTemp_INVALID then a cas-style store is 1950 generated. texpVal is the expected value, restart_point 1951 is the restart point if the store fails, and texpVal must 1952 have the same type as tres. 1953 1954 */ 1955 static void helper_ADC ( Int sz, 1956 IRTemp tres, IRTemp ta1, IRTemp ta2, 1957 /* info about optional store: */ 1958 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1959 { 1960 UInt thunkOp; 1961 IRType ty = szToITy(sz); 1962 IRTemp oldc = newTemp(Ity_I64); 1963 IRTemp oldcn = newTemp(ty); 1964 IROp plus = mkSizedOp(ty, Iop_Add8); 1965 IROp xor = mkSizedOp(ty, Iop_Xor8); 1966 1967 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1968 1969 switch (sz) { 1970 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 1971 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 1972 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 1973 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 1974 default: vassert(0); 1975 } 1976 1977 /* oldc = old carry flag, 0 or 1 */ 1978 assign( oldc, binop(Iop_And64, 1979 mk_amd64g_calculate_rflags_c(), 1980 mkU64(1)) ); 1981 1982 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1983 1984 assign( tres, binop(plus, 1985 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1986 mkexpr(oldcn)) ); 1987 1988 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1989 start of this function. */ 1990 if (taddr != IRTemp_INVALID) { 1991 if (texpVal == IRTemp_INVALID) { 1992 vassert(restart_point == 0); 1993 storeLE( mkexpr(taddr), mkexpr(tres) ); 1994 } else { 1995 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1996 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1997 casLE( mkexpr(taddr), 1998 mkexpr(texpVal), mkexpr(tres), restart_point ); 1999 } 2000 } 2001 2002 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2003 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2004 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2005 mkexpr(oldcn)) )) ); 2006 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2007 } 2008 2009 2010 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2011 appropriately. As with helper_ADC, possibly generate a store of 2012 the result -- see comments on helper_ADC for details. 2013 */ 2014 static void helper_SBB ( Int sz, 2015 IRTemp tres, IRTemp ta1, IRTemp ta2, 2016 /* info about optional store: */ 2017 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 2018 { 2019 UInt thunkOp; 2020 IRType ty = szToITy(sz); 2021 IRTemp oldc = newTemp(Ity_I64); 2022 IRTemp oldcn = newTemp(ty); 2023 IROp minus = mkSizedOp(ty, Iop_Sub8); 2024 IROp xor = mkSizedOp(ty, Iop_Xor8); 2025 2026 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2027 2028 switch (sz) { 2029 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2030 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2031 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2032 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2033 default: vassert(0); 2034 } 2035 2036 /* oldc = old carry flag, 0 or 1 */ 2037 assign( oldc, binop(Iop_And64, 2038 mk_amd64g_calculate_rflags_c(), 2039 mkU64(1)) ); 2040 2041 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2042 2043 assign( tres, binop(minus, 2044 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2045 mkexpr(oldcn)) ); 2046 2047 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2048 start of this function. */ 2049 if (taddr != IRTemp_INVALID) { 2050 if (texpVal == IRTemp_INVALID) { 2051 vassert(restart_point == 0); 2052 storeLE( mkexpr(taddr), mkexpr(tres) ); 2053 } else { 2054 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2055 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2056 casLE( mkexpr(taddr), 2057 mkexpr(texpVal), mkexpr(tres), restart_point ); 2058 } 2059 } 2060 2061 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2062 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2063 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2064 mkexpr(oldcn)) )) ); 2065 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2066 } 2067 2068 2069 /* -------------- Helpers for disassembly printing. -------------- */ 2070 2071 static HChar* nameGrp1 ( Int opc_aux ) 2072 { 2073 static HChar* grp1_names[8] 2074 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2075 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2076 return grp1_names[opc_aux]; 2077 } 2078 2079 static HChar* nameGrp2 ( Int opc_aux ) 2080 { 2081 static HChar* grp2_names[8] 2082 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2083 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2084 return grp2_names[opc_aux]; 2085 } 2086 2087 static HChar* nameGrp4 ( Int opc_aux ) 2088 { 2089 static HChar* grp4_names[8] 2090 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2091 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2092 return grp4_names[opc_aux]; 2093 } 2094 2095 static HChar* nameGrp5 ( Int opc_aux ) 2096 { 2097 static HChar* grp5_names[8] 2098 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2099 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2100 return grp5_names[opc_aux]; 2101 } 2102 2103 static HChar* nameGrp8 ( Int opc_aux ) 2104 { 2105 static HChar* grp8_names[8] 2106 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2107 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2108 return grp8_names[opc_aux]; 2109 } 2110 2111 //.. static HChar* nameSReg ( UInt sreg ) 2112 //.. { 2113 //.. switch (sreg) { 2114 //.. case R_ES: return "%es"; 2115 //.. case R_CS: return "%cs"; 2116 //.. case R_SS: return "%ss"; 2117 //.. case R_DS: return "%ds"; 2118 //.. case R_FS: return "%fs"; 2119 //.. case R_GS: return "%gs"; 2120 //.. default: vpanic("nameSReg(x86)"); 2121 //.. } 2122 //.. } 2123 2124 static HChar* nameMMXReg ( Int mmxreg ) 2125 { 2126 static HChar* mmx_names[8] 2127 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2128 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2129 return mmx_names[mmxreg]; 2130 } 2131 2132 static HChar* nameXMMReg ( Int xmmreg ) 2133 { 2134 static HChar* xmm_names[16] 2135 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2136 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2137 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2138 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2139 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2140 return xmm_names[xmmreg]; 2141 } 2142 2143 static HChar* nameMMXGran ( Int gran ) 2144 { 2145 switch (gran) { 2146 case 0: return "b"; 2147 case 1: return "w"; 2148 case 2: return "d"; 2149 case 3: return "q"; 2150 default: vpanic("nameMMXGran(amd64,guest)"); 2151 } 2152 } 2153 2154 static HChar nameISize ( Int size ) 2155 { 2156 switch (size) { 2157 case 8: return 'q'; 2158 case 4: return 'l'; 2159 case 2: return 'w'; 2160 case 1: return 'b'; 2161 default: vpanic("nameISize(amd64)"); 2162 } 2163 } 2164 2165 static HChar* nameYMMReg ( Int ymmreg ) 2166 { 2167 static HChar* ymm_names[16] 2168 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2169 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2170 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2171 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2172 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2173 return ymm_names[ymmreg]; 2174 } 2175 2176 2177 /*------------------------------------------------------------*/ 2178 /*--- JMP helpers ---*/ 2179 /*------------------------------------------------------------*/ 2180 2181 static void jmp_lit( /*MOD*/DisResult* dres, 2182 IRJumpKind kind, Addr64 d64 ) 2183 { 2184 vassert(dres->whatNext == Dis_Continue); 2185 vassert(dres->len == 0); 2186 vassert(dres->continueAt == 0); 2187 vassert(dres->jk_StopHere == Ijk_INVALID); 2188 dres->whatNext = Dis_StopHere; 2189 dres->jk_StopHere = kind; 2190 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2191 } 2192 2193 static void jmp_treg( /*MOD*/DisResult* dres, 2194 IRJumpKind kind, IRTemp t ) 2195 { 2196 vassert(dres->whatNext == Dis_Continue); 2197 vassert(dres->len == 0); 2198 vassert(dres->continueAt == 0); 2199 vassert(dres->jk_StopHere == Ijk_INVALID); 2200 dres->whatNext = Dis_StopHere; 2201 dres->jk_StopHere = kind; 2202 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2203 } 2204 2205 static 2206 void jcc_01 ( /*MOD*/DisResult* dres, 2207 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2208 { 2209 Bool invert; 2210 AMD64Condcode condPos; 2211 vassert(dres->whatNext == Dis_Continue); 2212 vassert(dres->len == 0); 2213 vassert(dres->continueAt == 0); 2214 vassert(dres->jk_StopHere == Ijk_INVALID); 2215 dres->whatNext = Dis_StopHere; 2216 dres->jk_StopHere = Ijk_Boring; 2217 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2218 if (invert) { 2219 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2220 Ijk_Boring, 2221 IRConst_U64(d64_false), 2222 OFFB_RIP ) ); 2223 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2224 } else { 2225 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2226 Ijk_Boring, 2227 IRConst_U64(d64_true), 2228 OFFB_RIP ) ); 2229 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2230 } 2231 } 2232 2233 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2234 guest address of the next instruction to be executed. 2235 2236 This function generates an AbiHint to say that -128(%rsp) 2237 .. -1(%rsp) should now be regarded as uninitialised. 2238 */ 2239 static 2240 void make_redzone_AbiHint ( VexAbiInfo* vbi, 2241 IRTemp new_rsp, IRTemp nia, HChar* who ) 2242 { 2243 Int szB = vbi->guest_stack_redzone_size; 2244 vassert(szB >= 0); 2245 2246 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2247 for is ELF. So just check it's the expected 128 value 2248 (paranoia). */ 2249 vassert(szB == 128); 2250 2251 if (0) vex_printf("AbiHint: %s\n", who); 2252 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2253 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2254 if (szB > 0) 2255 stmt( IRStmt_AbiHint( 2256 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2257 szB, 2258 mkexpr(nia) 2259 )); 2260 } 2261 2262 2263 /*------------------------------------------------------------*/ 2264 /*--- Disassembling addressing modes ---*/ 2265 /*------------------------------------------------------------*/ 2266 2267 static 2268 HChar* segRegTxt ( Prefix pfx ) 2269 { 2270 if (pfx & PFX_CS) return "%cs:"; 2271 if (pfx & PFX_DS) return "%ds:"; 2272 if (pfx & PFX_ES) return "%es:"; 2273 if (pfx & PFX_FS) return "%fs:"; 2274 if (pfx & PFX_GS) return "%gs:"; 2275 if (pfx & PFX_SS) return "%ss:"; 2276 return ""; /* no override */ 2277 } 2278 2279 2280 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2281 linear address by adding any required segment override as indicated 2282 by sorb, and also dealing with any address size override 2283 present. */ 2284 static 2285 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, 2286 Prefix pfx, IRExpr* virtual ) 2287 { 2288 /* --- segment overrides --- */ 2289 if (pfx & PFX_FS) { 2290 if (vbi->guest_amd64_assume_fs_is_zero) { 2291 /* Note that this is a linux-kernel specific hack that relies 2292 on the assumption that %fs is always zero. */ 2293 /* return virtual + guest_FS_ZERO. */ 2294 virtual = binop(Iop_Add64, virtual, 2295 IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); 2296 } else { 2297 unimplemented("amd64 %fs segment override"); 2298 } 2299 } 2300 2301 if (pfx & PFX_GS) { 2302 if (vbi->guest_amd64_assume_gs_is_0x60) { 2303 /* Note that this is a darwin-kernel specific hack that relies 2304 on the assumption that %gs is always 0x60. */ 2305 /* return virtual + guest_GS_0x60. */ 2306 virtual = binop(Iop_Add64, virtual, 2307 IRExpr_Get(OFFB_GS_0x60, Ity_I64)); 2308 } else { 2309 unimplemented("amd64 %gs segment override"); 2310 } 2311 } 2312 2313 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2314 2315 /* --- address size override --- */ 2316 if (haveASO(pfx)) 2317 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2318 2319 return virtual; 2320 } 2321 2322 //.. { 2323 //.. Int sreg; 2324 //.. IRType hWordTy; 2325 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2326 //.. 2327 //.. if (sorb == 0) 2328 //.. /* the common case - no override */ 2329 //.. return virtual; 2330 //.. 2331 //.. switch (sorb) { 2332 //.. case 0x3E: sreg = R_DS; break; 2333 //.. case 0x26: sreg = R_ES; break; 2334 //.. case 0x64: sreg = R_FS; break; 2335 //.. case 0x65: sreg = R_GS; break; 2336 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2337 //.. } 2338 //.. 2339 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2340 //.. 2341 //.. seg_selector = newTemp(Ity_I32); 2342 //.. ldt_ptr = newTemp(hWordTy); 2343 //.. gdt_ptr = newTemp(hWordTy); 2344 //.. r64 = newTemp(Ity_I64); 2345 //.. 2346 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2347 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2348 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2349 //.. 2350 //.. /* 2351 //.. Call this to do the translation and limit checks: 2352 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2353 //.. UInt seg_selector, UInt virtual_addr ) 2354 //.. */ 2355 //.. assign( 2356 //.. r64, 2357 //.. mkIRExprCCall( 2358 //.. Ity_I64, 2359 //.. 0/*regparms*/, 2360 //.. "x86g_use_seg_selector", 2361 //.. &x86g_use_seg_selector, 2362 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2363 //.. mkexpr(seg_selector), virtual) 2364 //.. ) 2365 //.. ); 2366 //.. 2367 //.. /* If the high 32 of the result are non-zero, there was a 2368 //.. failure in address translation. In which case, make a 2369 //.. quick exit. 2370 //.. */ 2371 //.. stmt( 2372 //.. IRStmt_Exit( 2373 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2374 //.. Ijk_MapFail, 2375 //.. IRConst_U32( guest_eip_curr_instr ) 2376 //.. ) 2377 //.. ); 2378 //.. 2379 //.. /* otherwise, here's the translated result. */ 2380 //.. return unop(Iop_64to32, mkexpr(r64)); 2381 //.. } 2382 2383 2384 /* Generate IR to calculate an address indicated by a ModRM and 2385 following SIB bytes. The expression, and the number of bytes in 2386 the address mode, are returned (the latter in *len). Note that 2387 this fn should not be called if the R/M part of the address denotes 2388 a register instead of memory. If print_codegen is true, text of 2389 the addressing mode is placed in buf. 2390 2391 The computed address is stored in a new tempreg, and the 2392 identity of the tempreg is returned. 2393 2394 extra_bytes holds the number of bytes after the amode, as supplied 2395 by the caller. This is needed to make sense of %rip-relative 2396 addresses. Note that the value that *len is set to is only the 2397 length of the amode itself and does not include the value supplied 2398 in extra_bytes. 2399 */ 2400 2401 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2402 { 2403 IRTemp tmp = newTemp(Ity_I64); 2404 assign( tmp, addr64 ); 2405 return tmp; 2406 } 2407 2408 static 2409 IRTemp disAMode ( /*OUT*/Int* len, 2410 VexAbiInfo* vbi, Prefix pfx, Long delta, 2411 /*OUT*/HChar* buf, Int extra_bytes ) 2412 { 2413 UChar mod_reg_rm = getUChar(delta); 2414 delta++; 2415 2416 buf[0] = (UChar)0; 2417 vassert(extra_bytes >= 0 && extra_bytes < 10); 2418 2419 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2420 jump table seems a bit excessive. 2421 */ 2422 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2423 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2424 /* is now XX0XXYYY */ 2425 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2426 switch (mod_reg_rm) { 2427 2428 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2429 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2430 */ 2431 case 0x00: case 0x01: case 0x02: case 0x03: 2432 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2433 { UChar rm = toUChar(mod_reg_rm & 7); 2434 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2435 *len = 1; 2436 return disAMode_copy2tmp( 2437 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2438 } 2439 2440 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2441 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2442 */ 2443 case 0x08: case 0x09: case 0x0A: case 0x0B: 2444 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2445 { UChar rm = toUChar(mod_reg_rm & 7); 2446 Long d = getSDisp8(delta); 2447 if (d == 0) { 2448 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2449 } else { 2450 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2451 } 2452 *len = 2; 2453 return disAMode_copy2tmp( 2454 handleAddrOverrides(vbi, pfx, 2455 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2456 } 2457 2458 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2459 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2460 */ 2461 case 0x10: case 0x11: case 0x12: case 0x13: 2462 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2463 { UChar rm = toUChar(mod_reg_rm & 7); 2464 Long d = getSDisp32(delta); 2465 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2466 *len = 5; 2467 return disAMode_copy2tmp( 2468 handleAddrOverrides(vbi, pfx, 2469 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2470 } 2471 2472 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2473 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2474 case 0x18: case 0x19: case 0x1A: case 0x1B: 2475 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2476 vpanic("disAMode(amd64): not an addr!"); 2477 2478 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2479 correctly at the start of handling each instruction. */ 2480 case 0x05: 2481 { Long d = getSDisp32(delta); 2482 *len = 5; 2483 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2484 /* We need to know the next instruction's start address. 2485 Try and figure out what it is, record the guess, and ask 2486 the top-level driver logic (bbToIR_AMD64) to check we 2487 guessed right, after the instruction is completely 2488 decoded. */ 2489 guest_RIP_next_mustcheck = True; 2490 guest_RIP_next_assumed = guest_RIP_bbstart 2491 + delta+4 + extra_bytes; 2492 return disAMode_copy2tmp( 2493 handleAddrOverrides(vbi, pfx, 2494 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2495 mkU64(d)))); 2496 } 2497 2498 case 0x04: { 2499 /* SIB, with no displacement. Special cases: 2500 -- %rsp cannot act as an index value. 2501 If index_r indicates %rsp, zero is used for the index. 2502 -- when mod is zero and base indicates RBP or R13, base is 2503 instead a 32-bit sign-extended literal. 2504 It's all madness, I tell you. Extract %index, %base and 2505 scale from the SIB byte. The value denoted is then: 2506 | %index == %RSP && (%base == %RBP || %base == %R13) 2507 = d32 following SIB byte 2508 | %index == %RSP && !(%base == %RBP || %base == %R13) 2509 = %base 2510 | %index != %RSP && (%base == %RBP || %base == %R13) 2511 = d32 following SIB byte + (%index << scale) 2512 | %index != %RSP && !(%base == %RBP || %base == %R13) 2513 = %base + (%index << scale) 2514 */ 2515 UChar sib = getUChar(delta); 2516 UChar scale = toUChar((sib >> 6) & 3); 2517 UChar index_r = toUChar((sib >> 3) & 7); 2518 UChar base_r = toUChar(sib & 7); 2519 /* correct since #(R13) == 8 + #(RBP) */ 2520 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2521 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2522 delta++; 2523 2524 if ((!index_is_SP) && (!base_is_BPor13)) { 2525 if (scale == 0) { 2526 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2527 nameIRegRexB(8,pfx,base_r), 2528 nameIReg64rexX(pfx,index_r)); 2529 } else { 2530 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2531 nameIRegRexB(8,pfx,base_r), 2532 nameIReg64rexX(pfx,index_r), 1<<scale); 2533 } 2534 *len = 2; 2535 return 2536 disAMode_copy2tmp( 2537 handleAddrOverrides(vbi, pfx, 2538 binop(Iop_Add64, 2539 getIRegRexB(8,pfx,base_r), 2540 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2541 mkU8(scale))))); 2542 } 2543 2544 if ((!index_is_SP) && base_is_BPor13) { 2545 Long d = getSDisp32(delta); 2546 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2547 nameIReg64rexX(pfx,index_r), 1<<scale); 2548 *len = 6; 2549 return 2550 disAMode_copy2tmp( 2551 handleAddrOverrides(vbi, pfx, 2552 binop(Iop_Add64, 2553 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2554 mkU8(scale)), 2555 mkU64(d)))); 2556 } 2557 2558 if (index_is_SP && (!base_is_BPor13)) { 2559 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2560 *len = 2; 2561 return disAMode_copy2tmp( 2562 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2563 } 2564 2565 if (index_is_SP && base_is_BPor13) { 2566 Long d = getSDisp32(delta); 2567 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2568 *len = 6; 2569 return disAMode_copy2tmp( 2570 handleAddrOverrides(vbi, pfx, mkU64(d))); 2571 } 2572 2573 vassert(0); 2574 } 2575 2576 /* SIB, with 8-bit displacement. Special cases: 2577 -- %esp cannot act as an index value. 2578 If index_r indicates %esp, zero is used for the index. 2579 Denoted value is: 2580 | %index == %ESP 2581 = d8 + %base 2582 | %index != %ESP 2583 = d8 + %base + (%index << scale) 2584 */ 2585 case 0x0C: { 2586 UChar sib = getUChar(delta); 2587 UChar scale = toUChar((sib >> 6) & 3); 2588 UChar index_r = toUChar((sib >> 3) & 7); 2589 UChar base_r = toUChar(sib & 7); 2590 Long d = getSDisp8(delta+1); 2591 2592 if (index_r == R_RSP && 0==getRexX(pfx)) { 2593 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2594 d, nameIRegRexB(8,pfx,base_r)); 2595 *len = 3; 2596 return disAMode_copy2tmp( 2597 handleAddrOverrides(vbi, pfx, 2598 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2599 } else { 2600 if (scale == 0) { 2601 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2602 nameIRegRexB(8,pfx,base_r), 2603 nameIReg64rexX(pfx,index_r)); 2604 } else { 2605 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2606 nameIRegRexB(8,pfx,base_r), 2607 nameIReg64rexX(pfx,index_r), 1<<scale); 2608 } 2609 *len = 3; 2610 return 2611 disAMode_copy2tmp( 2612 handleAddrOverrides(vbi, pfx, 2613 binop(Iop_Add64, 2614 binop(Iop_Add64, 2615 getIRegRexB(8,pfx,base_r), 2616 binop(Iop_Shl64, 2617 getIReg64rexX(pfx,index_r), mkU8(scale))), 2618 mkU64(d)))); 2619 } 2620 vassert(0); /*NOTREACHED*/ 2621 } 2622 2623 /* SIB, with 32-bit displacement. Special cases: 2624 -- %rsp cannot act as an index value. 2625 If index_r indicates %rsp, zero is used for the index. 2626 Denoted value is: 2627 | %index == %RSP 2628 = d32 + %base 2629 | %index != %RSP 2630 = d32 + %base + (%index << scale) 2631 */ 2632 case 0x14: { 2633 UChar sib = getUChar(delta); 2634 UChar scale = toUChar((sib >> 6) & 3); 2635 UChar index_r = toUChar((sib >> 3) & 7); 2636 UChar base_r = toUChar(sib & 7); 2637 Long d = getSDisp32(delta+1); 2638 2639 if (index_r == R_RSP && 0==getRexX(pfx)) { 2640 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2641 d, nameIRegRexB(8,pfx,base_r)); 2642 *len = 6; 2643 return disAMode_copy2tmp( 2644 handleAddrOverrides(vbi, pfx, 2645 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2646 } else { 2647 if (scale == 0) { 2648 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2649 nameIRegRexB(8,pfx,base_r), 2650 nameIReg64rexX(pfx,index_r)); 2651 } else { 2652 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2653 nameIRegRexB(8,pfx,base_r), 2654 nameIReg64rexX(pfx,index_r), 1<<scale); 2655 } 2656 *len = 6; 2657 return 2658 disAMode_copy2tmp( 2659 handleAddrOverrides(vbi, pfx, 2660 binop(Iop_Add64, 2661 binop(Iop_Add64, 2662 getIRegRexB(8,pfx,base_r), 2663 binop(Iop_Shl64, 2664 getIReg64rexX(pfx,index_r), mkU8(scale))), 2665 mkU64(d)))); 2666 } 2667 vassert(0); /*NOTREACHED*/ 2668 } 2669 2670 default: 2671 vpanic("disAMode(amd64)"); 2672 return 0; /*notreached*/ 2673 } 2674 } 2675 2676 2677 /* Figure out the number of (insn-stream) bytes constituting the amode 2678 beginning at delta. Is useful for getting hold of literals beyond 2679 the end of the amode before it has been disassembled. */ 2680 2681 static UInt lengthAMode ( Prefix pfx, Long delta ) 2682 { 2683 UChar mod_reg_rm = getUChar(delta); 2684 delta++; 2685 2686 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2687 jump table seems a bit excessive. 2688 */ 2689 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2690 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2691 /* is now XX0XXYYY */ 2692 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2693 switch (mod_reg_rm) { 2694 2695 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2696 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2697 */ 2698 case 0x00: case 0x01: case 0x02: case 0x03: 2699 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2700 return 1; 2701 2702 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2703 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2704 */ 2705 case 0x08: case 0x09: case 0x0A: case 0x0B: 2706 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2707 return 2; 2708 2709 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2710 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2711 */ 2712 case 0x10: case 0x11: case 0x12: case 0x13: 2713 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2714 return 5; 2715 2716 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2717 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2718 /* Not an address, but still handled. */ 2719 case 0x18: case 0x19: case 0x1A: case 0x1B: 2720 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2721 return 1; 2722 2723 /* RIP + disp32. */ 2724 case 0x05: 2725 return 5; 2726 2727 case 0x04: { 2728 /* SIB, with no displacement. */ 2729 UChar sib = getUChar(delta); 2730 UChar base_r = toUChar(sib & 7); 2731 /* correct since #(R13) == 8 + #(RBP) */ 2732 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2733 2734 if (base_is_BPor13) { 2735 return 6; 2736 } else { 2737 return 2; 2738 } 2739 } 2740 2741 /* SIB, with 8-bit displacement. */ 2742 case 0x0C: 2743 return 3; 2744 2745 /* SIB, with 32-bit displacement. */ 2746 case 0x14: 2747 return 6; 2748 2749 default: 2750 vpanic("lengthAMode(amd64)"); 2751 return 0; /*notreached*/ 2752 } 2753 } 2754 2755 2756 /*------------------------------------------------------------*/ 2757 /*--- Disassembling common idioms ---*/ 2758 /*------------------------------------------------------------*/ 2759 2760 /* Handle binary integer instructions of the form 2761 op E, G meaning 2762 op reg-or-mem, reg 2763 Is passed the a ptr to the modRM byte, the actual operation, and the 2764 data size. Returns the address advanced completely over this 2765 instruction. 2766 2767 E(src) is reg-or-mem 2768 G(dst) is reg. 2769 2770 If E is reg, --> GET %G, tmp 2771 OP %E, tmp 2772 PUT tmp, %G 2773 2774 If E is mem and OP is not reversible, 2775 --> (getAddr E) -> tmpa 2776 LD (tmpa), tmpa 2777 GET %G, tmp2 2778 OP tmpa, tmp2 2779 PUT tmp2, %G 2780 2781 If E is mem and OP is reversible 2782 --> (getAddr E) -> tmpa 2783 LD (tmpa), tmpa 2784 OP %G, tmpa 2785 PUT tmpa, %G 2786 */ 2787 static 2788 ULong dis_op2_E_G ( VexAbiInfo* vbi, 2789 Prefix pfx, 2790 Bool addSubCarry, 2791 IROp op8, 2792 Bool keep, 2793 Int size, 2794 Long delta0, 2795 HChar* t_amd64opc ) 2796 { 2797 HChar dis_buf[50]; 2798 Int len; 2799 IRType ty = szToITy(size); 2800 IRTemp dst1 = newTemp(ty); 2801 IRTemp src = newTemp(ty); 2802 IRTemp dst0 = newTemp(ty); 2803 UChar rm = getUChar(delta0); 2804 IRTemp addr = IRTemp_INVALID; 2805 2806 /* addSubCarry == True indicates the intended operation is 2807 add-with-carry or subtract-with-borrow. */ 2808 if (addSubCarry) { 2809 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2810 vassert(keep); 2811 } 2812 2813 if (epartIsReg(rm)) { 2814 /* Specially handle XOR reg,reg, because that doesn't really 2815 depend on reg, and doing the obvious thing potentially 2816 generates a spurious value check failure due to the bogus 2817 dependency. */ 2818 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2819 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2820 if (False && op8 == Iop_Sub8) 2821 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2822 putIRegG(size,pfx,rm, mkU(ty,0)); 2823 } 2824 2825 assign( dst0, getIRegG(size,pfx,rm) ); 2826 assign( src, getIRegE(size,pfx,rm) ); 2827 2828 if (addSubCarry && op8 == Iop_Add8) { 2829 helper_ADC( size, dst1, dst0, src, 2830 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2831 putIRegG(size, pfx, rm, mkexpr(dst1)); 2832 } else 2833 if (addSubCarry && op8 == Iop_Sub8) { 2834 helper_SBB( size, dst1, dst0, src, 2835 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2836 putIRegG(size, pfx, rm, mkexpr(dst1)); 2837 } else { 2838 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2839 if (isAddSub(op8)) 2840 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2841 else 2842 setFlags_DEP1(op8, dst1, ty); 2843 if (keep) 2844 putIRegG(size, pfx, rm, mkexpr(dst1)); 2845 } 2846 2847 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2848 nameIRegE(size,pfx,rm), 2849 nameIRegG(size,pfx,rm)); 2850 return 1+delta0; 2851 } else { 2852 /* E refers to memory */ 2853 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2854 assign( dst0, getIRegG(size,pfx,rm) ); 2855 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2856 2857 if (addSubCarry && op8 == Iop_Add8) { 2858 helper_ADC( size, dst1, dst0, src, 2859 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2860 putIRegG(size, pfx, rm, mkexpr(dst1)); 2861 } else 2862 if (addSubCarry && op8 == Iop_Sub8) { 2863 helper_SBB( size, dst1, dst0, src, 2864 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2865 putIRegG(size, pfx, rm, mkexpr(dst1)); 2866 } else { 2867 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2868 if (isAddSub(op8)) 2869 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2870 else 2871 setFlags_DEP1(op8, dst1, ty); 2872 if (keep) 2873 putIRegG(size, pfx, rm, mkexpr(dst1)); 2874 } 2875 2876 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2877 dis_buf, nameIRegG(size, pfx, rm)); 2878 return len+delta0; 2879 } 2880 } 2881 2882 2883 2884 /* Handle binary integer instructions of the form 2885 op G, E meaning 2886 op reg, reg-or-mem 2887 Is passed the a ptr to the modRM byte, the actual operation, and the 2888 data size. Returns the address advanced completely over this 2889 instruction. 2890 2891 G(src) is reg. 2892 E(dst) is reg-or-mem 2893 2894 If E is reg, --> GET %E, tmp 2895 OP %G, tmp 2896 PUT tmp, %E 2897 2898 If E is mem, --> (getAddr E) -> tmpa 2899 LD (tmpa), tmpv 2900 OP %G, tmpv 2901 ST tmpv, (tmpa) 2902 */ 2903 static 2904 ULong dis_op2_G_E ( VexAbiInfo* vbi, 2905 Prefix pfx, 2906 Bool addSubCarry, 2907 IROp op8, 2908 Bool keep, 2909 Int size, 2910 Long delta0, 2911 HChar* t_amd64opc ) 2912 { 2913 HChar dis_buf[50]; 2914 Int len; 2915 IRType ty = szToITy(size); 2916 IRTemp dst1 = newTemp(ty); 2917 IRTemp src = newTemp(ty); 2918 IRTemp dst0 = newTemp(ty); 2919 UChar rm = getUChar(delta0); 2920 IRTemp addr = IRTemp_INVALID; 2921 2922 /* addSubCarry == True indicates the intended operation is 2923 add-with-carry or subtract-with-borrow. */ 2924 if (addSubCarry) { 2925 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2926 vassert(keep); 2927 } 2928 2929 if (epartIsReg(rm)) { 2930 /* Specially handle XOR reg,reg, because that doesn't really 2931 depend on reg, and doing the obvious thing potentially 2932 generates a spurious value check failure due to the bogus 2933 dependency. Ditto SBB reg,reg. */ 2934 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2935 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2936 putIRegE(size,pfx,rm, mkU(ty,0)); 2937 } 2938 2939 assign(dst0, getIRegE(size,pfx,rm)); 2940 assign(src, getIRegG(size,pfx,rm)); 2941 2942 if (addSubCarry && op8 == Iop_Add8) { 2943 helper_ADC( size, dst1, dst0, src, 2944 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2945 putIRegE(size, pfx, rm, mkexpr(dst1)); 2946 } else 2947 if (addSubCarry && op8 == Iop_Sub8) { 2948 helper_SBB( size, dst1, dst0, src, 2949 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2950 putIRegE(size, pfx, rm, mkexpr(dst1)); 2951 } else { 2952 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2953 if (isAddSub(op8)) 2954 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2955 else 2956 setFlags_DEP1(op8, dst1, ty); 2957 if (keep) 2958 putIRegE(size, pfx, rm, mkexpr(dst1)); 2959 } 2960 2961 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2962 nameIRegG(size,pfx,rm), 2963 nameIRegE(size,pfx,rm)); 2964 return 1+delta0; 2965 } 2966 2967 /* E refers to memory */ 2968 { 2969 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2970 assign(dst0, loadLE(ty,mkexpr(addr))); 2971 assign(src, getIRegG(size,pfx,rm)); 2972 2973 if (addSubCarry && op8 == Iop_Add8) { 2974 if (pfx & PFX_LOCK) { 2975 /* cas-style store */ 2976 helper_ADC( size, dst1, dst0, src, 2977 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2978 } else { 2979 /* normal store */ 2980 helper_ADC( size, dst1, dst0, src, 2981 /*store*/addr, IRTemp_INVALID, 0 ); 2982 } 2983 } else 2984 if (addSubCarry && op8 == Iop_Sub8) { 2985 if (pfx & PFX_LOCK) { 2986 /* cas-style store */ 2987 helper_SBB( size, dst1, dst0, src, 2988 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2989 } else { 2990 /* normal store */ 2991 helper_SBB( size, dst1, dst0, src, 2992 /*store*/addr, IRTemp_INVALID, 0 ); 2993 } 2994 } else { 2995 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2996 if (keep) { 2997 if (pfx & PFX_LOCK) { 2998 if (0) vex_printf("locked case\n" ); 2999 casLE( mkexpr(addr), 3000 mkexpr(dst0)/*expval*/, 3001 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3002 } else { 3003 if (0) vex_printf("nonlocked case\n"); 3004 storeLE(mkexpr(addr), mkexpr(dst1)); 3005 } 3006 } 3007 if (isAddSub(op8)) 3008 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3009 else 3010 setFlags_DEP1(op8, dst1, ty); 3011 } 3012 3013 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3014 nameIRegG(size,pfx,rm), dis_buf); 3015 return len+delta0; 3016 } 3017 } 3018 3019 3020 /* Handle move instructions of the form 3021 mov E, G meaning 3022 mov reg-or-mem, reg 3023 Is passed the a ptr to the modRM byte, and the data size. Returns 3024 the address advanced completely over this instruction. 3025 3026 E(src) is reg-or-mem 3027 G(dst) is reg. 3028 3029 If E is reg, --> GET %E, tmpv 3030 PUT tmpv, %G 3031 3032 If E is mem --> (getAddr E) -> tmpa 3033 LD (tmpa), tmpb 3034 PUT tmpb, %G 3035 */ 3036 static 3037 ULong dis_mov_E_G ( VexAbiInfo* vbi, 3038 Prefix pfx, 3039 Int size, 3040 Long delta0 ) 3041 { 3042 Int len; 3043 UChar rm = getUChar(delta0); 3044 HChar dis_buf[50]; 3045 3046 if (epartIsReg(rm)) { 3047 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3048 DIP("mov%c %s,%s\n", nameISize(size), 3049 nameIRegE(size,pfx,rm), 3050 nameIRegG(size,pfx,rm)); 3051 return 1+delta0; 3052 } 3053 3054 /* E refers to memory */ 3055 { 3056 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3057 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3058 DIP("mov%c %s,%s\n", nameISize(size), 3059 dis_buf, 3060 nameIRegG(size,pfx,rm)); 3061 return delta0+len; 3062 } 3063 } 3064 3065 3066 /* Handle move instructions of the form 3067 mov G, E meaning 3068 mov reg, reg-or-mem 3069 Is passed the a ptr to the modRM byte, and the data size. Returns 3070 the address advanced completely over this instruction. 3071 3072 G(src) is reg. 3073 E(dst) is reg-or-mem 3074 3075 If E is reg, --> GET %G, tmp 3076 PUT tmp, %E 3077 3078 If E is mem, --> (getAddr E) -> tmpa 3079 GET %G, tmpv 3080 ST tmpv, (tmpa) 3081 */ 3082 static 3083 ULong dis_mov_G_E ( VexAbiInfo* vbi, 3084 Prefix pfx, 3085 Int size, 3086 Long delta0 ) 3087 { 3088 Int len; 3089 UChar rm = getUChar(delta0); 3090 HChar dis_buf[50]; 3091 3092 if (epartIsReg(rm)) { 3093 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3094 DIP("mov%c %s,%s\n", nameISize(size), 3095 nameIRegG(size,pfx,rm), 3096 nameIRegE(size,pfx,rm)); 3097 return 1+delta0; 3098 } 3099 3100 /* E refers to memory */ 3101 { 3102 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3103 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3104 DIP("mov%c %s,%s\n", nameISize(size), 3105 nameIRegG(size,pfx,rm), 3106 dis_buf); 3107 return len+delta0; 3108 } 3109 } 3110 3111 3112 /* op $immediate, AL/AX/EAX/RAX. */ 3113 static 3114 ULong dis_op_imm_A ( Int size, 3115 Bool carrying, 3116 IROp op8, 3117 Bool keep, 3118 Long delta, 3119 HChar* t_amd64opc ) 3120 { 3121 Int size4 = imin(size,4); 3122 IRType ty = szToITy(size); 3123 IRTemp dst0 = newTemp(ty); 3124 IRTemp src = newTemp(ty); 3125 IRTemp dst1 = newTemp(ty); 3126 Long lit = getSDisp(size4,delta); 3127 assign(dst0, getIRegRAX(size)); 3128 assign(src, mkU(ty,lit & mkSizeMask(size))); 3129 3130 if (isAddSub(op8) && !carrying) { 3131 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3132 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3133 } 3134 else 3135 if (isLogic(op8)) { 3136 vassert(!carrying); 3137 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3138 setFlags_DEP1(op8, dst1, ty); 3139 } 3140 else 3141 if (op8 == Iop_Add8 && carrying) { 3142 helper_ADC( size, dst1, dst0, src, 3143 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3144 } 3145 else 3146 if (op8 == Iop_Sub8 && carrying) { 3147 helper_SBB( size, dst1, dst0, src, 3148 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3149 } 3150 else 3151 vpanic("dis_op_imm_A(amd64,guest)"); 3152 3153 if (keep) 3154 putIRegRAX(size, mkexpr(dst1)); 3155 3156 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3157 lit, nameIRegRAX(size)); 3158 return delta+size4; 3159 } 3160 3161 3162 /* Sign- and Zero-extending moves. */ 3163 static 3164 ULong dis_movx_E_G ( VexAbiInfo* vbi, 3165 Prefix pfx, 3166 Long delta, Int szs, Int szd, Bool sign_extend ) 3167 { 3168 UChar rm = getUChar(delta); 3169 if (epartIsReg(rm)) { 3170 putIRegG(szd, pfx, rm, 3171 doScalarWidening( 3172 szs,szd,sign_extend, 3173 getIRegE(szs,pfx,rm))); 3174 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3175 nameISize(szs), 3176 nameISize(szd), 3177 nameIRegE(szs,pfx,rm), 3178 nameIRegG(szd,pfx,rm)); 3179 return 1+delta; 3180 } 3181 3182 /* E refers to memory */ 3183 { 3184 Int len; 3185 HChar dis_buf[50]; 3186 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3187 putIRegG(szd, pfx, rm, 3188 doScalarWidening( 3189 szs,szd,sign_extend, 3190 loadLE(szToITy(szs),mkexpr(addr)))); 3191 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3192 nameISize(szs), 3193 nameISize(szd), 3194 dis_buf, 3195 nameIRegG(szd,pfx,rm)); 3196 return len+delta; 3197 } 3198 } 3199 3200 3201 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3202 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3203 static 3204 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3205 { 3206 /* special-case the 64-bit case */ 3207 if (sz == 8) { 3208 IROp op = signed_divide ? Iop_DivModS128to64 3209 : Iop_DivModU128to64; 3210 IRTemp src128 = newTemp(Ity_I128); 3211 IRTemp dst128 = newTemp(Ity_I128); 3212 assign( src128, binop(Iop_64HLto128, 3213 getIReg64(R_RDX), 3214 getIReg64(R_RAX)) ); 3215 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3216 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3217 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3218 } else { 3219 IROp op = signed_divide ? Iop_DivModS64to32 3220 : Iop_DivModU64to32; 3221 IRTemp src64 = newTemp(Ity_I64); 3222 IRTemp dst64 = newTemp(Ity_I64); 3223 switch (sz) { 3224 case 4: 3225 assign( src64, 3226 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3227 assign( dst64, 3228 binop(op, mkexpr(src64), mkexpr(t)) ); 3229 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3230 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3231 break; 3232 case 2: { 3233 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3234 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3235 assign( src64, unop(widen3264, 3236 binop(Iop_16HLto32, 3237 getIRegRDX(2), 3238 getIRegRAX(2))) ); 3239 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3240 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3241 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3242 break; 3243 } 3244 case 1: { 3245 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3246 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3247 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3248 assign( src64, unop(widen3264, 3249 unop(widen1632, getIRegRAX(2))) ); 3250 assign( dst64, 3251 binop(op, mkexpr(src64), 3252 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3253 putIRegRAX( 1, unop(Iop_16to8, 3254 unop(Iop_32to16, 3255 unop(Iop_64to32,mkexpr(dst64)))) ); 3256 putIRegAH( unop(Iop_16to8, 3257 unop(Iop_32to16, 3258 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3259 break; 3260 } 3261 default: 3262 vpanic("codegen_div(amd64)"); 3263 } 3264 } 3265 } 3266 3267 static 3268 ULong dis_Grp1 ( VexAbiInfo* vbi, 3269 Prefix pfx, 3270 Long delta, UChar modrm, 3271 Int am_sz, Int d_sz, Int sz, Long d64 ) 3272 { 3273 Int len; 3274 HChar dis_buf[50]; 3275 IRType ty = szToITy(sz); 3276 IRTemp dst1 = newTemp(ty); 3277 IRTemp src = newTemp(ty); 3278 IRTemp dst0 = newTemp(ty); 3279 IRTemp addr = IRTemp_INVALID; 3280 IROp op8 = Iop_INVALID; 3281 ULong mask = mkSizeMask(sz); 3282 3283 switch (gregLO3ofRM(modrm)) { 3284 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3285 case 2: break; // ADC 3286 case 3: break; // SBB 3287 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3288 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3289 /*NOTREACHED*/ 3290 default: vpanic("dis_Grp1(amd64): unhandled case"); 3291 } 3292 3293 if (epartIsReg(modrm)) { 3294 vassert(am_sz == 1); 3295 3296 assign(dst0, getIRegE(sz,pfx,modrm)); 3297 assign(src, mkU(ty,d64 & mask)); 3298 3299 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3300 helper_ADC( sz, dst1, dst0, src, 3301 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3302 } else 3303 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3304 helper_SBB( sz, dst1, dst0, src, 3305 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3306 } else { 3307 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3308 if (isAddSub(op8)) 3309 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3310 else 3311 setFlags_DEP1(op8, dst1, ty); 3312 } 3313 3314 if (gregLO3ofRM(modrm) < 7) 3315 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3316 3317 delta += (am_sz + d_sz); 3318 DIP("%s%c $%lld, %s\n", 3319 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3320 nameIRegE(sz,pfx,modrm)); 3321 } else { 3322 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3323 3324 assign(dst0, loadLE(ty,mkexpr(addr))); 3325 assign(src, mkU(ty,d64 & mask)); 3326 3327 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3328 if (pfx & PFX_LOCK) { 3329 /* cas-style store */ 3330 helper_ADC( sz, dst1, dst0, src, 3331 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3332 } else { 3333 /* normal store */ 3334 helper_ADC( sz, dst1, dst0, src, 3335 /*store*/addr, IRTemp_INVALID, 0 ); 3336 } 3337 } else 3338 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3339 if (pfx & PFX_LOCK) { 3340 /* cas-style store */ 3341 helper_SBB( sz, dst1, dst0, src, 3342 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3343 } else { 3344 /* normal store */ 3345 helper_SBB( sz, dst1, dst0, src, 3346 /*store*/addr, IRTemp_INVALID, 0 ); 3347 } 3348 } else { 3349 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3350 if (gregLO3ofRM(modrm) < 7) { 3351 if (pfx & PFX_LOCK) { 3352 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3353 mkexpr(dst1)/*newVal*/, 3354 guest_RIP_curr_instr ); 3355 } else { 3356 storeLE(mkexpr(addr), mkexpr(dst1)); 3357 } 3358 } 3359 if (isAddSub(op8)) 3360 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3361 else 3362 setFlags_DEP1(op8, dst1, ty); 3363 } 3364 3365 delta += (len+d_sz); 3366 DIP("%s%c $%lld, %s\n", 3367 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3368 d64, dis_buf); 3369 } 3370 return delta; 3371 } 3372 3373 3374 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3375 expression. */ 3376 3377 static 3378 ULong dis_Grp2 ( VexAbiInfo* vbi, 3379 Prefix pfx, 3380 Long delta, UChar modrm, 3381 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3382 HChar* shift_expr_txt, Bool* decode_OK ) 3383 { 3384 /* delta on entry points at the modrm byte. */ 3385 HChar dis_buf[50]; 3386 Int len; 3387 Bool isShift, isRotate, isRotateC; 3388 IRType ty = szToITy(sz); 3389 IRTemp dst0 = newTemp(ty); 3390 IRTemp dst1 = newTemp(ty); 3391 IRTemp addr = IRTemp_INVALID; 3392 3393 *decode_OK = True; 3394 3395 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3396 3397 /* Put value to shift/rotate in dst0. */ 3398 if (epartIsReg(modrm)) { 3399 assign(dst0, getIRegE(sz, pfx, modrm)); 3400 delta += (am_sz + d_sz); 3401 } else { 3402 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3403 assign(dst0, loadLE(ty,mkexpr(addr))); 3404 delta += len + d_sz; 3405 } 3406 3407 isShift = False; 3408 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3409 3410 isRotate = False; 3411 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3412 3413 isRotateC = False; 3414 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3415 3416 if (!isShift && !isRotate && !isRotateC) { 3417 /*NOTREACHED*/ 3418 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3419 } 3420 3421 if (isRotateC) { 3422 /* Call a helper; this insn is so ridiculous it does not deserve 3423 better. One problem is, the helper has to calculate both the 3424 new value and the new flags. This is more than 64 bits, and 3425 there is no way to return more than 64 bits from the helper. 3426 Hence the crude and obvious solution is to call it twice, 3427 using the sign of the sz field to indicate whether it is the 3428 value or rflags result we want. 3429 */ 3430 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3431 IRExpr** argsVALUE; 3432 IRExpr** argsRFLAGS; 3433 3434 IRTemp new_value = newTemp(Ity_I64); 3435 IRTemp new_rflags = newTemp(Ity_I64); 3436 IRTemp old_rflags = newTemp(Ity_I64); 3437 3438 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3439 3440 argsVALUE 3441 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3442 widenUto64(shift_expr), /* rotate amount */ 3443 mkexpr(old_rflags), 3444 mkU64(sz) ); 3445 assign( new_value, 3446 mkIRExprCCall( 3447 Ity_I64, 3448 0/*regparm*/, 3449 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3450 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3451 argsVALUE 3452 ) 3453 ); 3454 3455 argsRFLAGS 3456 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3457 widenUto64(shift_expr), /* rotate amount */ 3458 mkexpr(old_rflags), 3459 mkU64(-sz) ); 3460 assign( new_rflags, 3461 mkIRExprCCall( 3462 Ity_I64, 3463 0/*regparm*/, 3464 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3465 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3466 argsRFLAGS 3467 ) 3468 ); 3469 3470 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3471 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3472 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3473 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3474 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3475 } 3476 3477 else 3478 if (isShift) { 3479 3480 IRTemp pre64 = newTemp(Ity_I64); 3481 IRTemp res64 = newTemp(Ity_I64); 3482 IRTemp res64ss = newTemp(Ity_I64); 3483 IRTemp shift_amt = newTemp(Ity_I8); 3484 UChar mask = toUChar(sz==8 ? 63 : 31); 3485 IROp op64; 3486 3487 switch (gregLO3ofRM(modrm)) { 3488 case 4: op64 = Iop_Shl64; break; 3489 case 5: op64 = Iop_Shr64; break; 3490 case 6: op64 = Iop_Shl64; break; 3491 case 7: op64 = Iop_Sar64; break; 3492 /*NOTREACHED*/ 3493 default: vpanic("dis_Grp2:shift"); break; 3494 } 3495 3496 /* Widen the value to be shifted to 64 bits, do the shift, and 3497 narrow back down. This seems surprisingly long-winded, but 3498 unfortunately the AMD semantics requires that 8/16/32-bit 3499 shifts give defined results for shift values all the way up 3500 to 32, and this seems the simplest way to do it. It has the 3501 advantage that the only IR level shifts generated are of 64 3502 bit values, and the shift amount is guaranteed to be in the 3503 range 0 .. 63, thereby observing the IR semantics requiring 3504 all shift values to be in the range 0 .. 2^word_size-1. 3505 3506 Therefore the shift amount is masked with 63 for 64-bit shifts 3507 and 31 for all others. 3508 */ 3509 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3510 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3511 3512 /* suitably widen the value to be shifted to 64 bits. */ 3513 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3514 : widenUto64(mkexpr(dst0)) ); 3515 3516 /* res64 = pre64 `shift` shift_amt */ 3517 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3518 3519 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3520 assign( res64ss, 3521 binop(op64, 3522 mkexpr(pre64), 3523 binop(Iop_And8, 3524 binop(Iop_Sub8, 3525 mkexpr(shift_amt), mkU8(1)), 3526 mkU8(mask))) ); 3527 3528 /* Build the flags thunk. */ 3529 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3530 3531 /* Narrow the result back down. */ 3532 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3533 3534 } /* if (isShift) */ 3535 3536 else 3537 if (isRotate) { 3538 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3539 : (ty==Ity_I32 ? 2 : 3)); 3540 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3541 IRTemp rot_amt = newTemp(Ity_I8); 3542 IRTemp rot_amt64 = newTemp(Ity_I8); 3543 IRTemp oldFlags = newTemp(Ity_I64); 3544 UChar mask = toUChar(sz==8 ? 63 : 31); 3545 3546 /* rot_amt = shift_expr & mask */ 3547 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3548 expressions never shift beyond the word size and thus remain 3549 well defined. */ 3550 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3551 3552 if (ty == Ity_I64) 3553 assign(rot_amt, mkexpr(rot_amt64)); 3554 else 3555 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3556 3557 if (left) { 3558 3559 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3560 assign(dst1, 3561 binop( mkSizedOp(ty,Iop_Or8), 3562 binop( mkSizedOp(ty,Iop_Shl8), 3563 mkexpr(dst0), 3564 mkexpr(rot_amt) 3565 ), 3566 binop( mkSizedOp(ty,Iop_Shr8), 3567 mkexpr(dst0), 3568 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3569 ) 3570 ) 3571 ); 3572 ccOp += AMD64G_CC_OP_ROLB; 3573 3574 } else { /* right */ 3575 3576 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3577 assign(dst1, 3578 binop( mkSizedOp(ty,Iop_Or8), 3579 binop( mkSizedOp(ty,Iop_Shr8), 3580 mkexpr(dst0), 3581 mkexpr(rot_amt) 3582 ), 3583 binop( mkSizedOp(ty,Iop_Shl8), 3584 mkexpr(dst0), 3585 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3586 ) 3587 ) 3588 ); 3589 ccOp += AMD64G_CC_OP_RORB; 3590 3591 } 3592 3593 /* dst1 now holds the rotated value. Build flag thunk. We 3594 need the resulting value for this, and the previous flags. 3595 Except don't set it if the rotate count is zero. */ 3596 3597 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3598 3599 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3600 stmt( IRStmt_Put( OFFB_CC_OP, 3601 IRExpr_Mux0X( mkexpr(rot_amt64), 3602 IRExpr_Get(OFFB_CC_OP,Ity_I64), 3603 mkU64(ccOp))) ); 3604 stmt( IRStmt_Put( OFFB_CC_DEP1, 3605 IRExpr_Mux0X( mkexpr(rot_amt64), 3606 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 3607 widenUto64(mkexpr(dst1)))) ); 3608 stmt( IRStmt_Put( OFFB_CC_DEP2, 3609 IRExpr_Mux0X( mkexpr(rot_amt64), 3610 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 3611 mkU64(0))) ); 3612 stmt( IRStmt_Put( OFFB_CC_NDEP, 3613 IRExpr_Mux0X( mkexpr(rot_amt64), 3614 IRExpr_Get(OFFB_CC_NDEP,Ity_I64), 3615 mkexpr(oldFlags))) ); 3616 } /* if (isRotate) */ 3617 3618 /* Save result, and finish up. */ 3619 if (epartIsReg(modrm)) { 3620 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3621 if (vex_traceflags & VEX_TRACE_FE) { 3622 vex_printf("%s%c ", 3623 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3624 if (shift_expr_txt) 3625 vex_printf("%s", shift_expr_txt); 3626 else 3627 ppIRExpr(shift_expr); 3628 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3629 } 3630 } else { 3631 storeLE(mkexpr(addr), mkexpr(dst1)); 3632 if (vex_traceflags & VEX_TRACE_FE) { 3633 vex_printf("%s%c ", 3634 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3635 if (shift_expr_txt) 3636 vex_printf("%s", shift_expr_txt); 3637 else 3638 ppIRExpr(shift_expr); 3639 vex_printf(", %s\n", dis_buf); 3640 } 3641 } 3642 return delta; 3643 } 3644 3645 3646 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3647 static 3648 ULong dis_Grp8_Imm ( VexAbiInfo* vbi, 3649 Prefix pfx, 3650 Long delta, UChar modrm, 3651 Int am_sz, Int sz, ULong src_val, 3652 Bool* decode_OK ) 3653 { 3654 /* src_val denotes a d8. 3655 And delta on entry points at the modrm byte. */ 3656 3657 IRType ty = szToITy(sz); 3658 IRTemp t2 = newTemp(Ity_I64); 3659 IRTemp t2m = newTemp(Ity_I64); 3660 IRTemp t_addr = IRTemp_INVALID; 3661 HChar dis_buf[50]; 3662 ULong mask; 3663 3664 /* we're optimists :-) */ 3665 *decode_OK = True; 3666 3667 /* Limit src_val -- the bit offset -- to something within a word. 3668 The Intel docs say that literal offsets larger than a word are 3669 masked in this way. */ 3670 switch (sz) { 3671 case 2: src_val &= 15; break; 3672 case 4: src_val &= 31; break; 3673 case 8: src_val &= 63; break; 3674 default: *decode_OK = False; return delta; 3675 } 3676 3677 /* Invent a mask suitable for the operation. */ 3678 switch (gregLO3ofRM(modrm)) { 3679 case 4: /* BT */ mask = 0; break; 3680 case 5: /* BTS */ mask = 1ULL << src_val; break; 3681 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3682 case 7: /* BTC */ mask = 1ULL << src_val; break; 3683 /* If this needs to be extended, probably simplest to make a 3684 new function to handle the other cases (0 .. 3). The 3685 Intel docs do however not indicate any use for 0 .. 3, so 3686 we don't expect this to happen. */ 3687 default: *decode_OK = False; return delta; 3688 } 3689 3690 /* Fetch the value to be tested and modified into t2, which is 3691 64-bits wide regardless of sz. */ 3692 if (epartIsReg(modrm)) { 3693 vassert(am_sz == 1); 3694 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3695 delta += (am_sz + 1); 3696 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3697 nameISize(sz), 3698 src_val, nameIRegE(sz,pfx,modrm)); 3699 } else { 3700 Int len; 3701 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3702 delta += (len+1); 3703 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3704 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3705 nameISize(sz), 3706 src_val, dis_buf); 3707 } 3708 3709 /* Compute the new value into t2m, if non-BT. */ 3710 switch (gregLO3ofRM(modrm)) { 3711 case 4: /* BT */ 3712 break; 3713 case 5: /* BTS */ 3714 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3715 break; 3716 case 6: /* BTR */ 3717 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3718 break; 3719 case 7: /* BTC */ 3720 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3721 break; 3722 default: 3723 /*NOTREACHED*/ /*the previous switch guards this*/ 3724 vassert(0); 3725 } 3726 3727 /* Write the result back, if non-BT. */ 3728 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3729 if (epartIsReg(modrm)) { 3730 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3731 } else { 3732 if (pfx & PFX_LOCK) { 3733 casLE( mkexpr(t_addr), 3734 narrowTo(ty, mkexpr(t2))/*expd*/, 3735 narrowTo(ty, mkexpr(t2m))/*new*/, 3736 guest_RIP_curr_instr ); 3737 } else { 3738 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3739 } 3740 } 3741 } 3742 3743 /* Copy relevant bit from t2 into the carry flag. */ 3744 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3745 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3746 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3747 stmt( IRStmt_Put( 3748 OFFB_CC_DEP1, 3749 binop(Iop_And64, 3750 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3751 mkU64(1)) 3752 )); 3753 /* Set NDEP even though it isn't used. This makes redundant-PUT 3754 elimination of previous stores to this field work better. */ 3755 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3756 3757 return delta; 3758 } 3759 3760 3761 /* Signed/unsigned widening multiply. Generate IR to multiply the 3762 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3763 RDX:RAX/EDX:EAX/DX:AX/AX. 3764 */ 3765 static void codegen_mulL_A_D ( Int sz, Bool syned, 3766 IRTemp tmp, HChar* tmp_txt ) 3767 { 3768 IRType ty = szToITy(sz); 3769 IRTemp t1 = newTemp(ty); 3770 3771 assign( t1, getIRegRAX(sz) ); 3772 3773 switch (ty) { 3774 case Ity_I64: { 3775 IRTemp res128 = newTemp(Ity_I128); 3776 IRTemp resHi = newTemp(Ity_I64); 3777 IRTemp resLo = newTemp(Ity_I64); 3778 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3779 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3780 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3781 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3782 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3783 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3784 putIReg64(R_RDX, mkexpr(resHi)); 3785 putIReg64(R_RAX, mkexpr(resLo)); 3786 break; 3787 } 3788 case Ity_I32: { 3789 IRTemp res64 = newTemp(Ity_I64); 3790 IRTemp resHi = newTemp(Ity_I32); 3791 IRTemp resLo = newTemp(Ity_I32); 3792 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3793 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3794 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3795 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3796 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3797 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3798 putIRegRDX(4, mkexpr(resHi)); 3799 putIRegRAX(4, mkexpr(resLo)); 3800 break; 3801 } 3802 case Ity_I16: { 3803 IRTemp res32 = newTemp(Ity_I32); 3804 IRTemp resHi = newTemp(Ity_I16); 3805 IRTemp resLo = newTemp(Ity_I16); 3806 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3807 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3808 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3809 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3810 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3811 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3812 putIRegRDX(2, mkexpr(resHi)); 3813 putIRegRAX(2, mkexpr(resLo)); 3814 break; 3815 } 3816 case Ity_I8: { 3817 IRTemp res16 = newTemp(Ity_I16); 3818 IRTemp resHi = newTemp(Ity_I8); 3819 IRTemp resLo = newTemp(Ity_I8); 3820 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3821 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3822 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3823 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3824 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3825 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3826 putIRegRAX(2, mkexpr(res16)); 3827 break; 3828 } 3829 default: 3830 ppIRType(ty); 3831 vpanic("codegen_mulL_A_D(amd64)"); 3832 } 3833 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3834 } 3835 3836 3837 /* Group 3 extended opcodes. */ 3838 static 3839 ULong dis_Grp3 ( VexAbiInfo* vbi, 3840 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3841 { 3842 Long d64; 3843 UChar modrm; 3844 HChar dis_buf[50]; 3845 Int len; 3846 IRTemp addr; 3847 IRType ty = szToITy(sz); 3848 IRTemp t1 = newTemp(ty); 3849 IRTemp dst1, src, dst0; 3850 *decode_OK = True; 3851 modrm = getUChar(delta); 3852 if (epartIsReg(modrm)) { 3853 switch (gregLO3ofRM(modrm)) { 3854 case 0: { /* TEST */ 3855 delta++; 3856 d64 = getSDisp(imin(4,sz), delta); 3857 delta += imin(4,sz); 3858 dst1 = newTemp(ty); 3859 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3860 getIRegE(sz,pfx,modrm), 3861 mkU(ty, d64 & mkSizeMask(sz)))); 3862 setFlags_DEP1( Iop_And8, dst1, ty ); 3863 DIP("test%c $%lld, %s\n", 3864 nameISize(sz), d64, 3865 nameIRegE(sz, pfx, modrm)); 3866 break; 3867 } 3868 case 1: 3869 *decode_OK = False; 3870 return delta; 3871 case 2: /* NOT */ 3872 delta++; 3873 putIRegE(sz, pfx, modrm, 3874 unop(mkSizedOp(ty,Iop_Not8), 3875 getIRegE(sz, pfx, modrm))); 3876 DIP("not%c %s\n", nameISize(sz), 3877 nameIRegE(sz, pfx, modrm)); 3878 break; 3879 case 3: /* NEG */ 3880 delta++; 3881 dst0 = newTemp(ty); 3882 src = newTemp(ty); 3883 dst1 = newTemp(ty); 3884 assign(dst0, mkU(ty,0)); 3885 assign(src, getIRegE(sz, pfx, modrm)); 3886 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3887 mkexpr(src))); 3888 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3889 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3890 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 3891 break; 3892 case 4: /* MUL (unsigned widening) */ 3893 delta++; 3894 src = newTemp(ty); 3895 assign(src, getIRegE(sz,pfx,modrm)); 3896 codegen_mulL_A_D ( sz, False, src, 3897 nameIRegE(sz,pfx,modrm) ); 3898 break; 3899 case 5: /* IMUL (signed widening) */ 3900 delta++; 3901 src = newTemp(ty); 3902 assign(src, getIRegE(sz,pfx,modrm)); 3903 codegen_mulL_A_D ( sz, True, src, 3904 nameIRegE(sz,pfx,modrm) ); 3905 break; 3906 case 6: /* DIV */ 3907 delta++; 3908 assign( t1, getIRegE(sz, pfx, modrm) ); 3909 codegen_div ( sz, t1, False ); 3910 DIP("div%c %s\n", nameISize(sz), 3911 nameIRegE(sz, pfx, modrm)); 3912 break; 3913 case 7: /* IDIV */ 3914 delta++; 3915 assign( t1, getIRegE(sz, pfx, modrm) ); 3916 codegen_div ( sz, t1, True ); 3917 DIP("idiv%c %s\n", nameISize(sz), 3918 nameIRegE(sz, pfx, modrm)); 3919 break; 3920 default: 3921 /*NOTREACHED*/ 3922 vpanic("Grp3(amd64,R)"); 3923 } 3924 } else { 3925 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 3926 /* we have to inform disAMode of any immediate 3927 bytes used */ 3928 gregLO3ofRM(modrm)==0/*TEST*/ 3929 ? imin(4,sz) 3930 : 0 3931 ); 3932 t1 = newTemp(ty); 3933 delta += len; 3934 assign(t1, loadLE(ty,mkexpr(addr))); 3935 switch (gregLO3ofRM(modrm)) { 3936 case 0: { /* TEST */ 3937 d64 = getSDisp(imin(4,sz), delta); 3938 delta += imin(4,sz); 3939 dst1 = newTemp(ty); 3940 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3941 mkexpr(t1), 3942 mkU(ty, d64 & mkSizeMask(sz)))); 3943 setFlags_DEP1( Iop_And8, dst1, ty ); 3944 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 3945 break; 3946 } 3947 case 1: 3948 *decode_OK = False; 3949 return delta; 3950 case 2: /* NOT */ 3951 dst1 = newTemp(ty); 3952 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 3953 if (pfx & PFX_LOCK) { 3954 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3955 guest_RIP_curr_instr ); 3956 } else { 3957 storeLE( mkexpr(addr), mkexpr(dst1) ); 3958 } 3959 DIP("not%c %s\n", nameISize(sz), dis_buf); 3960 break; 3961 case 3: /* NEG */ 3962 dst0 = newTemp(ty); 3963 src = newTemp(ty); 3964 dst1 = newTemp(ty); 3965 assign(dst0, mkU(ty,0)); 3966 assign(src, mkexpr(t1)); 3967 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3968 mkexpr(src))); 3969 if (pfx & PFX_LOCK) { 3970 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3971 guest_RIP_curr_instr ); 3972 } else { 3973 storeLE( mkexpr(addr), mkexpr(dst1) ); 3974 } 3975 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3976 DIP("neg%c %s\n", nameISize(sz), dis_buf); 3977 break; 3978 case 4: /* MUL (unsigned widening) */ 3979 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 3980 break; 3981 case 5: /* IMUL */ 3982 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 3983 break; 3984 case 6: /* DIV */ 3985 codegen_div ( sz, t1, False ); 3986 DIP("div%c %s\n", nameISize(sz), dis_buf); 3987 break; 3988 case 7: /* IDIV */ 3989 codegen_div ( sz, t1, True ); 3990 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 3991 break; 3992 default: 3993 /*NOTREACHED*/ 3994 vpanic("Grp3(amd64,M)"); 3995 } 3996 } 3997 return delta; 3998 } 3999 4000 4001 /* Group 4 extended opcodes. */ 4002 static 4003 ULong dis_Grp4 ( VexAbiInfo* vbi, 4004 Prefix pfx, Long delta, Bool* decode_OK ) 4005 { 4006 Int alen; 4007 UChar modrm; 4008 HChar dis_buf[50]; 4009 IRType ty = Ity_I8; 4010 IRTemp t1 = newTemp(ty); 4011 IRTemp t2 = newTemp(ty); 4012 4013 *decode_OK = True; 4014 4015 modrm = getUChar(delta); 4016 if (epartIsReg(modrm)) { 4017 assign(t1, getIRegE(1, pfx, modrm)); 4018 switch (gregLO3ofRM(modrm)) { 4019 case 0: /* INC */ 4020 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4021 putIRegE(1, pfx, modrm, mkexpr(t2)); 4022 setFlags_INC_DEC( True, t2, ty ); 4023 break; 4024 case 1: /* DEC */ 4025 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4026 putIRegE(1, pfx, modrm, mkexpr(t2)); 4027 setFlags_INC_DEC( False, t2, ty ); 4028 break; 4029 default: 4030 *decode_OK = False; 4031 return delta; 4032 } 4033 delta++; 4034 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4035 nameIRegE(1, pfx, modrm)); 4036 } else { 4037 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4038 assign( t1, loadLE(ty, mkexpr(addr)) ); 4039 switch (gregLO3ofRM(modrm)) { 4040 case 0: /* INC */ 4041 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4042 if (pfx & PFX_LOCK) { 4043 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4044 guest_RIP_curr_instr ); 4045 } else { 4046 storeLE( mkexpr(addr), mkexpr(t2) ); 4047 } 4048 setFlags_INC_DEC( True, t2, ty ); 4049 break; 4050 case 1: /* DEC */ 4051 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4052 if (pfx & PFX_LOCK) { 4053 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4054 guest_RIP_curr_instr ); 4055 } else { 4056 storeLE( mkexpr(addr), mkexpr(t2) ); 4057 } 4058 setFlags_INC_DEC( False, t2, ty ); 4059 break; 4060 default: 4061 *decode_OK = False; 4062 return delta; 4063 } 4064 delta += alen; 4065 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4066 } 4067 return delta; 4068 } 4069 4070 4071 /* Group 5 extended opcodes. */ 4072 static 4073 ULong dis_Grp5 ( VexAbiInfo* vbi, 4074 Prefix pfx, Int sz, Long delta, 4075 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4076 { 4077 Int len; 4078 UChar modrm; 4079 HChar dis_buf[50]; 4080 IRTemp addr = IRTemp_INVALID; 4081 IRType ty = szToITy(sz); 4082 IRTemp t1 = newTemp(ty); 4083 IRTemp t2 = IRTemp_INVALID; 4084 IRTemp t3 = IRTemp_INVALID; 4085 Bool showSz = True; 4086 4087 *decode_OK = True; 4088 4089 modrm = getUChar(delta); 4090 if (epartIsReg(modrm)) { 4091 assign(t1, getIRegE(sz,pfx,modrm)); 4092 switch (gregLO3ofRM(modrm)) { 4093 case 0: /* INC */ 4094 t2 = newTemp(ty); 4095 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4096 mkexpr(t1), mkU(ty,1))); 4097 setFlags_INC_DEC( True, t2, ty ); 4098 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4099 break; 4100 case 1: /* DEC */ 4101 t2 = newTemp(ty); 4102 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4103 mkexpr(t1), mkU(ty,1))); 4104 setFlags_INC_DEC( False, t2, ty ); 4105 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4106 break; 4107 case 2: /* call Ev */ 4108 /* Ignore any sz value and operate as if sz==8. */ 4109 if (!(sz == 4 || sz == 8)) goto unhandled; 4110 sz = 8; 4111 t3 = newTemp(Ity_I64); 4112 assign(t3, getIRegE(sz,pfx,modrm)); 4113 t2 = newTemp(Ity_I64); 4114 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4115 putIReg64(R_RSP, mkexpr(t2)); 4116 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4117 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4118 jmp_treg(dres, Ijk_Call, t3); 4119 vassert(dres->whatNext == Dis_StopHere); 4120 showSz = False; 4121 break; 4122 case 4: /* jmp Ev */ 4123 /* Ignore any sz value and operate as if sz==8. */ 4124 if (!(sz == 4 || sz == 8)) goto unhandled; 4125 sz = 8; 4126 t3 = newTemp(Ity_I64); 4127 assign(t3, getIRegE(sz,pfx,modrm)); 4128 jmp_treg(dres, Ijk_Boring, t3); 4129 vassert(dres->whatNext == Dis_StopHere); 4130 showSz = False; 4131 break; 4132 default: 4133 *decode_OK = False; 4134 return delta; 4135 } 4136 delta++; 4137 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4138 showSz ? nameISize(sz) : ' ', 4139 nameIRegE(sz, pfx, modrm)); 4140 } else { 4141 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4142 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4143 && gregLO3ofRM(modrm) != 6) { 4144 assign(t1, loadLE(ty,mkexpr(addr))); 4145 } 4146 switch (gregLO3ofRM(modrm)) { 4147 case 0: /* INC */ 4148 t2 = newTemp(ty); 4149 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4150 mkexpr(t1), mkU(ty,1))); 4151 if (pfx & PFX_LOCK) { 4152 casLE( mkexpr(addr), 4153 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4154 } else { 4155 storeLE(mkexpr(addr),mkexpr(t2)); 4156 } 4157 setFlags_INC_DEC( True, t2, ty ); 4158 break; 4159 case 1: /* DEC */ 4160 t2 = newTemp(ty); 4161 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4162 mkexpr(t1), mkU(ty,1))); 4163 if (pfx & PFX_LOCK) { 4164 casLE( mkexpr(addr), 4165 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4166 } else { 4167 storeLE(mkexpr(addr),mkexpr(t2)); 4168 } 4169 setFlags_INC_DEC( False, t2, ty ); 4170 break; 4171 case 2: /* call Ev */ 4172 /* Ignore any sz value and operate as if sz==8. */ 4173 if (!(sz == 4 || sz == 8)) goto unhandled; 4174 sz = 8; 4175 t3 = newTemp(Ity_I64); 4176 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4177 t2 = newTemp(Ity_I64); 4178 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4179 putIReg64(R_RSP, mkexpr(t2)); 4180 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4181 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4182 jmp_treg(dres, Ijk_Call, t3); 4183 vassert(dres->whatNext == Dis_StopHere); 4184 showSz = False; 4185 break; 4186 case 4: /* JMP Ev */ 4187 /* Ignore any sz value and operate as if sz==8. */ 4188 if (!(sz == 4 || sz == 8)) goto unhandled; 4189 sz = 8; 4190 t3 = newTemp(Ity_I64); 4191 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4192 jmp_treg(dres, Ijk_Boring, t3); 4193 vassert(dres->whatNext == Dis_StopHere); 4194 showSz = False; 4195 break; 4196 case 6: /* PUSH Ev */ 4197 /* There is no encoding for 32-bit operand size; hence ... */ 4198 if (sz == 4) sz = 8; 4199 if (!(sz == 8 || sz == 2)) goto unhandled; 4200 if (sz == 8) { 4201 t3 = newTemp(Ity_I64); 4202 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4203 t2 = newTemp(Ity_I64); 4204 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4205 putIReg64(R_RSP, mkexpr(t2) ); 4206 storeLE( mkexpr(t2), mkexpr(t3) ); 4207 break; 4208 } else { 4209 goto unhandled; /* awaiting test case */ 4210 } 4211 default: 4212 unhandled: 4213 *decode_OK = False; 4214 return delta; 4215 } 4216 delta += len; 4217 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4218 showSz ? nameISize(sz) : ' ', 4219 dis_buf); 4220 } 4221 return delta; 4222 } 4223 4224 4225 /*------------------------------------------------------------*/ 4226 /*--- Disassembling string ops (including REP prefixes) ---*/ 4227 /*------------------------------------------------------------*/ 4228 4229 /* Code shared by all the string ops */ 4230 static 4231 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4232 { 4233 UChar logSz; 4234 if (sz == 8 || sz == 4 || sz == 2) { 4235 logSz = 1; 4236 if (sz == 4) logSz = 2; 4237 if (sz == 8) logSz = 3; 4238 assign( t_inc, 4239 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4240 mkU8(logSz) ) ); 4241 } else { 4242 assign( t_inc, 4243 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4244 } 4245 } 4246 4247 static 4248 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4249 Int sz, HChar* name, Prefix pfx ) 4250 { 4251 IRTemp t_inc = newTemp(Ity_I64); 4252 /* Really we ought to inspect the override prefixes, but we don't. 4253 The following assertion catches any resulting sillyness. */ 4254 vassert(pfx == clearSegBits(pfx)); 4255 dis_string_op_increment(sz, t_inc); 4256 dis_OP( sz, t_inc, pfx ); 4257 DIP("%s%c\n", name, nameISize(sz)); 4258 } 4259 4260 static 4261 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4262 { 4263 IRType ty = szToITy(sz); 4264 IRTemp td = newTemp(Ity_I64); /* RDI */ 4265 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4266 IRExpr *incd, *incs; 4267 4268 if (haveASO(pfx)) { 4269 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4270 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4271 } else { 4272 assign( td, getIReg64(R_RDI) ); 4273 assign( ts, getIReg64(R_RSI) ); 4274 } 4275 4276 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4277 4278 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4279 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4280 if (haveASO(pfx)) { 4281 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4282 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4283 } 4284 putIReg64( R_RDI, incd ); 4285 putIReg64( R_RSI, incs ); 4286 } 4287 4288 static 4289 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4290 { 4291 IRType ty = szToITy(sz); 4292 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4293 IRExpr *incs; 4294 4295 if (haveASO(pfx)) 4296 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4297 else 4298 assign( ts, getIReg64(R_RSI) ); 4299 4300 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4301 4302 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4303 if (haveASO(pfx)) 4304 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4305 putIReg64( R_RSI, incs ); 4306 } 4307 4308 static 4309 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4310 { 4311 IRType ty = szToITy(sz); 4312 IRTemp ta = newTemp(ty); /* rAX */ 4313 IRTemp td = newTemp(Ity_I64); /* RDI */ 4314 IRExpr *incd; 4315 4316 assign( ta, getIRegRAX(sz) ); 4317 4318 if (haveASO(pfx)) 4319 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4320 else 4321 assign( td, getIReg64(R_RDI) ); 4322 4323 storeLE( mkexpr(td), mkexpr(ta) ); 4324 4325 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4326 if (haveASO(pfx)) 4327 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4328 putIReg64( R_RDI, incd ); 4329 } 4330 4331 static 4332 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4333 { 4334 IRType ty = szToITy(sz); 4335 IRTemp tdv = newTemp(ty); /* (RDI) */ 4336 IRTemp tsv = newTemp(ty); /* (RSI) */ 4337 IRTemp td = newTemp(Ity_I64); /* RDI */ 4338 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4339 IRExpr *incd, *incs; 4340 4341 if (haveASO(pfx)) { 4342 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4343 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4344 } else { 4345 assign( td, getIReg64(R_RDI) ); 4346 assign( ts, getIReg64(R_RSI) ); 4347 } 4348 4349 assign( tdv, loadLE(ty,mkexpr(td)) ); 4350 4351 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4352 4353 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4354 4355 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4356 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4357 if (haveASO(pfx)) { 4358 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4359 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4360 } 4361 putIReg64( R_RDI, incd ); 4362 putIReg64( R_RSI, incs ); 4363 } 4364 4365 static 4366 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4367 { 4368 IRType ty = szToITy(sz); 4369 IRTemp ta = newTemp(ty); /* rAX */ 4370 IRTemp td = newTemp(Ity_I64); /* RDI */ 4371 IRTemp tdv = newTemp(ty); /* (RDI) */ 4372 IRExpr *incd; 4373 4374 assign( ta, getIRegRAX(sz) ); 4375 4376 if (haveASO(pfx)) 4377 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4378 else 4379 assign( td, getIReg64(R_RDI) ); 4380 4381 assign( tdv, loadLE(ty,mkexpr(td)) ); 4382 4383 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4384 4385 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4386 if (haveASO(pfx)) 4387 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4388 putIReg64( R_RDI, incd ); 4389 } 4390 4391 4392 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4393 the insn is the last one in the basic block, and so emit a jump to 4394 the next insn, rather than just falling through. */ 4395 static 4396 void dis_REP_op ( /*MOD*/DisResult* dres, 4397 AMD64Condcode cond, 4398 void (*dis_OP)(Int, IRTemp, Prefix), 4399 Int sz, Addr64 rip, Addr64 rip_next, HChar* name, 4400 Prefix pfx ) 4401 { 4402 IRTemp t_inc = newTemp(Ity_I64); 4403 IRTemp tc; 4404 IRExpr* cmp; 4405 4406 /* Really we ought to inspect the override prefixes, but we don't. 4407 The following assertion catches any resulting sillyness. */ 4408 vassert(pfx == clearSegBits(pfx)); 4409 4410 if (haveASO(pfx)) { 4411 tc = newTemp(Ity_I32); /* ECX */ 4412 assign( tc, getIReg32(R_RCX) ); 4413 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4414 } else { 4415 tc = newTemp(Ity_I64); /* RCX */ 4416 assign( tc, getIReg64(R_RCX) ); 4417 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4418 } 4419 4420 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4421 IRConst_U64(rip_next), OFFB_RIP ) ); 4422 4423 if (haveASO(pfx)) 4424 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4425 else 4426 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4427 4428 dis_string_op_increment(sz, t_inc); 4429 dis_OP (sz, t_inc, pfx); 4430 4431 if (cond == AMD64CondAlways) { 4432 jmp_lit(dres, Ijk_Boring, rip); 4433 vassert(dres->whatNext == Dis_StopHere); 4434 } else { 4435 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4436 Ijk_Boring, 4437 IRConst_U64(rip), 4438 OFFB_RIP ) ); 4439 jmp_lit(dres, Ijk_Boring, rip_next); 4440 vassert(dres->whatNext == Dis_StopHere); 4441 } 4442 DIP("%s%c\n", name, nameISize(sz)); 4443 } 4444 4445 4446 /*------------------------------------------------------------*/ 4447 /*--- Arithmetic, etc. ---*/ 4448 /*------------------------------------------------------------*/ 4449 4450 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4451 static 4452 ULong dis_mul_E_G ( VexAbiInfo* vbi, 4453 Prefix pfx, 4454 Int size, 4455 Long delta0 ) 4456 { 4457 Int alen; 4458 HChar dis_buf[50]; 4459 UChar rm = getUChar(delta0); 4460 IRType ty = szToITy(size); 4461 IRTemp te = newTemp(ty); 4462 IRTemp tg = newTemp(ty); 4463 IRTemp resLo = newTemp(ty); 4464 4465 assign( tg, getIRegG(size, pfx, rm) ); 4466 if (epartIsReg(rm)) { 4467 assign( te, getIRegE(size, pfx, rm) ); 4468 } else { 4469 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4470 assign( te, loadLE(ty,mkexpr(addr)) ); 4471 } 4472 4473 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4474 4475 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4476 4477 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4478 4479 if (epartIsReg(rm)) { 4480 DIP("imul%c %s, %s\n", nameISize(size), 4481 nameIRegE(size,pfx,rm), 4482 nameIRegG(size,pfx,rm)); 4483 return 1+delta0; 4484 } else { 4485 DIP("imul%c %s, %s\n", nameISize(size), 4486 dis_buf, 4487 nameIRegG(size,pfx,rm)); 4488 return alen+delta0; 4489 } 4490 } 4491 4492 4493 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4494 static 4495 ULong dis_imul_I_E_G ( VexAbiInfo* vbi, 4496 Prefix pfx, 4497 Int size, 4498 Long delta, 4499 Int litsize ) 4500 { 4501 Long d64; 4502 Int alen; 4503 HChar dis_buf[50]; 4504 UChar rm = getUChar(delta); 4505 IRType ty = szToITy(size); 4506 IRTemp te = newTemp(ty); 4507 IRTemp tl = newTemp(ty); 4508 IRTemp resLo = newTemp(ty); 4509 4510 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4511 4512 if (epartIsReg(rm)) { 4513 assign(te, getIRegE(size, pfx, rm)); 4514 delta++; 4515 } else { 4516 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4517 imin(4,litsize) ); 4518 assign(te, loadLE(ty, mkexpr(addr))); 4519 delta += alen; 4520 } 4521 d64 = getSDisp(imin(4,litsize),delta); 4522 delta += imin(4,litsize); 4523 4524 d64 &= mkSizeMask(size); 4525 assign(tl, mkU(ty,d64)); 4526 4527 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4528 4529 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4530 4531 putIRegG(size, pfx, rm, mkexpr(resLo)); 4532 4533 DIP("imul%c $%lld, %s, %s\n", 4534 nameISize(size), d64, 4535 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4536 nameIRegG(size,pfx,rm) ); 4537 return delta; 4538 } 4539 4540 4541 /* Generate an IR sequence to do a popcount operation on the supplied 4542 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4543 Ity_I16, Ity_I32 or Ity_I64 only. */ 4544 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4545 { 4546 Int i; 4547 if (ty == Ity_I16) { 4548 IRTemp old = IRTemp_INVALID; 4549 IRTemp nyu = IRTemp_INVALID; 4550 IRTemp mask[4], shift[4]; 4551 for (i = 0; i < 4; i++) { 4552 mask[i] = newTemp(ty); 4553 shift[i] = 1 << i; 4554 } 4555 assign(mask[0], mkU16(0x5555)); 4556 assign(mask[1], mkU16(0x3333)); 4557 assign(mask[2], mkU16(0x0F0F)); 4558 assign(mask[3], mkU16(0x00FF)); 4559 old = src; 4560 for (i = 0; i < 4; i++) { 4561 nyu = newTemp(ty); 4562 assign(nyu, 4563 binop(Iop_Add16, 4564 binop(Iop_And16, 4565 mkexpr(old), 4566 mkexpr(mask[i])), 4567 binop(Iop_And16, 4568 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4569 mkexpr(mask[i])))); 4570 old = nyu; 4571 } 4572 return nyu; 4573 } 4574 if (ty == Ity_I32) { 4575 IRTemp old = IRTemp_INVALID; 4576 IRTemp nyu = IRTemp_INVALID; 4577 IRTemp mask[5], shift[5]; 4578 for (i = 0; i < 5; i++) { 4579 mask[i] = newTemp(ty); 4580 shift[i] = 1 << i; 4581 } 4582 assign(mask[0], mkU32(0x55555555)); 4583 assign(mask[1], mkU32(0x33333333)); 4584 assign(mask[2], mkU32(0x0F0F0F0F)); 4585 assign(mask[3], mkU32(0x00FF00FF)); 4586 assign(mask[4], mkU32(0x0000FFFF)); 4587 old = src; 4588 for (i = 0; i < 5; i++) { 4589 nyu = newTemp(ty); 4590 assign(nyu, 4591 binop(Iop_Add32, 4592 binop(Iop_And32, 4593 mkexpr(old), 4594 mkexpr(mask[i])), 4595 binop(Iop_And32, 4596 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4597 mkexpr(mask[i])))); 4598 old = nyu; 4599 } 4600 return nyu; 4601 } 4602 if (ty == Ity_I64) { 4603 IRTemp old = IRTemp_INVALID; 4604 IRTemp nyu = IRTemp_INVALID; 4605 IRTemp mask[6], shift[6]; 4606 for (i = 0; i < 6; i++) { 4607 mask[i] = newTemp(ty); 4608 shift[i] = 1 << i; 4609 } 4610 assign(mask[0], mkU64(0x5555555555555555ULL)); 4611 assign(mask[1], mkU64(0x3333333333333333ULL)); 4612 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4613 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4614 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4615 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4616 old = src; 4617 for (i = 0; i < 6; i++) { 4618 nyu = newTemp(ty); 4619 assign(nyu, 4620 binop(Iop_Add64, 4621 binop(Iop_And64, 4622 mkexpr(old), 4623 mkexpr(mask[i])), 4624 binop(Iop_And64, 4625 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4626 mkexpr(mask[i])))); 4627 old = nyu; 4628 } 4629 return nyu; 4630 } 4631 /*NOTREACHED*/ 4632 vassert(0); 4633 } 4634 4635 4636 /* Generate an IR sequence to do a count-leading-zeroes operation on 4637 the supplied IRTemp, and return a new IRTemp holding the result. 4638 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4639 the argument is zero, return the number of bits in the word (the 4640 natural semantics). */ 4641 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4642 { 4643 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4644 4645 IRTemp src64 = newTemp(Ity_I64); 4646 assign(src64, widenUto64( mkexpr(src) )); 4647 4648 IRTemp src64x = newTemp(Ity_I64); 4649 assign(src64x, 4650 binop(Iop_Shl64, mkexpr(src64), 4651 mkU8(64 - 8 * sizeofIRType(ty)))); 4652 4653 // Clz64 has undefined semantics when its input is zero, so 4654 // special-case around that. 4655 IRTemp res64 = newTemp(Ity_I64); 4656 assign(res64, 4657 IRExpr_Mux0X( 4658 unop(Iop_1Uto8, 4659 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))), 4660 unop(Iop_Clz64, mkexpr(src64x)), 4661 mkU64(8 * sizeofIRType(ty)) 4662 )); 4663 4664 IRTemp res = newTemp(ty); 4665 assign(res, narrowTo(ty, mkexpr(res64))); 4666 return res; 4667 } 4668 4669 4670 /*------------------------------------------------------------*/ 4671 /*--- ---*/ 4672 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4673 /*--- ---*/ 4674 /*------------------------------------------------------------*/ 4675 4676 /* --- Helper functions for dealing with the register stack. --- */ 4677 4678 /* --- Set the emulation-warning pseudo-register. --- */ 4679 4680 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4681 { 4682 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4683 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 4684 } 4685 4686 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4687 4688 static IRExpr* mkQNaN64 ( void ) 4689 { 4690 /* QNaN is 0 2047 1 0(51times) 4691 == 0b 11111111111b 1 0(51times) 4692 == 0x7FF8 0000 0000 0000 4693 */ 4694 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4695 } 4696 4697 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4698 4699 static IRExpr* get_ftop ( void ) 4700 { 4701 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4702 } 4703 4704 static void put_ftop ( IRExpr* e ) 4705 { 4706 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4707 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4708 } 4709 4710 /* --------- Get/put the C3210 bits. --------- */ 4711 4712 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4713 { 4714 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4715 } 4716 4717 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4718 { 4719 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4720 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4721 } 4722 4723 /* --------- Get/put the FPU rounding mode. --------- */ 4724 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4725 { 4726 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4727 } 4728 4729 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4730 { 4731 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4732 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4733 } 4734 4735 4736 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4737 /* Produces a value in 0 .. 3, which is encoded as per the type 4738 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4739 per IRRoundingMode, we merely need to get it and mask it for 4740 safety. 4741 */ 4742 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4743 { 4744 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4745 } 4746 4747 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4748 { 4749 return mkU32(Irrm_NEAREST); 4750 } 4751 4752 4753 /* --------- Get/set FP register tag bytes. --------- */ 4754 4755 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4756 4757 static void put_ST_TAG ( Int i, IRExpr* value ) 4758 { 4759 IRRegArray* descr; 4760 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 4761 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4762 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 4763 } 4764 4765 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 4766 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 4767 4768 static IRExpr* get_ST_TAG ( Int i ) 4769 { 4770 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4771 return IRExpr_GetI( descr, get_ftop(), i ); 4772 } 4773 4774 4775 /* --------- Get/set FP registers. --------- */ 4776 4777 /* Given i, and some expression e, emit 'ST(i) = e' and set the 4778 register's tag to indicate the register is full. The previous 4779 state of the register is not checked. */ 4780 4781 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 4782 { 4783 IRRegArray* descr; 4784 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 4785 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4786 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 4787 /* Mark the register as in-use. */ 4788 put_ST_TAG(i, mkU8(1)); 4789 } 4790 4791 /* Given i, and some expression e, emit 4792 ST(i) = is_full(i) ? NaN : e 4793 and set the tag accordingly. 4794 */ 4795 4796 static void put_ST ( Int i, IRExpr* value ) 4797 { 4798 put_ST_UNCHECKED( i, 4799 IRExpr_Mux0X( get_ST_TAG(i), 4800 /* 0 means empty */ 4801 value, 4802 /* non-0 means full */ 4803 mkQNaN64() 4804 ) 4805 ); 4806 } 4807 4808 4809 /* Given i, generate an expression yielding 'ST(i)'. */ 4810 4811 static IRExpr* get_ST_UNCHECKED ( Int i ) 4812 { 4813 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4814 return IRExpr_GetI( descr, get_ftop(), i ); 4815 } 4816 4817 4818 /* Given i, generate an expression yielding 4819 is_full(i) ? ST(i) : NaN 4820 */ 4821 4822 static IRExpr* get_ST ( Int i ) 4823 { 4824 return 4825 IRExpr_Mux0X( get_ST_TAG(i), 4826 /* 0 means empty */ 4827 mkQNaN64(), 4828 /* non-0 means full */ 4829 get_ST_UNCHECKED(i)); 4830 } 4831 4832 4833 /* Adjust FTOP downwards by one register. */ 4834 4835 static void fp_push ( void ) 4836 { 4837 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 4838 } 4839 4840 /* Adjust FTOP upwards by one register, and mark the vacated register 4841 as empty. */ 4842 4843 static void fp_pop ( void ) 4844 { 4845 put_ST_TAG(0, mkU8(0)); 4846 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4847 } 4848 4849 /* Clear the C2 bit of the FPU status register, for 4850 sin/cos/tan/sincos. */ 4851 4852 static void clear_C2 ( void ) 4853 { 4854 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); 4855 } 4856 4857 /* Invent a plausible-looking FPU status word value: 4858 ((ftop & 7) << 11) | (c3210 & 0x4700) 4859 */ 4860 static IRExpr* get_FPU_sw ( void ) 4861 { 4862 return 4863 unop(Iop_32to16, 4864 binop(Iop_Or32, 4865 binop(Iop_Shl32, 4866 binop(Iop_And32, get_ftop(), mkU32(7)), 4867 mkU8(11)), 4868 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 4869 mkU32(0x4700)) 4870 )); 4871 } 4872 4873 4874 /* ------------------------------------------------------- */ 4875 /* Given all that stack-mangling junk, we can now go ahead 4876 and describe FP instructions. 4877 */ 4878 4879 /* ST(0) = ST(0) `op` mem64/32(addr) 4880 Need to check ST(0)'s tag on read, but not on write. 4881 */ 4882 static 4883 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4884 IROp op, Bool dbl ) 4885 { 4886 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4887 if (dbl) { 4888 put_ST_UNCHECKED(0, 4889 triop( op, 4890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4891 get_ST(0), 4892 loadLE(Ity_F64,mkexpr(addr)) 4893 )); 4894 } else { 4895 put_ST_UNCHECKED(0, 4896 triop( op, 4897 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4898 get_ST(0), 4899 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 4900 )); 4901 } 4902 } 4903 4904 4905 /* ST(0) = mem64/32(addr) `op` ST(0) 4906 Need to check ST(0)'s tag on read, but not on write. 4907 */ 4908 static 4909 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4910 IROp op, Bool dbl ) 4911 { 4912 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4913 if (dbl) { 4914 put_ST_UNCHECKED(0, 4915 triop( op, 4916 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4917 loadLE(Ity_F64,mkexpr(addr)), 4918 get_ST(0) 4919 )); 4920 } else { 4921 put_ST_UNCHECKED(0, 4922 triop( op, 4923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4924 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 4925 get_ST(0) 4926 )); 4927 } 4928 } 4929 4930 4931 /* ST(dst) = ST(dst) `op` ST(src). 4932 Check dst and src tags when reading but not on write. 4933 */ 4934 static 4935 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4936 Bool pop_after ) 4937 { 4938 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4939 put_ST_UNCHECKED( 4940 st_dst, 4941 triop( op, 4942 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4943 get_ST(st_dst), 4944 get_ST(st_src) ) 4945 ); 4946 if (pop_after) 4947 fp_pop(); 4948 } 4949 4950 /* ST(dst) = ST(src) `op` ST(dst). 4951 Check dst and src tags when reading but not on write. 4952 */ 4953 static 4954 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4955 Bool pop_after ) 4956 { 4957 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4958 put_ST_UNCHECKED( 4959 st_dst, 4960 triop( op, 4961 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4962 get_ST(st_src), 4963 get_ST(st_dst) ) 4964 ); 4965 if (pop_after) 4966 fp_pop(); 4967 } 4968 4969 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 4970 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 4971 { 4972 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 4973 /* This is a bit of a hack (and isn't really right). It sets 4974 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 4975 documentation implies A and S are unchanged. 4976 */ 4977 /* It's also fishy in that it is used both for COMIP and 4978 UCOMIP, and they aren't the same (although similar). */ 4979 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 4980 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 4981 stmt( IRStmt_Put( 4982 OFFB_CC_DEP1, 4983 binop( Iop_And64, 4984 unop( Iop_32Uto64, 4985 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 4986 mkU64(0x45) 4987 ))); 4988 if (pop_after) 4989 fp_pop(); 4990 } 4991 4992 4993 /* returns 4994 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 4995 */ 4996 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 4997 { 4998 IRTemp t32 = newTemp(Ity_I32); 4999 assign( t32, e32 ); 5000 return 5001 IRExpr_Mux0X( 5002 unop(Iop_1Uto8, 5003 binop(Iop_CmpLT64U, 5004 unop(Iop_32Uto64, 5005 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5006 mkU64(65536))), 5007 mkU16( 0x8000 ), 5008 unop(Iop_32to16, mkexpr(t32))); 5009 } 5010 5011 5012 static 5013 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5014 VexAbiInfo* vbi, Prefix pfx, Long delta ) 5015 { 5016 Int len; 5017 UInt r_src, r_dst; 5018 HChar dis_buf[50]; 5019 IRTemp t1, t2; 5020 5021 /* On entry, delta points at the second byte of the insn (the modrm 5022 byte).*/ 5023 UChar first_opcode = getUChar(delta-1); 5024 UChar modrm = getUChar(delta+0); 5025 5026 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5027 5028 if (first_opcode == 0xD8) { 5029 if (modrm < 0xC0) { 5030 5031 /* bits 5,4,3 are an opcode extension, and the modRM also 5032 specifies an address. */ 5033 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5034 delta += len; 5035 5036 switch (gregLO3ofRM(modrm)) { 5037 5038 case 0: /* FADD single-real */ 5039 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5040 break; 5041 5042 case 1: /* FMUL single-real */ 5043 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5044 break; 5045 5046 case 2: /* FCOM single-real */ 5047 DIP("fcoms %s\n", dis_buf); 5048 /* This forces C1 to zero, which isn't right. */ 5049 /* The AMD documentation suggests that forcing C1 to 5050 zero is correct (Eliot Moss) */ 5051 put_C3210( 5052 unop( Iop_32Uto64, 5053 binop( Iop_And32, 5054 binop(Iop_Shl32, 5055 binop(Iop_CmpF64, 5056 get_ST(0), 5057 unop(Iop_F32toF64, 5058 loadLE(Ity_F32,mkexpr(addr)))), 5059 mkU8(8)), 5060 mkU32(0x4500) 5061 ))); 5062 break; 5063 5064 case 3: /* FCOMP single-real */ 5065 /* The AMD documentation suggests that forcing C1 to 5066 zero is correct (Eliot Moss) */ 5067 DIP("fcomps %s\n", dis_buf); 5068 /* This forces C1 to zero, which isn't right. */ 5069 put_C3210( 5070 unop( Iop_32Uto64, 5071 binop( Iop_And32, 5072 binop(Iop_Shl32, 5073 binop(Iop_CmpF64, 5074 get_ST(0), 5075 unop(Iop_F32toF64, 5076 loadLE(Ity_F32,mkexpr(addr)))), 5077 mkU8(8)), 5078 mkU32(0x4500) 5079 ))); 5080 fp_pop(); 5081 break; 5082 5083 case 4: /* FSUB single-real */ 5084 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5085 break; 5086 5087 case 5: /* FSUBR single-real */ 5088 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5089 break; 5090 5091 case 6: /* FDIV single-real */ 5092 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5093 break; 5094 5095 case 7: /* FDIVR single-real */ 5096 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5097 break; 5098 5099 default: 5100 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5101 vex_printf("first_opcode == 0xD8\n"); 5102 goto decode_fail; 5103 } 5104 } else { 5105 delta++; 5106 switch (modrm) { 5107 5108 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5109 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5110 break; 5111 5112 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5113 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5114 break; 5115 5116 /* Dunno if this is right */ 5117 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5118 r_dst = (UInt)modrm - 0xD0; 5119 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 5120 /* This forces C1 to zero, which isn't right. */ 5121 put_C3210( 5122 unop(Iop_32Uto64, 5123 binop( Iop_And32, 5124 binop(Iop_Shl32, 5125 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5126 mkU8(8)), 5127 mkU32(0x4500) 5128 ))); 5129 break; 5130 5131 /* Dunno if this is right */ 5132 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5133 r_dst = (UInt)modrm - 0xD8; 5134 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 5135 /* This forces C1 to zero, which isn't right. */ 5136 put_C3210( 5137 unop(Iop_32Uto64, 5138 binop( Iop_And32, 5139 binop(Iop_Shl32, 5140 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5141 mkU8(8)), 5142 mkU32(0x4500) 5143 ))); 5144 fp_pop(); 5145 break; 5146 5147 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5148 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5149 break; 5150 5151 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5152 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5153 break; 5154 5155 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5156 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5157 break; 5158 5159 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5160 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5161 break; 5162 5163 default: 5164 goto decode_fail; 5165 } 5166 } 5167 } 5168 5169 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5170 else 5171 if (first_opcode == 0xD9) { 5172 if (modrm < 0xC0) { 5173 5174 /* bits 5,4,3 are an opcode extension, and the modRM also 5175 specifies an address. */ 5176 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5177 delta += len; 5178 5179 switch (gregLO3ofRM(modrm)) { 5180 5181 case 0: /* FLD single-real */ 5182 DIP("flds %s\n", dis_buf); 5183 fp_push(); 5184 put_ST(0, unop(Iop_F32toF64, 5185 loadLE(Ity_F32, mkexpr(addr)))); 5186 break; 5187 5188 case 2: /* FST single-real */ 5189 DIP("fsts %s\n", dis_buf); 5190 storeLE(mkexpr(addr), 5191 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5192 break; 5193 5194 case 3: /* FSTP single-real */ 5195 DIP("fstps %s\n", dis_buf); 5196 storeLE(mkexpr(addr), 5197 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5198 fp_pop(); 5199 break; 5200 5201 case 4: { /* FLDENV m28 */ 5202 /* Uses dirty helper: 5203 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5204 IRTemp ew = newTemp(Ity_I32); 5205 IRTemp w64 = newTemp(Ity_I64); 5206 IRDirty* d = unsafeIRDirty_0_N ( 5207 0/*regparms*/, 5208 "amd64g_dirtyhelper_FLDENV", 5209 &amd64g_dirtyhelper_FLDENV, 5210 mkIRExprVec_1( mkexpr(addr) ) 5211 ); 5212 d->needsBBP = True; 5213 d->tmp = w64; 5214 /* declare we're reading memory */ 5215 d->mFx = Ifx_Read; 5216 d->mAddr = mkexpr(addr); 5217 d->mSize = 28; 5218 5219 /* declare we're writing guest state */ 5220 d->nFxState = 4; 5221 vex_bzero(&d->fxState, sizeof(d->fxState)); 5222 5223 d->fxState[0].fx = Ifx_Write; 5224 d->fxState[0].offset = OFFB_FTOP; 5225 d->fxState[0].size = sizeof(UInt); 5226 5227 d->fxState[1].fx = Ifx_Write; 5228 d->fxState[1].offset = OFFB_FPTAGS; 5229 d->fxState[1].size = 8 * sizeof(UChar); 5230 5231 d->fxState[2].fx = Ifx_Write; 5232 d->fxState[2].offset = OFFB_FPROUND; 5233 d->fxState[2].size = sizeof(ULong); 5234 5235 d->fxState[3].fx = Ifx_Write; 5236 d->fxState[3].offset = OFFB_FC3210; 5237 d->fxState[3].size = sizeof(ULong); 5238 5239 stmt( IRStmt_Dirty(d) ); 5240 5241 /* ew contains any emulation warning we may need to 5242 issue. If needed, side-exit to the next insn, 5243 reporting the warning, so that Valgrind's dispatcher 5244 sees the warning. */ 5245 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5246 put_emwarn( mkexpr(ew) ); 5247 stmt( 5248 IRStmt_Exit( 5249 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5250 Ijk_EmWarn, 5251 IRConst_U64( guest_RIP_bbstart+delta ), 5252 OFFB_RIP 5253 ) 5254 ); 5255 5256 DIP("fldenv %s\n", dis_buf); 5257 break; 5258 } 5259 5260 case 5: {/* FLDCW */ 5261 /* The only thing we observe in the control word is the 5262 rounding mode. Therefore, pass the 16-bit value 5263 (x87 native-format control word) to a clean helper, 5264 getting back a 64-bit value, the lower half of which 5265 is the FPROUND value to store, and the upper half of 5266 which is the emulation-warning token which may be 5267 generated. 5268 */ 5269 /* ULong amd64h_check_fldcw ( ULong ); */ 5270 IRTemp t64 = newTemp(Ity_I64); 5271 IRTemp ew = newTemp(Ity_I32); 5272 DIP("fldcw %s\n", dis_buf); 5273 assign( t64, mkIRExprCCall( 5274 Ity_I64, 0/*regparms*/, 5275 "amd64g_check_fldcw", 5276 &amd64g_check_fldcw, 5277 mkIRExprVec_1( 5278 unop( Iop_16Uto64, 5279 loadLE(Ity_I16, mkexpr(addr))) 5280 ) 5281 ) 5282 ); 5283 5284 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5285 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5286 put_emwarn( mkexpr(ew) ); 5287 /* Finally, if an emulation warning was reported, 5288 side-exit to the next insn, reporting the warning, 5289 so that Valgrind's dispatcher sees the warning. */ 5290 stmt( 5291 IRStmt_Exit( 5292 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5293 Ijk_EmWarn, 5294 IRConst_U64( guest_RIP_bbstart+delta ), 5295 OFFB_RIP 5296 ) 5297 ); 5298 break; 5299 } 5300 5301 case 6: { /* FNSTENV m28 */ 5302 /* Uses dirty helper: 5303 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5304 IRDirty* d = unsafeIRDirty_0_N ( 5305 0/*regparms*/, 5306 "amd64g_dirtyhelper_FSTENV", 5307 &amd64g_dirtyhelper_FSTENV, 5308 mkIRExprVec_1( mkexpr(addr) ) 5309 ); 5310 d->needsBBP = True; 5311 /* declare we're writing memory */ 5312 d->mFx = Ifx_Write; 5313 d->mAddr = mkexpr(addr); 5314 d->mSize = 28; 5315 5316 /* declare we're reading guest state */ 5317 d->nFxState = 4; 5318 vex_bzero(&d->fxState, sizeof(d->fxState)); 5319 5320 d->fxState[0].fx = Ifx_Read; 5321 d->fxState[0].offset = OFFB_FTOP; 5322 d->fxState[0].size = sizeof(UInt); 5323 5324 d->fxState[1].fx = Ifx_Read; 5325 d->fxState[1].offset = OFFB_FPTAGS; 5326 d->fxState[1].size = 8 * sizeof(UChar); 5327 5328 d->fxState[2].fx = Ifx_Read; 5329 d->fxState[2].offset = OFFB_FPROUND; 5330 d->fxState[2].size = sizeof(ULong); 5331 5332 d->fxState[3].fx = Ifx_Read; 5333 d->fxState[3].offset = OFFB_FC3210; 5334 d->fxState[3].size = sizeof(ULong); 5335 5336 stmt( IRStmt_Dirty(d) ); 5337 5338 DIP("fnstenv %s\n", dis_buf); 5339 break; 5340 } 5341 5342 case 7: /* FNSTCW */ 5343 /* Fake up a native x87 FPU control word. The only 5344 thing it depends on is FPROUND[1:0], so call a clean 5345 helper to cook it up. */ 5346 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5347 DIP("fnstcw %s\n", dis_buf); 5348 storeLE( 5349 mkexpr(addr), 5350 unop( Iop_64to16, 5351 mkIRExprCCall( 5352 Ity_I64, 0/*regp*/, 5353 "amd64g_create_fpucw", &amd64g_create_fpucw, 5354 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5355 ) 5356 ) 5357 ); 5358 break; 5359 5360 default: 5361 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5362 vex_printf("first_opcode == 0xD9\n"); 5363 goto decode_fail; 5364 } 5365 5366 } else { 5367 delta++; 5368 switch (modrm) { 5369 5370 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5371 r_src = (UInt)modrm - 0xC0; 5372 DIP("fld %%st(%u)\n", r_src); 5373 t1 = newTemp(Ity_F64); 5374 assign(t1, get_ST(r_src)); 5375 fp_push(); 5376 put_ST(0, mkexpr(t1)); 5377 break; 5378 5379 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5380 r_src = (UInt)modrm - 0xC8; 5381 DIP("fxch %%st(%u)\n", r_src); 5382 t1 = newTemp(Ity_F64); 5383 t2 = newTemp(Ity_F64); 5384 assign(t1, get_ST(0)); 5385 assign(t2, get_ST(r_src)); 5386 put_ST_UNCHECKED(0, mkexpr(t2)); 5387 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5388 break; 5389 5390 case 0xE0: /* FCHS */ 5391 DIP("fchs\n"); 5392 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5393 break; 5394 5395 case 0xE1: /* FABS */ 5396 DIP("fabs\n"); 5397 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5398 break; 5399 5400 case 0xE5: { /* FXAM */ 5401 /* This is an interesting one. It examines %st(0), 5402 regardless of whether the tag says it's empty or not. 5403 Here, just pass both the tag (in our format) and the 5404 value (as a double, actually a ULong) to a helper 5405 function. */ 5406 IRExpr** args 5407 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5408 unop(Iop_ReinterpF64asI64, 5409 get_ST_UNCHECKED(0)) ); 5410 put_C3210(mkIRExprCCall( 5411 Ity_I64, 5412 0/*regparm*/, 5413 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5414 args 5415 )); 5416 DIP("fxam\n"); 5417 break; 5418 } 5419 5420 case 0xE8: /* FLD1 */ 5421 DIP("fld1\n"); 5422 fp_push(); 5423 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5424 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5425 break; 5426 5427 case 0xE9: /* FLDL2T */ 5428 DIP("fldl2t\n"); 5429 fp_push(); 5430 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5431 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5432 break; 5433 5434 case 0xEA: /* FLDL2E */ 5435 DIP("fldl2e\n"); 5436 fp_push(); 5437 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5438 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5439 break; 5440 5441 case 0xEB: /* FLDPI */ 5442 DIP("fldpi\n"); 5443 fp_push(); 5444 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5445 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5446 break; 5447 5448 case 0xEC: /* FLDLG2 */ 5449 DIP("fldlg2\n"); 5450 fp_push(); 5451 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5452 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5453 break; 5454 5455 case 0xED: /* FLDLN2 */ 5456 DIP("fldln2\n"); 5457 fp_push(); 5458 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5459 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5460 break; 5461 5462 case 0xEE: /* FLDZ */ 5463 DIP("fldz\n"); 5464 fp_push(); 5465 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5466 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5467 break; 5468 5469 case 0xF0: /* F2XM1 */ 5470 DIP("f2xm1\n"); 5471 put_ST_UNCHECKED(0, 5472 binop(Iop_2xm1F64, 5473 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5474 get_ST(0))); 5475 break; 5476 5477 case 0xF1: /* FYL2X */ 5478 DIP("fyl2x\n"); 5479 put_ST_UNCHECKED(1, 5480 triop(Iop_Yl2xF64, 5481 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5482 get_ST(1), 5483 get_ST(0))); 5484 fp_pop(); 5485 break; 5486 5487 case 0xF2: /* FPTAN */ 5488 DIP("ftan\n"); 5489 put_ST_UNCHECKED(0, 5490 binop(Iop_TanF64, 5491 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5492 get_ST(0))); 5493 fp_push(); 5494 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 5495 clear_C2(); /* HACK */ 5496 break; 5497 5498 case 0xF3: /* FPATAN */ 5499 DIP("fpatan\n"); 5500 put_ST_UNCHECKED(1, 5501 triop(Iop_AtanF64, 5502 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5503 get_ST(1), 5504 get_ST(0))); 5505 fp_pop(); 5506 break; 5507 5508 case 0xF4: { /* FXTRACT */ 5509 IRTemp argF = newTemp(Ity_F64); 5510 IRTemp sigF = newTemp(Ity_F64); 5511 IRTemp expF = newTemp(Ity_F64); 5512 IRTemp argI = newTemp(Ity_I64); 5513 IRTemp sigI = newTemp(Ity_I64); 5514 IRTemp expI = newTemp(Ity_I64); 5515 DIP("fxtract\n"); 5516 assign( argF, get_ST(0) ); 5517 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5518 assign( sigI, 5519 mkIRExprCCall( 5520 Ity_I64, 0/*regparms*/, 5521 "x86amd64g_calculate_FXTRACT", 5522 &x86amd64g_calculate_FXTRACT, 5523 mkIRExprVec_2( mkexpr(argI), 5524 mkIRExpr_HWord(0)/*sig*/ )) 5525 ); 5526 assign( expI, 5527 mkIRExprCCall( 5528 Ity_I64, 0/*regparms*/, 5529 "x86amd64g_calculate_FXTRACT", 5530 &x86amd64g_calculate_FXTRACT, 5531 mkIRExprVec_2( mkexpr(argI), 5532 mkIRExpr_HWord(1)/*exp*/ )) 5533 ); 5534 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5535 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5536 /* exponent */ 5537 put_ST_UNCHECKED(0, mkexpr(expF) ); 5538 fp_push(); 5539 /* significand */ 5540 put_ST(0, mkexpr(sigF) ); 5541 break; 5542 } 5543 5544 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5545 IRTemp a1 = newTemp(Ity_F64); 5546 IRTemp a2 = newTemp(Ity_F64); 5547 DIP("fprem1\n"); 5548 /* Do FPREM1 twice, once to get the remainder, and once 5549 to get the C3210 flag values. */ 5550 assign( a1, get_ST(0) ); 5551 assign( a2, get_ST(1) ); 5552 put_ST_UNCHECKED(0, 5553 triop(Iop_PRem1F64, 5554 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5555 mkexpr(a1), 5556 mkexpr(a2))); 5557 put_C3210( 5558 unop(Iop_32Uto64, 5559 triop(Iop_PRem1C3210F64, 5560 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5561 mkexpr(a1), 5562 mkexpr(a2)) )); 5563 break; 5564 } 5565 5566 case 0xF7: /* FINCSTP */ 5567 DIP("fincstp\n"); 5568 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5569 break; 5570 5571 case 0xF8: { /* FPREM -- not IEEE compliant */ 5572 IRTemp a1 = newTemp(Ity_F64); 5573 IRTemp a2 = newTemp(Ity_F64); 5574 DIP("fprem\n"); 5575 /* Do FPREM twice, once to get the remainder, and once 5576 to get the C3210 flag values. */ 5577 assign( a1, get_ST(0) ); 5578 assign( a2, get_ST(1) ); 5579 put_ST_UNCHECKED(0, 5580 triop(Iop_PRemF64, 5581 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5582 mkexpr(a1), 5583 mkexpr(a2))); 5584 put_C3210( 5585 unop(Iop_32Uto64, 5586 triop(Iop_PRemC3210F64, 5587 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5588 mkexpr(a1), 5589 mkexpr(a2)) )); 5590 break; 5591 } 5592 5593 case 0xF9: /* FYL2XP1 */ 5594 DIP("fyl2xp1\n"); 5595 put_ST_UNCHECKED(1, 5596 triop(Iop_Yl2xp1F64, 5597 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5598 get_ST(1), 5599 get_ST(0))); 5600 fp_pop(); 5601 break; 5602 5603 case 0xFA: /* FSQRT */ 5604 DIP("fsqrt\n"); 5605 put_ST_UNCHECKED(0, 5606 binop(Iop_SqrtF64, 5607 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5608 get_ST(0))); 5609 break; 5610 5611 case 0xFB: { /* FSINCOS */ 5612 IRTemp a1 = newTemp(Ity_F64); 5613 assign( a1, get_ST(0) ); 5614 DIP("fsincos\n"); 5615 put_ST_UNCHECKED(0, 5616 binop(Iop_SinF64, 5617 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5618 mkexpr(a1))); 5619 fp_push(); 5620 put_ST(0, 5621 binop(Iop_CosF64, 5622 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5623 mkexpr(a1))); 5624 clear_C2(); /* HACK */ 5625 break; 5626 } 5627 5628 case 0xFC: /* FRNDINT */ 5629 DIP("frndint\n"); 5630 put_ST_UNCHECKED(0, 5631 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5632 break; 5633 5634 case 0xFD: /* FSCALE */ 5635 DIP("fscale\n"); 5636 put_ST_UNCHECKED(0, 5637 triop(Iop_ScaleF64, 5638 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5639 get_ST(0), 5640 get_ST(1))); 5641 break; 5642 5643 case 0xFE: /* FSIN */ 5644 DIP("fsin\n"); 5645 put_ST_UNCHECKED(0, 5646 binop(Iop_SinF64, 5647 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5648 get_ST(0))); 5649 clear_C2(); /* HACK */ 5650 break; 5651 5652 case 0xFF: /* FCOS */ 5653 DIP("fcos\n"); 5654 put_ST_UNCHECKED(0, 5655 binop(Iop_CosF64, 5656 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5657 get_ST(0))); 5658 clear_C2(); /* HACK */ 5659 break; 5660 5661 default: 5662 goto decode_fail; 5663 } 5664 } 5665 } 5666 5667 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 5668 else 5669 if (first_opcode == 0xDA) { 5670 5671 if (modrm < 0xC0) { 5672 5673 /* bits 5,4,3 are an opcode extension, and the modRM also 5674 specifies an address. */ 5675 IROp fop; 5676 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5677 delta += len; 5678 switch (gregLO3ofRM(modrm)) { 5679 5680 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 5681 DIP("fiaddl %s\n", dis_buf); 5682 fop = Iop_AddF64; 5683 goto do_fop_m32; 5684 5685 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 5686 DIP("fimull %s\n", dis_buf); 5687 fop = Iop_MulF64; 5688 goto do_fop_m32; 5689 5690 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 5691 DIP("fisubl %s\n", dis_buf); 5692 fop = Iop_SubF64; 5693 goto do_fop_m32; 5694 5695 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 5696 DIP("fisubrl %s\n", dis_buf); 5697 fop = Iop_SubF64; 5698 goto do_foprev_m32; 5699 5700 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 5701 DIP("fisubl %s\n", dis_buf); 5702 fop = Iop_DivF64; 5703 goto do_fop_m32; 5704 5705 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 5706 DIP("fidivrl %s\n", dis_buf); 5707 fop = Iop_DivF64; 5708 goto do_foprev_m32; 5709 5710 do_fop_m32: 5711 put_ST_UNCHECKED(0, 5712 triop(fop, 5713 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5714 get_ST(0), 5715 unop(Iop_I32StoF64, 5716 loadLE(Ity_I32, mkexpr(addr))))); 5717 break; 5718 5719 do_foprev_m32: 5720 put_ST_UNCHECKED(0, 5721 triop(fop, 5722 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5723 unop(Iop_I32StoF64, 5724 loadLE(Ity_I32, mkexpr(addr))), 5725 get_ST(0))); 5726 break; 5727 5728 default: 5729 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5730 vex_printf("first_opcode == 0xDA\n"); 5731 goto decode_fail; 5732 } 5733 5734 } else { 5735 5736 delta++; 5737 switch (modrm) { 5738 5739 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 5740 r_src = (UInt)modrm - 0xC0; 5741 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 5742 put_ST_UNCHECKED(0, 5743 IRExpr_Mux0X( 5744 unop(Iop_1Uto8, 5745 mk_amd64g_calculate_condition(AMD64CondB)), 5746 get_ST(0), get_ST(r_src)) ); 5747 break; 5748 5749 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 5750 r_src = (UInt)modrm - 0xC8; 5751 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 5752 put_ST_UNCHECKED(0, 5753 IRExpr_Mux0X( 5754 unop(Iop_1Uto8, 5755 mk_amd64g_calculate_condition(AMD64CondZ)), 5756 get_ST(0), get_ST(r_src)) ); 5757 break; 5758 5759 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 5760 r_src = (UInt)modrm - 0xD0; 5761 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 5762 put_ST_UNCHECKED(0, 5763 IRExpr_Mux0X( 5764 unop(Iop_1Uto8, 5765 mk_amd64g_calculate_condition(AMD64CondBE)), 5766 get_ST(0), get_ST(r_src)) ); 5767 break; 5768 5769 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 5770 r_src = (UInt)modrm - 0xD8; 5771 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 5772 put_ST_UNCHECKED(0, 5773 IRExpr_Mux0X( 5774 unop(Iop_1Uto8, 5775 mk_amd64g_calculate_condition(AMD64CondP)), 5776 get_ST(0), get_ST(r_src)) ); 5777 break; 5778 5779 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 5780 DIP("fucompp %%st(0),%%st(1)\n"); 5781 /* This forces C1 to zero, which isn't right. */ 5782 put_C3210( 5783 unop(Iop_32Uto64, 5784 binop( Iop_And32, 5785 binop(Iop_Shl32, 5786 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5787 mkU8(8)), 5788 mkU32(0x4500) 5789 ))); 5790 fp_pop(); 5791 fp_pop(); 5792 break; 5793 5794 default: 5795 goto decode_fail; 5796 } 5797 5798 } 5799 } 5800 5801 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 5802 else 5803 if (first_opcode == 0xDB) { 5804 if (modrm < 0xC0) { 5805 5806 /* bits 5,4,3 are an opcode extension, and the modRM also 5807 specifies an address. */ 5808 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5809 delta += len; 5810 5811 switch (gregLO3ofRM(modrm)) { 5812 5813 case 0: /* FILD m32int */ 5814 DIP("fildl %s\n", dis_buf); 5815 fp_push(); 5816 put_ST(0, unop(Iop_I32StoF64, 5817 loadLE(Ity_I32, mkexpr(addr)))); 5818 break; 5819 5820 case 1: /* FISTTPL m32 (SSE3) */ 5821 DIP("fisttpl %s\n", dis_buf); 5822 storeLE( mkexpr(addr), 5823 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 5824 fp_pop(); 5825 break; 5826 5827 case 2: /* FIST m32 */ 5828 DIP("fistl %s\n", dis_buf); 5829 storeLE( mkexpr(addr), 5830 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5831 break; 5832 5833 case 3: /* FISTP m32 */ 5834 DIP("fistpl %s\n", dis_buf); 5835 storeLE( mkexpr(addr), 5836 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5837 fp_pop(); 5838 break; 5839 5840 case 5: { /* FLD extended-real */ 5841 /* Uses dirty helper: 5842 ULong amd64g_loadF80le ( ULong ) 5843 addr holds the address. First, do a dirty call to 5844 get hold of the data. */ 5845 IRTemp val = newTemp(Ity_I64); 5846 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 5847 5848 IRDirty* d = unsafeIRDirty_1_N ( 5849 val, 5850 0/*regparms*/, 5851 "amd64g_dirtyhelper_loadF80le", 5852 &amd64g_dirtyhelper_loadF80le, 5853 args 5854 ); 5855 /* declare that we're reading memory */ 5856 d->mFx = Ifx_Read; 5857 d->mAddr = mkexpr(addr); 5858 d->mSize = 10; 5859 5860 /* execute the dirty call, dumping the result in val. */ 5861 stmt( IRStmt_Dirty(d) ); 5862 fp_push(); 5863 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 5864 5865 DIP("fldt %s\n", dis_buf); 5866 break; 5867 } 5868 5869 case 7: { /* FSTP extended-real */ 5870 /* Uses dirty helper: 5871 void amd64g_storeF80le ( ULong addr, ULong data ) 5872 */ 5873 IRExpr** args 5874 = mkIRExprVec_2( mkexpr(addr), 5875 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 5876 5877 IRDirty* d = unsafeIRDirty_0_N ( 5878 0/*regparms*/, 5879 "amd64g_dirtyhelper_storeF80le", 5880 &amd64g_dirtyhelper_storeF80le, 5881 args 5882 ); 5883 /* declare we're writing memory */ 5884 d->mFx = Ifx_Write; 5885 d->mAddr = mkexpr(addr); 5886 d->mSize = 10; 5887 5888 /* execute the dirty call. */ 5889 stmt( IRStmt_Dirty(d) ); 5890 fp_pop(); 5891 5892 DIP("fstpt\n %s", dis_buf); 5893 break; 5894 } 5895 5896 default: 5897 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5898 vex_printf("first_opcode == 0xDB\n"); 5899 goto decode_fail; 5900 } 5901 5902 } else { 5903 5904 delta++; 5905 switch (modrm) { 5906 5907 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 5908 r_src = (UInt)modrm - 0xC0; 5909 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 5910 put_ST_UNCHECKED(0, 5911 IRExpr_Mux0X( 5912 unop(Iop_1Uto8, 5913 mk_amd64g_calculate_condition(AMD64CondNB)), 5914 get_ST(0), get_ST(r_src)) ); 5915 break; 5916 5917 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 5918 r_src = (UInt)modrm - 0xC8; 5919 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 5920 put_ST_UNCHECKED( 5921 0, 5922 IRExpr_Mux0X( 5923 unop(Iop_1Uto8, 5924 mk_amd64g_calculate_condition(AMD64CondNZ)), 5925 get_ST(0), 5926 get_ST(r_src) 5927 ) 5928 ); 5929 break; 5930 5931 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 5932 r_src = (UInt)modrm - 0xD0; 5933 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 5934 put_ST_UNCHECKED( 5935 0, 5936 IRExpr_Mux0X( 5937 unop(Iop_1Uto8, 5938 mk_amd64g_calculate_condition(AMD64CondNBE)), 5939 get_ST(0), 5940 get_ST(r_src) 5941 ) 5942 ); 5943 break; 5944 5945 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 5946 r_src = (UInt)modrm - 0xD8; 5947 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 5948 put_ST_UNCHECKED( 5949 0, 5950 IRExpr_Mux0X( 5951 unop(Iop_1Uto8, 5952 mk_amd64g_calculate_condition(AMD64CondNP)), 5953 get_ST(0), 5954 get_ST(r_src) 5955 ) 5956 ); 5957 break; 5958 5959 case 0xE2: 5960 DIP("fnclex\n"); 5961 break; 5962 5963 case 0xE3: { 5964 /* Uses dirty helper: 5965 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5966 IRDirty* d = unsafeIRDirty_0_N ( 5967 0/*regparms*/, 5968 "amd64g_dirtyhelper_FINIT", 5969 &amd64g_dirtyhelper_FINIT, 5970 mkIRExprVec_0() 5971 ); 5972 d->needsBBP = True; 5973 5974 /* declare we're writing guest state */ 5975 d->nFxState = 5; 5976 vex_bzero(&d->fxState, sizeof(d->fxState)); 5977 5978 d->fxState[0].fx = Ifx_Write; 5979 d->fxState[0].offset = OFFB_FTOP; 5980 d->fxState[0].size = sizeof(UInt); 5981 5982 d->fxState[1].fx = Ifx_Write; 5983 d->fxState[1].offset = OFFB_FPREGS; 5984 d->fxState[1].size = 8 * sizeof(ULong); 5985 5986 d->fxState[2].fx = Ifx_Write; 5987 d->fxState[2].offset = OFFB_FPTAGS; 5988 d->fxState[2].size = 8 * sizeof(UChar); 5989 5990 d->fxState[3].fx = Ifx_Write; 5991 d->fxState[3].offset = OFFB_FPROUND; 5992 d->fxState[3].size = sizeof(ULong); 5993 5994 d->fxState[4].fx = Ifx_Write; 5995 d->fxState[4].offset = OFFB_FC3210; 5996 d->fxState[4].size = sizeof(ULong); 5997 5998 stmt( IRStmt_Dirty(d) ); 5999 6000 DIP("fninit\n"); 6001 break; 6002 } 6003 6004 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6005 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6006 break; 6007 6008 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6009 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6010 break; 6011 6012 default: 6013 goto decode_fail; 6014 } 6015 } 6016 } 6017 6018 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6019 else 6020 if (first_opcode == 0xDC) { 6021 if (modrm < 0xC0) { 6022 6023 /* bits 5,4,3 are an opcode extension, and the modRM also 6024 specifies an address. */ 6025 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6026 delta += len; 6027 6028 switch (gregLO3ofRM(modrm)) { 6029 6030 case 0: /* FADD double-real */ 6031 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6032 break; 6033 6034 case 1: /* FMUL double-real */ 6035 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6036 break; 6037 6038 //.. case 2: /* FCOM double-real */ 6039 //.. DIP("fcoml %s\n", dis_buf); 6040 //.. /* This forces C1 to zero, which isn't right. */ 6041 //.. put_C3210( 6042 //.. binop( Iop_And32, 6043 //.. binop(Iop_Shl32, 6044 //.. binop(Iop_CmpF64, 6045 //.. get_ST(0), 6046 //.. loadLE(Ity_F64,mkexpr(addr))), 6047 //.. mkU8(8)), 6048 //.. mkU32(0x4500) 6049 //.. )); 6050 //.. break; 6051 6052 case 3: /* FCOMP double-real */ 6053 DIP("fcompl %s\n", dis_buf); 6054 /* This forces C1 to zero, which isn't right. */ 6055 put_C3210( 6056 unop(Iop_32Uto64, 6057 binop( Iop_And32, 6058 binop(Iop_Shl32, 6059 binop(Iop_CmpF64, 6060 get_ST(0), 6061 loadLE(Ity_F64,mkexpr(addr))), 6062 mkU8(8)), 6063 mkU32(0x4500) 6064 ))); 6065 fp_pop(); 6066 break; 6067 6068 case 4: /* FSUB double-real */ 6069 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6070 break; 6071 6072 case 5: /* FSUBR double-real */ 6073 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6074 break; 6075 6076 case 6: /* FDIV double-real */ 6077 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6078 break; 6079 6080 case 7: /* FDIVR double-real */ 6081 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6082 break; 6083 6084 default: 6085 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6086 vex_printf("first_opcode == 0xDC\n"); 6087 goto decode_fail; 6088 } 6089 6090 } else { 6091 6092 delta++; 6093 switch (modrm) { 6094 6095 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6096 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6097 break; 6098 6099 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6100 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6101 break; 6102 6103 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6104 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6105 break; 6106 6107 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6108 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6109 break; 6110 6111 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6112 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6113 break; 6114 6115 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6116 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6117 break; 6118 6119 default: 6120 goto decode_fail; 6121 } 6122 6123 } 6124 } 6125 6126 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6127 else 6128 if (first_opcode == 0xDD) { 6129 6130 if (modrm < 0xC0) { 6131 6132 /* bits 5,4,3 are an opcode extension, and the modRM also 6133 specifies an address. */ 6134 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6135 delta += len; 6136 6137 switch (gregLO3ofRM(modrm)) { 6138 6139 case 0: /* FLD double-real */ 6140 DIP("fldl %s\n", dis_buf); 6141 fp_push(); 6142 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6143 break; 6144 6145 case 1: /* FISTTPQ m64 (SSE3) */ 6146 DIP("fistppll %s\n", dis_buf); 6147 storeLE( mkexpr(addr), 6148 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6149 fp_pop(); 6150 break; 6151 6152 case 2: /* FST double-real */ 6153 DIP("fstl %s\n", dis_buf); 6154 storeLE(mkexpr(addr), get_ST(0)); 6155 break; 6156 6157 case 3: /* FSTP double-real */ 6158 DIP("fstpl %s\n", dis_buf); 6159 storeLE(mkexpr(addr), get_ST(0)); 6160 fp_pop(); 6161 break; 6162 6163 case 4: { /* FRSTOR m94/m108 */ 6164 IRTemp ew = newTemp(Ity_I32); 6165 IRTemp w64 = newTemp(Ity_I64); 6166 IRDirty* d; 6167 if ( have66(pfx) ) { 6168 /* Uses dirty helper: 6169 VexEmWarn amd64g_dirtyhelper_FRSTORS 6170 ( VexGuestAMD64State*, HWord ) */ 6171 d = unsafeIRDirty_0_N ( 6172 0/*regparms*/, 6173 "amd64g_dirtyhelper_FRSTORS", 6174 &amd64g_dirtyhelper_FRSTORS, 6175 mkIRExprVec_1( mkexpr(addr) ) 6176 ); 6177 d->mSize = 94; 6178 } else { 6179 /* Uses dirty helper: 6180 VexEmWarn amd64g_dirtyhelper_FRSTOR 6181 ( VexGuestAMD64State*, HWord ) */ 6182 d = unsafeIRDirty_0_N ( 6183 0/*regparms*/, 6184 "amd64g_dirtyhelper_FRSTOR", 6185 &amd64g_dirtyhelper_FRSTOR, 6186 mkIRExprVec_1( mkexpr(addr) ) 6187 ); 6188 d->mSize = 108; 6189 } 6190 6191 d->needsBBP = True; 6192 d->tmp = w64; 6193 /* declare we're reading memory */ 6194 d->mFx = Ifx_Read; 6195 d->mAddr = mkexpr(addr); 6196 /* d->mSize set above */ 6197 6198 /* declare we're writing guest state */ 6199 d->nFxState = 5; 6200 vex_bzero(&d->fxState, sizeof(d->fxState)); 6201 6202 d->fxState[0].fx = Ifx_Write; 6203 d->fxState[0].offset = OFFB_FTOP; 6204 d->fxState[0].size = sizeof(UInt); 6205 6206 d->fxState[1].fx = Ifx_Write; 6207 d->fxState[1].offset = OFFB_FPREGS; 6208 d->fxState[1].size = 8 * sizeof(ULong); 6209 6210 d->fxState[2].fx = Ifx_Write; 6211 d->fxState[2].offset = OFFB_FPTAGS; 6212 d->fxState[2].size = 8 * sizeof(UChar); 6213 6214 d->fxState[3].fx = Ifx_Write; 6215 d->fxState[3].offset = OFFB_FPROUND; 6216 d->fxState[3].size = sizeof(ULong); 6217 6218 d->fxState[4].fx = Ifx_Write; 6219 d->fxState[4].offset = OFFB_FC3210; 6220 d->fxState[4].size = sizeof(ULong); 6221 6222 stmt( IRStmt_Dirty(d) ); 6223 6224 /* ew contains any emulation warning we may need to 6225 issue. If needed, side-exit to the next insn, 6226 reporting the warning, so that Valgrind's dispatcher 6227 sees the warning. */ 6228 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6229 put_emwarn( mkexpr(ew) ); 6230 stmt( 6231 IRStmt_Exit( 6232 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6233 Ijk_EmWarn, 6234 IRConst_U64( guest_RIP_bbstart+delta ), 6235 OFFB_RIP 6236 ) 6237 ); 6238 6239 if ( have66(pfx) ) { 6240 DIP("frstors %s\n", dis_buf); 6241 } else { 6242 DIP("frstor %s\n", dis_buf); 6243 } 6244 break; 6245 } 6246 6247 case 6: { /* FNSAVE m94/m108 */ 6248 IRDirty *d; 6249 if ( have66(pfx) ) { 6250 /* Uses dirty helper: 6251 void amd64g_dirtyhelper_FNSAVES ( VexGuestX86State*, HWord ) */ 6252 d = unsafeIRDirty_0_N ( 6253 0/*regparms*/, 6254 "amd64g_dirtyhelper_FNSAVES", 6255 &amd64g_dirtyhelper_FNSAVES, 6256 mkIRExprVec_1( mkexpr(addr) ) 6257 ); 6258 d->mSize = 94; 6259 } else { 6260 /* Uses dirty helper: 6261 void amd64g_dirtyhelper_FNSAVE ( VexGuestX86State*, HWord ) */ 6262 d = unsafeIRDirty_0_N ( 6263 0/*regparms*/, 6264 "amd64g_dirtyhelper_FNSAVE", 6265 &amd64g_dirtyhelper_FNSAVE, 6266 mkIRExprVec_1( mkexpr(addr) ) 6267 ); 6268 d->mSize = 108; 6269 } 6270 d->needsBBP = True; 6271 /* declare we're writing memory */ 6272 d->mFx = Ifx_Write; 6273 d->mAddr = mkexpr(addr); 6274 /* d->mSize set above */ 6275 6276 /* declare we're reading guest state */ 6277 d->nFxState = 5; 6278 vex_bzero(&d->fxState, sizeof(d->fxState)); 6279 6280 d->fxState[0].fx = Ifx_Read; 6281 d->fxState[0].offset = OFFB_FTOP; 6282 d->fxState[0].size = sizeof(UInt); 6283 6284 d->fxState[1].fx = Ifx_Read; 6285 d->fxState[1].offset = OFFB_FPREGS; 6286 d->fxState[1].size = 8 * sizeof(ULong); 6287 6288 d->fxState[2].fx = Ifx_Read; 6289 d->fxState[2].offset = OFFB_FPTAGS; 6290 d->fxState[2].size = 8 * sizeof(UChar); 6291 6292 d->fxState[3].fx = Ifx_Read; 6293 d->fxState[3].offset = OFFB_FPROUND; 6294 d->fxState[3].size = sizeof(ULong); 6295 6296 d->fxState[4].fx = Ifx_Read; 6297 d->fxState[4].offset = OFFB_FC3210; 6298 d->fxState[4].size = sizeof(ULong); 6299 6300 stmt( IRStmt_Dirty(d) ); 6301 6302 if ( have66(pfx) ) { 6303 DIP("fnsaves %s\n", dis_buf); 6304 } else { 6305 DIP("fnsave %s\n", dis_buf); 6306 } 6307 break; 6308 } 6309 6310 case 7: { /* FNSTSW m16 */ 6311 IRExpr* sw = get_FPU_sw(); 6312 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6313 storeLE( mkexpr(addr), sw ); 6314 DIP("fnstsw %s\n", dis_buf); 6315 break; 6316 } 6317 6318 default: 6319 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6320 vex_printf("first_opcode == 0xDD\n"); 6321 goto decode_fail; 6322 } 6323 } else { 6324 delta++; 6325 switch (modrm) { 6326 6327 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6328 r_dst = (UInt)modrm - 0xC0; 6329 DIP("ffree %%st(%u)\n", r_dst); 6330 put_ST_TAG ( r_dst, mkU8(0) ); 6331 break; 6332 6333 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6334 r_dst = (UInt)modrm - 0xD0; 6335 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6336 /* P4 manual says: "If the destination operand is a 6337 non-empty register, the invalid-operation exception 6338 is not generated. Hence put_ST_UNCHECKED. */ 6339 put_ST_UNCHECKED(r_dst, get_ST(0)); 6340 break; 6341 6342 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6343 r_dst = (UInt)modrm - 0xD8; 6344 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6345 /* P4 manual says: "If the destination operand is a 6346 non-empty register, the invalid-operation exception 6347 is not generated. Hence put_ST_UNCHECKED. */ 6348 put_ST_UNCHECKED(r_dst, get_ST(0)); 6349 fp_pop(); 6350 break; 6351 6352 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6353 r_dst = (UInt)modrm - 0xE0; 6354 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6355 /* This forces C1 to zero, which isn't right. */ 6356 put_C3210( 6357 unop(Iop_32Uto64, 6358 binop( Iop_And32, 6359 binop(Iop_Shl32, 6360 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6361 mkU8(8)), 6362 mkU32(0x4500) 6363 ))); 6364 break; 6365 6366 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6367 r_dst = (UInt)modrm - 0xE8; 6368 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6369 /* This forces C1 to zero, which isn't right. */ 6370 put_C3210( 6371 unop(Iop_32Uto64, 6372 binop( Iop_And32, 6373 binop(Iop_Shl32, 6374 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6375 mkU8(8)), 6376 mkU32(0x4500) 6377 ))); 6378 fp_pop(); 6379 break; 6380 6381 default: 6382 goto decode_fail; 6383 } 6384 } 6385 } 6386 6387 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6388 else 6389 if (first_opcode == 0xDE) { 6390 6391 if (modrm < 0xC0) { 6392 6393 /* bits 5,4,3 are an opcode extension, and the modRM also 6394 specifies an address. */ 6395 IROp fop; 6396 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6397 delta += len; 6398 6399 switch (gregLO3ofRM(modrm)) { 6400 6401 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6402 DIP("fiaddw %s\n", dis_buf); 6403 fop = Iop_AddF64; 6404 goto do_fop_m16; 6405 6406 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6407 DIP("fimulw %s\n", dis_buf); 6408 fop = Iop_MulF64; 6409 goto do_fop_m16; 6410 6411 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6412 DIP("fisubw %s\n", dis_buf); 6413 fop = Iop_SubF64; 6414 goto do_fop_m16; 6415 6416 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6417 DIP("fisubrw %s\n", dis_buf); 6418 fop = Iop_SubF64; 6419 goto do_foprev_m16; 6420 6421 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6422 DIP("fisubw %s\n", dis_buf); 6423 fop = Iop_DivF64; 6424 goto do_fop_m16; 6425 6426 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6427 DIP("fidivrw %s\n", dis_buf); 6428 fop = Iop_DivF64; 6429 goto do_foprev_m16; 6430 6431 do_fop_m16: 6432 put_ST_UNCHECKED(0, 6433 triop(fop, 6434 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6435 get_ST(0), 6436 unop(Iop_I32StoF64, 6437 unop(Iop_16Sto32, 6438 loadLE(Ity_I16, mkexpr(addr)))))); 6439 break; 6440 6441 do_foprev_m16: 6442 put_ST_UNCHECKED(0, 6443 triop(fop, 6444 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6445 unop(Iop_I32StoF64, 6446 unop(Iop_16Sto32, 6447 loadLE(Ity_I16, mkexpr(addr)))), 6448 get_ST(0))); 6449 break; 6450 6451 default: 6452 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6453 vex_printf("first_opcode == 0xDE\n"); 6454 goto decode_fail; 6455 } 6456 6457 } else { 6458 6459 delta++; 6460 switch (modrm) { 6461 6462 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6463 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6464 break; 6465 6466 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6467 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6468 break; 6469 6470 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6471 DIP("fcompp %%st(0),%%st(1)\n"); 6472 /* This forces C1 to zero, which isn't right. */ 6473 put_C3210( 6474 unop(Iop_32Uto64, 6475 binop( Iop_And32, 6476 binop(Iop_Shl32, 6477 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6478 mkU8(8)), 6479 mkU32(0x4500) 6480 ))); 6481 fp_pop(); 6482 fp_pop(); 6483 break; 6484 6485 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6486 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6487 break; 6488 6489 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6490 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6491 break; 6492 6493 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6494 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6495 break; 6496 6497 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6498 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6499 break; 6500 6501 default: 6502 goto decode_fail; 6503 } 6504 6505 } 6506 } 6507 6508 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6509 else 6510 if (first_opcode == 0xDF) { 6511 6512 if (modrm < 0xC0) { 6513 6514 /* bits 5,4,3 are an opcode extension, and the modRM also 6515 specifies an address. */ 6516 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6517 delta += len; 6518 6519 switch (gregLO3ofRM(modrm)) { 6520 6521 case 0: /* FILD m16int */ 6522 DIP("fildw %s\n", dis_buf); 6523 fp_push(); 6524 put_ST(0, unop(Iop_I32StoF64, 6525 unop(Iop_16Sto32, 6526 loadLE(Ity_I16, mkexpr(addr))))); 6527 break; 6528 6529 case 1: /* FISTTPS m16 (SSE3) */ 6530 DIP("fisttps %s\n", dis_buf); 6531 storeLE( mkexpr(addr), 6532 x87ishly_qnarrow_32_to_16( 6533 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6534 fp_pop(); 6535 break; 6536 6537 case 2: /* FIST m16 */ 6538 DIP("fists %s\n", dis_buf); 6539 storeLE( mkexpr(addr), 6540 x87ishly_qnarrow_32_to_16( 6541 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6542 break; 6543 6544 case 3: /* FISTP m16 */ 6545 DIP("fistps %s\n", dis_buf); 6546 storeLE( mkexpr(addr), 6547 x87ishly_qnarrow_32_to_16( 6548 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6549 fp_pop(); 6550 break; 6551 6552 case 5: /* FILD m64 */ 6553 DIP("fildll %s\n", dis_buf); 6554 fp_push(); 6555 put_ST(0, binop(Iop_I64StoF64, 6556 get_roundingmode(), 6557 loadLE(Ity_I64, mkexpr(addr)))); 6558 break; 6559 6560 case 7: /* FISTP m64 */ 6561 DIP("fistpll %s\n", dis_buf); 6562 storeLE( mkexpr(addr), 6563 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6564 fp_pop(); 6565 break; 6566 6567 default: 6568 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6569 vex_printf("first_opcode == 0xDF\n"); 6570 goto decode_fail; 6571 } 6572 6573 } else { 6574 6575 delta++; 6576 switch (modrm) { 6577 6578 case 0xC0: /* FFREEP %st(0) */ 6579 DIP("ffreep %%st(%d)\n", 0); 6580 put_ST_TAG ( 0, mkU8(0) ); 6581 fp_pop(); 6582 break; 6583 6584 case 0xE0: /* FNSTSW %ax */ 6585 DIP("fnstsw %%ax\n"); 6586 /* Invent a plausible-looking FPU status word value and 6587 dump it in %AX: 6588 ((ftop & 7) << 11) | (c3210 & 0x4700) 6589 */ 6590 putIRegRAX( 6591 2, 6592 unop(Iop_32to16, 6593 binop(Iop_Or32, 6594 binop(Iop_Shl32, 6595 binop(Iop_And32, get_ftop(), mkU32(7)), 6596 mkU8(11)), 6597 binop(Iop_And32, 6598 unop(Iop_64to32, get_C3210()), 6599 mkU32(0x4700)) 6600 ))); 6601 break; 6602 6603 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6604 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6605 break; 6606 6607 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6608 /* not really right since COMIP != UCOMIP */ 6609 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6610 break; 6611 6612 default: 6613 goto decode_fail; 6614 } 6615 } 6616 6617 } 6618 6619 else 6620 goto decode_fail; 6621 6622 *decode_ok = True; 6623 return delta; 6624 6625 decode_fail: 6626 *decode_ok = False; 6627 return delta; 6628 } 6629 6630 6631 /*------------------------------------------------------------*/ 6632 /*--- ---*/ 6633 /*--- MMX INSTRUCTIONS ---*/ 6634 /*--- ---*/ 6635 /*------------------------------------------------------------*/ 6636 6637 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6638 IA32 arch manual, volume 3): 6639 6640 Read from, or write to MMX register (viz, any insn except EMMS): 6641 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6642 * FP stack pointer set to zero 6643 6644 EMMS: 6645 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6646 * FP stack pointer set to zero 6647 */ 6648 6649 static void do_MMX_preamble ( void ) 6650 { 6651 Int i; 6652 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6653 IRExpr* zero = mkU32(0); 6654 IRExpr* tag1 = mkU8(1); 6655 put_ftop(zero); 6656 for (i = 0; i < 8; i++) 6657 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 6658 } 6659 6660 static void do_EMMS_preamble ( void ) 6661 { 6662 Int i; 6663 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6664 IRExpr* zero = mkU32(0); 6665 IRExpr* tag0 = mkU8(0); 6666 put_ftop(zero); 6667 for (i = 0; i < 8; i++) 6668 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 6669 } 6670 6671 6672 static IRExpr* getMMXReg ( UInt archreg ) 6673 { 6674 vassert(archreg < 8); 6675 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 6676 } 6677 6678 6679 static void putMMXReg ( UInt archreg, IRExpr* e ) 6680 { 6681 vassert(archreg < 8); 6682 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 6683 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 6684 } 6685 6686 6687 /* Helper for non-shift MMX insns. Note this is incomplete in the 6688 sense that it does not first call do_MMX_preamble() -- that is the 6689 responsibility of its caller. */ 6690 6691 static 6692 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, 6693 Prefix pfx, 6694 Long delta, 6695 UChar opc, 6696 HChar* name, 6697 Bool show_granularity ) 6698 { 6699 HChar dis_buf[50]; 6700 UChar modrm = getUChar(delta); 6701 Bool isReg = epartIsReg(modrm); 6702 IRExpr* argL = NULL; 6703 IRExpr* argR = NULL; 6704 IRExpr* argG = NULL; 6705 IRExpr* argE = NULL; 6706 IRTemp res = newTemp(Ity_I64); 6707 6708 Bool invG = False; 6709 IROp op = Iop_INVALID; 6710 void* hAddr = NULL; 6711 HChar* hName = NULL; 6712 Bool eLeft = False; 6713 6714 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 6715 6716 switch (opc) { 6717 /* Original MMX ones */ 6718 case 0xFC: op = Iop_Add8x8; break; 6719 case 0xFD: op = Iop_Add16x4; break; 6720 case 0xFE: op = Iop_Add32x2; break; 6721 6722 case 0xEC: op = Iop_QAdd8Sx8; break; 6723 case 0xED: op = Iop_QAdd16Sx4; break; 6724 6725 case 0xDC: op = Iop_QAdd8Ux8; break; 6726 case 0xDD: op = Iop_QAdd16Ux4; break; 6727 6728 case 0xF8: op = Iop_Sub8x8; break; 6729 case 0xF9: op = Iop_Sub16x4; break; 6730 case 0xFA: op = Iop_Sub32x2; break; 6731 6732 case 0xE8: op = Iop_QSub8Sx8; break; 6733 case 0xE9: op = Iop_QSub16Sx4; break; 6734 6735 case 0xD8: op = Iop_QSub8Ux8; break; 6736 case 0xD9: op = Iop_QSub16Ux4; break; 6737 6738 case 0xE5: op = Iop_MulHi16Sx4; break; 6739 case 0xD5: op = Iop_Mul16x4; break; 6740 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 6741 6742 case 0x74: op = Iop_CmpEQ8x8; break; 6743 case 0x75: op = Iop_CmpEQ16x4; break; 6744 case 0x76: op = Iop_CmpEQ32x2; break; 6745 6746 case 0x64: op = Iop_CmpGT8Sx8; break; 6747 case 0x65: op = Iop_CmpGT16Sx4; break; 6748 case 0x66: op = Iop_CmpGT32Sx2; break; 6749 6750 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 6751 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 6752 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 6753 6754 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 6755 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 6756 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 6757 6758 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 6759 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 6760 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 6761 6762 case 0xDB: op = Iop_And64; break; 6763 case 0xDF: op = Iop_And64; invG = True; break; 6764 case 0xEB: op = Iop_Or64; break; 6765 case 0xEF: /* Possibly do better here if argL and argR are the 6766 same reg */ 6767 op = Iop_Xor64; break; 6768 6769 /* Introduced in SSE1 */ 6770 case 0xE0: op = Iop_Avg8Ux8; break; 6771 case 0xE3: op = Iop_Avg16Ux4; break; 6772 case 0xEE: op = Iop_Max16Sx4; break; 6773 case 0xDE: op = Iop_Max8Ux8; break; 6774 case 0xEA: op = Iop_Min16Sx4; break; 6775 case 0xDA: op = Iop_Min8Ux8; break; 6776 case 0xE4: op = Iop_MulHi16Ux4; break; 6777 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 6778 6779 /* Introduced in SSE2 */ 6780 case 0xD4: op = Iop_Add64; break; 6781 case 0xFB: op = Iop_Sub64; break; 6782 6783 default: 6784 vex_printf("\n0x%x\n", (Int)opc); 6785 vpanic("dis_MMXop_regmem_to_reg"); 6786 } 6787 6788 # undef XXX 6789 6790 argG = getMMXReg(gregLO3ofRM(modrm)); 6791 if (invG) 6792 argG = unop(Iop_Not64, argG); 6793 6794 if (isReg) { 6795 delta++; 6796 argE = getMMXReg(eregLO3ofRM(modrm)); 6797 } else { 6798 Int len; 6799 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6800 delta += len; 6801 argE = loadLE(Ity_I64, mkexpr(addr)); 6802 } 6803 6804 if (eLeft) { 6805 argL = argE; 6806 argR = argG; 6807 } else { 6808 argL = argG; 6809 argR = argE; 6810 } 6811 6812 if (op != Iop_INVALID) { 6813 vassert(hName == NULL); 6814 vassert(hAddr == NULL); 6815 assign(res, binop(op, argL, argR)); 6816 } else { 6817 vassert(hName != NULL); 6818 vassert(hAddr != NULL); 6819 assign( res, 6820 mkIRExprCCall( 6821 Ity_I64, 6822 0/*regparms*/, hName, hAddr, 6823 mkIRExprVec_2( argL, argR ) 6824 ) 6825 ); 6826 } 6827 6828 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 6829 6830 DIP("%s%s %s, %s\n", 6831 name, show_granularity ? nameMMXGran(opc & 3) : "", 6832 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 6833 nameMMXReg(gregLO3ofRM(modrm)) ); 6834 6835 return delta; 6836 } 6837 6838 6839 /* Vector by scalar shift of G by the amount specified at the bottom 6840 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 6841 6842 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, 6843 Prefix pfx, Long delta, 6844 HChar* opname, IROp op ) 6845 { 6846 HChar dis_buf[50]; 6847 Int alen, size; 6848 IRTemp addr; 6849 Bool shl, shr, sar; 6850 UChar rm = getUChar(delta); 6851 IRTemp g0 = newTemp(Ity_I64); 6852 IRTemp g1 = newTemp(Ity_I64); 6853 IRTemp amt = newTemp(Ity_I64); 6854 IRTemp amt8 = newTemp(Ity_I8); 6855 6856 if (epartIsReg(rm)) { 6857 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 6858 DIP("%s %s,%s\n", opname, 6859 nameMMXReg(eregLO3ofRM(rm)), 6860 nameMMXReg(gregLO3ofRM(rm)) ); 6861 delta++; 6862 } else { 6863 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 6864 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 6865 DIP("%s %s,%s\n", opname, 6866 dis_buf, 6867 nameMMXReg(gregLO3ofRM(rm)) ); 6868 delta += alen; 6869 } 6870 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 6871 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 6872 6873 shl = shr = sar = False; 6874 size = 0; 6875 switch (op) { 6876 case Iop_ShlN16x4: shl = True; size = 32; break; 6877 case Iop_ShlN32x2: shl = True; size = 32; break; 6878 case Iop_Shl64: shl = True; size = 64; break; 6879 case Iop_ShrN16x4: shr = True; size = 16; break; 6880 case Iop_ShrN32x2: shr = True; size = 32; break; 6881 case Iop_Shr64: shr = True; size = 64; break; 6882 case Iop_SarN16x4: sar = True; size = 16; break; 6883 case Iop_SarN32x2: sar = True; size = 32; break; 6884 default: vassert(0); 6885 } 6886 6887 if (shl || shr) { 6888 assign( 6889 g1, 6890 IRExpr_Mux0X( 6891 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6892 mkU64(0), 6893 binop(op, mkexpr(g0), mkexpr(amt8)) 6894 ) 6895 ); 6896 } else 6897 if (sar) { 6898 assign( 6899 g1, 6900 IRExpr_Mux0X( 6901 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6902 binop(op, mkexpr(g0), mkU8(size-1)), 6903 binop(op, mkexpr(g0), mkexpr(amt8)) 6904 ) 6905 ); 6906 } else { 6907 vassert(0); 6908 } 6909 6910 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 6911 return delta; 6912 } 6913 6914 6915 /* Vector by scalar shift of E by an immediate byte. This is a 6916 straight copy of dis_SSE_shiftE_imm. */ 6917 6918 static 6919 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op ) 6920 { 6921 Bool shl, shr, sar; 6922 UChar rm = getUChar(delta); 6923 IRTemp e0 = newTemp(Ity_I64); 6924 IRTemp e1 = newTemp(Ity_I64); 6925 UChar amt, size; 6926 vassert(epartIsReg(rm)); 6927 vassert(gregLO3ofRM(rm) == 2 6928 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 6929 amt = getUChar(delta+1); 6930 delta += 2; 6931 DIP("%s $%d,%s\n", opname, 6932 (Int)amt, 6933 nameMMXReg(eregLO3ofRM(rm)) ); 6934 6935 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 6936 6937 shl = shr = sar = False; 6938 size = 0; 6939 switch (op) { 6940 case Iop_ShlN16x4: shl = True; size = 16; break; 6941 case Iop_ShlN32x2: shl = True; size = 32; break; 6942 case Iop_Shl64: shl = True; size = 64; break; 6943 case Iop_SarN16x4: sar = True; size = 16; break; 6944 case Iop_SarN32x2: sar = True; size = 32; break; 6945 case Iop_ShrN16x4: shr = True; size = 16; break; 6946 case Iop_ShrN32x2: shr = True; size = 32; break; 6947 case Iop_Shr64: shr = True; size = 64; break; 6948 default: vassert(0); 6949 } 6950 6951 if (shl || shr) { 6952 assign( e1, amt >= size 6953 ? mkU64(0) 6954 : binop(op, mkexpr(e0), mkU8(amt)) 6955 ); 6956 } else 6957 if (sar) { 6958 assign( e1, amt >= size 6959 ? binop(op, mkexpr(e0), mkU8(size-1)) 6960 : binop(op, mkexpr(e0), mkU8(amt)) 6961 ); 6962 } else { 6963 vassert(0); 6964 } 6965 6966 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 6967 return delta; 6968 } 6969 6970 6971 /* Completely handle all MMX instructions except emms. */ 6972 6973 static 6974 ULong dis_MMX ( Bool* decode_ok, 6975 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 6976 { 6977 Int len; 6978 UChar modrm; 6979 HChar dis_buf[50]; 6980 UChar opc = getUChar(delta); 6981 delta++; 6982 6983 /* dis_MMX handles all insns except emms. */ 6984 do_MMX_preamble(); 6985 6986 switch (opc) { 6987 6988 case 0x6E: 6989 if (sz == 4) { 6990 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 6991 modrm = getUChar(delta); 6992 if (epartIsReg(modrm)) { 6993 delta++; 6994 putMMXReg( 6995 gregLO3ofRM(modrm), 6996 binop( Iop_32HLto64, 6997 mkU32(0), 6998 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 6999 DIP("movd %s, %s\n", 7000 nameIReg32(eregOfRexRM(pfx,modrm)), 7001 nameMMXReg(gregLO3ofRM(modrm))); 7002 } else { 7003 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7004 delta += len; 7005 putMMXReg( 7006 gregLO3ofRM(modrm), 7007 binop( Iop_32HLto64, 7008 mkU32(0), 7009 loadLE(Ity_I32, mkexpr(addr)) ) ); 7010 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7011 } 7012 } 7013 else 7014 if (sz == 8) { 7015 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7016 modrm = getUChar(delta); 7017 if (epartIsReg(modrm)) { 7018 delta++; 7019 putMMXReg( gregLO3ofRM(modrm), 7020 getIReg64(eregOfRexRM(pfx,modrm)) ); 7021 DIP("movd %s, %s\n", 7022 nameIReg64(eregOfRexRM(pfx,modrm)), 7023 nameMMXReg(gregLO3ofRM(modrm))); 7024 } else { 7025 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7026 delta += len; 7027 putMMXReg( gregLO3ofRM(modrm), 7028 loadLE(Ity_I64, mkexpr(addr)) ); 7029 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7030 } 7031 } 7032 else { 7033 goto mmx_decode_failure; 7034 } 7035 break; 7036 7037 case 0x7E: 7038 if (sz == 4) { 7039 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7040 modrm = getUChar(delta); 7041 if (epartIsReg(modrm)) { 7042 delta++; 7043 putIReg32( eregOfRexRM(pfx,modrm), 7044 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7045 DIP("movd %s, %s\n", 7046 nameMMXReg(gregLO3ofRM(modrm)), 7047 nameIReg32(eregOfRexRM(pfx,modrm))); 7048 } else { 7049 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7050 delta += len; 7051 storeLE( mkexpr(addr), 7052 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7053 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7054 } 7055 } 7056 else 7057 if (sz == 8) { 7058 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7059 modrm = getUChar(delta); 7060 if (epartIsReg(modrm)) { 7061 delta++; 7062 putIReg64( eregOfRexRM(pfx,modrm), 7063 getMMXReg(gregLO3ofRM(modrm)) ); 7064 DIP("movd %s, %s\n", 7065 nameMMXReg(gregLO3ofRM(modrm)), 7066 nameIReg64(eregOfRexRM(pfx,modrm))); 7067 } else { 7068 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7069 delta += len; 7070 storeLE( mkexpr(addr), 7071 getMMXReg(gregLO3ofRM(modrm)) ); 7072 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7073 } 7074 } else { 7075 goto mmx_decode_failure; 7076 } 7077 break; 7078 7079 case 0x6F: 7080 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7081 if (sz != 4 7082 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7083 goto mmx_decode_failure; 7084 modrm = getUChar(delta); 7085 if (epartIsReg(modrm)) { 7086 delta++; 7087 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7088 DIP("movq %s, %s\n", 7089 nameMMXReg(eregLO3ofRM(modrm)), 7090 nameMMXReg(gregLO3ofRM(modrm))); 7091 } else { 7092 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7093 delta += len; 7094 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7095 DIP("movq %s, %s\n", 7096 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7097 } 7098 break; 7099 7100 case 0x7F: 7101 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7102 if (sz != 4 7103 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7104 goto mmx_decode_failure; 7105 modrm = getUChar(delta); 7106 if (epartIsReg(modrm)) { 7107 delta++; 7108 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7109 DIP("movq %s, %s\n", 7110 nameMMXReg(gregLO3ofRM(modrm)), 7111 nameMMXReg(eregLO3ofRM(modrm))); 7112 } else { 7113 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7114 delta += len; 7115 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7116 DIP("mov(nt)q %s, %s\n", 7117 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7118 } 7119 break; 7120 7121 case 0xFC: 7122 case 0xFD: 7123 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7124 if (sz != 4) 7125 goto mmx_decode_failure; 7126 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7127 break; 7128 7129 case 0xEC: 7130 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7131 if (sz != 4 7132 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7133 goto mmx_decode_failure; 7134 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7135 break; 7136 7137 case 0xDC: 7138 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7139 if (sz != 4) 7140 goto mmx_decode_failure; 7141 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7142 break; 7143 7144 case 0xF8: 7145 case 0xF9: 7146 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7147 if (sz != 4) 7148 goto mmx_decode_failure; 7149 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7150 break; 7151 7152 case 0xE8: 7153 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7154 if (sz != 4) 7155 goto mmx_decode_failure; 7156 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7157 break; 7158 7159 case 0xD8: 7160 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7161 if (sz != 4) 7162 goto mmx_decode_failure; 7163 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7164 break; 7165 7166 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7167 if (sz != 4) 7168 goto mmx_decode_failure; 7169 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7170 break; 7171 7172 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7173 if (sz != 4) 7174 goto mmx_decode_failure; 7175 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7176 break; 7177 7178 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7179 vassert(sz == 4); 7180 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7181 break; 7182 7183 case 0x74: 7184 case 0x75: 7185 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7186 if (sz != 4) 7187 goto mmx_decode_failure; 7188 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7189 break; 7190 7191 case 0x64: 7192 case 0x65: 7193 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7194 if (sz != 4) 7195 goto mmx_decode_failure; 7196 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7197 break; 7198 7199 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7200 if (sz != 4) 7201 goto mmx_decode_failure; 7202 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7203 break; 7204 7205 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7206 if (sz != 4) 7207 goto mmx_decode_failure; 7208 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7209 break; 7210 7211 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7212 if (sz != 4) 7213 goto mmx_decode_failure; 7214 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7215 break; 7216 7217 case 0x68: 7218 case 0x69: 7219 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7220 if (sz != 4 7221 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7222 goto mmx_decode_failure; 7223 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7224 break; 7225 7226 case 0x60: 7227 case 0x61: 7228 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7229 if (sz != 4 7230 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7231 goto mmx_decode_failure; 7232 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7233 break; 7234 7235 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7236 if (sz != 4) 7237 goto mmx_decode_failure; 7238 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7239 break; 7240 7241 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7242 if (sz != 4) 7243 goto mmx_decode_failure; 7244 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7245 break; 7246 7247 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7248 if (sz != 4) 7249 goto mmx_decode_failure; 7250 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7251 break; 7252 7253 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7254 if (sz != 4) 7255 goto mmx_decode_failure; 7256 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7257 break; 7258 7259 # define SHIFT_BY_REG(_name,_op) \ 7260 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7261 break; 7262 7263 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7264 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7265 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7266 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7267 7268 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7269 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7270 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7271 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7272 7273 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7274 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7275 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7276 7277 # undef SHIFT_BY_REG 7278 7279 case 0x71: 7280 case 0x72: 7281 case 0x73: { 7282 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7283 UChar byte2, subopc; 7284 if (sz != 4) 7285 goto mmx_decode_failure; 7286 byte2 = getUChar(delta); /* amode / sub-opcode */ 7287 subopc = toUChar( (byte2 >> 3) & 7 ); 7288 7289 # define SHIFT_BY_IMM(_name,_op) \ 7290 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7291 } while (0) 7292 7293 if (subopc == 2 /*SRL*/ && opc == 0x71) 7294 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7295 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7296 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7297 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7298 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7299 7300 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7301 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7302 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7303 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7304 7305 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7306 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7307 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7308 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7309 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7310 SHIFT_BY_IMM("psllq", Iop_Shl64); 7311 7312 else goto mmx_decode_failure; 7313 7314 # undef SHIFT_BY_IMM 7315 break; 7316 } 7317 7318 case 0xF7: { 7319 IRTemp addr = newTemp(Ity_I64); 7320 IRTemp regD = newTemp(Ity_I64); 7321 IRTemp regM = newTemp(Ity_I64); 7322 IRTemp mask = newTemp(Ity_I64); 7323 IRTemp olddata = newTemp(Ity_I64); 7324 IRTemp newdata = newTemp(Ity_I64); 7325 7326 modrm = getUChar(delta); 7327 if (sz != 4 || (!epartIsReg(modrm))) 7328 goto mmx_decode_failure; 7329 delta++; 7330 7331 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7332 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7333 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7334 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7335 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7336 assign( newdata, 7337 binop(Iop_Or64, 7338 binop(Iop_And64, 7339 mkexpr(regD), 7340 mkexpr(mask) ), 7341 binop(Iop_And64, 7342 mkexpr(olddata), 7343 unop(Iop_Not64, mkexpr(mask)))) ); 7344 storeLE( mkexpr(addr), mkexpr(newdata) ); 7345 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7346 nameMMXReg( gregLO3ofRM(modrm) ) ); 7347 break; 7348 } 7349 7350 /* --- MMX decode failure --- */ 7351 default: 7352 mmx_decode_failure: 7353 *decode_ok = False; 7354 return delta; /* ignored */ 7355 7356 } 7357 7358 *decode_ok = True; 7359 return delta; 7360 } 7361 7362 7363 /*------------------------------------------------------------*/ 7364 /*--- More misc arithmetic and other obscure insns. ---*/ 7365 /*------------------------------------------------------------*/ 7366 7367 /* Generate base << amt with vacated places filled with stuff 7368 from xtra. amt guaranteed in 0 .. 63. */ 7369 static 7370 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7371 { 7372 /* if amt == 0 7373 then base 7374 else (base << amt) | (xtra >>u (64-amt)) 7375 */ 7376 return 7377 IRExpr_Mux0X( 7378 mkexpr(amt), 7379 mkexpr(base), 7380 binop(Iop_Or64, 7381 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7382 binop(Iop_Shr64, mkexpr(xtra), 7383 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7384 ) 7385 ); 7386 } 7387 7388 /* Generate base >>u amt with vacated places filled with stuff 7389 from xtra. amt guaranteed in 0 .. 63. */ 7390 static 7391 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7392 { 7393 /* if amt == 0 7394 then base 7395 else (base >>u amt) | (xtra << (64-amt)) 7396 */ 7397 return 7398 IRExpr_Mux0X( 7399 mkexpr(amt), 7400 mkexpr(base), 7401 binop(Iop_Or64, 7402 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7403 binop(Iop_Shl64, mkexpr(xtra), 7404 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7405 ) 7406 ); 7407 } 7408 7409 /* Double length left and right shifts. Apparently only required in 7410 v-size (no b- variant). */ 7411 static 7412 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi, 7413 Prefix pfx, 7414 Long delta, UChar modrm, 7415 Int sz, 7416 IRExpr* shift_amt, 7417 Bool amt_is_literal, 7418 HChar* shift_amt_txt, 7419 Bool left_shift ) 7420 { 7421 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7422 for printing it. And eip on entry points at the modrm byte. */ 7423 Int len; 7424 HChar dis_buf[50]; 7425 7426 IRType ty = szToITy(sz); 7427 IRTemp gsrc = newTemp(ty); 7428 IRTemp esrc = newTemp(ty); 7429 IRTemp addr = IRTemp_INVALID; 7430 IRTemp tmpSH = newTemp(Ity_I8); 7431 IRTemp tmpSS = newTemp(Ity_I8); 7432 IRTemp tmp64 = IRTemp_INVALID; 7433 IRTemp res64 = IRTemp_INVALID; 7434 IRTemp rss64 = IRTemp_INVALID; 7435 IRTemp resTy = IRTemp_INVALID; 7436 IRTemp rssTy = IRTemp_INVALID; 7437 Int mask = sz==8 ? 63 : 31; 7438 7439 vassert(sz == 2 || sz == 4 || sz == 8); 7440 7441 /* The E-part is the destination; this is shifted. The G-part 7442 supplies bits to be shifted into the E-part, but is not 7443 changed. 7444 7445 If shifting left, form a double-length word with E at the top 7446 and G at the bottom, and shift this left. The result is then in 7447 the high part. 7448 7449 If shifting right, form a double-length word with G at the top 7450 and E at the bottom, and shift this right. The result is then 7451 at the bottom. */ 7452 7453 /* Fetch the operands. */ 7454 7455 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7456 7457 if (epartIsReg(modrm)) { 7458 delta++; 7459 assign( esrc, getIRegE(sz, pfx, modrm) ); 7460 DIP("sh%cd%c %s, %s, %s\n", 7461 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7462 shift_amt_txt, 7463 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7464 } else { 7465 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7466 /* # bytes following amode */ 7467 amt_is_literal ? 1 : 0 ); 7468 delta += len; 7469 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7470 DIP("sh%cd%c %s, %s, %s\n", 7471 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7472 shift_amt_txt, 7473 nameIRegG(sz, pfx, modrm), dis_buf); 7474 } 7475 7476 /* Calculate the masked shift amount (tmpSH), the masked subshift 7477 amount (tmpSS), the shifted value (res64) and the subshifted 7478 value (rss64). */ 7479 7480 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7481 assign( tmpSS, binop(Iop_And8, 7482 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7483 mkU8(mask))); 7484 7485 tmp64 = newTemp(Ity_I64); 7486 res64 = newTemp(Ity_I64); 7487 rss64 = newTemp(Ity_I64); 7488 7489 if (sz == 2 || sz == 4) { 7490 7491 /* G is xtra; E is data */ 7492 /* what a freaking nightmare: */ 7493 if (sz == 4 && left_shift) { 7494 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7495 assign( res64, 7496 binop(Iop_Shr64, 7497 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7498 mkU8(32)) ); 7499 assign( rss64, 7500 binop(Iop_Shr64, 7501 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7502 mkU8(32)) ); 7503 } 7504 else 7505 if (sz == 4 && !left_shift) { 7506 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7507 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7508 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7509 } 7510 else 7511 if (sz == 2 && left_shift) { 7512 assign( tmp64, 7513 binop(Iop_32HLto64, 7514 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7515 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7516 )); 7517 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7518 assign( res64, 7519 binop(Iop_Shr64, 7520 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7521 mkU8(48)) ); 7522 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7523 assign( rss64, 7524 binop(Iop_Shr64, 7525 binop(Iop_Shl64, 7526 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7527 mkU8(48)), 7528 mkexpr(tmpSS)), 7529 mkU8(48)) ); 7530 } 7531 else 7532 if (sz == 2 && !left_shift) { 7533 assign( tmp64, 7534 binop(Iop_32HLto64, 7535 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7536 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7537 )); 7538 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7539 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7540 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7541 assign( rss64, binop(Iop_Shr64, 7542 unop(Iop_16Uto64, mkexpr(esrc)), 7543 mkexpr(tmpSS)) ); 7544 } 7545 7546 } else { 7547 7548 vassert(sz == 8); 7549 if (left_shift) { 7550 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7551 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7552 } else { 7553 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7554 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7555 } 7556 7557 } 7558 7559 resTy = newTemp(ty); 7560 rssTy = newTemp(ty); 7561 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7562 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7563 7564 /* Put result back and write the flags thunk. */ 7565 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7566 resTy, rssTy, ty, tmpSH ); 7567 7568 if (epartIsReg(modrm)) { 7569 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7570 } else { 7571 storeLE( mkexpr(addr), mkexpr(resTy) ); 7572 } 7573 7574 if (amt_is_literal) delta++; 7575 return delta; 7576 } 7577 7578 7579 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7580 required. */ 7581 7582 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7583 7584 static HChar* nameBtOp ( BtOp op ) 7585 { 7586 switch (op) { 7587 case BtOpNone: return ""; 7588 case BtOpSet: return "s"; 7589 case BtOpReset: return "r"; 7590 case BtOpComp: return "c"; 7591 default: vpanic("nameBtOp(amd64)"); 7592 } 7593 } 7594 7595 7596 static 7597 ULong dis_bt_G_E ( VexAbiInfo* vbi, 7598 Prefix pfx, Int sz, Long delta, BtOp op ) 7599 { 7600 HChar dis_buf[50]; 7601 UChar modrm; 7602 Int len; 7603 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7604 t_addr1, t_rsp, t_mask, t_new; 7605 7606 vassert(sz == 2 || sz == 4 || sz == 8); 7607 7608 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7609 = t_addr0 = t_addr1 = t_rsp 7610 = t_mask = t_new = IRTemp_INVALID; 7611 7612 t_fetched = newTemp(Ity_I8); 7613 t_new = newTemp(Ity_I8); 7614 t_bitno0 = newTemp(Ity_I64); 7615 t_bitno1 = newTemp(Ity_I64); 7616 t_bitno2 = newTemp(Ity_I8); 7617 t_addr1 = newTemp(Ity_I64); 7618 modrm = getUChar(delta); 7619 7620 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 7621 7622 if (epartIsReg(modrm)) { 7623 delta++; 7624 /* Get it onto the client's stack. Oh, this is a horrible 7625 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 7626 Because of the ELF ABI stack redzone, there may be live data 7627 up to 128 bytes below %RSP. So we can't just push it on the 7628 stack, else we may wind up trashing live data, and causing 7629 impossible-to-find simulation errors. (Yes, this did 7630 happen.) So we need to drop RSP before at least 128 before 7631 pushing it. That unfortunately means hitting Memcheck's 7632 fast-case painting code. Ideally we should drop more than 7633 128, to reduce the chances of breaking buggy programs that 7634 have live data below -128(%RSP). Memcheck fast-cases moves 7635 of 288 bytes due to the need to handle ppc64-linux quickly, 7636 so let's use 288. Of course the real fix is to get rid of 7637 this kludge entirely. */ 7638 t_rsp = newTemp(Ity_I64); 7639 t_addr0 = newTemp(Ity_I64); 7640 7641 vassert(vbi->guest_stack_redzone_size == 128); 7642 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 7643 putIReg64(R_RSP, mkexpr(t_rsp)); 7644 7645 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 7646 7647 /* Make t_addr0 point at it. */ 7648 assign( t_addr0, mkexpr(t_rsp) ); 7649 7650 /* Mask out upper bits of the shift amount, since we're doing a 7651 reg. */ 7652 assign( t_bitno1, binop(Iop_And64, 7653 mkexpr(t_bitno0), 7654 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 7655 7656 } else { 7657 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 7658 delta += len; 7659 assign( t_bitno1, mkexpr(t_bitno0) ); 7660 } 7661 7662 /* At this point: t_addr0 is the address being operated on. If it 7663 was a reg, we will have pushed it onto the client's stack. 7664 t_bitno1 is the bit number, suitably masked in the case of a 7665 reg. */ 7666 7667 /* Now the main sequence. */ 7668 assign( t_addr1, 7669 binop(Iop_Add64, 7670 mkexpr(t_addr0), 7671 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 7672 7673 /* t_addr1 now holds effective address */ 7674 7675 assign( t_bitno2, 7676 unop(Iop_64to8, 7677 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 7678 7679 /* t_bitno2 contains offset of bit within byte */ 7680 7681 if (op != BtOpNone) { 7682 t_mask = newTemp(Ity_I8); 7683 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 7684 } 7685 7686 /* t_mask is now a suitable byte mask */ 7687 7688 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 7689 7690 if (op != BtOpNone) { 7691 switch (op) { 7692 case BtOpSet: 7693 assign( t_new, 7694 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7695 break; 7696 case BtOpComp: 7697 assign( t_new, 7698 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7699 break; 7700 case BtOpReset: 7701 assign( t_new, 7702 binop(Iop_And8, mkexpr(t_fetched), 7703 unop(Iop_Not8, mkexpr(t_mask))) ); 7704 break; 7705 default: 7706 vpanic("dis_bt_G_E(amd64)"); 7707 } 7708 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) { 7709 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 7710 mkexpr(t_new)/*new*/, 7711 guest_RIP_curr_instr ); 7712 } else { 7713 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 7714 } 7715 } 7716 7717 /* Side effect done; now get selected bit into Carry flag */ 7718 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 7719 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7720 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7721 stmt( IRStmt_Put( 7722 OFFB_CC_DEP1, 7723 binop(Iop_And64, 7724 binop(Iop_Shr64, 7725 unop(Iop_8Uto64, mkexpr(t_fetched)), 7726 mkexpr(t_bitno2)), 7727 mkU64(1))) 7728 ); 7729 /* Set NDEP even though it isn't used. This makes redundant-PUT 7730 elimination of previous stores to this field work better. */ 7731 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7732 7733 /* Move reg operand from stack back to reg */ 7734 if (epartIsReg(modrm)) { 7735 /* t_rsp still points at it. */ 7736 /* only write the reg if actually modifying it; doing otherwise 7737 zeroes the top half erroneously when doing btl due to 7738 standard zero-extend rule */ 7739 if (op != BtOpNone) 7740 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 7741 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 7742 } 7743 7744 DIP("bt%s%c %s, %s\n", 7745 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 7746 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 7747 7748 return delta; 7749 } 7750 7751 7752 7753 /* Handle BSF/BSR. Only v-size seems necessary. */ 7754 static 7755 ULong dis_bs_E_G ( VexAbiInfo* vbi, 7756 Prefix pfx, Int sz, Long delta, Bool fwds ) 7757 { 7758 Bool isReg; 7759 UChar modrm; 7760 HChar dis_buf[50]; 7761 7762 IRType ty = szToITy(sz); 7763 IRTemp src = newTemp(ty); 7764 IRTemp dst = newTemp(ty); 7765 IRTemp src64 = newTemp(Ity_I64); 7766 IRTemp dst64 = newTemp(Ity_I64); 7767 IRTemp src8 = newTemp(Ity_I8); 7768 7769 vassert(sz == 8 || sz == 4 || sz == 2); 7770 7771 modrm = getUChar(delta); 7772 isReg = epartIsReg(modrm); 7773 if (isReg) { 7774 delta++; 7775 assign( src, getIRegE(sz, pfx, modrm) ); 7776 } else { 7777 Int len; 7778 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7779 delta += len; 7780 assign( src, loadLE(ty, mkexpr(addr)) ); 7781 } 7782 7783 DIP("bs%c%c %s, %s\n", 7784 fwds ? 'f' : 'r', nameISize(sz), 7785 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 7786 nameIRegG(sz, pfx, modrm)); 7787 7788 /* First, widen src to 64 bits if it is not already. */ 7789 assign( src64, widenUto64(mkexpr(src)) ); 7790 7791 /* Generate an 8-bit expression which is zero iff the 7792 original is zero, and nonzero otherwise */ 7793 assign( src8, 7794 unop(Iop_1Uto8, 7795 binop(Iop_CmpNE64, 7796 mkexpr(src64), mkU64(0))) ); 7797 7798 /* Flags: Z is 1 iff source value is zero. All others 7799 are undefined -- we force them to zero. */ 7800 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7801 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7802 stmt( IRStmt_Put( 7803 OFFB_CC_DEP1, 7804 IRExpr_Mux0X( mkexpr(src8), 7805 /* src==0 */ 7806 mkU64(AMD64G_CC_MASK_Z), 7807 /* src!=0 */ 7808 mkU64(0) 7809 ) 7810 )); 7811 /* Set NDEP even though it isn't used. This makes redundant-PUT 7812 elimination of previous stores to this field work better. */ 7813 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7814 7815 /* Result: iff source value is zero, we can't use 7816 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 7817 But anyway, amd64 semantics say the result is undefined in 7818 such situations. Hence handle the zero case specially. */ 7819 7820 /* Bleh. What we compute: 7821 7822 bsf64: if src == 0 then {dst is unchanged} 7823 else Ctz64(src) 7824 7825 bsr64: if src == 0 then {dst is unchanged} 7826 else 63 - Clz64(src) 7827 7828 bsf32: if src == 0 then {dst is unchanged} 7829 else Ctz64(32Uto64(src)) 7830 7831 bsr32: if src == 0 then {dst is unchanged} 7832 else 63 - Clz64(32Uto64(src)) 7833 7834 bsf16: if src == 0 then {dst is unchanged} 7835 else Ctz64(32Uto64(16Uto32(src))) 7836 7837 bsr16: if src == 0 then {dst is unchanged} 7838 else 63 - Clz64(32Uto64(16Uto32(src))) 7839 */ 7840 7841 /* The main computation, guarding against zero. */ 7842 assign( dst64, 7843 IRExpr_Mux0X( 7844 mkexpr(src8), 7845 /* src == 0 -- leave dst unchanged */ 7846 widenUto64( getIRegG( sz, pfx, modrm ) ), 7847 /* src != 0 */ 7848 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 7849 : binop(Iop_Sub64, 7850 mkU64(63), 7851 unop(Iop_Clz64, mkexpr(src64))) 7852 ) 7853 ); 7854 7855 if (sz == 2) 7856 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 7857 else 7858 if (sz == 4) 7859 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 7860 else 7861 assign( dst, mkexpr(dst64) ); 7862 7863 /* dump result back */ 7864 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 7865 7866 return delta; 7867 } 7868 7869 7870 /* swap rAX with the reg specified by reg and REX.B */ 7871 static 7872 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 7873 { 7874 IRType ty = szToITy(sz); 7875 IRTemp t1 = newTemp(ty); 7876 IRTemp t2 = newTemp(ty); 7877 vassert(sz == 2 || sz == 4 || sz == 8); 7878 vassert(regLo3 < 8); 7879 if (sz == 8) { 7880 assign( t1, getIReg64(R_RAX) ); 7881 assign( t2, getIRegRexB(8, pfx, regLo3) ); 7882 putIReg64( R_RAX, mkexpr(t2) ); 7883 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 7884 } else if (sz == 4) { 7885 assign( t1, getIReg32(R_RAX) ); 7886 assign( t2, getIRegRexB(4, pfx, regLo3) ); 7887 putIReg32( R_RAX, mkexpr(t2) ); 7888 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 7889 } else { 7890 assign( t1, getIReg16(R_RAX) ); 7891 assign( t2, getIRegRexB(2, pfx, regLo3) ); 7892 putIReg16( R_RAX, mkexpr(t2) ); 7893 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 7894 } 7895 DIP("xchg%c %s, %s\n", 7896 nameISize(sz), nameIRegRAX(sz), 7897 nameIRegRexB(sz,pfx, regLo3)); 7898 } 7899 7900 7901 static 7902 void codegen_SAHF ( void ) 7903 { 7904 /* Set the flags to: 7905 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 7906 -- retain the old O flag 7907 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7908 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 7909 */ 7910 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7911 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7912 IRTemp oldflags = newTemp(Ity_I64); 7913 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 7914 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7915 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7916 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7917 stmt( IRStmt_Put( OFFB_CC_DEP1, 7918 binop(Iop_Or64, 7919 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 7920 binop(Iop_And64, 7921 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 7922 mkU64(mask_SZACP)) 7923 ) 7924 )); 7925 } 7926 7927 7928 static 7929 void codegen_LAHF ( void ) 7930 { 7931 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 7932 IRExpr* rax_with_hole; 7933 IRExpr* new_byte; 7934 IRExpr* new_rax; 7935 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7936 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7937 7938 IRTemp flags = newTemp(Ity_I64); 7939 assign( flags, mk_amd64g_calculate_rflags_all() ); 7940 7941 rax_with_hole 7942 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 7943 new_byte 7944 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 7945 mkU64(1<<1)); 7946 new_rax 7947 = binop(Iop_Or64, rax_with_hole, 7948 binop(Iop_Shl64, new_byte, mkU8(8))); 7949 putIReg64(R_RAX, new_rax); 7950 } 7951 7952 7953 static 7954 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 7955 VexAbiInfo* vbi, 7956 Prefix pfx, 7957 Int size, 7958 Long delta0 ) 7959 { 7960 HChar dis_buf[50]; 7961 Int len; 7962 7963 IRType ty = szToITy(size); 7964 IRTemp acc = newTemp(ty); 7965 IRTemp src = newTemp(ty); 7966 IRTemp dest = newTemp(ty); 7967 IRTemp dest2 = newTemp(ty); 7968 IRTemp acc2 = newTemp(ty); 7969 IRTemp cond8 = newTemp(Ity_I8); 7970 IRTemp addr = IRTemp_INVALID; 7971 UChar rm = getUChar(delta0); 7972 7973 /* There are 3 cases to consider: 7974 7975 reg-reg: ignore any lock prefix, generate sequence based 7976 on Mux0X 7977 7978 reg-mem, not locked: ignore any lock prefix, generate sequence 7979 based on Mux0X 7980 7981 reg-mem, locked: use IRCAS 7982 */ 7983 7984 if (epartIsReg(rm)) { 7985 /* case 1 */ 7986 assign( dest, getIRegE(size, pfx, rm) ); 7987 delta0++; 7988 assign( src, getIRegG(size, pfx, rm) ); 7989 assign( acc, getIRegRAX(size) ); 7990 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7991 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7992 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 7993 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7994 putIRegRAX(size, mkexpr(acc2)); 7995 putIRegE(size, pfx, rm, mkexpr(dest2)); 7996 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7997 nameIRegG(size,pfx,rm), 7998 nameIRegE(size,pfx,rm) ); 7999 } 8000 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 8001 /* case 2 */ 8002 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8003 assign( dest, loadLE(ty, mkexpr(addr)) ); 8004 delta0 += len; 8005 assign( src, getIRegG(size, pfx, rm) ); 8006 assign( acc, getIRegRAX(size) ); 8007 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8008 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 8009 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 8010 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 8011 putIRegRAX(size, mkexpr(acc2)); 8012 storeLE( mkexpr(addr), mkexpr(dest2) ); 8013 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8014 nameIRegG(size,pfx,rm), dis_buf); 8015 } 8016 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 8017 /* case 3 */ 8018 /* src is new value. acc is expected value. dest is old value. 8019 Compute success from the output of the IRCAS, and steer the 8020 new value for RAX accordingly: in case of success, RAX is 8021 unchanged. */ 8022 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8023 delta0 += len; 8024 assign( src, getIRegG(size, pfx, rm) ); 8025 assign( acc, getIRegRAX(size) ); 8026 stmt( IRStmt_CAS( 8027 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8028 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8029 )); 8030 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8031 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 8032 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 8033 putIRegRAX(size, mkexpr(acc2)); 8034 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8035 nameIRegG(size,pfx,rm), dis_buf); 8036 } 8037 else vassert(0); 8038 8039 *ok = True; 8040 return delta0; 8041 } 8042 8043 8044 /* Handle conditional move instructions of the form 8045 cmovcc E(reg-or-mem), G(reg) 8046 8047 E(src) is reg-or-mem 8048 G(dst) is reg. 8049 8050 If E is reg, --> GET %E, tmps 8051 GET %G, tmpd 8052 CMOVcc tmps, tmpd 8053 PUT tmpd, %G 8054 8055 If E is mem --> (getAddr E) -> tmpa 8056 LD (tmpa), tmps 8057 GET %G, tmpd 8058 CMOVcc tmps, tmpd 8059 PUT tmpd, %G 8060 */ 8061 static 8062 ULong dis_cmov_E_G ( VexAbiInfo* vbi, 8063 Prefix pfx, 8064 Int sz, 8065 AMD64Condcode cond, 8066 Long delta0 ) 8067 { 8068 UChar rm = getUChar(delta0); 8069 HChar dis_buf[50]; 8070 Int len; 8071 8072 IRType ty = szToITy(sz); 8073 IRTemp tmps = newTemp(ty); 8074 IRTemp tmpd = newTemp(ty); 8075 8076 if (epartIsReg(rm)) { 8077 assign( tmps, getIRegE(sz, pfx, rm) ); 8078 assign( tmpd, getIRegG(sz, pfx, rm) ); 8079 8080 putIRegG( sz, pfx, rm, 8081 IRExpr_Mux0X( unop(Iop_1Uto8, 8082 mk_amd64g_calculate_condition(cond)), 8083 mkexpr(tmpd), 8084 mkexpr(tmps) ) 8085 ); 8086 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8087 nameIRegE(sz,pfx,rm), 8088 nameIRegG(sz,pfx,rm)); 8089 return 1+delta0; 8090 } 8091 8092 /* E refers to memory */ 8093 { 8094 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8095 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8096 assign( tmpd, getIRegG(sz, pfx, rm) ); 8097 8098 putIRegG( sz, pfx, rm, 8099 IRExpr_Mux0X( unop(Iop_1Uto8, 8100 mk_amd64g_calculate_condition(cond)), 8101 mkexpr(tmpd), 8102 mkexpr(tmps) ) 8103 ); 8104 8105 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8106 dis_buf, 8107 nameIRegG(sz,pfx,rm)); 8108 return len+delta0; 8109 } 8110 } 8111 8112 8113 static 8114 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8115 VexAbiInfo* vbi, 8116 Prefix pfx, Int sz, Long delta0 ) 8117 { 8118 Int len; 8119 UChar rm = getUChar(delta0); 8120 HChar dis_buf[50]; 8121 8122 IRType ty = szToITy(sz); 8123 IRTemp tmpd = newTemp(ty); 8124 IRTemp tmpt0 = newTemp(ty); 8125 IRTemp tmpt1 = newTemp(ty); 8126 8127 /* There are 3 cases to consider: 8128 8129 reg-reg: ignore any lock prefix, 8130 generate 'naive' (non-atomic) sequence 8131 8132 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8133 (non-atomic) sequence 8134 8135 reg-mem, locked: use IRCAS 8136 */ 8137 8138 if (epartIsReg(rm)) { 8139 /* case 1 */ 8140 assign( tmpd, getIRegE(sz, pfx, rm) ); 8141 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8142 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8143 mkexpr(tmpd), mkexpr(tmpt0)) ); 8144 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8145 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8146 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8147 DIP("xadd%c %s, %s\n", 8148 nameISize(sz), nameIRegG(sz,pfx,rm), 8149 nameIRegE(sz,pfx,rm)); 8150 *decode_ok = True; 8151 return 1+delta0; 8152 } 8153 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 8154 /* case 2 */ 8155 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8156 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8157 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8158 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8159 mkexpr(tmpd), mkexpr(tmpt0)) ); 8160 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8161 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8162 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8163 DIP("xadd%c %s, %s\n", 8164 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8165 *decode_ok = True; 8166 return len+delta0; 8167 } 8168 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 8169 /* case 3 */ 8170 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8171 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8172 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8173 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8174 mkexpr(tmpd), mkexpr(tmpt0)) ); 8175 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8176 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8177 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8178 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8179 DIP("xadd%c %s, %s\n", 8180 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8181 *decode_ok = True; 8182 return len+delta0; 8183 } 8184 /*UNREACHED*/ 8185 vassert(0); 8186 } 8187 8188 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8189 //.. 8190 //.. static 8191 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8192 //.. { 8193 //.. Int len; 8194 //.. IRTemp addr; 8195 //.. UChar rm = getUChar(delta0); 8196 //.. HChar dis_buf[50]; 8197 //.. 8198 //.. if (epartIsReg(rm)) { 8199 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8200 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8201 //.. return 1+delta0; 8202 //.. } else { 8203 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8204 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8205 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8206 //.. return len+delta0; 8207 //.. } 8208 //.. } 8209 //.. 8210 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8211 //.. dst is ireg and sz==4, zero out top half of it. */ 8212 //.. 8213 //.. static 8214 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8215 //.. Int sz, 8216 //.. UInt delta0 ) 8217 //.. { 8218 //.. Int len; 8219 //.. IRTemp addr; 8220 //.. UChar rm = getUChar(delta0); 8221 //.. HChar dis_buf[50]; 8222 //.. 8223 //.. vassert(sz == 2 || sz == 4); 8224 //.. 8225 //.. if (epartIsReg(rm)) { 8226 //.. if (sz == 4) 8227 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8228 //.. else 8229 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8230 //.. 8231 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8232 //.. return 1+delta0; 8233 //.. } else { 8234 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8235 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8236 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8237 //.. return len+delta0; 8238 //.. } 8239 //.. } 8240 //.. 8241 //.. 8242 //.. static 8243 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8244 //.. { 8245 //.. IRTemp t1 = newTemp(Ity_I16); 8246 //.. IRTemp ta = newTemp(Ity_I32); 8247 //.. vassert(sz == 2 || sz == 4); 8248 //.. 8249 //.. assign( t1, getSReg(sreg) ); 8250 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8251 //.. putIReg(4, R_ESP, mkexpr(ta)); 8252 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8253 //.. 8254 //.. DIP("pushw %s\n", nameSReg(sreg)); 8255 //.. } 8256 //.. 8257 //.. static 8258 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8259 //.. { 8260 //.. IRTemp t1 = newTemp(Ity_I16); 8261 //.. IRTemp ta = newTemp(Ity_I32); 8262 //.. vassert(sz == 2 || sz == 4); 8263 //.. 8264 //.. assign( ta, getIReg(4, R_ESP) ); 8265 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8266 //.. 8267 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8268 //.. putSReg( sreg, mkexpr(t1) ); 8269 //.. DIP("pop %s\n", nameSReg(sreg)); 8270 //.. } 8271 8272 static 8273 void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 ) 8274 { 8275 IRTemp t1 = newTemp(Ity_I64); 8276 IRTemp t2 = newTemp(Ity_I64); 8277 IRTemp t3 = newTemp(Ity_I64); 8278 assign(t1, getIReg64(R_RSP)); 8279 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8280 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8281 putIReg64(R_RSP, mkexpr(t3)); 8282 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8283 jmp_treg(dres, Ijk_Ret, t2); 8284 vassert(dres->whatNext == Dis_StopHere); 8285 } 8286 8287 8288 /*------------------------------------------------------------*/ 8289 /*--- SSE/SSE2/SSE3 helpers ---*/ 8290 /*------------------------------------------------------------*/ 8291 8292 /* Worker function; do not call directly. 8293 Handles full width G = G `op` E and G = (not G) `op` E. 8294 */ 8295 8296 static ULong dis_SSE_E_to_G_all_wrk ( 8297 VexAbiInfo* vbi, 8298 Prefix pfx, Long delta, 8299 HChar* opname, IROp op, 8300 Bool invertG 8301 ) 8302 { 8303 HChar dis_buf[50]; 8304 Int alen; 8305 IRTemp addr; 8306 UChar rm = getUChar(delta); 8307 IRExpr* gpart 8308 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8309 : getXMMReg(gregOfRexRM(pfx,rm)); 8310 if (epartIsReg(rm)) { 8311 putXMMReg( gregOfRexRM(pfx,rm), 8312 binop(op, gpart, 8313 getXMMReg(eregOfRexRM(pfx,rm))) ); 8314 DIP("%s %s,%s\n", opname, 8315 nameXMMReg(eregOfRexRM(pfx,rm)), 8316 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8317 return delta+1; 8318 } else { 8319 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8320 putXMMReg( gregOfRexRM(pfx,rm), 8321 binop(op, gpart, 8322 loadLE(Ity_V128, mkexpr(addr))) ); 8323 DIP("%s %s,%s\n", opname, 8324 dis_buf, 8325 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8326 return delta+alen; 8327 } 8328 } 8329 8330 8331 /* All lanes SSE binary operation, G = G `op` E. */ 8332 8333 static 8334 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi, 8335 Prefix pfx, Long delta, 8336 HChar* opname, IROp op ) 8337 { 8338 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8339 } 8340 8341 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8342 8343 static 8344 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi, 8345 Prefix pfx, Long delta, 8346 HChar* opname, IROp op ) 8347 { 8348 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8349 } 8350 8351 8352 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8353 8354 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi, 8355 Prefix pfx, Long delta, 8356 HChar* opname, IROp op ) 8357 { 8358 HChar dis_buf[50]; 8359 Int alen; 8360 IRTemp addr; 8361 UChar rm = getUChar(delta); 8362 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8363 if (epartIsReg(rm)) { 8364 putXMMReg( gregOfRexRM(pfx,rm), 8365 binop(op, gpart, 8366 getXMMReg(eregOfRexRM(pfx,rm))) ); 8367 DIP("%s %s,%s\n", opname, 8368 nameXMMReg(eregOfRexRM(pfx,rm)), 8369 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8370 return delta+1; 8371 } else { 8372 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8373 E operand needs to be made simply of zeroes. */ 8374 IRTemp epart = newTemp(Ity_V128); 8375 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8376 assign( epart, unop( Iop_32UtoV128, 8377 loadLE(Ity_I32, mkexpr(addr))) ); 8378 putXMMReg( gregOfRexRM(pfx,rm), 8379 binop(op, gpart, mkexpr(epart)) ); 8380 DIP("%s %s,%s\n", opname, 8381 dis_buf, 8382 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8383 return delta+alen; 8384 } 8385 } 8386 8387 8388 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8389 8390 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi, 8391 Prefix pfx, Long delta, 8392 HChar* opname, IROp op ) 8393 { 8394 HChar dis_buf[50]; 8395 Int alen; 8396 IRTemp addr; 8397 UChar rm = getUChar(delta); 8398 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8399 if (epartIsReg(rm)) { 8400 putXMMReg( gregOfRexRM(pfx,rm), 8401 binop(op, gpart, 8402 getXMMReg(eregOfRexRM(pfx,rm))) ); 8403 DIP("%s %s,%s\n", opname, 8404 nameXMMReg(eregOfRexRM(pfx,rm)), 8405 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8406 return delta+1; 8407 } else { 8408 /* We can only do a 64-bit memory read, so the upper half of the 8409 E operand needs to be made simply of zeroes. */ 8410 IRTemp epart = newTemp(Ity_V128); 8411 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8412 assign( epart, unop( Iop_64UtoV128, 8413 loadLE(Ity_I64, mkexpr(addr))) ); 8414 putXMMReg( gregOfRexRM(pfx,rm), 8415 binop(op, gpart, mkexpr(epart)) ); 8416 DIP("%s %s,%s\n", opname, 8417 dis_buf, 8418 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8419 return delta+alen; 8420 } 8421 } 8422 8423 8424 /* All lanes unary SSE operation, G = op(E). */ 8425 8426 static ULong dis_SSE_E_to_G_unary_all ( 8427 VexAbiInfo* vbi, 8428 Prefix pfx, Long delta, 8429 HChar* opname, IROp op 8430 ) 8431 { 8432 HChar dis_buf[50]; 8433 Int alen; 8434 IRTemp addr; 8435 UChar rm = getUChar(delta); 8436 if (epartIsReg(rm)) { 8437 putXMMReg( gregOfRexRM(pfx,rm), 8438 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) ); 8439 DIP("%s %s,%s\n", opname, 8440 nameXMMReg(eregOfRexRM(pfx,rm)), 8441 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8442 return delta+1; 8443 } else { 8444 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8445 putXMMReg( gregOfRexRM(pfx,rm), 8446 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 8447 DIP("%s %s,%s\n", opname, 8448 dis_buf, 8449 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8450 return delta+alen; 8451 } 8452 } 8453 8454 8455 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8456 8457 static ULong dis_SSE_E_to_G_unary_lo32 ( 8458 VexAbiInfo* vbi, 8459 Prefix pfx, Long delta, 8460 HChar* opname, IROp op 8461 ) 8462 { 8463 /* First we need to get the old G value and patch the low 32 bits 8464 of the E operand into it. Then apply op and write back to G. */ 8465 HChar dis_buf[50]; 8466 Int alen; 8467 IRTemp addr; 8468 UChar rm = getUChar(delta); 8469 IRTemp oldG0 = newTemp(Ity_V128); 8470 IRTemp oldG1 = newTemp(Ity_V128); 8471 8472 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8473 8474 if (epartIsReg(rm)) { 8475 assign( oldG1, 8476 binop( Iop_SetV128lo32, 8477 mkexpr(oldG0), 8478 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8479 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8480 DIP("%s %s,%s\n", opname, 8481 nameXMMReg(eregOfRexRM(pfx,rm)), 8482 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8483 return delta+1; 8484 } else { 8485 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8486 assign( oldG1, 8487 binop( Iop_SetV128lo32, 8488 mkexpr(oldG0), 8489 loadLE(Ity_I32, mkexpr(addr)) )); 8490 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8491 DIP("%s %s,%s\n", opname, 8492 dis_buf, 8493 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8494 return delta+alen; 8495 } 8496 } 8497 8498 8499 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 8500 8501 static ULong dis_SSE_E_to_G_unary_lo64 ( 8502 VexAbiInfo* vbi, 8503 Prefix pfx, Long delta, 8504 HChar* opname, IROp op 8505 ) 8506 { 8507 /* First we need to get the old G value and patch the low 64 bits 8508 of the E operand into it. Then apply op and write back to G. */ 8509 HChar dis_buf[50]; 8510 Int alen; 8511 IRTemp addr; 8512 UChar rm = getUChar(delta); 8513 IRTemp oldG0 = newTemp(Ity_V128); 8514 IRTemp oldG1 = newTemp(Ity_V128); 8515 8516 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8517 8518 if (epartIsReg(rm)) { 8519 assign( oldG1, 8520 binop( Iop_SetV128lo64, 8521 mkexpr(oldG0), 8522 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 8523 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8524 DIP("%s %s,%s\n", opname, 8525 nameXMMReg(eregOfRexRM(pfx,rm)), 8526 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8527 return delta+1; 8528 } else { 8529 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8530 assign( oldG1, 8531 binop( Iop_SetV128lo64, 8532 mkexpr(oldG0), 8533 loadLE(Ity_I64, mkexpr(addr)) )); 8534 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8535 DIP("%s %s,%s\n", opname, 8536 dis_buf, 8537 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8538 return delta+alen; 8539 } 8540 } 8541 8542 8543 /* SSE integer binary operation: 8544 G = G `op` E (eLeft == False) 8545 G = E `op` G (eLeft == True) 8546 */ 8547 static ULong dis_SSEint_E_to_G( 8548 VexAbiInfo* vbi, 8549 Prefix pfx, Long delta, 8550 HChar* opname, IROp op, 8551 Bool eLeft 8552 ) 8553 { 8554 HChar dis_buf[50]; 8555 Int alen; 8556 IRTemp addr; 8557 UChar rm = getUChar(delta); 8558 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8559 IRExpr* epart = NULL; 8560 if (epartIsReg(rm)) { 8561 epart = getXMMReg(eregOfRexRM(pfx,rm)); 8562 DIP("%s %s,%s\n", opname, 8563 nameXMMReg(eregOfRexRM(pfx,rm)), 8564 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8565 delta += 1; 8566 } else { 8567 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8568 epart = loadLE(Ity_V128, mkexpr(addr)); 8569 DIP("%s %s,%s\n", opname, 8570 dis_buf, 8571 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8572 delta += alen; 8573 } 8574 putXMMReg( gregOfRexRM(pfx,rm), 8575 eLeft ? binop(op, epart, gpart) 8576 : binop(op, gpart, epart) ); 8577 return delta; 8578 } 8579 8580 8581 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 8582 This is all a bit of a kludge in that it ignores the subtleties of 8583 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 8584 spec. */ 8585 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 8586 /*OUT*/IROp* opP, 8587 /*OUT*/Bool* postNotP, 8588 UInt imm8, Bool all_lanes, Int sz ) 8589 { 8590 if (imm8 >= 32) return False; 8591 8592 /* First, compute a (preSwap, op, postNot) triple from 8593 the supplied imm8. */ 8594 Bool pre = False; 8595 IROp op = Iop_INVALID; 8596 Bool not = False; 8597 8598 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 8599 // If you add a case here, add a corresponding test for both VCMPSD_128 8600 // and VCMPSS_128 in avx-1.c. 8601 switch (imm8) { 8602 // "O" = ordered, "U" = unordered 8603 // "Q" = non-signalling (quiet), "S" = signalling 8604 // 8605 // swap operands? 8606 // | 8607 // | cmp op invert after? 8608 // | | | 8609 // v v v 8610 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 8611 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 8612 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 8613 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 8614 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 8615 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 8616 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 8617 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 8618 // 0x8 EQ_UQ 8619 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 8620 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */ 8621 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 8622 // 0xB FALSE_OQ 8623 // 0xC: this isn't really right because it returns all-1s when 8624 // either operand is a NaN, and it should return all-0s. 8625 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 8626 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 8627 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 8628 // 0xF TRUE_UQ 8629 // 0x10 EQ_OS 8630 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 8631 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 8632 // 0x13 UNORD_S 8633 // 0x14 NEQ_US 8634 // 0x15 NLT_UQ 8635 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 8636 // 0x17 ORD_S 8637 // 0x18 EQ_US 8638 // 0x19 NGE_UQ 8639 // 0x1A NGT_UQ 8640 // 0x1B FALSE_OS 8641 // 0x1C NEQ_OS 8642 // 0x1D GE_OQ 8643 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 8644 // 0x1F TRUE_US 8645 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 8646 avx-1.c if new cases turn up. */ 8647 default: break; 8648 } 8649 # undef XXX 8650 if (op == Iop_INVALID) return False; 8651 8652 /* Now convert the op into one with the same arithmetic but that is 8653 correct for the width and laneage requirements. */ 8654 8655 /**/ if (sz == 4 && all_lanes) { 8656 switch (op) { 8657 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 8658 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 8659 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 8660 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 8661 default: vassert(0); 8662 } 8663 } 8664 else if (sz == 4 && !all_lanes) { 8665 switch (op) { 8666 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 8667 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 8668 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 8669 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 8670 default: vassert(0); 8671 } 8672 } 8673 else if (sz == 8 && all_lanes) { 8674 switch (op) { 8675 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 8676 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 8677 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 8678 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 8679 default: vassert(0); 8680 } 8681 } 8682 else if (sz == 8 && !all_lanes) { 8683 switch (op) { 8684 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 8685 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 8686 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 8687 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 8688 default: vassert(0); 8689 } 8690 } 8691 else { 8692 vpanic("findSSECmpOp(amd64,guest)"); 8693 } 8694 8695 *preSwapP = pre; *opP = op; *postNotP = not; 8696 return True; 8697 } 8698 8699 8700 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 8701 returns the original delta to indicate failure. */ 8702 8703 static Long dis_SSE_cmp_E_to_G ( VexAbiInfo* vbi, 8704 Prefix pfx, Long delta, 8705 HChar* opname, Bool all_lanes, Int sz ) 8706 { 8707 Long delta0 = delta; 8708 HChar dis_buf[50]; 8709 Int alen; 8710 UInt imm8; 8711 IRTemp addr; 8712 Bool preSwap = False; 8713 IROp op = Iop_INVALID; 8714 Bool postNot = False; 8715 IRTemp plain = newTemp(Ity_V128); 8716 UChar rm = getUChar(delta); 8717 UShort mask = 0; 8718 vassert(sz == 4 || sz == 8); 8719 if (epartIsReg(rm)) { 8720 imm8 = getUChar(delta+1); 8721 if (imm8 >= 8) return delta0; /* FAIL */ 8722 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 8723 if (!ok) return delta0; /* FAIL */ 8724 vassert(!preSwap); /* never needed for imm8 < 8 */ 8725 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 8726 getXMMReg(eregOfRexRM(pfx,rm))) ); 8727 delta += 2; 8728 DIP("%s $%d,%s,%s\n", opname, 8729 (Int)imm8, 8730 nameXMMReg(eregOfRexRM(pfx,rm)), 8731 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8732 } else { 8733 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 8734 imm8 = getUChar(delta+alen); 8735 if (imm8 >= 8) return delta0; /* FAIL */ 8736 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 8737 if (!ok) return delta0; /* FAIL */ 8738 vassert(!preSwap); /* never needed for imm8 < 8 */ 8739 assign( plain, 8740 binop( 8741 op, 8742 getXMMReg(gregOfRexRM(pfx,rm)), 8743 all_lanes 8744 ? loadLE(Ity_V128, mkexpr(addr)) 8745 : sz == 8 8746 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 8747 : /*sz==4*/ 8748 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 8749 ) 8750 ); 8751 delta += alen+1; 8752 DIP("%s $%d,%s,%s\n", opname, 8753 (Int)imm8, 8754 dis_buf, 8755 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8756 } 8757 8758 if (postNot && all_lanes) { 8759 putXMMReg( gregOfRexRM(pfx,rm), 8760 unop(Iop_NotV128, mkexpr(plain)) ); 8761 } 8762 else 8763 if (postNot && !all_lanes) { 8764 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 8765 putXMMReg( gregOfRexRM(pfx,rm), 8766 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 8767 } 8768 else { 8769 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 8770 } 8771 8772 return delta; 8773 } 8774 8775 8776 /* Vector by scalar shift of G by the amount specified at the bottom 8777 of E. */ 8778 8779 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi, 8780 Prefix pfx, Long delta, 8781 HChar* opname, IROp op ) 8782 { 8783 HChar dis_buf[50]; 8784 Int alen, size; 8785 IRTemp addr; 8786 Bool shl, shr, sar; 8787 UChar rm = getUChar(delta); 8788 IRTemp g0 = newTemp(Ity_V128); 8789 IRTemp g1 = newTemp(Ity_V128); 8790 IRTemp amt = newTemp(Ity_I64); 8791 IRTemp amt8 = newTemp(Ity_I8); 8792 if (epartIsReg(rm)) { 8793 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 8794 DIP("%s %s,%s\n", opname, 8795 nameXMMReg(eregOfRexRM(pfx,rm)), 8796 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8797 delta++; 8798 } else { 8799 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8800 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 8801 DIP("%s %s,%s\n", opname, 8802 dis_buf, 8803 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8804 delta += alen; 8805 } 8806 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8807 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 8808 8809 shl = shr = sar = False; 8810 size = 0; 8811 switch (op) { 8812 case Iop_ShlN16x8: shl = True; size = 32; break; 8813 case Iop_ShlN32x4: shl = True; size = 32; break; 8814 case Iop_ShlN64x2: shl = True; size = 64; break; 8815 case Iop_SarN16x8: sar = True; size = 16; break; 8816 case Iop_SarN32x4: sar = True; size = 32; break; 8817 case Iop_ShrN16x8: shr = True; size = 16; break; 8818 case Iop_ShrN32x4: shr = True; size = 32; break; 8819 case Iop_ShrN64x2: shr = True; size = 64; break; 8820 default: vassert(0); 8821 } 8822 8823 if (shl || shr) { 8824 assign( 8825 g1, 8826 IRExpr_Mux0X( 8827 unop(Iop_1Uto8, 8828 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))), 8829 mkV128(0x0000), 8830 binop(op, mkexpr(g0), mkexpr(amt8)) 8831 ) 8832 ); 8833 } else 8834 if (sar) { 8835 assign( 8836 g1, 8837 IRExpr_Mux0X( 8838 unop(Iop_1Uto8, 8839 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))), 8840 binop(op, mkexpr(g0), mkU8(size-1)), 8841 binop(op, mkexpr(g0), mkexpr(amt8)) 8842 ) 8843 ); 8844 } else { 8845 vassert(0); 8846 } 8847 8848 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 8849 return delta; 8850 } 8851 8852 8853 /* Vector by scalar shift of E by an immediate byte. */ 8854 8855 static 8856 ULong dis_SSE_shiftE_imm ( Prefix pfx, 8857 Long delta, HChar* opname, IROp op ) 8858 { 8859 Bool shl, shr, sar; 8860 UChar rm = getUChar(delta); 8861 IRTemp e0 = newTemp(Ity_V128); 8862 IRTemp e1 = newTemp(Ity_V128); 8863 UChar amt, size; 8864 vassert(epartIsReg(rm)); 8865 vassert(gregLO3ofRM(rm) == 2 8866 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 8867 amt = getUChar(delta+1); 8868 delta += 2; 8869 DIP("%s $%d,%s\n", opname, 8870 (Int)amt, 8871 nameXMMReg(eregOfRexRM(pfx,rm)) ); 8872 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 8873 8874 shl = shr = sar = False; 8875 size = 0; 8876 switch (op) { 8877 case Iop_ShlN16x8: shl = True; size = 16; break; 8878 case Iop_ShlN32x4: shl = True; size = 32; break; 8879 case Iop_ShlN64x2: shl = True; size = 64; break; 8880 case Iop_SarN16x8: sar = True; size = 16; break; 8881 case Iop_SarN32x4: sar = True; size = 32; break; 8882 case Iop_ShrN16x8: shr = True; size = 16; break; 8883 case Iop_ShrN32x4: shr = True; size = 32; break; 8884 case Iop_ShrN64x2: shr = True; size = 64; break; 8885 default: vassert(0); 8886 } 8887 8888 if (shl || shr) { 8889 assign( e1, amt >= size 8890 ? mkV128(0x0000) 8891 : binop(op, mkexpr(e0), mkU8(amt)) 8892 ); 8893 } else 8894 if (sar) { 8895 assign( e1, amt >= size 8896 ? binop(op, mkexpr(e0), mkU8(size-1)) 8897 : binop(op, mkexpr(e0), mkU8(amt)) 8898 ); 8899 } else { 8900 vassert(0); 8901 } 8902 8903 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 8904 return delta; 8905 } 8906 8907 8908 /* Get the current SSE rounding mode. */ 8909 8910 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 8911 { 8912 return 8913 unop( Iop_64to32, 8914 binop( Iop_And64, 8915 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 8916 mkU64(3) )); 8917 } 8918 8919 static void put_sse_roundingmode ( IRExpr* sseround ) 8920 { 8921 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 8922 stmt( IRStmt_Put( OFFB_SSEROUND, 8923 unop(Iop_32Uto64,sseround) ) ); 8924 } 8925 8926 /* Break a V128-bit value up into four 32-bit ints. */ 8927 8928 static void breakupV128to32s ( IRTemp t128, 8929 /*OUTs*/ 8930 IRTemp* t3, IRTemp* t2, 8931 IRTemp* t1, IRTemp* t0 ) 8932 { 8933 IRTemp hi64 = newTemp(Ity_I64); 8934 IRTemp lo64 = newTemp(Ity_I64); 8935 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 8936 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 8937 8938 vassert(t0 && *t0 == IRTemp_INVALID); 8939 vassert(t1 && *t1 == IRTemp_INVALID); 8940 vassert(t2 && *t2 == IRTemp_INVALID); 8941 vassert(t3 && *t3 == IRTemp_INVALID); 8942 8943 *t0 = newTemp(Ity_I32); 8944 *t1 = newTemp(Ity_I32); 8945 *t2 = newTemp(Ity_I32); 8946 *t3 = newTemp(Ity_I32); 8947 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 8948 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 8949 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 8950 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 8951 } 8952 8953 /* Construct a V128-bit value from four 32-bit ints. */ 8954 8955 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 8956 IRTemp t1, IRTemp t0 ) 8957 { 8958 return 8959 binop( Iop_64HLtoV128, 8960 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 8961 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 8962 ); 8963 } 8964 8965 /* Break a 64-bit value up into four 16-bit ints. */ 8966 8967 static void breakup64to16s ( IRTemp t64, 8968 /*OUTs*/ 8969 IRTemp* t3, IRTemp* t2, 8970 IRTemp* t1, IRTemp* t0 ) 8971 { 8972 IRTemp hi32 = newTemp(Ity_I32); 8973 IRTemp lo32 = newTemp(Ity_I32); 8974 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 8975 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 8976 8977 vassert(t0 && *t0 == IRTemp_INVALID); 8978 vassert(t1 && *t1 == IRTemp_INVALID); 8979 vassert(t2 && *t2 == IRTemp_INVALID); 8980 vassert(t3 && *t3 == IRTemp_INVALID); 8981 8982 *t0 = newTemp(Ity_I16); 8983 *t1 = newTemp(Ity_I16); 8984 *t2 = newTemp(Ity_I16); 8985 *t3 = newTemp(Ity_I16); 8986 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 8987 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 8988 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 8989 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 8990 } 8991 8992 /* Construct a 64-bit value from four 16-bit ints. */ 8993 8994 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 8995 IRTemp t1, IRTemp t0 ) 8996 { 8997 return 8998 binop( Iop_32HLto64, 8999 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9000 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9001 ); 9002 } 9003 9004 /* Break a V256-bit value up into four 64-bit ints. */ 9005 9006 static void breakupV256to64s ( IRTemp t256, 9007 /*OUTs*/ 9008 IRTemp* t3, IRTemp* t2, 9009 IRTemp* t1, IRTemp* t0 ) 9010 { 9011 vassert(t0 && *t0 == IRTemp_INVALID); 9012 vassert(t1 && *t1 == IRTemp_INVALID); 9013 vassert(t2 && *t2 == IRTemp_INVALID); 9014 vassert(t3 && *t3 == IRTemp_INVALID); 9015 *t0 = newTemp(Ity_I64); 9016 *t1 = newTemp(Ity_I64); 9017 *t2 = newTemp(Ity_I64); 9018 *t3 = newTemp(Ity_I64); 9019 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9020 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9021 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9022 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9023 } 9024 9025 /* Break a V256-bit value up into two V128s. */ 9026 9027 static void breakupV256toV128s ( IRTemp t256, 9028 /*OUTs*/ 9029 IRTemp* t1, IRTemp* t0 ) 9030 { 9031 vassert(t0 && *t0 == IRTemp_INVALID); 9032 vassert(t1 && *t1 == IRTemp_INVALID); 9033 *t0 = newTemp(Ity_V128); 9034 *t1 = newTemp(Ity_V128); 9035 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9036 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9037 } 9038 9039 /* Break a V256-bit value up into eight 32-bit ints. */ 9040 9041 static void breakupV256to32s ( IRTemp t256, 9042 /*OUTs*/ 9043 IRTemp* t7, IRTemp* t6, 9044 IRTemp* t5, IRTemp* t4, 9045 IRTemp* t3, IRTemp* t2, 9046 IRTemp* t1, IRTemp* t0 ) 9047 { 9048 IRTemp t128_1 = IRTemp_INVALID; 9049 IRTemp t128_0 = IRTemp_INVALID; 9050 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9051 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9052 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9053 } 9054 9055 /* Break a V128-bit value up into two 64-bit ints. */ 9056 9057 static void breakupV128to64s ( IRTemp t128, 9058 /*OUTs*/ 9059 IRTemp* t1, IRTemp* t0 ) 9060 { 9061 vassert(t0 && *t0 == IRTemp_INVALID); 9062 vassert(t1 && *t1 == IRTemp_INVALID); 9063 *t0 = newTemp(Ity_I64); 9064 *t1 = newTemp(Ity_I64); 9065 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9066 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9067 } 9068 9069 /* Construct a V256-bit value from eight 32-bit ints. */ 9070 9071 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9072 IRTemp t5, IRTemp t4, 9073 IRTemp t3, IRTemp t2, 9074 IRTemp t1, IRTemp t0 ) 9075 { 9076 return 9077 binop( Iop_V128HLtoV256, 9078 binop( Iop_64HLtoV128, 9079 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9080 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9081 binop( Iop_64HLtoV128, 9082 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9083 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9084 ); 9085 } 9086 9087 /* Construct a V256-bit value from four 64-bit ints. */ 9088 9089 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9090 IRTemp t1, IRTemp t0 ) 9091 { 9092 return 9093 binop( Iop_V128HLtoV256, 9094 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9095 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9096 ); 9097 } 9098 9099 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9100 values (aa,bb), computes, for each of the 4 16-bit lanes: 9101 9102 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9103 */ 9104 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9105 { 9106 IRTemp aa = newTemp(Ity_I64); 9107 IRTemp bb = newTemp(Ity_I64); 9108 IRTemp aahi32s = newTemp(Ity_I64); 9109 IRTemp aalo32s = newTemp(Ity_I64); 9110 IRTemp bbhi32s = newTemp(Ity_I64); 9111 IRTemp bblo32s = newTemp(Ity_I64); 9112 IRTemp rHi = newTemp(Ity_I64); 9113 IRTemp rLo = newTemp(Ity_I64); 9114 IRTemp one32x2 = newTemp(Ity_I64); 9115 assign(aa, aax); 9116 assign(bb, bbx); 9117 assign( aahi32s, 9118 binop(Iop_SarN32x2, 9119 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9120 mkU8(16) )); 9121 assign( aalo32s, 9122 binop(Iop_SarN32x2, 9123 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9124 mkU8(16) )); 9125 assign( bbhi32s, 9126 binop(Iop_SarN32x2, 9127 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9128 mkU8(16) )); 9129 assign( bblo32s, 9130 binop(Iop_SarN32x2, 9131 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9132 mkU8(16) )); 9133 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9134 assign( 9135 rHi, 9136 binop( 9137 Iop_ShrN32x2, 9138 binop( 9139 Iop_Add32x2, 9140 binop( 9141 Iop_ShrN32x2, 9142 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9143 mkU8(14) 9144 ), 9145 mkexpr(one32x2) 9146 ), 9147 mkU8(1) 9148 ) 9149 ); 9150 assign( 9151 rLo, 9152 binop( 9153 Iop_ShrN32x2, 9154 binop( 9155 Iop_Add32x2, 9156 binop( 9157 Iop_ShrN32x2, 9158 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9159 mkU8(14) 9160 ), 9161 mkexpr(one32x2) 9162 ), 9163 mkU8(1) 9164 ) 9165 ); 9166 return 9167 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9168 } 9169 9170 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9171 values (aa,bb), computes, for each lane: 9172 9173 if aa_lane < 0 then - bb_lane 9174 else if aa_lane > 0 then bb_lane 9175 else 0 9176 */ 9177 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9178 { 9179 IRTemp aa = newTemp(Ity_I64); 9180 IRTemp bb = newTemp(Ity_I64); 9181 IRTemp zero = newTemp(Ity_I64); 9182 IRTemp bbNeg = newTemp(Ity_I64); 9183 IRTemp negMask = newTemp(Ity_I64); 9184 IRTemp posMask = newTemp(Ity_I64); 9185 IROp opSub = Iop_INVALID; 9186 IROp opCmpGTS = Iop_INVALID; 9187 9188 switch (laneszB) { 9189 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9190 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9191 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9192 default: vassert(0); 9193 } 9194 9195 assign( aa, aax ); 9196 assign( bb, bbx ); 9197 assign( zero, mkU64(0) ); 9198 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9199 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9200 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9201 9202 return 9203 binop(Iop_Or64, 9204 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9205 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9206 9207 } 9208 9209 9210 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9211 value aa, computes, for each lane 9212 9213 if aa < 0 then -aa else aa 9214 9215 Note that the result is interpreted as unsigned, so that the 9216 absolute value of the most negative signed input can be 9217 represented. 9218 */ 9219 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9220 { 9221 IRTemp res = newTemp(Ity_I64); 9222 IRTemp zero = newTemp(Ity_I64); 9223 IRTemp aaNeg = newTemp(Ity_I64); 9224 IRTemp negMask = newTemp(Ity_I64); 9225 IRTemp posMask = newTemp(Ity_I64); 9226 IROp opSub = Iop_INVALID; 9227 IROp opSarN = Iop_INVALID; 9228 9229 switch (laneszB) { 9230 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9231 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9232 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9233 default: vassert(0); 9234 } 9235 9236 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9237 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9238 assign( zero, mkU64(0) ); 9239 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9240 assign( res, 9241 binop(Iop_Or64, 9242 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9243 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9244 return res; 9245 } 9246 9247 /* XMM version of math_PABS_MMX. */ 9248 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9249 { 9250 IRTemp res = newTemp(Ity_V128); 9251 IRTemp aaHi = newTemp(Ity_I64); 9252 IRTemp aaLo = newTemp(Ity_I64); 9253 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9254 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9255 assign(res, binop(Iop_64HLtoV128, 9256 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9257 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9258 return res; 9259 } 9260 9261 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9262 partial applications in C :-( */ 9263 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9264 return math_PABS_XMM(aa, 4); 9265 } 9266 9267 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9268 return math_PABS_XMM(aa, 2); 9269 } 9270 9271 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9272 return math_PABS_XMM(aa, 1); 9273 } 9274 9275 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9276 IRTemp lo64, Long byteShift ) 9277 { 9278 vassert(byteShift >= 1 && byteShift <= 7); 9279 return 9280 binop(Iop_Or64, 9281 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9282 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9283 ); 9284 } 9285 9286 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9287 { 9288 IRTemp res = newTemp(Ity_V128); 9289 IRTemp sHi = newTemp(Ity_I64); 9290 IRTemp sLo = newTemp(Ity_I64); 9291 IRTemp dHi = newTemp(Ity_I64); 9292 IRTemp dLo = newTemp(Ity_I64); 9293 IRTemp rHi = newTemp(Ity_I64); 9294 IRTemp rLo = newTemp(Ity_I64); 9295 9296 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9297 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9298 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9299 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9300 9301 if (imm8 == 0) { 9302 assign( rHi, mkexpr(sHi) ); 9303 assign( rLo, mkexpr(sLo) ); 9304 } 9305 else if (imm8 >= 1 && imm8 <= 7) { 9306 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9307 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9308 } 9309 else if (imm8 == 8) { 9310 assign( rHi, mkexpr(dLo) ); 9311 assign( rLo, mkexpr(sHi) ); 9312 } 9313 else if (imm8 >= 9 && imm8 <= 15) { 9314 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9315 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9316 } 9317 else if (imm8 == 16) { 9318 assign( rHi, mkexpr(dHi) ); 9319 assign( rLo, mkexpr(dLo) ); 9320 } 9321 else if (imm8 >= 17 && imm8 <= 23) { 9322 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9323 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9324 } 9325 else if (imm8 == 24) { 9326 assign( rHi, mkU64(0) ); 9327 assign( rLo, mkexpr(dHi) ); 9328 } 9329 else if (imm8 >= 25 && imm8 <= 31) { 9330 assign( rHi, mkU64(0) ); 9331 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9332 } 9333 else if (imm8 >= 32 && imm8 <= 255) { 9334 assign( rHi, mkU64(0) ); 9335 assign( rLo, mkU64(0) ); 9336 } 9337 else 9338 vassert(0); 9339 9340 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 9341 return res; 9342 } 9343 9344 9345 /* Generate a SIGSEGV followed by a restart of the current instruction 9346 if effective_addr is not 16-aligned. This is required behaviour 9347 for some SSE3 instructions and all 128-bit SSSE3 instructions. 9348 This assumes that guest_RIP_curr_instr is set correctly! */ 9349 static 9350 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 9351 { 9352 stmt( 9353 IRStmt_Exit( 9354 binop(Iop_CmpNE64, 9355 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 9356 mkU64(0)), 9357 Ijk_SigSEGV, 9358 IRConst_U64(guest_RIP_curr_instr), 9359 OFFB_RIP 9360 ) 9361 ); 9362 } 9363 9364 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 9365 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 9366 } 9367 9368 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 9369 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 9370 } 9371 9372 /* Helper for deciding whether a given insn (starting at the opcode 9373 byte) may validly be used with a LOCK prefix. The following insns 9374 may be used with LOCK when their destination operand is in memory. 9375 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 9376 9377 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 9378 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 9379 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 9380 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 9381 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 9382 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 9383 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 9384 9385 DEC FE /1, FF /1 9386 INC FE /0, FF /0 9387 9388 NEG F6 /3, F7 /3 9389 NOT F6 /2, F7 /2 9390 9391 XCHG 86, 87 9392 9393 BTC 0F BB, 0F BA /7 9394 BTR 0F B3, 0F BA /6 9395 BTS 0F AB, 0F BA /5 9396 9397 CMPXCHG 0F B0, 0F B1 9398 CMPXCHG8B 0F C7 /1 9399 9400 XADD 0F C0, 0F C1 9401 9402 ------------------------------ 9403 9404 80 /0 = addb $imm8, rm8 9405 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 9406 82 /0 = addb $imm8, rm8 9407 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 9408 9409 00 = addb r8, rm8 9410 01 = addl r32, rm32 and addw r16, rm16 9411 9412 Same for ADD OR ADC SBB AND SUB XOR 9413 9414 FE /1 = dec rm8 9415 FF /1 = dec rm32 and dec rm16 9416 9417 FE /0 = inc rm8 9418 FF /0 = inc rm32 and inc rm16 9419 9420 F6 /3 = neg rm8 9421 F7 /3 = neg rm32 and neg rm16 9422 9423 F6 /2 = not rm8 9424 F7 /2 = not rm32 and not rm16 9425 9426 0F BB = btcw r16, rm16 and btcl r32, rm32 9427 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 9428 9429 Same for BTS, BTR 9430 */ 9431 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 9432 { 9433 switch (opc[0]) { 9434 case 0x00: case 0x01: case 0x08: case 0x09: 9435 case 0x10: case 0x11: case 0x18: case 0x19: 9436 case 0x20: case 0x21: case 0x28: case 0x29: 9437 case 0x30: case 0x31: 9438 if (!epartIsReg(opc[1])) 9439 return True; 9440 break; 9441 9442 case 0x80: case 0x81: case 0x82: case 0x83: 9443 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 9444 && !epartIsReg(opc[1])) 9445 return True; 9446 break; 9447 9448 case 0xFE: case 0xFF: 9449 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 9450 && !epartIsReg(opc[1])) 9451 return True; 9452 break; 9453 9454 case 0xF6: case 0xF7: 9455 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 9456 && !epartIsReg(opc[1])) 9457 return True; 9458 break; 9459 9460 case 0x86: case 0x87: 9461 if (!epartIsReg(opc[1])) 9462 return True; 9463 break; 9464 9465 case 0x0F: { 9466 switch (opc[1]) { 9467 case 0xBB: case 0xB3: case 0xAB: 9468 if (!epartIsReg(opc[2])) 9469 return True; 9470 break; 9471 case 0xBA: 9472 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 9473 && !epartIsReg(opc[2])) 9474 return True; 9475 break; 9476 case 0xB0: case 0xB1: 9477 if (!epartIsReg(opc[2])) 9478 return True; 9479 break; 9480 case 0xC7: 9481 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 9482 return True; 9483 break; 9484 case 0xC0: case 0xC1: 9485 if (!epartIsReg(opc[2])) 9486 return True; 9487 break; 9488 default: 9489 break; 9490 } /* switch (opc[1]) */ 9491 break; 9492 } 9493 9494 default: 9495 break; 9496 } /* switch (opc[0]) */ 9497 9498 return False; 9499 } 9500 9501 9502 /*------------------------------------------------------------*/ 9503 /*--- ---*/ 9504 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 9505 /*--- ---*/ 9506 /*------------------------------------------------------------*/ 9507 9508 static Long dis_COMISD ( VexAbiInfo* vbi, Prefix pfx, 9509 Long delta, Bool isAvx, UChar opc ) 9510 { 9511 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 9512 Int alen = 0; 9513 HChar dis_buf[50]; 9514 IRTemp argL = newTemp(Ity_F64); 9515 IRTemp argR = newTemp(Ity_F64); 9516 UChar modrm = getUChar(delta); 9517 IRTemp addr = IRTemp_INVALID; 9518 if (epartIsReg(modrm)) { 9519 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 9520 0/*lowest lane*/ ) ); 9521 delta += 1; 9522 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9523 opc==0x2E ? "u" : "", 9524 nameXMMReg(eregOfRexRM(pfx,modrm)), 9525 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9526 } else { 9527 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9528 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9529 delta += alen; 9530 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9531 opc==0x2E ? "u" : "", 9532 dis_buf, 9533 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9534 } 9535 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 9536 0/*lowest lane*/ ) ); 9537 9538 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9539 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9540 stmt( IRStmt_Put( 9541 OFFB_CC_DEP1, 9542 binop( Iop_And64, 9543 unop( Iop_32Uto64, 9544 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 9545 mkU64(0x45) 9546 ))); 9547 return delta; 9548 } 9549 9550 9551 static Long dis_COMISS ( VexAbiInfo* vbi, Prefix pfx, 9552 Long delta, Bool isAvx, UChar opc ) 9553 { 9554 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 9555 Int alen = 0; 9556 HChar dis_buf[50]; 9557 IRTemp argL = newTemp(Ity_F32); 9558 IRTemp argR = newTemp(Ity_F32); 9559 UChar modrm = getUChar(delta); 9560 IRTemp addr = IRTemp_INVALID; 9561 if (epartIsReg(modrm)) { 9562 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 9563 0/*lowest lane*/ ) ); 9564 delta += 1; 9565 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 9566 opc==0x2E ? "u" : "", 9567 nameXMMReg(eregOfRexRM(pfx,modrm)), 9568 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9569 } else { 9570 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9571 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 9572 delta += alen; 9573 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 9574 opc==0x2E ? "u" : "", 9575 dis_buf, 9576 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9577 } 9578 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 9579 0/*lowest lane*/ ) ); 9580 9581 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9582 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9583 stmt( IRStmt_Put( 9584 OFFB_CC_DEP1, 9585 binop( Iop_And64, 9586 unop( Iop_32Uto64, 9587 binop(Iop_CmpF64, 9588 unop(Iop_F32toF64,mkexpr(argL)), 9589 unop(Iop_F32toF64,mkexpr(argR)))), 9590 mkU64(0x45) 9591 ))); 9592 return delta; 9593 } 9594 9595 9596 static Long dis_PSHUFD_32x4 ( VexAbiInfo* vbi, Prefix pfx, 9597 Long delta, Bool writesYmm ) 9598 { 9599 Int order; 9600 Int alen = 0; 9601 HChar dis_buf[50]; 9602 IRTemp sV = newTemp(Ity_V128); 9603 UChar modrm = getUChar(delta); 9604 HChar* strV = writesYmm ? "v" : ""; 9605 IRTemp addr = IRTemp_INVALID; 9606 if (epartIsReg(modrm)) { 9607 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 9608 order = (Int)getUChar(delta+1); 9609 delta += 1+1; 9610 DIP("%spshufd $%d,%s,%s\n", strV, order, 9611 nameXMMReg(eregOfRexRM(pfx,modrm)), 9612 nameXMMReg(gregOfRexRM(pfx,modrm))); 9613 } else { 9614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 9615 1/*byte after the amode*/ ); 9616 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9617 order = (Int)getUChar(delta+alen); 9618 delta += alen+1; 9619 DIP("%spshufd $%d,%s,%s\n", strV, order, 9620 dis_buf, 9621 nameXMMReg(gregOfRexRM(pfx,modrm))); 9622 } 9623 9624 IRTemp s3, s2, s1, s0; 9625 s3 = s2 = s1 = s0 = IRTemp_INVALID; 9626 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 9627 9628 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9629 IRTemp dV = newTemp(Ity_V128); 9630 assign(dV, 9631 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 9632 SEL((order>>2)&3), SEL((order>>0)&3) ) 9633 ); 9634 # undef SEL 9635 9636 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 9637 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 9638 return delta; 9639 } 9640 9641 9642 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 9643 { 9644 IRTemp dV = newTemp(Ity_V128); 9645 IRTemp hi64 = newTemp(Ity_I64); 9646 IRTemp lo64 = newTemp(Ity_I64); 9647 IRTemp hi64r = newTemp(Ity_I64); 9648 IRTemp lo64r = newTemp(Ity_I64); 9649 9650 vassert(imm >= 0 && imm <= 255); 9651 if (imm >= 16) { 9652 assign(dV, mkV128(0x0000)); 9653 return dV; 9654 } 9655 9656 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 9657 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 9658 9659 if (imm == 0) { 9660 assign( lo64r, mkexpr(lo64) ); 9661 assign( hi64r, mkexpr(hi64) ); 9662 } 9663 else 9664 if (imm == 8) { 9665 assign( hi64r, mkU64(0) ); 9666 assign( lo64r, mkexpr(hi64) ); 9667 } 9668 else 9669 if (imm > 8) { 9670 assign( hi64r, mkU64(0) ); 9671 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 9672 } else { 9673 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 9674 assign( lo64r, 9675 binop( Iop_Or64, 9676 binop(Iop_Shr64, mkexpr(lo64), 9677 mkU8(8 * imm)), 9678 binop(Iop_Shl64, mkexpr(hi64), 9679 mkU8(8 * (8 - imm)) ) 9680 ) 9681 ); 9682 } 9683 9684 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 9685 return dV; 9686 } 9687 9688 9689 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 9690 { 9691 IRTemp dV = newTemp(Ity_V128); 9692 IRTemp hi64 = newTemp(Ity_I64); 9693 IRTemp lo64 = newTemp(Ity_I64); 9694 IRTemp hi64r = newTemp(Ity_I64); 9695 IRTemp lo64r = newTemp(Ity_I64); 9696 9697 vassert(imm >= 0 && imm <= 255); 9698 if (imm >= 16) { 9699 assign(dV, mkV128(0x0000)); 9700 return dV; 9701 } 9702 9703 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 9704 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 9705 9706 if (imm == 0) { 9707 assign( lo64r, mkexpr(lo64) ); 9708 assign( hi64r, mkexpr(hi64) ); 9709 } 9710 else 9711 if (imm == 8) { 9712 assign( lo64r, mkU64(0) ); 9713 assign( hi64r, mkexpr(lo64) ); 9714 } 9715 else 9716 if (imm > 8) { 9717 assign( lo64r, mkU64(0) ); 9718 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 9719 } else { 9720 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 9721 assign( hi64r, 9722 binop( Iop_Or64, 9723 binop(Iop_Shl64, mkexpr(hi64), 9724 mkU8(8 * imm)), 9725 binop(Iop_Shr64, mkexpr(lo64), 9726 mkU8(8 * (8 - imm)) ) 9727 ) 9728 ); 9729 } 9730 9731 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 9732 return dV; 9733 } 9734 9735 9736 static Long dis_CVTxSD2SI ( VexAbiInfo* vbi, Prefix pfx, 9737 Long delta, Bool isAvx, UChar opc, Int sz ) 9738 { 9739 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 9740 HChar dis_buf[50]; 9741 Int alen = 0; 9742 UChar modrm = getUChar(delta); 9743 IRTemp addr = IRTemp_INVALID; 9744 IRTemp rmode = newTemp(Ity_I32); 9745 IRTemp f64lo = newTemp(Ity_F64); 9746 Bool r2zero = toBool(opc == 0x2C); 9747 9748 if (epartIsReg(modrm)) { 9749 delta += 1; 9750 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 9751 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 9752 nameXMMReg(eregOfRexRM(pfx,modrm)), 9753 nameIReg(sz, gregOfRexRM(pfx,modrm), 9754 False)); 9755 } else { 9756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9757 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9758 delta += alen; 9759 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 9760 dis_buf, 9761 nameIReg(sz, gregOfRexRM(pfx,modrm), 9762 False)); 9763 } 9764 9765 if (r2zero) { 9766 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9767 } else { 9768 assign( rmode, get_sse_roundingmode() ); 9769 } 9770 9771 if (sz == 4) { 9772 putIReg32( gregOfRexRM(pfx,modrm), 9773 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 9774 } else { 9775 vassert(sz == 8); 9776 putIReg64( gregOfRexRM(pfx,modrm), 9777 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 9778 } 9779 9780 return delta; 9781 } 9782 9783 9784 static Long dis_CVTxSS2SI ( VexAbiInfo* vbi, Prefix pfx, 9785 Long delta, Bool isAvx, UChar opc, Int sz ) 9786 { 9787 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 9788 HChar dis_buf[50]; 9789 Int alen = 0; 9790 UChar modrm = getUChar(delta); 9791 IRTemp addr = IRTemp_INVALID; 9792 IRTemp rmode = newTemp(Ity_I32); 9793 IRTemp f32lo = newTemp(Ity_F32); 9794 Bool r2zero = toBool(opc == 0x2C); 9795 9796 if (epartIsReg(modrm)) { 9797 delta += 1; 9798 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 9799 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 9800 nameXMMReg(eregOfRexRM(pfx,modrm)), 9801 nameIReg(sz, gregOfRexRM(pfx,modrm), 9802 False)); 9803 } else { 9804 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9805 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9806 delta += alen; 9807 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 9808 dis_buf, 9809 nameIReg(sz, gregOfRexRM(pfx,modrm), 9810 False)); 9811 } 9812 9813 if (r2zero) { 9814 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9815 } else { 9816 assign( rmode, get_sse_roundingmode() ); 9817 } 9818 9819 if (sz == 4) { 9820 putIReg32( gregOfRexRM(pfx,modrm), 9821 binop( Iop_F64toI32S, 9822 mkexpr(rmode), 9823 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9824 } else { 9825 vassert(sz == 8); 9826 putIReg64( gregOfRexRM(pfx,modrm), 9827 binop( Iop_F64toI64S, 9828 mkexpr(rmode), 9829 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9830 } 9831 9832 return delta; 9833 } 9834 9835 9836 static Long dis_CVTPS2PD_128 ( VexAbiInfo* vbi, Prefix pfx, 9837 Long delta, Bool isAvx ) 9838 { 9839 IRTemp addr = IRTemp_INVALID; 9840 Int alen = 0; 9841 HChar dis_buf[50]; 9842 IRTemp f32lo = newTemp(Ity_F32); 9843 IRTemp f32hi = newTemp(Ity_F32); 9844 UChar modrm = getUChar(delta); 9845 UInt rG = gregOfRexRM(pfx,modrm); 9846 if (epartIsReg(modrm)) { 9847 UInt rE = eregOfRexRM(pfx,modrm); 9848 assign( f32lo, getXMMRegLane32F(rE, 0) ); 9849 assign( f32hi, getXMMRegLane32F(rE, 1) ); 9850 delta += 1; 9851 DIP("%scvtps2pd %s,%s\n", 9852 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 9853 } else { 9854 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9855 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 9856 assign( f32hi, loadLE(Ity_F32, 9857 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 9858 delta += alen; 9859 DIP("%scvtps2pd %s,%s\n", 9860 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 9861 } 9862 9863 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 9864 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 9865 if (isAvx) 9866 putYMMRegLane128( rG, 1, mkV128(0)); 9867 return delta; 9868 } 9869 9870 9871 static Long dis_CVTPS2PD_256 ( VexAbiInfo* vbi, Prefix pfx, 9872 Long delta ) 9873 { 9874 IRTemp addr = IRTemp_INVALID; 9875 Int alen = 0; 9876 HChar dis_buf[50]; 9877 IRTemp f32_0 = newTemp(Ity_F32); 9878 IRTemp f32_1 = newTemp(Ity_F32); 9879 IRTemp f32_2 = newTemp(Ity_F32); 9880 IRTemp f32_3 = newTemp(Ity_F32); 9881 UChar modrm = getUChar(delta); 9882 UInt rG = gregOfRexRM(pfx,modrm); 9883 if (epartIsReg(modrm)) { 9884 UInt rE = eregOfRexRM(pfx,modrm); 9885 assign( f32_0, getXMMRegLane32F(rE, 0) ); 9886 assign( f32_1, getXMMRegLane32F(rE, 1) ); 9887 assign( f32_2, getXMMRegLane32F(rE, 2) ); 9888 assign( f32_3, getXMMRegLane32F(rE, 3) ); 9889 delta += 1; 9890 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 9891 } else { 9892 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9893 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 9894 assign( f32_1, loadLE(Ity_F32, 9895 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 9896 assign( f32_2, loadLE(Ity_F32, 9897 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 9898 assign( f32_3, loadLE(Ity_F32, 9899 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 9900 delta += alen; 9901 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 9902 } 9903 9904 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 9905 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 9906 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 9907 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 9908 return delta; 9909 } 9910 9911 9912 static Long dis_CVTPD2PS_128 ( VexAbiInfo* vbi, Prefix pfx, 9913 Long delta, Bool isAvx ) 9914 { 9915 IRTemp addr = IRTemp_INVALID; 9916 Int alen = 0; 9917 HChar dis_buf[50]; 9918 UChar modrm = getUChar(delta); 9919 UInt rG = gregOfRexRM(pfx,modrm); 9920 IRTemp argV = newTemp(Ity_V128); 9921 IRTemp rmode = newTemp(Ity_I32); 9922 if (epartIsReg(modrm)) { 9923 UInt rE = eregOfRexRM(pfx,modrm); 9924 assign( argV, getXMMReg(rE) ); 9925 delta += 1; 9926 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 9927 nameXMMReg(rE), nameXMMReg(rG)); 9928 } else { 9929 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9930 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9931 delta += alen; 9932 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 9933 dis_buf, nameXMMReg(rG) ); 9934 } 9935 9936 assign( rmode, get_sse_roundingmode() ); 9937 IRTemp t0 = newTemp(Ity_F64); 9938 IRTemp t1 = newTemp(Ity_F64); 9939 assign( t0, unop(Iop_ReinterpI64asF64, 9940 unop(Iop_V128to64, mkexpr(argV))) ); 9941 assign( t1, unop(Iop_ReinterpI64asF64, 9942 unop(Iop_V128HIto64, mkexpr(argV))) ); 9943 9944 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 9945 putXMMRegLane32( rG, 3, mkU32(0) ); 9946 putXMMRegLane32( rG, 2, mkU32(0) ); 9947 putXMMRegLane32F( rG, 1, CVT(t1) ); 9948 putXMMRegLane32F( rG, 0, CVT(t0) ); 9949 # undef CVT 9950 if (isAvx) 9951 putYMMRegLane128( rG, 1, mkV128(0) ); 9952 9953 return delta; 9954 } 9955 9956 9957 static Long dis_CVTxPS2DQ_128 ( VexAbiInfo* vbi, Prefix pfx, 9958 Long delta, Bool isAvx, Bool r2zero ) 9959 { 9960 IRTemp addr = IRTemp_INVALID; 9961 Int alen = 0; 9962 HChar dis_buf[50]; 9963 UChar modrm = getUChar(delta); 9964 IRTemp argV = newTemp(Ity_V128); 9965 IRTemp rmode = newTemp(Ity_I32); 9966 UInt rG = gregOfRexRM(pfx,modrm); 9967 IRTemp t0, t1, t2, t3; 9968 9969 if (epartIsReg(modrm)) { 9970 UInt rE = eregOfRexRM(pfx,modrm); 9971 assign( argV, getXMMReg(rE) ); 9972 delta += 1; 9973 DIP("%scvt%sps2dq %s,%s\n", 9974 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 9975 } else { 9976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9977 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9978 delta += alen; 9979 DIP("%scvt%sps2dq %s,%s\n", 9980 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 9981 } 9982 9983 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 9984 : get_sse_roundingmode() ); 9985 t0 = t1 = t2 = t3 = IRTemp_INVALID; 9986 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 9987 /* This is less than ideal. If it turns out to be a performance 9988 bottleneck it can be improved. */ 9989 # define CVT(_t) \ 9990 binop( Iop_F64toI32S, \ 9991 mkexpr(rmode), \ 9992 unop( Iop_F32toF64, \ 9993 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9994 9995 putXMMRegLane32( rG, 3, CVT(t3) ); 9996 putXMMRegLane32( rG, 2, CVT(t2) ); 9997 putXMMRegLane32( rG, 1, CVT(t1) ); 9998 putXMMRegLane32( rG, 0, CVT(t0) ); 9999 # undef CVT 10000 if (isAvx) 10001 putYMMRegLane128( rG, 1, mkV128(0) ); 10002 10003 return delta; 10004 } 10005 10006 10007 static Long dis_CVTxPS2DQ_256 ( VexAbiInfo* vbi, Prefix pfx, 10008 Long delta, Bool r2zero ) 10009 { 10010 IRTemp addr = IRTemp_INVALID; 10011 Int alen = 0; 10012 HChar dis_buf[50]; 10013 UChar modrm = getUChar(delta); 10014 IRTemp argV = newTemp(Ity_V256); 10015 IRTemp rmode = newTemp(Ity_I32); 10016 UInt rG = gregOfRexRM(pfx,modrm); 10017 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10018 10019 if (epartIsReg(modrm)) { 10020 UInt rE = eregOfRexRM(pfx,modrm); 10021 assign( argV, getYMMReg(rE) ); 10022 delta += 1; 10023 DIP("vcvt%sps2dq %s,%s\n", 10024 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10025 } else { 10026 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10027 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10028 delta += alen; 10029 DIP("vcvt%sps2dq %s,%s\n", 10030 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10031 } 10032 10033 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10034 : get_sse_roundingmode() ); 10035 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10036 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10037 /* This is less than ideal. If it turns out to be a performance 10038 bottleneck it can be improved. */ 10039 # define CVT(_t) \ 10040 binop( Iop_F64toI32S, \ 10041 mkexpr(rmode), \ 10042 unop( Iop_F32toF64, \ 10043 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10044 10045 putYMMRegLane32( rG, 7, CVT(t7) ); 10046 putYMMRegLane32( rG, 6, CVT(t6) ); 10047 putYMMRegLane32( rG, 5, CVT(t5) ); 10048 putYMMRegLane32( rG, 4, CVT(t4) ); 10049 putYMMRegLane32( rG, 3, CVT(t3) ); 10050 putYMMRegLane32( rG, 2, CVT(t2) ); 10051 putYMMRegLane32( rG, 1, CVT(t1) ); 10052 putYMMRegLane32( rG, 0, CVT(t0) ); 10053 # undef CVT 10054 10055 return delta; 10056 } 10057 10058 10059 static Long dis_CVTxPD2DQ_128 ( VexAbiInfo* vbi, Prefix pfx, 10060 Long delta, Bool isAvx, Bool r2zero ) 10061 { 10062 IRTemp addr = IRTemp_INVALID; 10063 Int alen = 0; 10064 HChar dis_buf[50]; 10065 UChar modrm = getUChar(delta); 10066 IRTemp argV = newTemp(Ity_V128); 10067 IRTemp rmode = newTemp(Ity_I32); 10068 UInt rG = gregOfRexRM(pfx,modrm); 10069 IRTemp t0, t1; 10070 10071 if (epartIsReg(modrm)) { 10072 UInt rE = eregOfRexRM(pfx,modrm); 10073 assign( argV, getXMMReg(rE) ); 10074 delta += 1; 10075 DIP("%scvt%spd2dq %s,%s\n", 10076 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10077 } else { 10078 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10079 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10080 delta += alen; 10081 DIP("%scvt%spd2dqx %s,%s\n", 10082 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10083 } 10084 10085 if (r2zero) { 10086 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10087 } else { 10088 assign( rmode, get_sse_roundingmode() ); 10089 } 10090 10091 t0 = newTemp(Ity_F64); 10092 t1 = newTemp(Ity_F64); 10093 assign( t0, unop(Iop_ReinterpI64asF64, 10094 unop(Iop_V128to64, mkexpr(argV))) ); 10095 assign( t1, unop(Iop_ReinterpI64asF64, 10096 unop(Iop_V128HIto64, mkexpr(argV))) ); 10097 10098 # define CVT(_t) binop( Iop_F64toI32S, \ 10099 mkexpr(rmode), \ 10100 mkexpr(_t) ) 10101 10102 putXMMRegLane32( rG, 3, mkU32(0) ); 10103 putXMMRegLane32( rG, 2, mkU32(0) ); 10104 putXMMRegLane32( rG, 1, CVT(t1) ); 10105 putXMMRegLane32( rG, 0, CVT(t0) ); 10106 # undef CVT 10107 if (isAvx) 10108 putYMMRegLane128( rG, 1, mkV128(0) ); 10109 10110 return delta; 10111 } 10112 10113 10114 static Long dis_CVTxPD2DQ_256 ( VexAbiInfo* vbi, Prefix pfx, 10115 Long delta, Bool r2zero ) 10116 { 10117 IRTemp addr = IRTemp_INVALID; 10118 Int alen = 0; 10119 HChar dis_buf[50]; 10120 UChar modrm = getUChar(delta); 10121 IRTemp argV = newTemp(Ity_V256); 10122 IRTemp rmode = newTemp(Ity_I32); 10123 UInt rG = gregOfRexRM(pfx,modrm); 10124 IRTemp t0, t1, t2, t3; 10125 10126 if (epartIsReg(modrm)) { 10127 UInt rE = eregOfRexRM(pfx,modrm); 10128 assign( argV, getYMMReg(rE) ); 10129 delta += 1; 10130 DIP("vcvt%spd2dq %s,%s\n", 10131 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10132 } else { 10133 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10134 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10135 delta += alen; 10136 DIP("vcvt%spd2dqy %s,%s\n", 10137 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10138 } 10139 10140 if (r2zero) { 10141 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10142 } else { 10143 assign( rmode, get_sse_roundingmode() ); 10144 } 10145 10146 t0 = IRTemp_INVALID; 10147 t1 = IRTemp_INVALID; 10148 t2 = IRTemp_INVALID; 10149 t3 = IRTemp_INVALID; 10150 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10151 10152 # define CVT(_t) binop( Iop_F64toI32S, \ 10153 mkexpr(rmode), \ 10154 unop( Iop_ReinterpI64asF64, \ 10155 mkexpr(_t) ) ) 10156 10157 putXMMRegLane32( rG, 3, CVT(t3) ); 10158 putXMMRegLane32( rG, 2, CVT(t2) ); 10159 putXMMRegLane32( rG, 1, CVT(t1) ); 10160 putXMMRegLane32( rG, 0, CVT(t0) ); 10161 # undef CVT 10162 putYMMRegLane128( rG, 1, mkV128(0) ); 10163 10164 return delta; 10165 } 10166 10167 10168 static Long dis_CVTDQ2PS_128 ( VexAbiInfo* vbi, Prefix pfx, 10169 Long delta, Bool isAvx ) 10170 { 10171 IRTemp addr = IRTemp_INVALID; 10172 Int alen = 0; 10173 HChar dis_buf[50]; 10174 UChar modrm = getUChar(delta); 10175 IRTemp argV = newTemp(Ity_V128); 10176 IRTemp rmode = newTemp(Ity_I32); 10177 UInt rG = gregOfRexRM(pfx,modrm); 10178 IRTemp t0, t1, t2, t3; 10179 10180 if (epartIsReg(modrm)) { 10181 UInt rE = eregOfRexRM(pfx,modrm); 10182 assign( argV, getXMMReg(rE) ); 10183 delta += 1; 10184 DIP("%scvtdq2ps %s,%s\n", 10185 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10186 } else { 10187 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10188 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10189 delta += alen; 10190 DIP("%scvtdq2ps %s,%s\n", 10191 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10192 } 10193 10194 assign( rmode, get_sse_roundingmode() ); 10195 t0 = IRTemp_INVALID; 10196 t1 = IRTemp_INVALID; 10197 t2 = IRTemp_INVALID; 10198 t3 = IRTemp_INVALID; 10199 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10200 10201 # define CVT(_t) binop( Iop_F64toF32, \ 10202 mkexpr(rmode), \ 10203 unop(Iop_I32StoF64,mkexpr(_t))) 10204 10205 putXMMRegLane32F( rG, 3, CVT(t3) ); 10206 putXMMRegLane32F( rG, 2, CVT(t2) ); 10207 putXMMRegLane32F( rG, 1, CVT(t1) ); 10208 putXMMRegLane32F( rG, 0, CVT(t0) ); 10209 # undef CVT 10210 if (isAvx) 10211 putYMMRegLane128( rG, 1, mkV128(0) ); 10212 10213 return delta; 10214 } 10215 10216 static Long dis_CVTDQ2PS_256 ( VexAbiInfo* vbi, Prefix pfx, 10217 Long delta ) 10218 { 10219 IRTemp addr = IRTemp_INVALID; 10220 Int alen = 0; 10221 HChar dis_buf[50]; 10222 UChar modrm = getUChar(delta); 10223 IRTemp argV = newTemp(Ity_V256); 10224 IRTemp rmode = newTemp(Ity_I32); 10225 UInt rG = gregOfRexRM(pfx,modrm); 10226 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10227 10228 if (epartIsReg(modrm)) { 10229 UInt rE = eregOfRexRM(pfx,modrm); 10230 assign( argV, getYMMReg(rE) ); 10231 delta += 1; 10232 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10233 } else { 10234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10235 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10236 delta += alen; 10237 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10238 } 10239 10240 assign( rmode, get_sse_roundingmode() ); 10241 t0 = IRTemp_INVALID; 10242 t1 = IRTemp_INVALID; 10243 t2 = IRTemp_INVALID; 10244 t3 = IRTemp_INVALID; 10245 t4 = IRTemp_INVALID; 10246 t5 = IRTemp_INVALID; 10247 t6 = IRTemp_INVALID; 10248 t7 = IRTemp_INVALID; 10249 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10250 10251 # define CVT(_t) binop( Iop_F64toF32, \ 10252 mkexpr(rmode), \ 10253 unop(Iop_I32StoF64,mkexpr(_t))) 10254 10255 putYMMRegLane32F( rG, 7, CVT(t7) ); 10256 putYMMRegLane32F( rG, 6, CVT(t6) ); 10257 putYMMRegLane32F( rG, 5, CVT(t5) ); 10258 putYMMRegLane32F( rG, 4, CVT(t4) ); 10259 putYMMRegLane32F( rG, 3, CVT(t3) ); 10260 putYMMRegLane32F( rG, 2, CVT(t2) ); 10261 putYMMRegLane32F( rG, 1, CVT(t1) ); 10262 putYMMRegLane32F( rG, 0, CVT(t0) ); 10263 # undef CVT 10264 10265 return delta; 10266 } 10267 10268 10269 static Long dis_PMOVMSKB_128 ( VexAbiInfo* vbi, Prefix pfx, 10270 Long delta, Bool isAvx ) 10271 { 10272 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 10273 UChar modrm = getUChar(delta); 10274 vassert(epartIsReg(modrm)); /* ensured by caller */ 10275 UInt rE = eregOfRexRM(pfx,modrm); 10276 UInt rG = gregOfRexRM(pfx,modrm); 10277 IRTemp t0 = newTemp(Ity_I64); 10278 IRTemp t1 = newTemp(Ity_I64); 10279 IRTemp t5 = newTemp(Ity_I64); 10280 assign(t0, getXMMRegLane64(rE, 0)); 10281 assign(t1, getXMMRegLane64(rE, 1)); 10282 assign(t5, mkIRExprCCall( Ity_I64, 0/*regparms*/, 10283 "amd64g_calculate_sse_pmovmskb", 10284 &amd64g_calculate_sse_pmovmskb, 10285 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 10286 putIReg32(rG, unop(Iop_64to32,mkexpr(t5))); 10287 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10288 nameIReg32(rG)); 10289 delta += 1; 10290 return delta; 10291 } 10292 10293 10294 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 10295 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 10296 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 10297 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10298 { 10299 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10300 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10301 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10302 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10303 IRTemp res = newTemp(Ity_V128); 10304 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 10305 : mkV128from32s( s1, d1, s0, d0 )); 10306 return res; 10307 } 10308 10309 10310 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 10311 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 10312 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10313 { 10314 IRTemp s1 = newTemp(Ity_I64); 10315 IRTemp s0 = newTemp(Ity_I64); 10316 IRTemp d1 = newTemp(Ity_I64); 10317 IRTemp d0 = newTemp(Ity_I64); 10318 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10319 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10320 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10321 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10322 IRTemp res = newTemp(Ity_V128); 10323 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 10324 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 10325 return res; 10326 } 10327 10328 10329 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 10330 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 10331 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 10332 way. */ 10333 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10334 { 10335 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10336 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10337 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 10338 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 10339 IRTemp res = newTemp(Ity_V256); 10340 assign(res, xIsH 10341 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 10342 mkexpr(s1), mkexpr(d1)) 10343 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 10344 mkexpr(s0), mkexpr(d0))); 10345 return res; 10346 } 10347 10348 10349 /* FIXME: this is really bad. Surely can do something better here? 10350 One observation is that the steering in the upper and lower 128 bit 10351 halves is the same as with math_UNPCKxPS_128, so we simply split 10352 into two halves, and use that. Consequently any improvement in 10353 math_UNPCKxPS_128 (probably, to use interleave-style primops) 10354 benefits this too. */ 10355 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10356 { 10357 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10358 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10359 breakupV256toV128s( sV, &sVhi, &sVlo ); 10360 breakupV256toV128s( dV, &dVhi, &dVlo ); 10361 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 10362 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 10363 IRTemp rV = newTemp(Ity_V256); 10364 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10365 return rV; 10366 } 10367 10368 10369 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10370 { 10371 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10372 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10373 vassert(imm8 < 256); 10374 10375 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10376 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10377 10378 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10379 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10380 IRTemp res = newTemp(Ity_V128); 10381 assign(res, 10382 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 10383 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 10384 # undef SELD 10385 # undef SELS 10386 return res; 10387 } 10388 10389 10390 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 10391 identically. Hence do the clueless thing and use math_SHUFPS_128 10392 twice. */ 10393 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10394 { 10395 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10396 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10397 breakupV256toV128s( sV, &sVhi, &sVlo ); 10398 breakupV256toV128s( dV, &dVhi, &dVlo ); 10399 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 10400 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 10401 IRTemp rV = newTemp(Ity_V256); 10402 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10403 return rV; 10404 } 10405 10406 10407 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10408 { 10409 IRTemp s1 = newTemp(Ity_I64); 10410 IRTemp s0 = newTemp(Ity_I64); 10411 IRTemp d1 = newTemp(Ity_I64); 10412 IRTemp d0 = newTemp(Ity_I64); 10413 10414 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10415 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10416 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10417 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10418 10419 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 10420 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 10421 10422 IRTemp res = newTemp(Ity_V128); 10423 assign(res, binop( Iop_64HLtoV128, 10424 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 10425 10426 # undef SELD 10427 # undef SELS 10428 return res; 10429 } 10430 10431 10432 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10433 { 10434 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10435 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10436 breakupV256toV128s( sV, &sVhi, &sVlo ); 10437 breakupV256toV128s( dV, &dVhi, &dVlo ); 10438 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10439 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 10440 IRTemp rV = newTemp(Ity_V256); 10441 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10442 return rV; 10443 } 10444 10445 10446 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10447 { 10448 UShort imm8_mask_16; 10449 IRTemp imm8_mask = newTemp(Ity_V128); 10450 10451 switch( imm8 & 3 ) { 10452 case 0: imm8_mask_16 = 0x0000; break; 10453 case 1: imm8_mask_16 = 0x00FF; break; 10454 case 2: imm8_mask_16 = 0xFF00; break; 10455 case 3: imm8_mask_16 = 0xFFFF; break; 10456 default: vassert(0); break; 10457 } 10458 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 10459 10460 IRTemp res = newTemp(Ity_V128); 10461 assign ( res, binop( Iop_OrV128, 10462 binop( Iop_AndV128, mkexpr(sV), 10463 mkexpr(imm8_mask) ), 10464 binop( Iop_AndV128, mkexpr(dV), 10465 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 10466 return res; 10467 } 10468 10469 10470 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10471 { 10472 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10473 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10474 breakupV256toV128s( sV, &sVhi, &sVlo ); 10475 breakupV256toV128s( dV, &dVhi, &dVlo ); 10476 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10477 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 10478 IRTemp rV = newTemp(Ity_V256); 10479 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10480 return rV; 10481 } 10482 10483 10484 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10485 { 10486 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 10487 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 10488 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 10489 0xFFFF }; 10490 IRTemp imm8_mask = newTemp(Ity_V128); 10491 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 10492 10493 IRTemp res = newTemp(Ity_V128); 10494 assign ( res, binop( Iop_OrV128, 10495 binop( Iop_AndV128, mkexpr(sV), 10496 mkexpr(imm8_mask) ), 10497 binop( Iop_AndV128, mkexpr(dV), 10498 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 10499 return res; 10500 } 10501 10502 10503 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10504 { 10505 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10506 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10507 breakupV256toV128s( sV, &sVhi, &sVlo ); 10508 breakupV256toV128s( dV, &dVhi, &dVlo ); 10509 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 10510 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 10511 IRTemp rV = newTemp(Ity_V256); 10512 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10513 return rV; 10514 } 10515 10516 10517 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10518 { 10519 /* Make w be a 16-bit version of imm8, formed by duplicating each 10520 bit in imm8. */ 10521 Int i; 10522 UShort imm16 = 0; 10523 for (i = 0; i < 8; i++) { 10524 if (imm8 & (1 << i)) 10525 imm16 |= (3 << (2*i)); 10526 } 10527 IRTemp imm16_mask = newTemp(Ity_V128); 10528 assign( imm16_mask, mkV128( imm16 )); 10529 10530 IRTemp res = newTemp(Ity_V128); 10531 assign ( res, binop( Iop_OrV128, 10532 binop( Iop_AndV128, mkexpr(sV), 10533 mkexpr(imm16_mask) ), 10534 binop( Iop_AndV128, mkexpr(dV), 10535 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 10536 return res; 10537 } 10538 10539 10540 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 10541 { 10542 /* This is a really poor translation -- could be improved if 10543 performance critical */ 10544 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10545 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10546 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10547 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10548 IRTemp res = newTemp(Ity_V128); 10549 assign(res, binop(Iop_64HLtoV128, 10550 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 10551 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 10552 return res; 10553 } 10554 10555 10556 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 10557 { 10558 /* This is a really poor translation -- could be improved if 10559 performance critical */ 10560 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10561 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10562 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10563 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10564 IRTemp res = newTemp(Ity_V128); 10565 assign(res, binop(Iop_64HLtoV128, 10566 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 10567 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 10568 return res; 10569 } 10570 10571 10572 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 10573 { 10574 IRTemp sVhi, sVlo, dVhi, dVlo; 10575 IRTemp resHi = newTemp(Ity_I64); 10576 IRTemp resLo = newTemp(Ity_I64); 10577 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 10578 breakupV128to64s( sV, &sVhi, &sVlo ); 10579 breakupV128to64s( dV, &dVhi, &dVlo ); 10580 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 10581 "amd64g_calculate_mmx_pmaddwd", 10582 &amd64g_calculate_mmx_pmaddwd, 10583 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 10584 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 10585 "amd64g_calculate_mmx_pmaddwd", 10586 &amd64g_calculate_mmx_pmaddwd, 10587 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 10588 IRTemp res = newTemp(Ity_V128); 10589 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 10590 return res; 10591 } 10592 10593 10594 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 10595 { 10596 IRTemp addV = newTemp(Ity_V128); 10597 IRTemp subV = newTemp(Ity_V128); 10598 IRTemp a1 = newTemp(Ity_I64); 10599 IRTemp s0 = newTemp(Ity_I64); 10600 10601 assign( addV, binop(Iop_Add64Fx2, mkexpr(dV), mkexpr(sV)) ); 10602 assign( subV, binop(Iop_Sub64Fx2, mkexpr(dV), mkexpr(sV)) ); 10603 10604 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 10605 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 10606 10607 IRTemp res = newTemp(Ity_V128); 10608 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 10609 return res; 10610 } 10611 10612 10613 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 10614 { 10615 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 10616 IRTemp addV = newTemp(Ity_V256); 10617 IRTemp subV = newTemp(Ity_V256); 10618 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 10619 10620 assign( addV, binop(Iop_Add64Fx4, mkexpr(dV), mkexpr(sV)) ); 10621 assign( subV, binop(Iop_Sub64Fx4, mkexpr(dV), mkexpr(sV)) ); 10622 10623 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 10624 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 10625 10626 IRTemp res = newTemp(Ity_V256); 10627 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 10628 return res; 10629 } 10630 10631 10632 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 10633 { 10634 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 10635 IRTemp addV = newTemp(Ity_V128); 10636 IRTemp subV = newTemp(Ity_V128); 10637 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 10638 10639 assign( addV, binop(Iop_Add32Fx4, mkexpr(dV), mkexpr(sV)) ); 10640 assign( subV, binop(Iop_Sub32Fx4, mkexpr(dV), mkexpr(sV)) ); 10641 10642 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 10643 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 10644 10645 IRTemp res = newTemp(Ity_V128); 10646 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 10647 return res; 10648 } 10649 10650 10651 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 10652 { 10653 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 10654 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 10655 IRTemp addV = newTemp(Ity_V256); 10656 IRTemp subV = newTemp(Ity_V256); 10657 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 10658 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 10659 10660 assign( addV, binop(Iop_Add32Fx8, mkexpr(dV), mkexpr(sV)) ); 10661 assign( subV, binop(Iop_Sub32Fx8, mkexpr(dV), mkexpr(sV)) ); 10662 10663 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 10664 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 10665 10666 IRTemp res = newTemp(Ity_V256); 10667 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 10668 return res; 10669 } 10670 10671 10672 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 10673 static Long dis_PSHUFxW_128 ( VexAbiInfo* vbi, Prefix pfx, 10674 Long delta, Bool isAvx, Bool xIsH ) 10675 { 10676 IRTemp addr = IRTemp_INVALID; 10677 Int alen = 0; 10678 HChar dis_buf[50]; 10679 UChar modrm = getUChar(delta); 10680 UInt rG = gregOfRexRM(pfx,modrm); 10681 UInt imm8; 10682 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 10683 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10684 sV = newTemp(Ity_V128); 10685 dV = newTemp(Ity_V128); 10686 sVmut = newTemp(Ity_I64); 10687 dVmut = newTemp(Ity_I64); 10688 sVcon = newTemp(Ity_I64); 10689 if (epartIsReg(modrm)) { 10690 UInt rE = eregOfRexRM(pfx,modrm); 10691 assign( sV, getXMMReg(rE) ); 10692 imm8 = (UInt)getUChar(delta+1); 10693 delta += 1+1; 10694 DIP("%spshuf%cw $%u,%s,%s\n", 10695 isAvx ? "v" : "", xIsH ? 'h' : 'l', 10696 imm8, nameXMMReg(rE), nameXMMReg(rG)); 10697 } else { 10698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 10699 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10700 imm8 = (UInt)getUChar(delta+alen); 10701 delta += alen+1; 10702 DIP("%spshuf%cw $%u,%s,%s\n", 10703 isAvx ? "v" : "", xIsH ? 'h' : 'l', 10704 imm8, dis_buf, nameXMMReg(rG)); 10705 } 10706 10707 /* Get the to-be-changed (mut) and unchanging (con) bits of the 10708 source. */ 10709 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 10710 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 10711 10712 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 10713 # define SEL(n) \ 10714 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10715 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 10716 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 10717 # undef SEL 10718 10719 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 10720 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 10721 10722 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 10723 return delta; 10724 } 10725 10726 10727 static Long dis_PEXTRW_128_EregOnly_toG ( VexAbiInfo* vbi, Prefix pfx, 10728 Long delta, Bool isAvx ) 10729 { 10730 Long deltaIN = delta; 10731 UChar modrm = getUChar(delta); 10732 UInt rG = gregOfRexRM(pfx,modrm); 10733 IRTemp sV = newTemp(Ity_V128); 10734 IRTemp d16 = newTemp(Ity_I16); 10735 UInt imm8; 10736 IRTemp s0, s1, s2, s3; 10737 if (epartIsReg(modrm)) { 10738 UInt rE = eregOfRexRM(pfx,modrm); 10739 assign(sV, getXMMReg(rE)); 10740 imm8 = getUChar(delta+1) & 7; 10741 delta += 1+1; 10742 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "", 10743 (Int)imm8, nameXMMReg(rE), nameIReg32(rG)); 10744 } else { 10745 /* The memory case is disallowed, apparently. */ 10746 return deltaIN; /* FAIL */ 10747 } 10748 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10749 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10750 switch (imm8) { 10751 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 10752 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 10753 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 10754 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 10755 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 10756 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 10757 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 10758 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 10759 default: vassert(0); 10760 } 10761 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 10762 return delta; 10763 } 10764 10765 10766 static Long dis_CVTDQ2PD_128 ( VexAbiInfo* vbi, Prefix pfx, 10767 Long delta, Bool isAvx ) 10768 { 10769 IRTemp addr = IRTemp_INVALID; 10770 Int alen = 0; 10771 HChar dis_buf[50]; 10772 UChar modrm = getUChar(delta); 10773 IRTemp arg64 = newTemp(Ity_I64); 10774 UInt rG = gregOfRexRM(pfx,modrm); 10775 UChar* mbV = isAvx ? "v" : ""; 10776 if (epartIsReg(modrm)) { 10777 UInt rE = eregOfRexRM(pfx,modrm); 10778 assign( arg64, getXMMRegLane64(rE, 0) ); 10779 delta += 1; 10780 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 10781 } else { 10782 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10783 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 10784 delta += alen; 10785 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 10786 } 10787 putXMMRegLane64F( 10788 rG, 0, 10789 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 10790 ); 10791 putXMMRegLane64F( 10792 rG, 1, 10793 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 10794 ); 10795 if (isAvx) 10796 putYMMRegLane128(rG, 1, mkV128(0)); 10797 return delta; 10798 } 10799 10800 10801 static Long dis_STMXCSR ( VexAbiInfo* vbi, Prefix pfx, 10802 Long delta, Bool isAvx ) 10803 { 10804 IRTemp addr = IRTemp_INVALID; 10805 Int alen = 0; 10806 HChar dis_buf[50]; 10807 UChar modrm = getUChar(delta); 10808 vassert(!epartIsReg(modrm)); /* ensured by caller */ 10809 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 10810 10811 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10812 delta += alen; 10813 10814 /* Fake up a native SSE mxcsr word. The only thing it depends on 10815 is SSEROUND[1:0], so call a clean helper to cook it up. 10816 */ 10817 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 10818 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 10819 storeLE( 10820 mkexpr(addr), 10821 unop(Iop_64to32, 10822 mkIRExprCCall( 10823 Ity_I64, 0/*regp*/, 10824 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 10825 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 10826 ) 10827 ) 10828 ); 10829 return delta; 10830 } 10831 10832 10833 static Long dis_LDMXCSR ( VexAbiInfo* vbi, Prefix pfx, 10834 Long delta, Bool isAvx ) 10835 { 10836 IRTemp addr = IRTemp_INVALID; 10837 Int alen = 0; 10838 HChar dis_buf[50]; 10839 UChar modrm = getUChar(delta); 10840 vassert(!epartIsReg(modrm)); /* ensured by caller */ 10841 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 10842 10843 IRTemp t64 = newTemp(Ity_I64); 10844 IRTemp ew = newTemp(Ity_I32); 10845 10846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10847 delta += alen; 10848 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 10849 10850 /* The only thing we observe in %mxcsr is the rounding mode. 10851 Therefore, pass the 32-bit value (SSE native-format control 10852 word) to a clean helper, getting back a 64-bit value, the 10853 lower half of which is the SSEROUND value to store, and the 10854 upper half of which is the emulation-warning token which may 10855 be generated. 10856 */ 10857 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 10858 assign( t64, mkIRExprCCall( 10859 Ity_I64, 0/*regparms*/, 10860 "amd64g_check_ldmxcsr", 10861 &amd64g_check_ldmxcsr, 10862 mkIRExprVec_1( 10863 unop(Iop_32Uto64, 10864 loadLE(Ity_I32, mkexpr(addr)) 10865 ) 10866 ) 10867 ) 10868 ); 10869 10870 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 10871 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 10872 put_emwarn( mkexpr(ew) ); 10873 /* Finally, if an emulation warning was reported, side-exit to 10874 the next insn, reporting the warning, so that Valgrind's 10875 dispatcher sees the warning. */ 10876 stmt( 10877 IRStmt_Exit( 10878 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 10879 Ijk_EmWarn, 10880 IRConst_U64(guest_RIP_bbstart+delta), 10881 OFFB_RIP 10882 ) 10883 ); 10884 return delta; 10885 } 10886 10887 10888 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 10889 { 10890 vassert(imm8 >= 0 && imm8 <= 7); 10891 10892 // Create a V128 value which has the selected word in the 10893 // specified lane, and zeroes everywhere else. 10894 IRTemp tmp128 = newTemp(Ity_V128); 10895 IRTemp halfshift = newTemp(Ity_I64); 10896 assign(halfshift, binop(Iop_Shl64, 10897 unop(Iop_16Uto64, mkexpr(u16)), 10898 mkU8(16 * (imm8 & 3)))); 10899 if (imm8 < 4) { 10900 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 10901 } else { 10902 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 10903 } 10904 10905 UShort mask = ~(3 << (imm8 * 2)); 10906 IRTemp res = newTemp(Ity_V128); 10907 assign( res, binop(Iop_OrV128, 10908 mkexpr(tmp128), 10909 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 10910 return res; 10911 } 10912 10913 10914 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 10915 { 10916 IRTemp s1, s0, d1, d0; 10917 s1 = s0 = d1 = d0 = IRTemp_INVALID; 10918 10919 breakupV128to64s( sV, &s1, &s0 ); 10920 breakupV128to64s( dV, &d1, &d0 ); 10921 10922 IRTemp res = newTemp(Ity_V128); 10923 assign( res, 10924 binop(Iop_64HLtoV128, 10925 mkIRExprCCall(Ity_I64, 0/*regparms*/, 10926 "amd64g_calculate_mmx_psadbw", 10927 &amd64g_calculate_mmx_psadbw, 10928 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 10929 mkIRExprCCall(Ity_I64, 0/*regparms*/, 10930 "amd64g_calculate_mmx_psadbw", 10931 &amd64g_calculate_mmx_psadbw, 10932 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 10933 return res; 10934 } 10935 10936 10937 static Long dis_MASKMOVDQU ( VexAbiInfo* vbi, Prefix pfx, 10938 Long delta, Bool isAvx ) 10939 { 10940 IRTemp regD = newTemp(Ity_V128); 10941 IRTemp mask = newTemp(Ity_V128); 10942 IRTemp olddata = newTemp(Ity_V128); 10943 IRTemp newdata = newTemp(Ity_V128); 10944 IRTemp addr = newTemp(Ity_I64); 10945 UChar modrm = getUChar(delta); 10946 UInt rG = gregOfRexRM(pfx,modrm); 10947 UInt rE = eregOfRexRM(pfx,modrm); 10948 10949 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 10950 assign( regD, getXMMReg( rG )); 10951 10952 /* Unfortunately can't do the obvious thing with SarN8x16 10953 here since that can't be re-emitted as SSE2 code - no such 10954 insn. */ 10955 assign( mask, 10956 binop(Iop_64HLtoV128, 10957 binop(Iop_SarN8x8, 10958 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 10959 mkU8(7) ), 10960 binop(Iop_SarN8x8, 10961 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 10962 mkU8(7) ) )); 10963 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 10964 assign( newdata, binop(Iop_OrV128, 10965 binop(Iop_AndV128, 10966 mkexpr(regD), 10967 mkexpr(mask) ), 10968 binop(Iop_AndV128, 10969 mkexpr(olddata), 10970 unop(Iop_NotV128, mkexpr(mask)))) ); 10971 storeLE( mkexpr(addr), mkexpr(newdata) ); 10972 10973 delta += 1; 10974 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 10975 nameXMMReg(rE), nameXMMReg(rG) ); 10976 return delta; 10977 } 10978 10979 10980 static Long dis_MOVMSKPS_128 ( VexAbiInfo* vbi, Prefix pfx, 10981 Long delta, Bool isAvx ) 10982 { 10983 UChar modrm = getUChar(delta); 10984 UInt rG = gregOfRexRM(pfx,modrm); 10985 UInt rE = eregOfRexRM(pfx,modrm); 10986 IRTemp t0 = newTemp(Ity_I32); 10987 IRTemp t1 = newTemp(Ity_I32); 10988 IRTemp t2 = newTemp(Ity_I32); 10989 IRTemp t3 = newTemp(Ity_I32); 10990 delta += 1; 10991 assign( t0, binop( Iop_And32, 10992 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 10993 mkU32(1) )); 10994 assign( t1, binop( Iop_And32, 10995 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 10996 mkU32(2) )); 10997 assign( t2, binop( Iop_And32, 10998 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 10999 mkU32(4) )); 11000 assign( t3, binop( Iop_And32, 11001 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 11002 mkU32(8) )); 11003 putIReg32( rG, binop(Iop_Or32, 11004 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11005 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11006 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 11007 nameXMMReg(rE), nameIReg32(rG)); 11008 return delta; 11009 } 11010 11011 11012 static Long dis_MOVMSKPS_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 11013 { 11014 UChar modrm = getUChar(delta); 11015 UInt rG = gregOfRexRM(pfx,modrm); 11016 UInt rE = eregOfRexRM(pfx,modrm); 11017 IRTemp t0 = newTemp(Ity_I32); 11018 IRTemp t1 = newTemp(Ity_I32); 11019 IRTemp t2 = newTemp(Ity_I32); 11020 IRTemp t3 = newTemp(Ity_I32); 11021 IRTemp t4 = newTemp(Ity_I32); 11022 IRTemp t5 = newTemp(Ity_I32); 11023 IRTemp t6 = newTemp(Ity_I32); 11024 IRTemp t7 = newTemp(Ity_I32); 11025 delta += 1; 11026 assign( t0, binop( Iop_And32, 11027 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 11028 mkU32(1) )); 11029 assign( t1, binop( Iop_And32, 11030 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 11031 mkU32(2) )); 11032 assign( t2, binop( Iop_And32, 11033 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 11034 mkU32(4) )); 11035 assign( t3, binop( Iop_And32, 11036 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 11037 mkU32(8) )); 11038 assign( t4, binop( Iop_And32, 11039 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 11040 mkU32(16) )); 11041 assign( t5, binop( Iop_And32, 11042 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 11043 mkU32(32) )); 11044 assign( t6, binop( Iop_And32, 11045 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 11046 mkU32(64) )); 11047 assign( t7, binop( Iop_And32, 11048 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 11049 mkU32(128) )); 11050 putIReg32( rG, binop(Iop_Or32, 11051 binop(Iop_Or32, 11052 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11053 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 11054 binop(Iop_Or32, 11055 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 11056 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 11057 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11058 return delta; 11059 } 11060 11061 11062 static Long dis_MOVMSKPD_128 ( VexAbiInfo* vbi, Prefix pfx, 11063 Long delta, Bool isAvx ) 11064 { 11065 UChar modrm = getUChar(delta); 11066 UInt rG = gregOfRexRM(pfx,modrm); 11067 UInt rE = eregOfRexRM(pfx,modrm); 11068 IRTemp t0 = newTemp(Ity_I32); 11069 IRTemp t1 = newTemp(Ity_I32); 11070 delta += 1; 11071 assign( t0, binop( Iop_And32, 11072 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 11073 mkU32(1) )); 11074 assign( t1, binop( Iop_And32, 11075 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 11076 mkU32(2) )); 11077 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 11078 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 11079 nameXMMReg(rE), nameIReg32(rG)); 11080 return delta; 11081 } 11082 11083 11084 static Long dis_MOVMSKPD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 11085 { 11086 UChar modrm = getUChar(delta); 11087 UInt rG = gregOfRexRM(pfx,modrm); 11088 UInt rE = eregOfRexRM(pfx,modrm); 11089 IRTemp t0 = newTemp(Ity_I32); 11090 IRTemp t1 = newTemp(Ity_I32); 11091 IRTemp t2 = newTemp(Ity_I32); 11092 IRTemp t3 = newTemp(Ity_I32); 11093 delta += 1; 11094 assign( t0, binop( Iop_And32, 11095 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 11096 mkU32(1) )); 11097 assign( t1, binop( Iop_And32, 11098 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 11099 mkU32(2) )); 11100 assign( t2, binop( Iop_And32, 11101 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 11102 mkU32(4) )); 11103 assign( t3, binop( Iop_And32, 11104 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 11105 mkU32(8) )); 11106 putIReg32( rG, binop(Iop_Or32, 11107 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11108 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11109 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11110 return delta; 11111 } 11112 11113 11114 /* Note, this also handles SSE(1) insns. */ 11115 __attribute__((noinline)) 11116 static 11117 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 11118 VexAbiInfo* vbi, 11119 Prefix pfx, Int sz, Long deltaIN, 11120 DisResult* dres ) 11121 { 11122 IRTemp addr = IRTemp_INVALID; 11123 IRTemp t0 = IRTemp_INVALID; 11124 IRTemp t1 = IRTemp_INVALID; 11125 IRTemp t2 = IRTemp_INVALID; 11126 IRTemp t3 = IRTemp_INVALID; 11127 IRTemp t4 = IRTemp_INVALID; 11128 IRTemp t5 = IRTemp_INVALID; 11129 IRTemp t6 = IRTemp_INVALID; 11130 UChar modrm = 0; 11131 Int alen = 0; 11132 HChar dis_buf[50]; 11133 11134 *decode_OK = False; 11135 11136 Long delta = deltaIN; 11137 UChar opc = getUChar(delta); 11138 delta++; 11139 switch (opc) { 11140 11141 case 0x10: 11142 if (have66noF2noF3(pfx) 11143 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11144 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 11145 modrm = getUChar(delta); 11146 if (epartIsReg(modrm)) { 11147 putXMMReg( gregOfRexRM(pfx,modrm), 11148 getXMMReg( eregOfRexRM(pfx,modrm) )); 11149 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11150 nameXMMReg(gregOfRexRM(pfx,modrm))); 11151 delta += 1; 11152 } else { 11153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11154 putXMMReg( gregOfRexRM(pfx,modrm), 11155 loadLE(Ity_V128, mkexpr(addr)) ); 11156 DIP("movupd %s,%s\n", dis_buf, 11157 nameXMMReg(gregOfRexRM(pfx,modrm))); 11158 delta += alen; 11159 } 11160 goto decode_success; 11161 } 11162 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 11163 G (lo half xmm). If E is mem, upper half of G is zeroed out. 11164 If E is reg, upper half of G is unchanged. */ 11165 if (haveF2no66noF3(pfx) 11166 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 11167 modrm = getUChar(delta); 11168 if (epartIsReg(modrm)) { 11169 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11170 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11171 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11172 nameXMMReg(gregOfRexRM(pfx,modrm))); 11173 delta += 1; 11174 } else { 11175 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11176 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11177 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11178 loadLE(Ity_I64, mkexpr(addr)) ); 11179 DIP("movsd %s,%s\n", dis_buf, 11180 nameXMMReg(gregOfRexRM(pfx,modrm))); 11181 delta += alen; 11182 } 11183 goto decode_success; 11184 } 11185 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 11186 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 11187 if (haveF3no66noF2(pfx) 11188 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11189 modrm = getUChar(delta); 11190 if (epartIsReg(modrm)) { 11191 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11192 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 11193 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11194 nameXMMReg(gregOfRexRM(pfx,modrm))); 11195 delta += 1; 11196 } else { 11197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11198 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11199 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11200 loadLE(Ity_I32, mkexpr(addr)) ); 11201 DIP("movss %s,%s\n", dis_buf, 11202 nameXMMReg(gregOfRexRM(pfx,modrm))); 11203 delta += alen; 11204 } 11205 goto decode_success; 11206 } 11207 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 11208 if (haveNo66noF2noF3(pfx) 11209 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11210 modrm = getUChar(delta); 11211 if (epartIsReg(modrm)) { 11212 putXMMReg( gregOfRexRM(pfx,modrm), 11213 getXMMReg( eregOfRexRM(pfx,modrm) )); 11214 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11215 nameXMMReg(gregOfRexRM(pfx,modrm))); 11216 delta += 1; 11217 } else { 11218 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11219 putXMMReg( gregOfRexRM(pfx,modrm), 11220 loadLE(Ity_V128, mkexpr(addr)) ); 11221 DIP("movups %s,%s\n", dis_buf, 11222 nameXMMReg(gregOfRexRM(pfx,modrm))); 11223 delta += alen; 11224 } 11225 goto decode_success; 11226 } 11227 break; 11228 11229 case 0x11: 11230 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 11231 or lo half xmm). */ 11232 if (haveF2no66noF3(pfx) 11233 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11234 modrm = getUChar(delta); 11235 if (epartIsReg(modrm)) { 11236 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 11237 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11238 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11239 nameXMMReg(eregOfRexRM(pfx,modrm))); 11240 delta += 1; 11241 } else { 11242 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11243 storeLE( mkexpr(addr), 11244 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11245 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11246 dis_buf); 11247 delta += alen; 11248 } 11249 goto decode_success; 11250 } 11251 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 11252 or lo 1/4 xmm). */ 11253 if (haveF3no66noF2(pfx) && sz == 4) { 11254 modrm = getUChar(delta); 11255 if (epartIsReg(modrm)) { 11256 /* fall through, we don't yet have a test case */ 11257 } else { 11258 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11259 storeLE( mkexpr(addr), 11260 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 11261 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11262 dis_buf); 11263 delta += alen; 11264 goto decode_success; 11265 } 11266 } 11267 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 11268 if (have66noF2noF3(pfx) 11269 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11270 modrm = getUChar(delta); 11271 if (epartIsReg(modrm)) { 11272 putXMMReg( eregOfRexRM(pfx,modrm), 11273 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11274 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11275 nameXMMReg(eregOfRexRM(pfx,modrm))); 11276 delta += 1; 11277 } else { 11278 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11279 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11280 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11281 dis_buf ); 11282 delta += alen; 11283 } 11284 goto decode_success; 11285 } 11286 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 11287 if (haveNo66noF2noF3(pfx) 11288 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11289 modrm = getUChar(delta); 11290 if (epartIsReg(modrm)) { 11291 /* fall through; awaiting test case */ 11292 } else { 11293 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11294 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11295 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11296 dis_buf ); 11297 delta += alen; 11298 goto decode_success; 11299 } 11300 } 11301 break; 11302 11303 case 0x12: 11304 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 11305 /* Identical to MOVLPS ? */ 11306 if (have66noF2noF3(pfx) 11307 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11308 modrm = getUChar(delta); 11309 if (epartIsReg(modrm)) { 11310 /* fall through; apparently reg-reg is not possible */ 11311 } else { 11312 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11313 delta += alen; 11314 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11315 0/*lower lane*/, 11316 loadLE(Ity_I64, mkexpr(addr)) ); 11317 DIP("movlpd %s, %s\n", 11318 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11319 goto decode_success; 11320 } 11321 } 11322 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 11323 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 11324 if (haveNo66noF2noF3(pfx) 11325 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11326 modrm = getUChar(delta); 11327 if (epartIsReg(modrm)) { 11328 delta += 1; 11329 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11330 0/*lower lane*/, 11331 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 11332 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11333 nameXMMReg(gregOfRexRM(pfx,modrm))); 11334 } else { 11335 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11336 delta += alen; 11337 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 11338 loadLE(Ity_I64, mkexpr(addr)) ); 11339 DIP("movlps %s, %s\n", 11340 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11341 } 11342 goto decode_success; 11343 } 11344 break; 11345 11346 case 0x13: 11347 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 11348 if (haveNo66noF2noF3(pfx) 11349 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11350 modrm = getUChar(delta); 11351 if (!epartIsReg(modrm)) { 11352 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11353 delta += alen; 11354 storeLE( mkexpr(addr), 11355 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11356 0/*lower lane*/ ) ); 11357 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11358 dis_buf); 11359 goto decode_success; 11360 } 11361 /* else fall through */ 11362 } 11363 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 11364 /* Identical to MOVLPS ? */ 11365 if (have66noF2noF3(pfx) 11366 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11367 modrm = getUChar(delta); 11368 if (!epartIsReg(modrm)) { 11369 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11370 delta += alen; 11371 storeLE( mkexpr(addr), 11372 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11373 0/*lower lane*/ ) ); 11374 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11375 dis_buf); 11376 goto decode_success; 11377 } 11378 /* else fall through */ 11379 } 11380 break; 11381 11382 case 0x14: 11383 case 0x15: 11384 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 11385 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 11386 /* These just appear to be special cases of SHUFPS */ 11387 if (haveNo66noF2noF3(pfx) && sz == 4) { 11388 Bool hi = toBool(opc == 0x15); 11389 IRTemp sV = newTemp(Ity_V128); 11390 IRTemp dV = newTemp(Ity_V128); 11391 modrm = getUChar(delta); 11392 UInt rG = gregOfRexRM(pfx,modrm); 11393 assign( dV, getXMMReg(rG) ); 11394 if (epartIsReg(modrm)) { 11395 UInt rE = eregOfRexRM(pfx,modrm); 11396 assign( sV, getXMMReg(rE) ); 11397 delta += 1; 11398 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11399 nameXMMReg(rE), nameXMMReg(rG)); 11400 } else { 11401 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11402 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11403 delta += alen; 11404 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11405 dis_buf, nameXMMReg(rG)); 11406 } 11407 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 11408 putXMMReg( rG, mkexpr(res) ); 11409 goto decode_success; 11410 } 11411 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 11412 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 11413 /* These just appear to be special cases of SHUFPS */ 11414 if (have66noF2noF3(pfx) 11415 && sz == 2 /* could be 8 if rex also present */) { 11416 Bool hi = toBool(opc == 0x15); 11417 IRTemp sV = newTemp(Ity_V128); 11418 IRTemp dV = newTemp(Ity_V128); 11419 modrm = getUChar(delta); 11420 UInt rG = gregOfRexRM(pfx,modrm); 11421 assign( dV, getXMMReg(rG) ); 11422 if (epartIsReg(modrm)) { 11423 UInt rE = eregOfRexRM(pfx,modrm); 11424 assign( sV, getXMMReg(rE) ); 11425 delta += 1; 11426 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11427 nameXMMReg(rE), nameXMMReg(rG)); 11428 } else { 11429 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11430 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11431 delta += alen; 11432 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11433 dis_buf, nameXMMReg(rG)); 11434 } 11435 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 11436 putXMMReg( rG, mkexpr(res) ); 11437 goto decode_success; 11438 } 11439 break; 11440 11441 case 0x16: 11442 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 11443 /* These seems identical to MOVHPS. This instruction encoding is 11444 completely crazy. */ 11445 if (have66noF2noF3(pfx) 11446 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11447 modrm = getUChar(delta); 11448 if (epartIsReg(modrm)) { 11449 /* fall through; apparently reg-reg is not possible */ 11450 } else { 11451 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11452 delta += alen; 11453 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 11454 loadLE(Ity_I64, mkexpr(addr)) ); 11455 DIP("movhpd %s,%s\n", dis_buf, 11456 nameXMMReg( gregOfRexRM(pfx,modrm) )); 11457 goto decode_success; 11458 } 11459 } 11460 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 11461 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 11462 if (haveNo66noF2noF3(pfx) 11463 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11464 modrm = getUChar(delta); 11465 if (epartIsReg(modrm)) { 11466 delta += 1; 11467 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 11468 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 11469 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11470 nameXMMReg(gregOfRexRM(pfx,modrm))); 11471 } else { 11472 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11473 delta += alen; 11474 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 11475 loadLE(Ity_I64, mkexpr(addr)) ); 11476 DIP("movhps %s,%s\n", dis_buf, 11477 nameXMMReg( gregOfRexRM(pfx,modrm) )); 11478 } 11479 goto decode_success; 11480 } 11481 break; 11482 11483 case 0x17: 11484 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 11485 if (haveNo66noF2noF3(pfx) 11486 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11487 modrm = getUChar(delta); 11488 if (!epartIsReg(modrm)) { 11489 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11490 delta += alen; 11491 storeLE( mkexpr(addr), 11492 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11493 1/*upper lane*/ ) ); 11494 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11495 dis_buf); 11496 goto decode_success; 11497 } 11498 /* else fall through */ 11499 } 11500 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 11501 /* Again, this seems identical to MOVHPS. */ 11502 if (have66noF2noF3(pfx) 11503 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11504 modrm = getUChar(delta); 11505 if (!epartIsReg(modrm)) { 11506 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11507 delta += alen; 11508 storeLE( mkexpr(addr), 11509 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11510 1/*upper lane*/ ) ); 11511 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11512 dis_buf); 11513 goto decode_success; 11514 } 11515 /* else fall through */ 11516 } 11517 break; 11518 11519 case 0x18: 11520 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 11521 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 11522 /* 0F 18 /2 = PREFETCH1 */ 11523 /* 0F 18 /3 = PREFETCH2 */ 11524 if (haveNo66noF2noF3(pfx) 11525 && !epartIsReg(getUChar(delta)) 11526 && gregLO3ofRM(getUChar(delta)) >= 0 11527 && gregLO3ofRM(getUChar(delta)) <= 3) { 11528 HChar* hintstr = "??"; 11529 11530 modrm = getUChar(delta); 11531 vassert(!epartIsReg(modrm)); 11532 11533 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11534 delta += alen; 11535 11536 switch (gregLO3ofRM(modrm)) { 11537 case 0: hintstr = "nta"; break; 11538 case 1: hintstr = "t0"; break; 11539 case 2: hintstr = "t1"; break; 11540 case 3: hintstr = "t2"; break; 11541 default: vassert(0); 11542 } 11543 11544 DIP("prefetch%s %s\n", hintstr, dis_buf); 11545 goto decode_success; 11546 } 11547 break; 11548 11549 case 0x28: 11550 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 11551 if (have66noF2noF3(pfx) 11552 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11553 modrm = getUChar(delta); 11554 if (epartIsReg(modrm)) { 11555 putXMMReg( gregOfRexRM(pfx,modrm), 11556 getXMMReg( eregOfRexRM(pfx,modrm) )); 11557 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11558 nameXMMReg(gregOfRexRM(pfx,modrm))); 11559 delta += 1; 11560 } else { 11561 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11562 gen_SEGV_if_not_16_aligned( addr ); 11563 putXMMReg( gregOfRexRM(pfx,modrm), 11564 loadLE(Ity_V128, mkexpr(addr)) ); 11565 DIP("movapd %s,%s\n", dis_buf, 11566 nameXMMReg(gregOfRexRM(pfx,modrm))); 11567 delta += alen; 11568 } 11569 goto decode_success; 11570 } 11571 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 11572 if (haveNo66noF2noF3(pfx) 11573 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11574 modrm = getUChar(delta); 11575 if (epartIsReg(modrm)) { 11576 putXMMReg( gregOfRexRM(pfx,modrm), 11577 getXMMReg( eregOfRexRM(pfx,modrm) )); 11578 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11579 nameXMMReg(gregOfRexRM(pfx,modrm))); 11580 delta += 1; 11581 } else { 11582 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11583 gen_SEGV_if_not_16_aligned( addr ); 11584 putXMMReg( gregOfRexRM(pfx,modrm), 11585 loadLE(Ity_V128, mkexpr(addr)) ); 11586 DIP("movaps %s,%s\n", dis_buf, 11587 nameXMMReg(gregOfRexRM(pfx,modrm))); 11588 delta += alen; 11589 } 11590 goto decode_success; 11591 } 11592 break; 11593 11594 case 0x29: 11595 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 11596 if (haveNo66noF2noF3(pfx) 11597 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11598 modrm = getUChar(delta); 11599 if (epartIsReg(modrm)) { 11600 /* fall through; awaiting test case */ 11601 putXMMReg( eregOfRexRM(pfx,modrm), 11602 getXMMReg( gregOfRexRM(pfx,modrm) )); 11603 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11604 nameXMMReg(eregOfRexRM(pfx,modrm))); 11605 delta += 1; 11606 } else { 11607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11608 gen_SEGV_if_not_16_aligned( addr ); 11609 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11610 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11611 dis_buf ); 11612 delta += alen; 11613 } 11614 goto decode_success; 11615 } 11616 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 11617 if (have66noF2noF3(pfx) 11618 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11619 modrm = getUChar(delta); 11620 if (epartIsReg(modrm)) { 11621 putXMMReg( eregOfRexRM(pfx,modrm), 11622 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11623 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11624 nameXMMReg(eregOfRexRM(pfx,modrm))); 11625 delta += 1; 11626 } else { 11627 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11628 gen_SEGV_if_not_16_aligned( addr ); 11629 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11630 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11631 dis_buf ); 11632 delta += alen; 11633 } 11634 goto decode_success; 11635 } 11636 break; 11637 11638 case 0x2A: 11639 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 11640 half xmm */ 11641 if (haveNo66noF2noF3(pfx) && sz == 4) { 11642 IRTemp arg64 = newTemp(Ity_I64); 11643 IRTemp rmode = newTemp(Ity_I32); 11644 11645 modrm = getUChar(delta); 11646 do_MMX_preamble(); 11647 if (epartIsReg(modrm)) { 11648 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 11649 delta += 1; 11650 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 11651 nameXMMReg(gregOfRexRM(pfx,modrm))); 11652 } else { 11653 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11654 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11655 delta += alen; 11656 DIP("cvtpi2ps %s,%s\n", dis_buf, 11657 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11658 } 11659 11660 assign( rmode, get_sse_roundingmode() ); 11661 11662 putXMMRegLane32F( 11663 gregOfRexRM(pfx,modrm), 0, 11664 binop(Iop_F64toF32, 11665 mkexpr(rmode), 11666 unop(Iop_I32StoF64, 11667 unop(Iop_64to32, mkexpr(arg64)) )) ); 11668 11669 putXMMRegLane32F( 11670 gregOfRexRM(pfx,modrm), 1, 11671 binop(Iop_F64toF32, 11672 mkexpr(rmode), 11673 unop(Iop_I32StoF64, 11674 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 11675 11676 goto decode_success; 11677 } 11678 /* F3 0F 2A = CVTSI2SS 11679 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 11680 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 11681 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 11682 IRTemp rmode = newTemp(Ity_I32); 11683 assign( rmode, get_sse_roundingmode() ); 11684 modrm = getUChar(delta); 11685 if (sz == 4) { 11686 IRTemp arg32 = newTemp(Ity_I32); 11687 if (epartIsReg(modrm)) { 11688 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 11689 delta += 1; 11690 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11691 nameXMMReg(gregOfRexRM(pfx,modrm))); 11692 } else { 11693 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11694 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 11695 delta += alen; 11696 DIP("cvtsi2ss %s,%s\n", dis_buf, 11697 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11698 } 11699 putXMMRegLane32F( 11700 gregOfRexRM(pfx,modrm), 0, 11701 binop(Iop_F64toF32, 11702 mkexpr(rmode), 11703 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 11704 } else { 11705 /* sz == 8 */ 11706 IRTemp arg64 = newTemp(Ity_I64); 11707 if (epartIsReg(modrm)) { 11708 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 11709 delta += 1; 11710 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11711 nameXMMReg(gregOfRexRM(pfx,modrm))); 11712 } else { 11713 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11714 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11715 delta += alen; 11716 DIP("cvtsi2ssq %s,%s\n", dis_buf, 11717 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11718 } 11719 putXMMRegLane32F( 11720 gregOfRexRM(pfx,modrm), 0, 11721 binop(Iop_F64toF32, 11722 mkexpr(rmode), 11723 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 11724 } 11725 goto decode_success; 11726 } 11727 /* F2 0F 2A = CVTSI2SD 11728 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 11729 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 11730 */ 11731 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 11732 modrm = getUChar(delta); 11733 if (sz == 4) { 11734 IRTemp arg32 = newTemp(Ity_I32); 11735 if (epartIsReg(modrm)) { 11736 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 11737 delta += 1; 11738 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11739 nameXMMReg(gregOfRexRM(pfx,modrm))); 11740 } else { 11741 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11742 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 11743 delta += alen; 11744 DIP("cvtsi2sdl %s,%s\n", dis_buf, 11745 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11746 } 11747 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 11748 unop(Iop_I32StoF64, mkexpr(arg32)) 11749 ); 11750 } else { 11751 /* sz == 8 */ 11752 IRTemp arg64 = newTemp(Ity_I64); 11753 if (epartIsReg(modrm)) { 11754 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 11755 delta += 1; 11756 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11757 nameXMMReg(gregOfRexRM(pfx,modrm))); 11758 } else { 11759 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11760 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11761 delta += alen; 11762 DIP("cvtsi2sdq %s,%s\n", dis_buf, 11763 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11764 } 11765 putXMMRegLane64F( 11766 gregOfRexRM(pfx,modrm), 11767 0, 11768 binop( Iop_I64StoF64, 11769 get_sse_roundingmode(), 11770 mkexpr(arg64) 11771 ) 11772 ); 11773 } 11774 goto decode_success; 11775 } 11776 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 11777 xmm(G) */ 11778 if (have66noF2noF3(pfx) && sz == 2) { 11779 IRTemp arg64 = newTemp(Ity_I64); 11780 11781 modrm = getUChar(delta); 11782 if (epartIsReg(modrm)) { 11783 /* Only switch to MMX mode if the source is a MMX register. 11784 This is inconsistent with all other instructions which 11785 convert between XMM and (M64 or MMX), which always switch 11786 to MMX mode even if 64-bit operand is M64 and not MMX. At 11787 least, that's what the Intel docs seem to me to say. 11788 Fixes #210264. */ 11789 do_MMX_preamble(); 11790 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 11791 delta += 1; 11792 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 11793 nameXMMReg(gregOfRexRM(pfx,modrm))); 11794 } else { 11795 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11796 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11797 delta += alen; 11798 DIP("cvtpi2pd %s,%s\n", dis_buf, 11799 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11800 } 11801 11802 putXMMRegLane64F( 11803 gregOfRexRM(pfx,modrm), 0, 11804 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 11805 ); 11806 11807 putXMMRegLane64F( 11808 gregOfRexRM(pfx,modrm), 1, 11809 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 11810 ); 11811 11812 goto decode_success; 11813 } 11814 break; 11815 11816 case 0x2B: 11817 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 11818 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 11819 if ( (haveNo66noF2noF3(pfx) && sz == 4) 11820 || (have66noF2noF3(pfx) && sz == 2) ) { 11821 modrm = getUChar(delta); 11822 if (!epartIsReg(modrm)) { 11823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11824 gen_SEGV_if_not_16_aligned( addr ); 11825 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11826 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 11827 dis_buf, 11828 nameXMMReg(gregOfRexRM(pfx,modrm))); 11829 delta += alen; 11830 goto decode_success; 11831 } 11832 /* else fall through */ 11833 } 11834 break; 11835 11836 case 0x2C: 11837 case 0x2D: 11838 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 11839 I32 in mmx, according to prevailing SSE rounding mode */ 11840 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 11841 I32 in mmx, rounding towards zero */ 11842 if (haveNo66noF2noF3(pfx) && sz == 4) { 11843 IRTemp dst64 = newTemp(Ity_I64); 11844 IRTemp rmode = newTemp(Ity_I32); 11845 IRTemp f32lo = newTemp(Ity_F32); 11846 IRTemp f32hi = newTemp(Ity_F32); 11847 Bool r2zero = toBool(opc == 0x2C); 11848 11849 do_MMX_preamble(); 11850 modrm = getUChar(delta); 11851 11852 if (epartIsReg(modrm)) { 11853 delta += 1; 11854 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 11855 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 11856 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 11857 nameXMMReg(eregOfRexRM(pfx,modrm)), 11858 nameMMXReg(gregLO3ofRM(modrm))); 11859 } else { 11860 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11861 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 11862 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 11863 mkexpr(addr), 11864 mkU64(4) ))); 11865 delta += alen; 11866 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 11867 dis_buf, 11868 nameMMXReg(gregLO3ofRM(modrm))); 11869 } 11870 11871 if (r2zero) { 11872 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 11873 } else { 11874 assign( rmode, get_sse_roundingmode() ); 11875 } 11876 11877 assign( 11878 dst64, 11879 binop( Iop_32HLto64, 11880 binop( Iop_F64toI32S, 11881 mkexpr(rmode), 11882 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 11883 binop( Iop_F64toI32S, 11884 mkexpr(rmode), 11885 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 11886 ) 11887 ); 11888 11889 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 11890 goto decode_success; 11891 } 11892 /* F3 0F 2D = CVTSS2SI 11893 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 11894 according to prevailing SSE rounding mode 11895 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 11896 according to prevailing SSE rounding mode 11897 */ 11898 /* F3 0F 2C = CVTTSS2SI 11899 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 11900 truncating towards zero 11901 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 11902 truncating towards zero 11903 */ 11904 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 11905 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 11906 goto decode_success; 11907 } 11908 /* F2 0F 2D = CVTSD2SI 11909 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 11910 according to prevailing SSE rounding mode 11911 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 11912 according to prevailing SSE rounding mode 11913 */ 11914 /* F2 0F 2C = CVTTSD2SI 11915 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 11916 truncating towards zero 11917 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 11918 truncating towards zero 11919 */ 11920 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 11921 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 11922 goto decode_success; 11923 } 11924 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 11925 I32 in mmx, according to prevailing SSE rounding mode */ 11926 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 11927 I32 in mmx, rounding towards zero */ 11928 if (have66noF2noF3(pfx) && sz == 2) { 11929 IRTemp dst64 = newTemp(Ity_I64); 11930 IRTemp rmode = newTemp(Ity_I32); 11931 IRTemp f64lo = newTemp(Ity_F64); 11932 IRTemp f64hi = newTemp(Ity_F64); 11933 Bool r2zero = toBool(opc == 0x2C); 11934 11935 do_MMX_preamble(); 11936 modrm = getUChar(delta); 11937 11938 if (epartIsReg(modrm)) { 11939 delta += 1; 11940 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 11941 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 11942 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 11943 nameXMMReg(eregOfRexRM(pfx,modrm)), 11944 nameMMXReg(gregLO3ofRM(modrm))); 11945 } else { 11946 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11947 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 11948 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 11949 mkexpr(addr), 11950 mkU64(8) ))); 11951 delta += alen; 11952 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 11953 dis_buf, 11954 nameMMXReg(gregLO3ofRM(modrm))); 11955 } 11956 11957 if (r2zero) { 11958 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 11959 } else { 11960 assign( rmode, get_sse_roundingmode() ); 11961 } 11962 11963 assign( 11964 dst64, 11965 binop( Iop_32HLto64, 11966 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 11967 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 11968 ) 11969 ); 11970 11971 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 11972 goto decode_success; 11973 } 11974 break; 11975 11976 case 0x2E: 11977 case 0x2F: 11978 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 11979 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 11980 if (have66noF2noF3(pfx) && sz == 2) { 11981 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 11982 goto decode_success; 11983 } 11984 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 11985 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 11986 if (haveNo66noF2noF3(pfx) && sz == 4) { 11987 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 11988 goto decode_success; 11989 } 11990 break; 11991 11992 case 0x50: 11993 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 11994 to 4 lowest bits of ireg(G) */ 11995 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 11996 && epartIsReg(getUChar(delta))) { 11997 /* sz == 8 is a kludge to handle insns with REX.W redundantly 11998 set to 1, which has been known to happen: 11999 12000 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 12001 12002 20071106: Intel docs say that REX.W isn't redundant: when 12003 present, a 64-bit register is written; when not present, only 12004 the 32-bit half is written. However, testing on a Core2 12005 machine suggests the entire 64 bit register is written 12006 irrespective of the status of REX.W. That could be because 12007 of the default rule that says "if the lower half of a 32-bit 12008 register is written, the upper half is zeroed". By using 12009 putIReg32 here we inadvertantly produce the same behaviour as 12010 the Core2, for the same reason -- putIReg32 implements said 12011 rule. 12012 12013 AMD docs give no indication that REX.W is even valid for this 12014 insn. */ 12015 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12016 goto decode_success; 12017 } 12018 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 12019 2 lowest bits of ireg(G) */ 12020 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 12021 /* sz == 8 is a kludge to handle insns with REX.W redundantly 12022 set to 1, which has been known to happen: 12023 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 12024 20071106: see further comments on MOVMSKPS implementation above. 12025 */ 12026 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12027 goto decode_success; 12028 } 12029 break; 12030 12031 case 0x51: 12032 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 12033 if (haveF3no66noF2(pfx) && sz == 4) { 12034 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12035 "sqrtss", Iop_Sqrt32F0x4 ); 12036 goto decode_success; 12037 } 12038 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 12039 if (haveNo66noF2noF3(pfx) && sz == 4) { 12040 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12041 "sqrtps", Iop_Sqrt32Fx4 ); 12042 goto decode_success; 12043 } 12044 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 12045 if (haveF2no66noF3(pfx) && sz == 4) { 12046 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 12047 "sqrtsd", Iop_Sqrt64F0x2 ); 12048 goto decode_success; 12049 } 12050 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 12051 if (have66noF2noF3(pfx) && sz == 2) { 12052 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12053 "sqrtpd", Iop_Sqrt64Fx2 ); 12054 goto decode_success; 12055 } 12056 break; 12057 12058 case 0x52: 12059 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 12060 if (haveF3no66noF2(pfx) && sz == 4) { 12061 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12062 "rsqrtss", Iop_RSqrt32F0x4 ); 12063 goto decode_success; 12064 } 12065 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 12066 if (haveNo66noF2noF3(pfx) && sz == 4) { 12067 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12068 "rsqrtps", Iop_RSqrt32Fx4 ); 12069 goto decode_success; 12070 } 12071 break; 12072 12073 case 0x53: 12074 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 12075 if (haveF3no66noF2(pfx) && sz == 4) { 12076 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12077 "rcpss", Iop_Recip32F0x4 ); 12078 goto decode_success; 12079 } 12080 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 12081 if (haveNo66noF2noF3(pfx) && sz == 4) { 12082 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12083 "rcpps", Iop_Recip32Fx4 ); 12084 goto decode_success; 12085 } 12086 break; 12087 12088 case 0x54: 12089 /* 0F 54 = ANDPS -- G = G and E */ 12090 if (haveNo66noF2noF3(pfx) && sz == 4) { 12091 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 12092 goto decode_success; 12093 } 12094 /* 66 0F 54 = ANDPD -- G = G and E */ 12095 if (have66noF2noF3(pfx) && sz == 2) { 12096 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 12097 goto decode_success; 12098 } 12099 break; 12100 12101 case 0x55: 12102 /* 0F 55 = ANDNPS -- G = (not G) and E */ 12103 if (haveNo66noF2noF3(pfx) && sz == 4) { 12104 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 12105 Iop_AndV128 ); 12106 goto decode_success; 12107 } 12108 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 12109 if (have66noF2noF3(pfx) && sz == 2) { 12110 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 12111 Iop_AndV128 ); 12112 goto decode_success; 12113 } 12114 break; 12115 12116 case 0x56: 12117 /* 0F 56 = ORPS -- G = G and E */ 12118 if (haveNo66noF2noF3(pfx) && sz == 4) { 12119 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 12120 goto decode_success; 12121 } 12122 /* 66 0F 56 = ORPD -- G = G and E */ 12123 if (have66noF2noF3(pfx) && sz == 2) { 12124 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 12125 goto decode_success; 12126 } 12127 break; 12128 12129 case 0x57: 12130 /* 66 0F 57 = XORPD -- G = G xor E */ 12131 if (have66noF2noF3(pfx) && sz == 2) { 12132 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 12133 goto decode_success; 12134 } 12135 /* 0F 57 = XORPS -- G = G xor E */ 12136 if (haveNo66noF2noF3(pfx) && sz == 4) { 12137 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 12138 goto decode_success; 12139 } 12140 break; 12141 12142 case 0x58: 12143 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 12144 if (haveNo66noF2noF3(pfx) && sz == 4) { 12145 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 12146 goto decode_success; 12147 } 12148 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 12149 if (haveF3no66noF2(pfx) && sz == 4) { 12150 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 12151 goto decode_success; 12152 } 12153 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 12154 if (haveF2no66noF3(pfx) 12155 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12156 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 12157 goto decode_success; 12158 } 12159 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 12160 if (have66noF2noF3(pfx) 12161 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12162 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 12163 goto decode_success; 12164 } 12165 break; 12166 12167 case 0x59: 12168 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 12169 if (haveF2no66noF3(pfx) 12170 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12171 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 12172 goto decode_success; 12173 } 12174 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 12175 if (haveF3no66noF2(pfx) && sz == 4) { 12176 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 12177 goto decode_success; 12178 } 12179 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 12180 if (haveNo66noF2noF3(pfx) && sz == 4) { 12181 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 12182 goto decode_success; 12183 } 12184 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 12185 if (have66noF2noF3(pfx) 12186 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12187 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 12188 goto decode_success; 12189 } 12190 break; 12191 12192 case 0x5A: 12193 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 12194 F64 in xmm(G). */ 12195 if (haveNo66noF2noF3(pfx) && sz == 4) { 12196 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12197 goto decode_success; 12198 } 12199 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 12200 low half xmm(G) */ 12201 if (haveF3no66noF2(pfx) && sz == 4) { 12202 IRTemp f32lo = newTemp(Ity_F32); 12203 12204 modrm = getUChar(delta); 12205 if (epartIsReg(modrm)) { 12206 delta += 1; 12207 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 12208 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12209 nameXMMReg(gregOfRexRM(pfx,modrm))); 12210 } else { 12211 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12212 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 12213 delta += alen; 12214 DIP("cvtss2sd %s,%s\n", dis_buf, 12215 nameXMMReg(gregOfRexRM(pfx,modrm))); 12216 } 12217 12218 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12219 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 12220 12221 goto decode_success; 12222 } 12223 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 12224 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 12225 if (haveF2no66noF3(pfx) && sz == 4) { 12226 IRTemp rmode = newTemp(Ity_I32); 12227 IRTemp f64lo = newTemp(Ity_F64); 12228 12229 modrm = getUChar(delta); 12230 if (epartIsReg(modrm)) { 12231 delta += 1; 12232 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 12233 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12234 nameXMMReg(gregOfRexRM(pfx,modrm))); 12235 } else { 12236 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12237 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 12238 delta += alen; 12239 DIP("cvtsd2ss %s,%s\n", dis_buf, 12240 nameXMMReg(gregOfRexRM(pfx,modrm))); 12241 } 12242 12243 assign( rmode, get_sse_roundingmode() ); 12244 putXMMRegLane32F( 12245 gregOfRexRM(pfx,modrm), 0, 12246 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 12247 ); 12248 12249 goto decode_success; 12250 } 12251 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 12252 lo half xmm(G), rounding according to prevailing SSE rounding 12253 mode, and zero upper half */ 12254 /* Note, this is practically identical to CVTPD2DQ. It would have 12255 be nice to merge them together. */ 12256 if (have66noF2noF3(pfx) && sz == 2) { 12257 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12258 goto decode_success; 12259 } 12260 break; 12261 12262 case 0x5B: 12263 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12264 xmm(G), rounding towards zero */ 12265 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12266 xmm(G), as per the prevailing rounding mode */ 12267 if ( (have66noF2noF3(pfx) && sz == 2) 12268 || (haveF3no66noF2(pfx) && sz == 4) ) { 12269 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 12270 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 12271 goto decode_success; 12272 } 12273 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 12274 xmm(G) */ 12275 if (haveNo66noF2noF3(pfx) && sz == 4) { 12276 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12277 goto decode_success; 12278 } 12279 break; 12280 12281 case 0x5C: 12282 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 12283 if (haveF3no66noF2(pfx) && sz == 4) { 12284 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 12285 goto decode_success; 12286 } 12287 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 12288 if (haveF2no66noF3(pfx) 12289 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12290 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 12291 goto decode_success; 12292 } 12293 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 12294 if (haveNo66noF2noF3(pfx) && sz == 4) { 12295 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 12296 goto decode_success; 12297 } 12298 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 12299 if (have66noF2noF3(pfx) && sz == 2) { 12300 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 12301 goto decode_success; 12302 } 12303 break; 12304 12305 case 0x5D: 12306 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 12307 if (haveNo66noF2noF3(pfx) && sz == 4) { 12308 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 12309 goto decode_success; 12310 } 12311 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 12312 if (haveF3no66noF2(pfx) && sz == 4) { 12313 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 12314 goto decode_success; 12315 } 12316 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 12317 if (haveF2no66noF3(pfx) && sz == 4) { 12318 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 12319 goto decode_success; 12320 } 12321 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 12322 if (have66noF2noF3(pfx) && sz == 2) { 12323 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 12324 goto decode_success; 12325 } 12326 break; 12327 12328 case 0x5E: 12329 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 12330 if (haveF2no66noF3(pfx) && sz == 4) { 12331 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 12332 goto decode_success; 12333 } 12334 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 12335 if (haveNo66noF2noF3(pfx) && sz == 4) { 12336 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 12337 goto decode_success; 12338 } 12339 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 12340 if (haveF3no66noF2(pfx) && sz == 4) { 12341 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 12342 goto decode_success; 12343 } 12344 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 12345 if (have66noF2noF3(pfx) && sz == 2) { 12346 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 12347 goto decode_success; 12348 } 12349 break; 12350 12351 case 0x5F: 12352 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 12353 if (haveNo66noF2noF3(pfx) && sz == 4) { 12354 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 12355 goto decode_success; 12356 } 12357 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 12358 if (haveF3no66noF2(pfx) && sz == 4) { 12359 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 12360 goto decode_success; 12361 } 12362 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 12363 if (haveF2no66noF3(pfx) && sz == 4) { 12364 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 12365 goto decode_success; 12366 } 12367 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 12368 if (have66noF2noF3(pfx) && sz == 2) { 12369 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 12370 goto decode_success; 12371 } 12372 break; 12373 12374 case 0x60: 12375 /* 66 0F 60 = PUNPCKLBW */ 12376 if (have66noF2noF3(pfx) && sz == 2) { 12377 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12378 "punpcklbw", 12379 Iop_InterleaveLO8x16, True ); 12380 goto decode_success; 12381 } 12382 break; 12383 12384 case 0x61: 12385 /* 66 0F 61 = PUNPCKLWD */ 12386 if (have66noF2noF3(pfx) && sz == 2) { 12387 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12388 "punpcklwd", 12389 Iop_InterleaveLO16x8, True ); 12390 goto decode_success; 12391 } 12392 break; 12393 12394 case 0x62: 12395 /* 66 0F 62 = PUNPCKLDQ */ 12396 if (have66noF2noF3(pfx) && sz == 2) { 12397 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12398 "punpckldq", 12399 Iop_InterleaveLO32x4, True ); 12400 goto decode_success; 12401 } 12402 break; 12403 12404 case 0x63: 12405 /* 66 0F 63 = PACKSSWB */ 12406 if (have66noF2noF3(pfx) && sz == 2) { 12407 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12408 "packsswb", 12409 Iop_QNarrowBin16Sto8Sx16, True ); 12410 goto decode_success; 12411 } 12412 break; 12413 12414 case 0x64: 12415 /* 66 0F 64 = PCMPGTB */ 12416 if (have66noF2noF3(pfx) && sz == 2) { 12417 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12418 "pcmpgtb", Iop_CmpGT8Sx16, False ); 12419 goto decode_success; 12420 } 12421 break; 12422 12423 case 0x65: 12424 /* 66 0F 65 = PCMPGTW */ 12425 if (have66noF2noF3(pfx) && sz == 2) { 12426 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12427 "pcmpgtw", Iop_CmpGT16Sx8, False ); 12428 goto decode_success; 12429 } 12430 break; 12431 12432 case 0x66: 12433 /* 66 0F 66 = PCMPGTD */ 12434 if (have66noF2noF3(pfx) && sz == 2) { 12435 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12436 "pcmpgtd", Iop_CmpGT32Sx4, False ); 12437 goto decode_success; 12438 } 12439 break; 12440 12441 case 0x67: 12442 /* 66 0F 67 = PACKUSWB */ 12443 if (have66noF2noF3(pfx) && sz == 2) { 12444 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12445 "packuswb", 12446 Iop_QNarrowBin16Sto8Ux16, True ); 12447 goto decode_success; 12448 } 12449 break; 12450 12451 case 0x68: 12452 /* 66 0F 68 = PUNPCKHBW */ 12453 if (have66noF2noF3(pfx) && sz == 2) { 12454 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12455 "punpckhbw", 12456 Iop_InterleaveHI8x16, True ); 12457 goto decode_success; 12458 } 12459 break; 12460 12461 case 0x69: 12462 /* 66 0F 69 = PUNPCKHWD */ 12463 if (have66noF2noF3(pfx) && sz == 2) { 12464 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12465 "punpckhwd", 12466 Iop_InterleaveHI16x8, True ); 12467 goto decode_success; 12468 } 12469 break; 12470 12471 case 0x6A: 12472 /* 66 0F 6A = PUNPCKHDQ */ 12473 if (have66noF2noF3(pfx) && sz == 2) { 12474 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12475 "punpckhdq", 12476 Iop_InterleaveHI32x4, True ); 12477 goto decode_success; 12478 } 12479 break; 12480 12481 case 0x6B: 12482 /* 66 0F 6B = PACKSSDW */ 12483 if (have66noF2noF3(pfx) && sz == 2) { 12484 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12485 "packssdw", 12486 Iop_QNarrowBin32Sto16Sx8, True ); 12487 goto decode_success; 12488 } 12489 break; 12490 12491 case 0x6C: 12492 /* 66 0F 6C = PUNPCKLQDQ */ 12493 if (have66noF2noF3(pfx) && sz == 2) { 12494 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12495 "punpcklqdq", 12496 Iop_InterleaveLO64x2, True ); 12497 goto decode_success; 12498 } 12499 break; 12500 12501 case 0x6D: 12502 /* 66 0F 6D = PUNPCKHQDQ */ 12503 if (have66noF2noF3(pfx) && sz == 2) { 12504 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12505 "punpckhqdq", 12506 Iop_InterleaveHI64x2, True ); 12507 goto decode_success; 12508 } 12509 break; 12510 12511 case 0x6E: 12512 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 12513 zeroing high 3/4 of xmm. */ 12514 /* or from ireg64/m64 to xmm lo 1/2, 12515 zeroing high 1/2 of xmm. */ 12516 if (have66noF2noF3(pfx)) { 12517 vassert(sz == 2 || sz == 8); 12518 if (sz == 2) sz = 4; 12519 modrm = getUChar(delta); 12520 if (epartIsReg(modrm)) { 12521 delta += 1; 12522 if (sz == 4) { 12523 putXMMReg( 12524 gregOfRexRM(pfx,modrm), 12525 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 12526 ); 12527 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12528 nameXMMReg(gregOfRexRM(pfx,modrm))); 12529 } else { 12530 putXMMReg( 12531 gregOfRexRM(pfx,modrm), 12532 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 12533 ); 12534 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12535 nameXMMReg(gregOfRexRM(pfx,modrm))); 12536 } 12537 } else { 12538 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 12539 delta += alen; 12540 putXMMReg( 12541 gregOfRexRM(pfx,modrm), 12542 sz == 4 12543 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 12544 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 12545 ); 12546 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 12547 nameXMMReg(gregOfRexRM(pfx,modrm))); 12548 } 12549 goto decode_success; 12550 } 12551 break; 12552 12553 case 0x6F: 12554 if (have66noF2noF3(pfx) 12555 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12556 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 12557 modrm = getUChar(delta); 12558 if (epartIsReg(modrm)) { 12559 putXMMReg( gregOfRexRM(pfx,modrm), 12560 getXMMReg( eregOfRexRM(pfx,modrm) )); 12561 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12562 nameXMMReg(gregOfRexRM(pfx,modrm))); 12563 delta += 1; 12564 } else { 12565 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12566 gen_SEGV_if_not_16_aligned( addr ); 12567 putXMMReg( gregOfRexRM(pfx,modrm), 12568 loadLE(Ity_V128, mkexpr(addr)) ); 12569 DIP("movdqa %s,%s\n", dis_buf, 12570 nameXMMReg(gregOfRexRM(pfx,modrm))); 12571 delta += alen; 12572 } 12573 goto decode_success; 12574 } 12575 if (haveF3no66noF2(pfx) && sz == 4) { 12576 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 12577 modrm = getUChar(delta); 12578 if (epartIsReg(modrm)) { 12579 putXMMReg( gregOfRexRM(pfx,modrm), 12580 getXMMReg( eregOfRexRM(pfx,modrm) )); 12581 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12582 nameXMMReg(gregOfRexRM(pfx,modrm))); 12583 delta += 1; 12584 } else { 12585 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12586 putXMMReg( gregOfRexRM(pfx,modrm), 12587 loadLE(Ity_V128, mkexpr(addr)) ); 12588 DIP("movdqu %s,%s\n", dis_buf, 12589 nameXMMReg(gregOfRexRM(pfx,modrm))); 12590 delta += alen; 12591 } 12592 goto decode_success; 12593 } 12594 break; 12595 12596 case 0x70: 12597 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 12598 if (have66noF2noF3(pfx) && sz == 2) { 12599 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 12600 goto decode_success; 12601 } 12602 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 12603 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 12604 if (haveNo66noF2noF3(pfx) && sz == 4) { 12605 Int order; 12606 IRTemp sV, dV, s3, s2, s1, s0; 12607 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12608 sV = newTemp(Ity_I64); 12609 dV = newTemp(Ity_I64); 12610 do_MMX_preamble(); 12611 modrm = getUChar(delta); 12612 if (epartIsReg(modrm)) { 12613 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 12614 order = (Int)getUChar(delta+1); 12615 delta += 1+1; 12616 DIP("pshufw $%d,%s,%s\n", order, 12617 nameMMXReg(eregLO3ofRM(modrm)), 12618 nameMMXReg(gregLO3ofRM(modrm))); 12619 } else { 12620 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 12621 1/*extra byte after amode*/ ); 12622 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12623 order = (Int)getUChar(delta+alen); 12624 delta += 1+alen; 12625 DIP("pshufw $%d,%s,%s\n", order, 12626 dis_buf, 12627 nameMMXReg(gregLO3ofRM(modrm))); 12628 } 12629 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 12630 # define SEL(n) \ 12631 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12632 assign(dV, 12633 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 12634 SEL((order>>2)&3), SEL((order>>0)&3) ) 12635 ); 12636 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 12637 # undef SEL 12638 goto decode_success; 12639 } 12640 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 12641 mem) to G(xmm), and copy upper half */ 12642 if (haveF2no66noF3(pfx) && sz == 4) { 12643 delta = dis_PSHUFxW_128( vbi, pfx, delta, 12644 False/*!isAvx*/, False/*!xIsH*/ ); 12645 goto decode_success; 12646 } 12647 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 12648 mem) to G(xmm), and copy lower half */ 12649 if (haveF3no66noF2(pfx) && sz == 4) { 12650 delta = dis_PSHUFxW_128( vbi, pfx, delta, 12651 False/*!isAvx*/, True/*xIsH*/ ); 12652 goto decode_success; 12653 } 12654 break; 12655 12656 case 0x71: 12657 /* 66 0F 71 /2 ib = PSRLW by immediate */ 12658 if (have66noF2noF3(pfx) && sz == 2 12659 && epartIsReg(getUChar(delta)) 12660 && gregLO3ofRM(getUChar(delta)) == 2) { 12661 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 12662 goto decode_success; 12663 } 12664 /* 66 0F 71 /4 ib = PSRAW by immediate */ 12665 if (have66noF2noF3(pfx) && sz == 2 12666 && epartIsReg(getUChar(delta)) 12667 && gregLO3ofRM(getUChar(delta)) == 4) { 12668 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 12669 goto decode_success; 12670 } 12671 /* 66 0F 71 /6 ib = PSLLW by immediate */ 12672 if (have66noF2noF3(pfx) && sz == 2 12673 && epartIsReg(getUChar(delta)) 12674 && gregLO3ofRM(getUChar(delta)) == 6) { 12675 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 12676 goto decode_success; 12677 } 12678 break; 12679 12680 case 0x72: 12681 /* 66 0F 72 /2 ib = PSRLD by immediate */ 12682 if (have66noF2noF3(pfx) && sz == 2 12683 && epartIsReg(getUChar(delta)) 12684 && gregLO3ofRM(getUChar(delta)) == 2) { 12685 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 12686 goto decode_success; 12687 } 12688 /* 66 0F 72 /4 ib = PSRAD by immediate */ 12689 if (have66noF2noF3(pfx) && sz == 2 12690 && epartIsReg(getUChar(delta)) 12691 && gregLO3ofRM(getUChar(delta)) == 4) { 12692 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 12693 goto decode_success; 12694 } 12695 /* 66 0F 72 /6 ib = PSLLD by immediate */ 12696 if (have66noF2noF3(pfx) && sz == 2 12697 && epartIsReg(getUChar(delta)) 12698 && gregLO3ofRM(getUChar(delta)) == 6) { 12699 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 12700 goto decode_success; 12701 } 12702 break; 12703 12704 case 0x73: 12705 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 12706 /* note, if mem case ever filled in, 1 byte after amode */ 12707 if (have66noF2noF3(pfx) && sz == 2 12708 && epartIsReg(getUChar(delta)) 12709 && gregLO3ofRM(getUChar(delta)) == 3) { 12710 Int imm = (Int)getUChar(delta+1); 12711 Int reg = eregOfRexRM(pfx,getUChar(delta)); 12712 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 12713 delta += 2; 12714 IRTemp sV = newTemp(Ity_V128); 12715 assign( sV, getXMMReg(reg) ); 12716 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 12717 goto decode_success; 12718 } 12719 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 12720 /* note, if mem case ever filled in, 1 byte after amode */ 12721 if (have66noF2noF3(pfx) && sz == 2 12722 && epartIsReg(getUChar(delta)) 12723 && gregLO3ofRM(getUChar(delta)) == 7) { 12724 Int imm = (Int)getUChar(delta+1); 12725 Int reg = eregOfRexRM(pfx,getUChar(delta)); 12726 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 12727 vassert(imm >= 0 && imm <= 255); 12728 delta += 2; 12729 IRTemp sV = newTemp(Ity_V128); 12730 assign( sV, getXMMReg(reg) ); 12731 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 12732 goto decode_success; 12733 } 12734 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 12735 if (have66noF2noF3(pfx) && sz == 2 12736 && epartIsReg(getUChar(delta)) 12737 && gregLO3ofRM(getUChar(delta)) == 2) { 12738 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 12739 goto decode_success; 12740 } 12741 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 12742 if (have66noF2noF3(pfx) && sz == 2 12743 && epartIsReg(getUChar(delta)) 12744 && gregLO3ofRM(getUChar(delta)) == 6) { 12745 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 12746 goto decode_success; 12747 } 12748 break; 12749 12750 case 0x74: 12751 /* 66 0F 74 = PCMPEQB */ 12752 if (have66noF2noF3(pfx) && sz == 2) { 12753 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12754 "pcmpeqb", Iop_CmpEQ8x16, False ); 12755 goto decode_success; 12756 } 12757 break; 12758 12759 case 0x75: 12760 /* 66 0F 75 = PCMPEQW */ 12761 if (have66noF2noF3(pfx) && sz == 2) { 12762 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12763 "pcmpeqw", Iop_CmpEQ16x8, False ); 12764 goto decode_success; 12765 } 12766 break; 12767 12768 case 0x76: 12769 /* 66 0F 76 = PCMPEQD */ 12770 if (have66noF2noF3(pfx) && sz == 2) { 12771 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12772 "pcmpeqd", Iop_CmpEQ32x4, False ); 12773 goto decode_success; 12774 } 12775 break; 12776 12777 case 0x7E: 12778 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 12779 G (lo half xmm). Upper half of G is zeroed out. */ 12780 if (haveF3no66noF2(pfx) 12781 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12782 modrm = getUChar(delta); 12783 if (epartIsReg(modrm)) { 12784 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12785 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 12786 /* zero bits 127:64 */ 12787 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 12788 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12789 nameXMMReg(gregOfRexRM(pfx,modrm))); 12790 delta += 1; 12791 } else { 12792 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12793 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12794 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12795 loadLE(Ity_I64, mkexpr(addr)) ); 12796 DIP("movsd %s,%s\n", dis_buf, 12797 nameXMMReg(gregOfRexRM(pfx,modrm))); 12798 delta += alen; 12799 } 12800 goto decode_success; 12801 } 12802 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 12803 /* or from xmm low 1/2 to ireg64 or m64. */ 12804 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 12805 if (sz == 2) sz = 4; 12806 modrm = getUChar(delta); 12807 if (epartIsReg(modrm)) { 12808 delta += 1; 12809 if (sz == 4) { 12810 putIReg32( eregOfRexRM(pfx,modrm), 12811 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 12812 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12813 nameIReg32(eregOfRexRM(pfx,modrm))); 12814 } else { 12815 putIReg64( eregOfRexRM(pfx,modrm), 12816 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 12817 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12818 nameIReg64(eregOfRexRM(pfx,modrm))); 12819 } 12820 } else { 12821 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 12822 delta += alen; 12823 storeLE( mkexpr(addr), 12824 sz == 4 12825 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 12826 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 12827 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 12828 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 12829 } 12830 goto decode_success; 12831 } 12832 break; 12833 12834 case 0x7F: 12835 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 12836 if (haveF3no66noF2(pfx) && sz == 4) { 12837 modrm = getUChar(delta); 12838 if (epartIsReg(modrm)) { 12839 goto decode_failure; /* awaiting test case */ 12840 delta += 1; 12841 putXMMReg( eregOfRexRM(pfx,modrm), 12842 getXMMReg(gregOfRexRM(pfx,modrm)) ); 12843 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12844 nameXMMReg(eregOfRexRM(pfx,modrm))); 12845 } else { 12846 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 12847 delta += alen; 12848 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12849 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 12850 } 12851 goto decode_success; 12852 } 12853 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 12854 if (have66noF2noF3(pfx) && sz == 2) { 12855 modrm = getUChar(delta); 12856 if (epartIsReg(modrm)) { 12857 delta += 1; 12858 putXMMReg( eregOfRexRM(pfx,modrm), 12859 getXMMReg(gregOfRexRM(pfx,modrm)) ); 12860 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12861 nameXMMReg(eregOfRexRM(pfx,modrm))); 12862 } else { 12863 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 12864 gen_SEGV_if_not_16_aligned( addr ); 12865 delta += alen; 12866 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12867 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 12868 } 12869 goto decode_success; 12870 } 12871 break; 12872 12873 case 0xAE: 12874 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 12875 if (haveNo66noF2noF3(pfx) 12876 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 12877 && sz == 4) { 12878 delta += 1; 12879 /* Insert a memory fence. It's sometimes important that these 12880 are carried through to the generated code. */ 12881 stmt( IRStmt_MBE(Imbe_Fence) ); 12882 DIP("sfence\n"); 12883 goto decode_success; 12884 } 12885 /* mindless duplication follows .. */ 12886 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 12887 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 12888 if (haveNo66noF2noF3(pfx) 12889 && epartIsReg(getUChar(delta)) 12890 && (gregLO3ofRM(getUChar(delta)) == 5 12891 || gregLO3ofRM(getUChar(delta)) == 6) 12892 && sz == 4) { 12893 delta += 1; 12894 /* Insert a memory fence. It's sometimes important that these 12895 are carried through to the generated code. */ 12896 stmt( IRStmt_MBE(Imbe_Fence) ); 12897 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 12898 goto decode_success; 12899 } 12900 12901 /* 0F AE /7 = CLFLUSH -- flush cache line */ 12902 if (haveNo66noF2noF3(pfx) 12903 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 12904 && sz == 4) { 12905 12906 /* This is something of a hack. We need to know the size of 12907 the cache line containing addr. Since we don't (easily), 12908 assume 256 on the basis that no real cache would have a 12909 line that big. It's safe to invalidate more stuff than we 12910 need, just inefficient. */ 12911 ULong lineszB = 256ULL; 12912 12913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12914 delta += alen; 12915 12916 /* Round addr down to the start of the containing block. */ 12917 stmt( IRStmt_Put( 12918 OFFB_TISTART, 12919 binop( Iop_And64, 12920 mkexpr(addr), 12921 mkU64( ~(lineszB-1) ))) ); 12922 12923 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) ); 12924 12925 jmp_lit(dres, Ijk_TInval, (Addr64)(guest_RIP_bbstart+delta)); 12926 12927 DIP("clflush %s\n", dis_buf); 12928 goto decode_success; 12929 } 12930 12931 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 12932 if (haveNo66noF2noF3(pfx) 12933 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 12934 && sz == 4) { 12935 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 12936 goto decode_success; 12937 } 12938 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 12939 if (haveNo66noF2noF3(pfx) 12940 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 12941 && sz == 4) { 12942 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 12943 goto decode_success; 12944 } 12945 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. 12946 Note that the presence or absence of REX.W slightly affects the 12947 written format: whether the saved FPU IP and DP pointers are 64 12948 or 32 bits. But the helper function we call simply writes zero 12949 bits in the relevant fields (which are 64 bits regardless of 12950 what REX.W is) and so it's good enough (iow, equally broken) in 12951 both cases. */ 12952 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 12953 && !epartIsReg(getUChar(delta)) 12954 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 12955 IRDirty* d; 12956 modrm = getUChar(delta); 12957 vassert(!epartIsReg(modrm)); 12958 12959 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12960 delta += alen; 12961 gen_SEGV_if_not_16_aligned(addr); 12962 12963 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 12964 12965 /* Uses dirty helper: 12966 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ 12967 d = unsafeIRDirty_0_N ( 12968 0/*regparms*/, 12969 "amd64g_dirtyhelper_FXSAVE", 12970 &amd64g_dirtyhelper_FXSAVE, 12971 mkIRExprVec_1( mkexpr(addr) ) 12972 ); 12973 d->needsBBP = True; 12974 12975 /* declare we're writing memory */ 12976 d->mFx = Ifx_Write; 12977 d->mAddr = mkexpr(addr); 12978 d->mSize = 464; /* according to recent Intel docs */ 12979 12980 /* declare we're reading guest state */ 12981 d->nFxState = 7; 12982 vex_bzero(&d->fxState, sizeof(d->fxState)); 12983 12984 d->fxState[0].fx = Ifx_Read; 12985 d->fxState[0].offset = OFFB_FTOP; 12986 d->fxState[0].size = sizeof(UInt); 12987 12988 d->fxState[1].fx = Ifx_Read; 12989 d->fxState[1].offset = OFFB_FPREGS; 12990 d->fxState[1].size = 8 * sizeof(ULong); 12991 12992 d->fxState[2].fx = Ifx_Read; 12993 d->fxState[2].offset = OFFB_FPTAGS; 12994 d->fxState[2].size = 8 * sizeof(UChar); 12995 12996 d->fxState[3].fx = Ifx_Read; 12997 d->fxState[3].offset = OFFB_FPROUND; 12998 d->fxState[3].size = sizeof(ULong); 12999 13000 d->fxState[4].fx = Ifx_Read; 13001 d->fxState[4].offset = OFFB_FC3210; 13002 d->fxState[4].size = sizeof(ULong); 13003 13004 d->fxState[5].fx = Ifx_Read; 13005 d->fxState[5].offset = OFFB_YMM0; 13006 d->fxState[5].size = sizeof(U128); 13007 /* plus 15 more of the above, spaced out in YMM sized steps */ 13008 d->fxState[5].nRepeats = 15; 13009 d->fxState[5].repeatLen = sizeof(U256); 13010 13011 d->fxState[6].fx = Ifx_Read; 13012 d->fxState[6].offset = OFFB_SSEROUND; 13013 d->fxState[6].size = sizeof(ULong); 13014 13015 /* Be paranoid ... this assertion tries to ensure the 16 %ymm 13016 images are packed back-to-back. If not, the settings for 13017 d->fxState[5] are wrong. */ 13018 vassert(32 == sizeof(U256)); 13019 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); 13020 13021 stmt( IRStmt_Dirty(d) ); 13022 13023 goto decode_success; 13024 } 13025 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. 13026 As with FXSAVE above we ignore the value of REX.W since we're 13027 not bothering with the FPU DP and IP fields. */ 13028 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13029 && !epartIsReg(getUChar(delta)) 13030 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 13031 IRDirty* d; 13032 modrm = getUChar(delta); 13033 vassert(!epartIsReg(modrm)); 13034 13035 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13036 delta += alen; 13037 gen_SEGV_if_not_16_aligned(addr); 13038 13039 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 13040 13041 /* Uses dirty helper: 13042 VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) 13043 NOTE: 13044 the VexEmWarn value is simply ignored 13045 */ 13046 d = unsafeIRDirty_0_N ( 13047 0/*regparms*/, 13048 "amd64g_dirtyhelper_FXRSTOR", 13049 &amd64g_dirtyhelper_FXRSTOR, 13050 mkIRExprVec_1( mkexpr(addr) ) 13051 ); 13052 d->needsBBP = True; 13053 13054 /* declare we're reading memory */ 13055 d->mFx = Ifx_Read; 13056 d->mAddr = mkexpr(addr); 13057 d->mSize = 464; /* according to recent Intel docs */ 13058 13059 /* declare we're writing guest state */ 13060 d->nFxState = 7; 13061 vex_bzero(&d->fxState, sizeof(d->fxState)); 13062 13063 d->fxState[0].fx = Ifx_Write; 13064 d->fxState[0].offset = OFFB_FTOP; 13065 d->fxState[0].size = sizeof(UInt); 13066 13067 d->fxState[1].fx = Ifx_Write; 13068 d->fxState[1].offset = OFFB_FPREGS; 13069 d->fxState[1].size = 8 * sizeof(ULong); 13070 13071 d->fxState[2].fx = Ifx_Write; 13072 d->fxState[2].offset = OFFB_FPTAGS; 13073 d->fxState[2].size = 8 * sizeof(UChar); 13074 13075 d->fxState[3].fx = Ifx_Write; 13076 d->fxState[3].offset = OFFB_FPROUND; 13077 d->fxState[3].size = sizeof(ULong); 13078 13079 d->fxState[4].fx = Ifx_Write; 13080 d->fxState[4].offset = OFFB_FC3210; 13081 d->fxState[4].size = sizeof(ULong); 13082 13083 d->fxState[5].fx = Ifx_Write; 13084 d->fxState[5].offset = OFFB_YMM0; 13085 d->fxState[5].size = sizeof(U128); 13086 /* plus 15 more of the above, spaced out in YMM sized steps */ 13087 d->fxState[5].nRepeats = 15; 13088 d->fxState[5].repeatLen = sizeof(U256); 13089 13090 d->fxState[6].fx = Ifx_Write; 13091 d->fxState[6].offset = OFFB_SSEROUND; 13092 d->fxState[6].size = sizeof(ULong); 13093 13094 /* Be paranoid ... this assertion tries to ensure the 16 %ymm 13095 images are packed back-to-back. If not, the settings for 13096 d->fxState[5] are wrong. */ 13097 vassert(32 == sizeof(U256)); 13098 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); 13099 13100 stmt( IRStmt_Dirty(d) ); 13101 13102 goto decode_success; 13103 } 13104 break; 13105 13106 case 0xC2: 13107 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 13108 if (haveNo66noF2noF3(pfx) && sz == 4) { 13109 Long delta0 = delta; 13110 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 13111 if (delta > delta0) goto decode_success; 13112 } 13113 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 13114 if (haveF3no66noF2(pfx) && sz == 4) { 13115 Long delta0 = delta; 13116 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 13117 if (delta > delta0) goto decode_success; 13118 } 13119 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 13120 if (haveF2no66noF3(pfx) && sz == 4) { 13121 Long delta0 = delta; 13122 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 13123 if (delta > delta0) goto decode_success; 13124 } 13125 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 13126 if (have66noF2noF3(pfx) && sz == 2) { 13127 Long delta0 = delta; 13128 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 13129 if (delta > delta0) goto decode_success; 13130 } 13131 break; 13132 13133 case 0xC3: 13134 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 13135 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13136 modrm = getUChar(delta); 13137 if (!epartIsReg(modrm)) { 13138 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13139 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 13140 DIP("movnti %s,%s\n", dis_buf, 13141 nameIRegG(sz, pfx, modrm)); 13142 delta += alen; 13143 goto decode_success; 13144 } 13145 /* else fall through */ 13146 } 13147 break; 13148 13149 case 0xC4: 13150 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13151 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13152 put it into the specified lane of mmx(G). */ 13153 if (haveNo66noF2noF3(pfx) 13154 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13155 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 13156 mmx reg. t4 is the new lane value. t5 is the original 13157 mmx value. t6 is the new mmx value. */ 13158 Int lane; 13159 t4 = newTemp(Ity_I16); 13160 t5 = newTemp(Ity_I64); 13161 t6 = newTemp(Ity_I64); 13162 modrm = getUChar(delta); 13163 do_MMX_preamble(); 13164 13165 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 13166 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 13167 13168 if (epartIsReg(modrm)) { 13169 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 13170 delta += 1+1; 13171 lane = getUChar(delta-1); 13172 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13173 nameIReg16(eregOfRexRM(pfx,modrm)), 13174 nameMMXReg(gregLO3ofRM(modrm))); 13175 } else { 13176 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13177 delta += 1+alen; 13178 lane = getUChar(delta-1); 13179 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13180 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13181 dis_buf, 13182 nameMMXReg(gregLO3ofRM(modrm))); 13183 } 13184 13185 switch (lane & 3) { 13186 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 13187 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 13188 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 13189 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 13190 default: vassert(0); 13191 } 13192 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 13193 goto decode_success; 13194 } 13195 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13196 put it into the specified lane of xmm(G). */ 13197 if (have66noF2noF3(pfx) 13198 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13199 Int lane; 13200 t4 = newTemp(Ity_I16); 13201 modrm = getUChar(delta); 13202 UInt rG = gregOfRexRM(pfx,modrm); 13203 if (epartIsReg(modrm)) { 13204 UInt rE = eregOfRexRM(pfx,modrm); 13205 assign(t4, getIReg16(rE)); 13206 delta += 1+1; 13207 lane = getUChar(delta-1); 13208 DIP("pinsrw $%d,%s,%s\n", 13209 (Int)lane, nameIReg16(rE), nameXMMReg(rG)); 13210 } else { 13211 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13212 1/*byte after the amode*/ ); 13213 delta += 1+alen; 13214 lane = getUChar(delta-1); 13215 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13216 DIP("pinsrw $%d,%s,%s\n", 13217 (Int)lane, dis_buf, nameXMMReg(rG)); 13218 } 13219 IRTemp src_vec = newTemp(Ity_V128); 13220 assign(src_vec, getXMMReg(rG)); 13221 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 13222 putXMMReg(rG, mkexpr(res_vec)); 13223 goto decode_success; 13224 } 13225 break; 13226 13227 case 0xC5: 13228 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13229 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 13230 zero-extend of it in ireg(G). */ 13231 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13232 modrm = getUChar(delta); 13233 if (epartIsReg(modrm)) { 13234 IRTemp sV = newTemp(Ity_I64); 13235 t5 = newTemp(Ity_I16); 13236 do_MMX_preamble(); 13237 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 13238 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 13239 switch (getUChar(delta+1) & 3) { 13240 case 0: assign(t5, mkexpr(t0)); break; 13241 case 1: assign(t5, mkexpr(t1)); break; 13242 case 2: assign(t5, mkexpr(t2)); break; 13243 case 3: assign(t5, mkexpr(t3)); break; 13244 default: vassert(0); 13245 } 13246 if (sz == 8) 13247 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 13248 else 13249 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 13250 DIP("pextrw $%d,%s,%s\n", 13251 (Int)getUChar(delta+1), 13252 nameMMXReg(eregLO3ofRM(modrm)), 13253 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 13254 : nameIReg32(gregOfRexRM(pfx,modrm)) 13255 ); 13256 delta += 2; 13257 goto decode_success; 13258 } 13259 /* else fall through */ 13260 /* note, for anyone filling in the mem case: this insn has one 13261 byte after the amode and therefore you must pass 1 as the 13262 last arg to disAMode */ 13263 } 13264 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 13265 zero-extend of it in ireg(G). */ 13266 if (have66noF2noF3(pfx) 13267 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13268 Long delta0 = delta; 13269 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 13270 False/*!isAvx*/ ); 13271 if (delta > delta0) goto decode_success; 13272 /* else fall through -- decoding has failed */ 13273 } 13274 break; 13275 13276 case 0xC6: 13277 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 13278 if (haveNo66noF2noF3(pfx) && sz == 4) { 13279 Int imm8 = 0; 13280 IRTemp sV = newTemp(Ity_V128); 13281 IRTemp dV = newTemp(Ity_V128); 13282 modrm = getUChar(delta); 13283 UInt rG = gregOfRexRM(pfx,modrm); 13284 assign( dV, getXMMReg(rG) ); 13285 if (epartIsReg(modrm)) { 13286 UInt rE = eregOfRexRM(pfx,modrm); 13287 assign( sV, getXMMReg(rE) ); 13288 imm8 = (Int)getUChar(delta+1); 13289 delta += 1+1; 13290 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 13291 } else { 13292 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13293 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13294 imm8 = (Int)getUChar(delta+alen); 13295 delta += 1+alen; 13296 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 13297 } 13298 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 13299 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13300 goto decode_success; 13301 } 13302 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 13303 if (have66noF2noF3(pfx) && sz == 2) { 13304 Int select; 13305 IRTemp sV = newTemp(Ity_V128); 13306 IRTemp dV = newTemp(Ity_V128); 13307 13308 modrm = getUChar(delta); 13309 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13310 13311 if (epartIsReg(modrm)) { 13312 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13313 select = (Int)getUChar(delta+1); 13314 delta += 1+1; 13315 DIP("shufpd $%d,%s,%s\n", select, 13316 nameXMMReg(eregOfRexRM(pfx,modrm)), 13317 nameXMMReg(gregOfRexRM(pfx,modrm))); 13318 } else { 13319 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13320 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13321 select = getUChar(delta+alen); 13322 delta += 1+alen; 13323 DIP("shufpd $%d,%s,%s\n", select, 13324 dis_buf, 13325 nameXMMReg(gregOfRexRM(pfx,modrm))); 13326 } 13327 13328 IRTemp res = math_SHUFPD_128( sV, dV, select ); 13329 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13330 goto decode_success; 13331 } 13332 break; 13333 13334 case 0xD1: 13335 /* 66 0F D1 = PSRLW by E */ 13336 if (have66noF2noF3(pfx) && sz == 2) { 13337 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 13338 goto decode_success; 13339 } 13340 break; 13341 13342 case 0xD2: 13343 /* 66 0F D2 = PSRLD by E */ 13344 if (have66noF2noF3(pfx) && sz == 2) { 13345 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 13346 goto decode_success; 13347 } 13348 break; 13349 13350 case 0xD3: 13351 /* 66 0F D3 = PSRLQ by E */ 13352 if (have66noF2noF3(pfx) && sz == 2) { 13353 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 13354 goto decode_success; 13355 } 13356 break; 13357 13358 case 0xD4: 13359 /* 66 0F D4 = PADDQ */ 13360 if (have66noF2noF3(pfx) && sz == 2) { 13361 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13362 "paddq", Iop_Add64x2, False ); 13363 goto decode_success; 13364 } 13365 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 13366 /* 0F D4 = PADDQ -- add 64x1 */ 13367 if (haveNo66noF2noF3(pfx) && sz == 4) { 13368 do_MMX_preamble(); 13369 delta = dis_MMXop_regmem_to_reg ( 13370 vbi, pfx, delta, opc, "paddq", False ); 13371 goto decode_success; 13372 } 13373 break; 13374 13375 case 0xD5: 13376 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 13377 if (have66noF2noF3(pfx) && sz == 2) { 13378 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13379 "pmullw", Iop_Mul16x8, False ); 13380 goto decode_success; 13381 } 13382 break; 13383 13384 case 0xD6: 13385 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 13386 hi half). */ 13387 if (haveF3no66noF2(pfx) && sz == 4) { 13388 modrm = getUChar(delta); 13389 if (epartIsReg(modrm)) { 13390 do_MMX_preamble(); 13391 putXMMReg( gregOfRexRM(pfx,modrm), 13392 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 13393 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13394 nameXMMReg(gregOfRexRM(pfx,modrm))); 13395 delta += 1; 13396 goto decode_success; 13397 } 13398 /* apparently no mem case for this insn */ 13399 } 13400 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 13401 or lo half xmm). */ 13402 if (have66noF2noF3(pfx) 13403 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13404 modrm = getUChar(delta); 13405 if (epartIsReg(modrm)) { 13406 /* fall through, awaiting test case */ 13407 /* dst: lo half copied, hi half zeroed */ 13408 } else { 13409 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13410 storeLE( mkexpr(addr), 13411 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 13412 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 13413 delta += alen; 13414 goto decode_success; 13415 } 13416 } 13417 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 13418 if (haveF2no66noF3(pfx) && sz == 4) { 13419 modrm = getUChar(delta); 13420 if (epartIsReg(modrm)) { 13421 do_MMX_preamble(); 13422 putMMXReg( gregLO3ofRM(modrm), 13423 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 13424 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13425 nameMMXReg(gregLO3ofRM(modrm))); 13426 delta += 1; 13427 goto decode_success; 13428 } 13429 /* apparently no mem case for this insn */ 13430 } 13431 break; 13432 13433 case 0xD7: 13434 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 13435 lanes in xmm(E), turn them into a byte, and put 13436 zero-extend of it in ireg(G). Doing this directly is just 13437 too cumbersome; give up therefore and call a helper. */ 13438 if (have66noF2noF3(pfx) 13439 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 13440 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 13441 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 13442 goto decode_success; 13443 } 13444 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13445 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 13446 mmx(G), turn them into a byte, and put zero-extend of it in 13447 ireg(G). */ 13448 if (haveNo66noF2noF3(pfx) 13449 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13450 modrm = getUChar(delta); 13451 if (epartIsReg(modrm)) { 13452 do_MMX_preamble(); 13453 t0 = newTemp(Ity_I64); 13454 t1 = newTemp(Ity_I64); 13455 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 13456 assign(t1, mkIRExprCCall( 13457 Ity_I64, 0/*regparms*/, 13458 "amd64g_calculate_mmx_pmovmskb", 13459 &amd64g_calculate_mmx_pmovmskb, 13460 mkIRExprVec_1(mkexpr(t0)))); 13461 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1))); 13462 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13463 nameIReg32(gregOfRexRM(pfx,modrm))); 13464 delta += 1; 13465 goto decode_success; 13466 } 13467 /* else fall through */ 13468 } 13469 break; 13470 13471 case 0xD8: 13472 /* 66 0F D8 = PSUBUSB */ 13473 if (have66noF2noF3(pfx) && sz == 2) { 13474 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13475 "psubusb", Iop_QSub8Ux16, False ); 13476 goto decode_success; 13477 } 13478 break; 13479 13480 case 0xD9: 13481 /* 66 0F D9 = PSUBUSW */ 13482 if (have66noF2noF3(pfx) && sz == 2) { 13483 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13484 "psubusw", Iop_QSub16Ux8, False ); 13485 goto decode_success; 13486 } 13487 break; 13488 13489 case 0xDA: 13490 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13491 /* 0F DA = PMINUB -- 8x8 unsigned min */ 13492 if (haveNo66noF2noF3(pfx) && sz == 4) { 13493 do_MMX_preamble(); 13494 delta = dis_MMXop_regmem_to_reg ( 13495 vbi, pfx, delta, opc, "pminub", False ); 13496 goto decode_success; 13497 } 13498 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 13499 if (have66noF2noF3(pfx) && sz == 2) { 13500 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13501 "pminub", Iop_Min8Ux16, False ); 13502 goto decode_success; 13503 } 13504 break; 13505 13506 case 0xDB: 13507 /* 66 0F DB = PAND */ 13508 if (have66noF2noF3(pfx) && sz == 2) { 13509 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 13510 goto decode_success; 13511 } 13512 break; 13513 13514 case 0xDC: 13515 /* 66 0F DC = PADDUSB */ 13516 if (have66noF2noF3(pfx) && sz == 2) { 13517 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13518 "paddusb", Iop_QAdd8Ux16, False ); 13519 goto decode_success; 13520 } 13521 break; 13522 13523 case 0xDD: 13524 /* 66 0F DD = PADDUSW */ 13525 if (have66noF2noF3(pfx) && sz == 2) { 13526 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13527 "paddusw", Iop_QAdd16Ux8, False ); 13528 goto decode_success; 13529 } 13530 break; 13531 13532 case 0xDE: 13533 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13534 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 13535 if (haveNo66noF2noF3(pfx) && sz == 4) { 13536 do_MMX_preamble(); 13537 delta = dis_MMXop_regmem_to_reg ( 13538 vbi, pfx, delta, opc, "pmaxub", False ); 13539 goto decode_success; 13540 } 13541 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 13542 if (have66noF2noF3(pfx) && sz == 2) { 13543 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13544 "pmaxub", Iop_Max8Ux16, False ); 13545 goto decode_success; 13546 } 13547 break; 13548 13549 case 0xDF: 13550 /* 66 0F DF = PANDN */ 13551 if (have66noF2noF3(pfx) && sz == 2) { 13552 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 13553 goto decode_success; 13554 } 13555 break; 13556 13557 case 0xE0: 13558 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13559 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 13560 if (haveNo66noF2noF3(pfx) && sz == 4) { 13561 do_MMX_preamble(); 13562 delta = dis_MMXop_regmem_to_reg ( 13563 vbi, pfx, delta, opc, "pavgb", False ); 13564 goto decode_success; 13565 } 13566 /* 66 0F E0 = PAVGB */ 13567 if (have66noF2noF3(pfx) && sz == 2) { 13568 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13569 "pavgb", Iop_Avg8Ux16, False ); 13570 goto decode_success; 13571 } 13572 break; 13573 13574 case 0xE1: 13575 /* 66 0F E1 = PSRAW by E */ 13576 if (have66noF2noF3(pfx) && sz == 2) { 13577 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 13578 goto decode_success; 13579 } 13580 break; 13581 13582 case 0xE2: 13583 /* 66 0F E2 = PSRAD by E */ 13584 if (have66noF2noF3(pfx) && sz == 2) { 13585 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 13586 goto decode_success; 13587 } 13588 break; 13589 13590 case 0xE3: 13591 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13592 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 13593 if (haveNo66noF2noF3(pfx) && sz == 4) { 13594 do_MMX_preamble(); 13595 delta = dis_MMXop_regmem_to_reg ( 13596 vbi, pfx, delta, opc, "pavgw", False ); 13597 goto decode_success; 13598 } 13599 /* 66 0F E3 = PAVGW */ 13600 if (have66noF2noF3(pfx) && sz == 2) { 13601 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13602 "pavgw", Iop_Avg16Ux8, False ); 13603 goto decode_success; 13604 } 13605 break; 13606 13607 case 0xE4: 13608 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13609 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 13610 if (haveNo66noF2noF3(pfx) && sz == 4) { 13611 do_MMX_preamble(); 13612 delta = dis_MMXop_regmem_to_reg ( 13613 vbi, pfx, delta, opc, "pmuluh", False ); 13614 goto decode_success; 13615 } 13616 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 13617 if (have66noF2noF3(pfx) && sz == 2) { 13618 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13619 "pmulhuw", Iop_MulHi16Ux8, False ); 13620 goto decode_success; 13621 } 13622 break; 13623 13624 case 0xE5: 13625 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 13626 if (have66noF2noF3(pfx) && sz == 2) { 13627 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13628 "pmulhw", Iop_MulHi16Sx8, False ); 13629 goto decode_success; 13630 } 13631 break; 13632 13633 case 0xE6: 13634 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 13635 lo half xmm(G), and zero upper half, rounding towards zero */ 13636 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 13637 lo half xmm(G), according to prevailing rounding mode, and zero 13638 upper half */ 13639 if ( (haveF2no66noF3(pfx) && sz == 4) 13640 || (have66noF2noF3(pfx) && sz == 2) ) { 13641 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 13642 toBool(sz == 2)/*r2zero*/); 13643 goto decode_success; 13644 } 13645 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 13646 F64 in xmm(G) */ 13647 if (haveF3no66noF2(pfx) && sz == 4) { 13648 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 13649 goto decode_success; 13650 } 13651 break; 13652 13653 case 0xE7: 13654 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13655 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 13656 Intel manual does not say anything about the usual business of 13657 the FP reg tags getting trashed whenever an MMX insn happens. 13658 So we just leave them alone. 13659 */ 13660 if (haveNo66noF2noF3(pfx) && sz == 4) { 13661 modrm = getUChar(delta); 13662 if (!epartIsReg(modrm)) { 13663 /* do_MMX_preamble(); Intel docs don't specify this */ 13664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13665 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 13666 DIP("movntq %s,%s\n", dis_buf, 13667 nameMMXReg(gregLO3ofRM(modrm))); 13668 delta += alen; 13669 goto decode_success; 13670 } 13671 /* else fall through */ 13672 } 13673 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 13674 if (have66noF2noF3(pfx) && sz == 2) { 13675 modrm = getUChar(delta); 13676 if (!epartIsReg(modrm)) { 13677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13678 gen_SEGV_if_not_16_aligned( addr ); 13679 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13680 DIP("movntdq %s,%s\n", dis_buf, 13681 nameXMMReg(gregOfRexRM(pfx,modrm))); 13682 delta += alen; 13683 goto decode_success; 13684 } 13685 /* else fall through */ 13686 } 13687 break; 13688 13689 case 0xE8: 13690 /* 66 0F E8 = PSUBSB */ 13691 if (have66noF2noF3(pfx) && sz == 2) { 13692 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13693 "psubsb", Iop_QSub8Sx16, False ); 13694 goto decode_success; 13695 } 13696 break; 13697 13698 case 0xE9: 13699 /* 66 0F E9 = PSUBSW */ 13700 if (have66noF2noF3(pfx) && sz == 2) { 13701 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13702 "psubsw", Iop_QSub16Sx8, False ); 13703 goto decode_success; 13704 } 13705 break; 13706 13707 case 0xEA: 13708 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13709 /* 0F EA = PMINSW -- 16x4 signed min */ 13710 if (haveNo66noF2noF3(pfx) && sz == 4) { 13711 do_MMX_preamble(); 13712 delta = dis_MMXop_regmem_to_reg ( 13713 vbi, pfx, delta, opc, "pminsw", False ); 13714 goto decode_success; 13715 } 13716 /* 66 0F EA = PMINSW -- 16x8 signed min */ 13717 if (have66noF2noF3(pfx) && sz == 2) { 13718 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13719 "pminsw", Iop_Min16Sx8, False ); 13720 goto decode_success; 13721 } 13722 break; 13723 13724 case 0xEB: 13725 /* 66 0F EB = POR */ 13726 if (have66noF2noF3(pfx) && sz == 2) { 13727 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 13728 goto decode_success; 13729 } 13730 break; 13731 13732 case 0xEC: 13733 /* 66 0F EC = PADDSB */ 13734 if (have66noF2noF3(pfx) && sz == 2) { 13735 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13736 "paddsb", Iop_QAdd8Sx16, False ); 13737 goto decode_success; 13738 } 13739 break; 13740 13741 case 0xED: 13742 /* 66 0F ED = PADDSW */ 13743 if (have66noF2noF3(pfx) && sz == 2) { 13744 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13745 "paddsw", Iop_QAdd16Sx8, False ); 13746 goto decode_success; 13747 } 13748 break; 13749 13750 case 0xEE: 13751 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13752 /* 0F EE = PMAXSW -- 16x4 signed max */ 13753 if (haveNo66noF2noF3(pfx) && sz == 4) { 13754 do_MMX_preamble(); 13755 delta = dis_MMXop_regmem_to_reg ( 13756 vbi, pfx, delta, opc, "pmaxsw", False ); 13757 goto decode_success; 13758 } 13759 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 13760 if (have66noF2noF3(pfx) && sz == 2) { 13761 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13762 "pmaxsw", Iop_Max16Sx8, False ); 13763 goto decode_success; 13764 } 13765 break; 13766 13767 case 0xEF: 13768 /* 66 0F EF = PXOR */ 13769 if (have66noF2noF3(pfx) && sz == 2) { 13770 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 13771 goto decode_success; 13772 } 13773 break; 13774 13775 case 0xF1: 13776 /* 66 0F F1 = PSLLW by E */ 13777 if (have66noF2noF3(pfx) && sz == 2) { 13778 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 13779 goto decode_success; 13780 } 13781 break; 13782 13783 case 0xF2: 13784 /* 66 0F F2 = PSLLD by E */ 13785 if (have66noF2noF3(pfx) && sz == 2) { 13786 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 13787 goto decode_success; 13788 } 13789 break; 13790 13791 case 0xF3: 13792 /* 66 0F F3 = PSLLQ by E */ 13793 if (have66noF2noF3(pfx) && sz == 2) { 13794 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 13795 goto decode_success; 13796 } 13797 break; 13798 13799 case 0xF4: 13800 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 13801 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 13802 half */ 13803 if (have66noF2noF3(pfx) && sz == 2) { 13804 IRTemp sV = newTemp(Ity_V128); 13805 IRTemp dV = newTemp(Ity_V128); 13806 modrm = getUChar(delta); 13807 UInt rG = gregOfRexRM(pfx,modrm); 13808 assign( dV, getXMMReg(rG) ); 13809 if (epartIsReg(modrm)) { 13810 UInt rE = eregOfRexRM(pfx,modrm); 13811 assign( sV, getXMMReg(rE) ); 13812 delta += 1; 13813 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 13814 } else { 13815 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13816 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13817 delta += alen; 13818 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 13819 } 13820 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 13821 goto decode_success; 13822 } 13823 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 13824 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 13825 0 to form 64-bit result */ 13826 if (haveNo66noF2noF3(pfx) && sz == 4) { 13827 IRTemp sV = newTemp(Ity_I64); 13828 IRTemp dV = newTemp(Ity_I64); 13829 t1 = newTemp(Ity_I32); 13830 t0 = newTemp(Ity_I32); 13831 modrm = getUChar(delta); 13832 13833 do_MMX_preamble(); 13834 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13835 13836 if (epartIsReg(modrm)) { 13837 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13838 delta += 1; 13839 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13840 nameMMXReg(gregLO3ofRM(modrm))); 13841 } else { 13842 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13843 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13844 delta += alen; 13845 DIP("pmuludq %s,%s\n", dis_buf, 13846 nameMMXReg(gregLO3ofRM(modrm))); 13847 } 13848 13849 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 13850 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 13851 putMMXReg( gregLO3ofRM(modrm), 13852 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 13853 goto decode_success; 13854 } 13855 break; 13856 13857 case 0xF5: 13858 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 13859 E(xmm or mem) to G(xmm) */ 13860 if (have66noF2noF3(pfx) && sz == 2) { 13861 IRTemp sV = newTemp(Ity_V128); 13862 IRTemp dV = newTemp(Ity_V128); 13863 modrm = getUChar(delta); 13864 UInt rG = gregOfRexRM(pfx,modrm); 13865 if (epartIsReg(modrm)) { 13866 UInt rE = eregOfRexRM(pfx,modrm); 13867 assign( sV, getXMMReg(rE) ); 13868 delta += 1; 13869 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 13870 } else { 13871 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13872 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13873 delta += alen; 13874 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 13875 } 13876 assign( dV, getXMMReg(rG) ); 13877 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 13878 goto decode_success; 13879 } 13880 break; 13881 13882 case 0xF6: 13883 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13884 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 13885 if (haveNo66noF2noF3(pfx) && sz == 4) { 13886 do_MMX_preamble(); 13887 delta = dis_MMXop_regmem_to_reg ( 13888 vbi, pfx, delta, opc, "psadbw", False ); 13889 goto decode_success; 13890 } 13891 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 13892 from E(xmm or mem) to G(xmm) */ 13893 if (have66noF2noF3(pfx) && sz == 2) { 13894 IRTemp sV = newTemp(Ity_V128); 13895 IRTemp dV = newTemp(Ity_V128); 13896 modrm = getUChar(delta); 13897 UInt rG = gregOfRexRM(pfx,modrm); 13898 if (epartIsReg(modrm)) { 13899 UInt rE = eregOfRexRM(pfx,modrm); 13900 assign( sV, getXMMReg(rE) ); 13901 delta += 1; 13902 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 13903 } else { 13904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13905 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13906 delta += alen; 13907 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 13908 } 13909 assign( dV, getXMMReg(rG) ); 13910 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 13911 13912 goto decode_success; 13913 } 13914 break; 13915 13916 case 0xF7: 13917 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13918 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 13919 if (haveNo66noF2noF3(pfx) && sz == 4) { 13920 Bool ok = False; 13921 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 13922 if (ok) goto decode_success; 13923 } 13924 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 13925 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 13926 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 13927 goto decode_success; 13928 } 13929 break; 13930 13931 case 0xF8: 13932 /* 66 0F F8 = PSUBB */ 13933 if (have66noF2noF3(pfx) && sz == 2) { 13934 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13935 "psubb", Iop_Sub8x16, False ); 13936 goto decode_success; 13937 } 13938 break; 13939 13940 case 0xF9: 13941 /* 66 0F F9 = PSUBW */ 13942 if (have66noF2noF3(pfx) && sz == 2) { 13943 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13944 "psubw", Iop_Sub16x8, False ); 13945 goto decode_success; 13946 } 13947 break; 13948 13949 case 0xFA: 13950 /* 66 0F FA = PSUBD */ 13951 if (have66noF2noF3(pfx) && sz == 2) { 13952 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13953 "psubd", Iop_Sub32x4, False ); 13954 goto decode_success; 13955 } 13956 break; 13957 13958 case 0xFB: 13959 /* 66 0F FB = PSUBQ */ 13960 if (have66noF2noF3(pfx) && sz == 2) { 13961 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13962 "psubq", Iop_Sub64x2, False ); 13963 goto decode_success; 13964 } 13965 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 13966 /* 0F FB = PSUBQ -- sub 64x1 */ 13967 if (haveNo66noF2noF3(pfx) && sz == 4) { 13968 do_MMX_preamble(); 13969 delta = dis_MMXop_regmem_to_reg ( 13970 vbi, pfx, delta, opc, "psubq", False ); 13971 goto decode_success; 13972 } 13973 break; 13974 13975 case 0xFC: 13976 /* 66 0F FC = PADDB */ 13977 if (have66noF2noF3(pfx) && sz == 2) { 13978 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13979 "paddb", Iop_Add8x16, False ); 13980 goto decode_success; 13981 } 13982 break; 13983 13984 case 0xFD: 13985 /* 66 0F FD = PADDW */ 13986 if (have66noF2noF3(pfx) && sz == 2) { 13987 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13988 "paddw", Iop_Add16x8, False ); 13989 goto decode_success; 13990 } 13991 break; 13992 13993 case 0xFE: 13994 /* 66 0F FE = PADDD */ 13995 if (have66noF2noF3(pfx) && sz == 2) { 13996 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13997 "paddd", Iop_Add32x4, False ); 13998 goto decode_success; 13999 } 14000 break; 14001 14002 default: 14003 goto decode_failure; 14004 14005 } 14006 14007 decode_failure: 14008 *decode_OK = False; 14009 return deltaIN; 14010 14011 decode_success: 14012 *decode_OK = True; 14013 return delta; 14014 } 14015 14016 14017 /*------------------------------------------------------------*/ 14018 /*--- ---*/ 14019 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 14020 /*--- ---*/ 14021 /*------------------------------------------------------------*/ 14022 14023 static Long dis_MOVDDUP_128 ( VexAbiInfo* vbi, Prefix pfx, 14024 Long delta, Bool isAvx ) 14025 { 14026 IRTemp addr = IRTemp_INVALID; 14027 Int alen = 0; 14028 HChar dis_buf[50]; 14029 IRTemp sV = newTemp(Ity_V128); 14030 IRTemp d0 = newTemp(Ity_I64); 14031 UChar modrm = getUChar(delta); 14032 UInt rG = gregOfRexRM(pfx,modrm); 14033 if (epartIsReg(modrm)) { 14034 UInt rE = eregOfRexRM(pfx,modrm); 14035 assign( sV, getXMMReg(rE) ); 14036 DIP("%smovddup %s,%s\n", 14037 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 14038 delta += 1; 14039 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 14040 } else { 14041 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14042 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14043 DIP("%smovddup %s,%s\n", 14044 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 14045 delta += alen; 14046 } 14047 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14048 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 14049 return delta; 14050 } 14051 14052 14053 static Long dis_MOVDDUP_256 ( VexAbiInfo* vbi, Prefix pfx, 14054 Long delta ) 14055 { 14056 IRTemp addr = IRTemp_INVALID; 14057 Int alen = 0; 14058 HChar dis_buf[50]; 14059 IRTemp d0 = newTemp(Ity_I64); 14060 IRTemp d1 = newTemp(Ity_I64); 14061 UChar modrm = getUChar(delta); 14062 UInt rG = gregOfRexRM(pfx,modrm); 14063 if (epartIsReg(modrm)) { 14064 UInt rE = eregOfRexRM(pfx,modrm); 14065 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 14066 delta += 1; 14067 assign ( d0, getYMMRegLane64(rE, 0) ); 14068 assign ( d1, getYMMRegLane64(rE, 2) ); 14069 } else { 14070 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14071 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14072 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 14073 mkexpr(addr), mkU64(16))) ); 14074 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 14075 delta += alen; 14076 } 14077 putYMMRegLane64( rG, 0, mkexpr(d0) ); 14078 putYMMRegLane64( rG, 1, mkexpr(d0) ); 14079 putYMMRegLane64( rG, 2, mkexpr(d1) ); 14080 putYMMRegLane64( rG, 3, mkexpr(d1) ); 14081 return delta; 14082 } 14083 14084 14085 static Long dis_MOVSxDUP_128 ( VexAbiInfo* vbi, Prefix pfx, 14086 Long delta, Bool isAvx, Bool isL ) 14087 { 14088 IRTemp addr = IRTemp_INVALID; 14089 Int alen = 0; 14090 HChar dis_buf[50]; 14091 IRTemp sV = newTemp(Ity_V128); 14092 UChar modrm = getUChar(delta); 14093 UInt rG = gregOfRexRM(pfx,modrm); 14094 IRTemp s3, s2, s1, s0; 14095 s3 = s2 = s1 = s0 = IRTemp_INVALID; 14096 if (epartIsReg(modrm)) { 14097 UInt rE = eregOfRexRM(pfx,modrm); 14098 assign( sV, getXMMReg(rE) ); 14099 DIP("%smovs%cdup %s,%s\n", 14100 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 14101 delta += 1; 14102 } else { 14103 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14104 if (!isAvx) 14105 gen_SEGV_if_not_16_aligned( addr ); 14106 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14107 DIP("%smovs%cdup %s,%s\n", 14108 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 14109 delta += alen; 14110 } 14111 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14112 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14113 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 14114 : mkV128from32s( s3, s3, s1, s1 ) ); 14115 return delta; 14116 } 14117 14118 14119 static Long dis_MOVSxDUP_256 ( VexAbiInfo* vbi, Prefix pfx, 14120 Long delta, Bool isL ) 14121 { 14122 IRTemp addr = IRTemp_INVALID; 14123 Int alen = 0; 14124 HChar dis_buf[50]; 14125 IRTemp sV = newTemp(Ity_V256); 14126 UChar modrm = getUChar(delta); 14127 UInt rG = gregOfRexRM(pfx,modrm); 14128 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 14129 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 14130 if (epartIsReg(modrm)) { 14131 UInt rE = eregOfRexRM(pfx,modrm); 14132 assign( sV, getYMMReg(rE) ); 14133 DIP("vmovs%cdup %s,%s\n", 14134 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 14135 delta += 1; 14136 } else { 14137 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14138 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 14139 DIP("vmovs%cdup %s,%s\n", 14140 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 14141 delta += alen; 14142 } 14143 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 14144 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 14145 : mkV128from32s( s7, s7, s5, s5 ) ); 14146 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 14147 : mkV128from32s( s3, s3, s1, s1 ) ); 14148 return delta; 14149 } 14150 14151 14152 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14153 { 14154 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 14155 IRTemp leftV = newTemp(Ity_V128); 14156 IRTemp rightV = newTemp(Ity_V128); 14157 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 14158 14159 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14160 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 14161 14162 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 14163 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 14164 14165 IRTemp res = newTemp(Ity_V128); 14166 assign( res, binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 14167 mkexpr(leftV), mkexpr(rightV) ) ); 14168 return res; 14169 } 14170 14171 14172 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14173 { 14174 IRTemp s1, s0, d1, d0; 14175 IRTemp leftV = newTemp(Ity_V128); 14176 IRTemp rightV = newTemp(Ity_V128); 14177 s1 = s0 = d1 = d0 = IRTemp_INVALID; 14178 14179 breakupV128to64s( sV, &s1, &s0 ); 14180 breakupV128to64s( dV, &d1, &d0 ); 14181 14182 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 14183 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 14184 14185 IRTemp res = newTemp(Ity_V128); 14186 assign( res, binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 14187 mkexpr(leftV), mkexpr(rightV) ) ); 14188 return res; 14189 } 14190 14191 14192 __attribute__((noinline)) 14193 static 14194 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 14195 VexAbiInfo* vbi, 14196 Prefix pfx, Int sz, Long deltaIN ) 14197 { 14198 IRTemp addr = IRTemp_INVALID; 14199 UChar modrm = 0; 14200 Int alen = 0; 14201 HChar dis_buf[50]; 14202 14203 *decode_OK = False; 14204 14205 Long delta = deltaIN; 14206 UChar opc = getUChar(delta); 14207 delta++; 14208 switch (opc) { 14209 14210 case 0x12: 14211 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 14212 duplicating some lanes (2:2:0:0). */ 14213 if (haveF3no66noF2(pfx) && sz == 4) { 14214 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14215 True/*isL*/ ); 14216 goto decode_success; 14217 } 14218 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 14219 duplicating some lanes (0:1:0:1). */ 14220 if (haveF2no66noF3(pfx) 14221 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14222 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ ); 14223 goto decode_success; 14224 } 14225 break; 14226 14227 case 0x16: 14228 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 14229 duplicating some lanes (3:3:1:1). */ 14230 if (haveF3no66noF2(pfx) && sz == 4) { 14231 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14232 False/*!isL*/ ); 14233 goto decode_success; 14234 } 14235 break; 14236 14237 case 0x7C: 14238 case 0x7D: 14239 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 14240 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 14241 if (haveF2no66noF3(pfx) && sz == 4) { 14242 IRTemp eV = newTemp(Ity_V128); 14243 IRTemp gV = newTemp(Ity_V128); 14244 Bool isAdd = opc == 0x7C; 14245 HChar* str = isAdd ? "add" : "sub"; 14246 modrm = getUChar(delta); 14247 UInt rG = gregOfRexRM(pfx,modrm); 14248 if (epartIsReg(modrm)) { 14249 UInt rE = eregOfRexRM(pfx,modrm); 14250 assign( eV, getXMMReg(rE) ); 14251 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14252 delta += 1; 14253 } else { 14254 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14255 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14256 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14257 delta += alen; 14258 } 14259 14260 assign( gV, getXMMReg(rG) ); 14261 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) ); 14262 goto decode_success; 14263 } 14264 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 14265 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 14266 if (have66noF2noF3(pfx) && sz == 2) { 14267 IRTemp eV = newTemp(Ity_V128); 14268 IRTemp gV = newTemp(Ity_V128); 14269 Bool isAdd = opc == 0x7C; 14270 HChar* str = isAdd ? "add" : "sub"; 14271 modrm = getUChar(delta); 14272 UInt rG = gregOfRexRM(pfx,modrm); 14273 if (epartIsReg(modrm)) { 14274 UInt rE = eregOfRexRM(pfx,modrm); 14275 assign( eV, getXMMReg(rE) ); 14276 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14277 delta += 1; 14278 } else { 14279 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14280 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14281 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14282 delta += alen; 14283 } 14284 14285 assign( gV, getXMMReg(rG) ); 14286 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) ); 14287 goto decode_success; 14288 } 14289 break; 14290 14291 case 0xD0: 14292 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 14293 if (have66noF2noF3(pfx) && sz == 2) { 14294 IRTemp eV = newTemp(Ity_V128); 14295 IRTemp gV = newTemp(Ity_V128); 14296 modrm = getUChar(delta); 14297 UInt rG = gregOfRexRM(pfx,modrm); 14298 if (epartIsReg(modrm)) { 14299 UInt rE = eregOfRexRM(pfx,modrm); 14300 assign( eV, getXMMReg(rE) ); 14301 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14302 delta += 1; 14303 } else { 14304 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14305 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14306 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG)); 14307 delta += alen; 14308 } 14309 14310 assign( gV, getXMMReg(rG) ); 14311 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) ); 14312 goto decode_success; 14313 } 14314 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 14315 if (haveF2no66noF3(pfx) && sz == 4) { 14316 IRTemp eV = newTemp(Ity_V128); 14317 IRTemp gV = newTemp(Ity_V128); 14318 modrm = getUChar(delta); 14319 UInt rG = gregOfRexRM(pfx,modrm); 14320 14321 modrm = getUChar(delta); 14322 if (epartIsReg(modrm)) { 14323 UInt rE = eregOfRexRM(pfx,modrm); 14324 assign( eV, getXMMReg(rE) ); 14325 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14326 delta += 1; 14327 } else { 14328 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14329 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14330 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG)); 14331 delta += alen; 14332 } 14333 14334 assign( gV, getXMMReg(rG) ); 14335 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) ); 14336 goto decode_success; 14337 } 14338 break; 14339 14340 case 0xF0: 14341 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 14342 if (haveF2no66noF3(pfx) && sz == 4) { 14343 modrm = getUChar(delta); 14344 if (epartIsReg(modrm)) { 14345 goto decode_failure; 14346 } else { 14347 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14348 putXMMReg( gregOfRexRM(pfx,modrm), 14349 loadLE(Ity_V128, mkexpr(addr)) ); 14350 DIP("lddqu %s,%s\n", dis_buf, 14351 nameXMMReg(gregOfRexRM(pfx,modrm))); 14352 delta += alen; 14353 } 14354 goto decode_success; 14355 } 14356 break; 14357 14358 default: 14359 goto decode_failure; 14360 14361 } 14362 14363 decode_failure: 14364 *decode_OK = False; 14365 return deltaIN; 14366 14367 decode_success: 14368 *decode_OK = True; 14369 return delta; 14370 } 14371 14372 14373 /*------------------------------------------------------------*/ 14374 /*--- ---*/ 14375 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/ 14376 /*--- ---*/ 14377 /*------------------------------------------------------------*/ 14378 14379 static 14380 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 14381 { 14382 IRTemp sHi = newTemp(Ity_I64); 14383 IRTemp sLo = newTemp(Ity_I64); 14384 IRTemp dHi = newTemp(Ity_I64); 14385 IRTemp dLo = newTemp(Ity_I64); 14386 IRTemp rHi = newTemp(Ity_I64); 14387 IRTemp rLo = newTemp(Ity_I64); 14388 IRTemp sevens = newTemp(Ity_I64); 14389 IRTemp mask0x80hi = newTemp(Ity_I64); 14390 IRTemp mask0x80lo = newTemp(Ity_I64); 14391 IRTemp maskBit3hi = newTemp(Ity_I64); 14392 IRTemp maskBit3lo = newTemp(Ity_I64); 14393 IRTemp sAnd7hi = newTemp(Ity_I64); 14394 IRTemp sAnd7lo = newTemp(Ity_I64); 14395 IRTemp permdHi = newTemp(Ity_I64); 14396 IRTemp permdLo = newTemp(Ity_I64); 14397 IRTemp res = newTemp(Ity_V128); 14398 14399 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 14400 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 14401 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 14402 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 14403 14404 assign( sevens, mkU64(0x0707070707070707ULL) ); 14405 14406 /* mask0x80hi = Not(SarN8x8(sHi,7)) 14407 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 14408 sAnd7hi = And(sHi,sevens) 14409 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 14410 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 14411 rHi = And(permdHi,mask0x80hi) 14412 */ 14413 assign( 14414 mask0x80hi, 14415 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 14416 14417 assign( 14418 maskBit3hi, 14419 binop(Iop_SarN8x8, 14420 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 14421 mkU8(7))); 14422 14423 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 14424 14425 assign( 14426 permdHi, 14427 binop( 14428 Iop_Or64, 14429 binop(Iop_And64, 14430 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 14431 mkexpr(maskBit3hi)), 14432 binop(Iop_And64, 14433 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 14434 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 14435 14436 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 14437 14438 /* And the same for the lower half of the result. What fun. */ 14439 14440 assign( 14441 mask0x80lo, 14442 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 14443 14444 assign( 14445 maskBit3lo, 14446 binop(Iop_SarN8x8, 14447 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 14448 mkU8(7))); 14449 14450 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 14451 14452 assign( 14453 permdLo, 14454 binop( 14455 Iop_Or64, 14456 binop(Iop_And64, 14457 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 14458 mkexpr(maskBit3lo)), 14459 binop(Iop_And64, 14460 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 14461 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 14462 14463 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 14464 14465 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 14466 return res; 14467 } 14468 14469 14470 static Long dis_PHADD_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 14471 Bool isAvx, UChar opc ) 14472 { 14473 IRTemp addr = IRTemp_INVALID; 14474 Int alen = 0; 14475 HChar dis_buf[50]; 14476 HChar* str = "???"; 14477 IROp opV64 = Iop_INVALID; 14478 IROp opCatO = Iop_CatOddLanes16x4; 14479 IROp opCatE = Iop_CatEvenLanes16x4; 14480 IRTemp sV = newTemp(Ity_V128); 14481 IRTemp dV = newTemp(Ity_V128); 14482 IRTemp sHi = newTemp(Ity_I64); 14483 IRTemp sLo = newTemp(Ity_I64); 14484 IRTemp dHi = newTemp(Ity_I64); 14485 IRTemp dLo = newTemp(Ity_I64); 14486 UChar modrm = getUChar(delta); 14487 UInt rG = gregOfRexRM(pfx,modrm); 14488 UInt rV = isAvx ? getVexNvvvv(pfx) : rG; 14489 14490 switch (opc) { 14491 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 14492 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 14493 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 14494 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 14495 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 14496 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 14497 default: vassert(0); 14498 } 14499 if (opc == 0x02 || opc == 0x06) { 14500 opCatO = Iop_InterleaveHI32x2; 14501 opCatE = Iop_InterleaveLO32x2; 14502 } 14503 14504 assign( dV, getXMMReg(rV) ); 14505 14506 if (epartIsReg(modrm)) { 14507 UInt rE = eregOfRexRM(pfx,modrm); 14508 assign( sV, getXMMReg(rE) ); 14509 DIP("ph%s %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14510 delta += 1; 14511 } else { 14512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14513 if (!isAvx) 14514 gen_SEGV_if_not_16_aligned( addr ); 14515 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14516 DIP("ph%s %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14517 delta += alen; 14518 } 14519 14520 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 14521 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 14522 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 14523 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 14524 14525 /* This isn't a particularly efficient way to compute the 14526 result, but at least it avoids a proliferation of IROps, 14527 hence avoids complication all the backends. */ 14528 14529 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14530 ( rG, 14531 binop(Iop_64HLtoV128, 14532 binop(opV64, 14533 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 14534 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ), 14535 binop(opV64, 14536 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 14537 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) ); 14538 return delta; 14539 } 14540 14541 14542 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV ) 14543 { 14544 IRTemp sVoddsSX = newTemp(Ity_V128); 14545 IRTemp sVevensSX = newTemp(Ity_V128); 14546 IRTemp dVoddsZX = newTemp(Ity_V128); 14547 IRTemp dVevensZX = newTemp(Ity_V128); 14548 /* compute dV unsigned x sV signed */ 14549 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 14550 assign( sVevensSX, binop(Iop_SarN16x8, 14551 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 14552 mkU8(8)) ); 14553 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 14554 assign( dVevensZX, binop(Iop_ShrN16x8, 14555 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 14556 mkU8(8)) ); 14557 14558 IRTemp res = newTemp(Ity_V128); 14559 assign( res, binop(Iop_QAdd16Sx8, 14560 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 14561 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 14562 ) 14563 ); 14564 return res; 14565 } 14566 14567 14568 __attribute__((noinline)) 14569 static 14570 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK, 14571 VexAbiInfo* vbi, 14572 Prefix pfx, Int sz, Long deltaIN ) 14573 { 14574 IRTemp addr = IRTemp_INVALID; 14575 UChar modrm = 0; 14576 Int alen = 0; 14577 HChar dis_buf[50]; 14578 14579 *decode_OK = False; 14580 14581 Long delta = deltaIN; 14582 UChar opc = getUChar(delta); 14583 delta++; 14584 switch (opc) { 14585 14586 case 0x00: 14587 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 14588 if (have66noF2noF3(pfx) 14589 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14590 IRTemp sV = newTemp(Ity_V128); 14591 IRTemp dV = newTemp(Ity_V128); 14592 14593 modrm = getUChar(delta); 14594 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14595 14596 if (epartIsReg(modrm)) { 14597 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14598 delta += 1; 14599 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14600 nameXMMReg(gregOfRexRM(pfx,modrm))); 14601 } else { 14602 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14603 gen_SEGV_if_not_16_aligned( addr ); 14604 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14605 delta += alen; 14606 DIP("pshufb %s,%s\n", dis_buf, 14607 nameXMMReg(gregOfRexRM(pfx,modrm))); 14608 } 14609 14610 IRTemp res = math_PSHUFB_XMM( dV, sV ); 14611 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res)); 14612 goto decode_success; 14613 } 14614 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 14615 if (haveNo66noF2noF3(pfx) && sz == 4) { 14616 IRTemp sV = newTemp(Ity_I64); 14617 IRTemp dV = newTemp(Ity_I64); 14618 14619 modrm = getUChar(delta); 14620 do_MMX_preamble(); 14621 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14622 14623 if (epartIsReg(modrm)) { 14624 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14625 delta += 1; 14626 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14627 nameMMXReg(gregLO3ofRM(modrm))); 14628 } else { 14629 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14630 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14631 delta += alen; 14632 DIP("pshufb %s,%s\n", dis_buf, 14633 nameMMXReg(gregLO3ofRM(modrm))); 14634 } 14635 14636 putMMXReg( 14637 gregLO3ofRM(modrm), 14638 binop( 14639 Iop_And64, 14640 /* permute the lanes */ 14641 binop( 14642 Iop_Perm8x8, 14643 mkexpr(dV), 14644 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 14645 ), 14646 /* mask off lanes which have (index & 0x80) == 0x80 */ 14647 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 14648 ) 14649 ); 14650 goto decode_success; 14651 } 14652 break; 14653 14654 case 0x01: 14655 case 0x02: 14656 case 0x03: 14657 case 0x05: 14658 case 0x06: 14659 case 0x07: 14660 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 14661 G to G (xmm). */ 14662 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 14663 G to G (xmm). */ 14664 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 14665 xmm) and G to G (xmm). */ 14666 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 14667 G to G (xmm). */ 14668 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 14669 G to G (xmm). */ 14670 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 14671 xmm) and G to G (xmm). */ 14672 if (have66noF2noF3(pfx) 14673 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14674 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc ); 14675 goto decode_success; 14676 } 14677 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 14678 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 14679 to G (mmx). */ 14680 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 14681 to G (mmx). */ 14682 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 14683 mmx) and G to G (mmx). */ 14684 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 14685 to G (mmx). */ 14686 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 14687 to G (mmx). */ 14688 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 14689 mmx) and G to G (mmx). */ 14690 if (haveNo66noF2noF3(pfx) && sz == 4) { 14691 HChar* str = "???"; 14692 IROp opV64 = Iop_INVALID; 14693 IROp opCatO = Iop_CatOddLanes16x4; 14694 IROp opCatE = Iop_CatEvenLanes16x4; 14695 IRTemp sV = newTemp(Ity_I64); 14696 IRTemp dV = newTemp(Ity_I64); 14697 14698 modrm = getUChar(delta); 14699 14700 switch (opc) { 14701 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 14702 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 14703 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 14704 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 14705 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 14706 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 14707 default: vassert(0); 14708 } 14709 if (opc == 0x02 || opc == 0x06) { 14710 opCatO = Iop_InterleaveHI32x2; 14711 opCatE = Iop_InterleaveLO32x2; 14712 } 14713 14714 do_MMX_preamble(); 14715 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14716 14717 if (epartIsReg(modrm)) { 14718 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14719 delta += 1; 14720 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 14721 nameMMXReg(gregLO3ofRM(modrm))); 14722 } else { 14723 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14724 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14725 delta += alen; 14726 DIP("ph%s %s,%s\n", str, dis_buf, 14727 nameMMXReg(gregLO3ofRM(modrm))); 14728 } 14729 14730 putMMXReg( 14731 gregLO3ofRM(modrm), 14732 binop(opV64, 14733 binop(opCatE,mkexpr(sV),mkexpr(dV)), 14734 binop(opCatO,mkexpr(sV),mkexpr(dV)) 14735 ) 14736 ); 14737 goto decode_success; 14738 } 14739 break; 14740 14741 case 0x04: 14742 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 14743 Unsigned Bytes (XMM) */ 14744 if (have66noF2noF3(pfx) 14745 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14746 IRTemp sV = newTemp(Ity_V128); 14747 IRTemp dV = newTemp(Ity_V128); 14748 modrm = getUChar(delta); 14749 UInt rG = gregOfRexRM(pfx,modrm); 14750 14751 assign( dV, getXMMReg(rG) ); 14752 14753 if (epartIsReg(modrm)) { 14754 UInt rE = eregOfRexRM(pfx,modrm); 14755 assign( sV, getXMMReg(rE) ); 14756 delta += 1; 14757 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14758 } else { 14759 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14760 gen_SEGV_if_not_16_aligned( addr ); 14761 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14762 delta += alen; 14763 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG)); 14764 } 14765 14766 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) ); 14767 goto decode_success; 14768 } 14769 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 14770 Unsigned Bytes (MMX) */ 14771 if (haveNo66noF2noF3(pfx) && sz == 4) { 14772 IRTemp sV = newTemp(Ity_I64); 14773 IRTemp dV = newTemp(Ity_I64); 14774 IRTemp sVoddsSX = newTemp(Ity_I64); 14775 IRTemp sVevensSX = newTemp(Ity_I64); 14776 IRTemp dVoddsZX = newTemp(Ity_I64); 14777 IRTemp dVevensZX = newTemp(Ity_I64); 14778 14779 modrm = getUChar(delta); 14780 do_MMX_preamble(); 14781 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14782 14783 if (epartIsReg(modrm)) { 14784 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14785 delta += 1; 14786 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14787 nameMMXReg(gregLO3ofRM(modrm))); 14788 } else { 14789 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14790 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14791 delta += alen; 14792 DIP("pmaddubsw %s,%s\n", dis_buf, 14793 nameMMXReg(gregLO3ofRM(modrm))); 14794 } 14795 14796 /* compute dV unsigned x sV signed */ 14797 assign( sVoddsSX, 14798 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 14799 assign( sVevensSX, 14800 binop(Iop_SarN16x4, 14801 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 14802 mkU8(8)) ); 14803 assign( dVoddsZX, 14804 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 14805 assign( dVevensZX, 14806 binop(Iop_ShrN16x4, 14807 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 14808 mkU8(8)) ); 14809 14810 putMMXReg( 14811 gregLO3ofRM(modrm), 14812 binop(Iop_QAdd16Sx4, 14813 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 14814 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 14815 ) 14816 ); 14817 goto decode_success; 14818 } 14819 break; 14820 14821 case 0x08: 14822 case 0x09: 14823 case 0x0A: 14824 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 14825 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 14826 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */ 14827 if (have66noF2noF3(pfx) 14828 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14829 IRTemp sV = newTemp(Ity_V128); 14830 IRTemp dV = newTemp(Ity_V128); 14831 IRTemp sHi = newTemp(Ity_I64); 14832 IRTemp sLo = newTemp(Ity_I64); 14833 IRTemp dHi = newTemp(Ity_I64); 14834 IRTemp dLo = newTemp(Ity_I64); 14835 HChar* str = "???"; 14836 Int laneszB = 0; 14837 14838 switch (opc) { 14839 case 0x08: laneszB = 1; str = "b"; break; 14840 case 0x09: laneszB = 2; str = "w"; break; 14841 case 0x0A: laneszB = 4; str = "d"; break; 14842 default: vassert(0); 14843 } 14844 14845 modrm = getUChar(delta); 14846 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14847 14848 if (epartIsReg(modrm)) { 14849 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14850 delta += 1; 14851 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 14852 nameXMMReg(gregOfRexRM(pfx,modrm))); 14853 } else { 14854 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14855 gen_SEGV_if_not_16_aligned( addr ); 14856 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14857 delta += alen; 14858 DIP("psign%s %s,%s\n", str, dis_buf, 14859 nameXMMReg(gregOfRexRM(pfx,modrm))); 14860 } 14861 14862 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 14863 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 14864 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 14865 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 14866 14867 putXMMReg( 14868 gregOfRexRM(pfx,modrm), 14869 binop(Iop_64HLtoV128, 14870 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 14871 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 14872 ) 14873 ); 14874 goto decode_success; 14875 } 14876 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 14877 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 14878 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */ 14879 if (haveNo66noF2noF3(pfx) && sz == 4) { 14880 IRTemp sV = newTemp(Ity_I64); 14881 IRTemp dV = newTemp(Ity_I64); 14882 HChar* str = "???"; 14883 Int laneszB = 0; 14884 14885 switch (opc) { 14886 case 0x08: laneszB = 1; str = "b"; break; 14887 case 0x09: laneszB = 2; str = "w"; break; 14888 case 0x0A: laneszB = 4; str = "d"; break; 14889 default: vassert(0); 14890 } 14891 14892 modrm = getUChar(delta); 14893 do_MMX_preamble(); 14894 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14895 14896 if (epartIsReg(modrm)) { 14897 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14898 delta += 1; 14899 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 14900 nameMMXReg(gregLO3ofRM(modrm))); 14901 } else { 14902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14903 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14904 delta += alen; 14905 DIP("psign%s %s,%s\n", str, dis_buf, 14906 nameMMXReg(gregLO3ofRM(modrm))); 14907 } 14908 14909 putMMXReg( 14910 gregLO3ofRM(modrm), 14911 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 14912 ); 14913 goto decode_success; 14914 } 14915 break; 14916 14917 case 0x0B: 14918 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 14919 Scale (XMM) */ 14920 if (have66noF2noF3(pfx) 14921 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14922 IRTemp sV = newTemp(Ity_V128); 14923 IRTemp dV = newTemp(Ity_V128); 14924 IRTemp sHi = newTemp(Ity_I64); 14925 IRTemp sLo = newTemp(Ity_I64); 14926 IRTemp dHi = newTemp(Ity_I64); 14927 IRTemp dLo = newTemp(Ity_I64); 14928 14929 modrm = getUChar(delta); 14930 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14931 14932 if (epartIsReg(modrm)) { 14933 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14934 delta += 1; 14935 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14936 nameXMMReg(gregOfRexRM(pfx,modrm))); 14937 } else { 14938 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14939 gen_SEGV_if_not_16_aligned( addr ); 14940 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14941 delta += alen; 14942 DIP("pmulhrsw %s,%s\n", dis_buf, 14943 nameXMMReg(gregOfRexRM(pfx,modrm))); 14944 } 14945 14946 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 14947 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 14948 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 14949 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 14950 14951 putXMMReg( 14952 gregOfRexRM(pfx,modrm), 14953 binop(Iop_64HLtoV128, 14954 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 14955 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 14956 ) 14957 ); 14958 goto decode_success; 14959 } 14960 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 14961 (MMX) */ 14962 if (haveNo66noF2noF3(pfx) && sz == 4) { 14963 IRTemp sV = newTemp(Ity_I64); 14964 IRTemp dV = newTemp(Ity_I64); 14965 14966 modrm = getUChar(delta); 14967 do_MMX_preamble(); 14968 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14969 14970 if (epartIsReg(modrm)) { 14971 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14972 delta += 1; 14973 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14974 nameMMXReg(gregLO3ofRM(modrm))); 14975 } else { 14976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14977 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14978 delta += alen; 14979 DIP("pmulhrsw %s,%s\n", dis_buf, 14980 nameMMXReg(gregLO3ofRM(modrm))); 14981 } 14982 14983 putMMXReg( 14984 gregLO3ofRM(modrm), 14985 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 14986 ); 14987 goto decode_success; 14988 } 14989 break; 14990 14991 case 0x1C: 14992 case 0x1D: 14993 case 0x1E: 14994 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 14995 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 14996 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 14997 if (have66noF2noF3(pfx) 14998 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 14999 IRTemp sV = newTemp(Ity_V128); 15000 HChar* str = "???"; 15001 Int laneszB = 0; 15002 15003 switch (opc) { 15004 case 0x1C: laneszB = 1; str = "b"; break; 15005 case 0x1D: laneszB = 2; str = "w"; break; 15006 case 0x1E: laneszB = 4; str = "d"; break; 15007 default: vassert(0); 15008 } 15009 15010 modrm = getUChar(delta); 15011 if (epartIsReg(modrm)) { 15012 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15013 delta += 1; 15014 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 15015 nameXMMReg(gregOfRexRM(pfx,modrm))); 15016 } else { 15017 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15018 gen_SEGV_if_not_16_aligned( addr ); 15019 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15020 delta += alen; 15021 DIP("pabs%s %s,%s\n", str, dis_buf, 15022 nameXMMReg(gregOfRexRM(pfx,modrm))); 15023 } 15024 15025 putXMMReg( gregOfRexRM(pfx,modrm), 15026 mkexpr(math_PABS_XMM(sV, laneszB)) ); 15027 goto decode_success; 15028 } 15029 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 15030 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 15031 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 15032 if (haveNo66noF2noF3(pfx) && sz == 4) { 15033 IRTemp sV = newTemp(Ity_I64); 15034 HChar* str = "???"; 15035 Int laneszB = 0; 15036 15037 switch (opc) { 15038 case 0x1C: laneszB = 1; str = "b"; break; 15039 case 0x1D: laneszB = 2; str = "w"; break; 15040 case 0x1E: laneszB = 4; str = "d"; break; 15041 default: vassert(0); 15042 } 15043 15044 modrm = getUChar(delta); 15045 do_MMX_preamble(); 15046 15047 if (epartIsReg(modrm)) { 15048 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15049 delta += 1; 15050 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15051 nameMMXReg(gregLO3ofRM(modrm))); 15052 } else { 15053 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15054 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15055 delta += alen; 15056 DIP("pabs%s %s,%s\n", str, dis_buf, 15057 nameMMXReg(gregLO3ofRM(modrm))); 15058 } 15059 15060 putMMXReg( gregLO3ofRM(modrm), 15061 mkexpr(math_PABS_MMX( sV, laneszB )) ); 15062 goto decode_success; 15063 } 15064 break; 15065 15066 default: 15067 break; 15068 15069 } 15070 15071 //decode_failure: 15072 *decode_OK = False; 15073 return deltaIN; 15074 15075 decode_success: 15076 *decode_OK = True; 15077 return delta; 15078 } 15079 15080 15081 /*------------------------------------------------------------*/ 15082 /*--- ---*/ 15083 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/ 15084 /*--- ---*/ 15085 /*------------------------------------------------------------*/ 15086 15087 __attribute__((noinline)) 15088 static 15089 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK, 15090 VexAbiInfo* vbi, 15091 Prefix pfx, Int sz, Long deltaIN ) 15092 { 15093 Long d64 = 0; 15094 IRTemp addr = IRTemp_INVALID; 15095 UChar modrm = 0; 15096 Int alen = 0; 15097 HChar dis_buf[50]; 15098 15099 *decode_OK = False; 15100 15101 Long delta = deltaIN; 15102 UChar opc = getUChar(delta); 15103 delta++; 15104 switch (opc) { 15105 15106 case 0x0F: 15107 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 15108 if (have66noF2noF3(pfx) 15109 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15110 IRTemp sV = newTemp(Ity_V128); 15111 IRTemp dV = newTemp(Ity_V128); 15112 15113 modrm = getUChar(delta); 15114 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15115 15116 if (epartIsReg(modrm)) { 15117 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15118 d64 = (Long)getUChar(delta+1); 15119 delta += 1+1; 15120 DIP("palignr $%d,%s,%s\n", (Int)d64, 15121 nameXMMReg(eregOfRexRM(pfx,modrm)), 15122 nameXMMReg(gregOfRexRM(pfx,modrm))); 15123 } else { 15124 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15125 gen_SEGV_if_not_16_aligned( addr ); 15126 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15127 d64 = (Long)getUChar(delta+alen); 15128 delta += alen+1; 15129 DIP("palignr $%d,%s,%s\n", (Int)d64, 15130 dis_buf, 15131 nameXMMReg(gregOfRexRM(pfx,modrm))); 15132 } 15133 15134 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 ); 15135 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 15136 goto decode_success; 15137 } 15138 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 15139 if (haveNo66noF2noF3(pfx) && sz == 4) { 15140 IRTemp sV = newTemp(Ity_I64); 15141 IRTemp dV = newTemp(Ity_I64); 15142 IRTemp res = newTemp(Ity_I64); 15143 15144 modrm = getUChar(delta); 15145 do_MMX_preamble(); 15146 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15147 15148 if (epartIsReg(modrm)) { 15149 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15150 d64 = (Long)getUChar(delta+1); 15151 delta += 1+1; 15152 DIP("palignr $%d,%s,%s\n", (Int)d64, 15153 nameMMXReg(eregLO3ofRM(modrm)), 15154 nameMMXReg(gregLO3ofRM(modrm))); 15155 } else { 15156 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15157 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15158 d64 = (Long)getUChar(delta+alen); 15159 delta += alen+1; 15160 DIP("palignr $%d%s,%s\n", (Int)d64, 15161 dis_buf, 15162 nameMMXReg(gregLO3ofRM(modrm))); 15163 } 15164 15165 if (d64 == 0) { 15166 assign( res, mkexpr(sV) ); 15167 } 15168 else if (d64 >= 1 && d64 <= 7) { 15169 assign(res, 15170 binop(Iop_Or64, 15171 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 15172 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 15173 ))); 15174 } 15175 else if (d64 == 8) { 15176 assign( res, mkexpr(dV) ); 15177 } 15178 else if (d64 >= 9 && d64 <= 15) { 15179 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 15180 } 15181 else if (d64 >= 16 && d64 <= 255) { 15182 assign( res, mkU64(0) ); 15183 } 15184 else 15185 vassert(0); 15186 15187 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 15188 goto decode_success; 15189 } 15190 break; 15191 15192 default: 15193 break; 15194 15195 } 15196 15197 //decode_failure: 15198 *decode_OK = False; 15199 return deltaIN; 15200 15201 decode_success: 15202 *decode_OK = True; 15203 return delta; 15204 } 15205 15206 15207 /*------------------------------------------------------------*/ 15208 /*--- ---*/ 15209 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/ 15210 /*--- ---*/ 15211 /*------------------------------------------------------------*/ 15212 15213 __attribute__((noinline)) 15214 static 15215 Long dis_ESC_0F__SSE4 ( Bool* decode_OK, 15216 VexArchInfo* archinfo, 15217 VexAbiInfo* vbi, 15218 Prefix pfx, Int sz, Long deltaIN ) 15219 { 15220 IRTemp addr = IRTemp_INVALID; 15221 IRType ty = Ity_INVALID; 15222 UChar modrm = 0; 15223 Int alen = 0; 15224 HChar dis_buf[50]; 15225 15226 *decode_OK = False; 15227 15228 Long delta = deltaIN; 15229 UChar opc = getUChar(delta); 15230 delta++; 15231 switch (opc) { 15232 15233 case 0xB8: 15234 /* F3 0F B8 = POPCNT{W,L,Q} 15235 Count the number of 1 bits in a register 15236 */ 15237 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */ 15238 && (sz == 2 || sz == 4 || sz == 8)) { 15239 /*IRType*/ ty = szToITy(sz); 15240 IRTemp src = newTemp(ty); 15241 modrm = getUChar(delta); 15242 if (epartIsReg(modrm)) { 15243 assign(src, getIRegE(sz, pfx, modrm)); 15244 delta += 1; 15245 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15246 nameIRegG(sz, pfx, modrm)); 15247 } else { 15248 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 15249 assign(src, loadLE(ty, mkexpr(addr))); 15250 delta += alen; 15251 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 15252 nameIRegG(sz, pfx, modrm)); 15253 } 15254 15255 IRTemp result = gen_POPCOUNT(ty, src); 15256 putIRegG(sz, pfx, modrm, mkexpr(result)); 15257 15258 // Update flags. This is pretty lame .. perhaps can do better 15259 // if this turns out to be performance critical. 15260 // O S A C P are cleared. Z is set if SRC == 0. 15261 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15262 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15263 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15264 stmt( IRStmt_Put( OFFB_CC_DEP1, 15265 binop(Iop_Shl64, 15266 unop(Iop_1Uto64, 15267 binop(Iop_CmpEQ64, 15268 widenUto64(mkexpr(src)), 15269 mkU64(0))), 15270 mkU8(AMD64G_CC_SHIFT_Z)))); 15271 15272 goto decode_success; 15273 } 15274 break; 15275 15276 case 0xBD: 15277 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 15278 which we can only decode if we're sure this is an AMD cpu 15279 that supports LZCNT, since otherwise it's BSR, which behaves 15280 differently. Bizarrely, my Sandy Bridge also accepts these 15281 instructions but produces different results. */ 15282 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15283 && (sz == 2 || sz == 4 || sz == 8) 15284 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 15285 /*IRType*/ ty = szToITy(sz); 15286 IRTemp src = newTemp(ty); 15287 modrm = getUChar(delta); 15288 if (epartIsReg(modrm)) { 15289 assign(src, getIRegE(sz, pfx, modrm)); 15290 delta += 1; 15291 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15292 nameIRegG(sz, pfx, modrm)); 15293 } else { 15294 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 15295 assign(src, loadLE(ty, mkexpr(addr))); 15296 delta += alen; 15297 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 15298 nameIRegG(sz, pfx, modrm)); 15299 } 15300 15301 IRTemp res = gen_LZCNT(ty, src); 15302 putIRegG(sz, pfx, modrm, mkexpr(res)); 15303 15304 // Update flags. This is pretty lame .. perhaps can do better 15305 // if this turns out to be performance critical. 15306 // O S A P are cleared. Z is set if RESULT == 0. 15307 // C is set if SRC is zero. 15308 IRTemp src64 = newTemp(Ity_I64); 15309 IRTemp res64 = newTemp(Ity_I64); 15310 assign(src64, widenUto64(mkexpr(src))); 15311 assign(res64, widenUto64(mkexpr(res))); 15312 15313 IRTemp oszacp = newTemp(Ity_I64); 15314 assign( 15315 oszacp, 15316 binop(Iop_Or64, 15317 binop(Iop_Shl64, 15318 unop(Iop_1Uto64, 15319 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 15320 mkU8(AMD64G_CC_SHIFT_Z)), 15321 binop(Iop_Shl64, 15322 unop(Iop_1Uto64, 15323 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 15324 mkU8(AMD64G_CC_SHIFT_C)) 15325 ) 15326 ); 15327 15328 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15329 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15330 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15331 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 15332 15333 goto decode_success; 15334 } 15335 break; 15336 15337 default: 15338 break; 15339 15340 } 15341 15342 //decode_failure: 15343 *decode_OK = False; 15344 return deltaIN; 15345 15346 decode_success: 15347 *decode_OK = True; 15348 return delta; 15349 } 15350 15351 15352 /*------------------------------------------------------------*/ 15353 /*--- ---*/ 15354 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/ 15355 /*--- ---*/ 15356 /*------------------------------------------------------------*/ 15357 15358 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG, 15359 IRTemp vec0/*controlling mask*/, 15360 UInt gran, IROp opSAR ) 15361 { 15362 /* The tricky bit is to convert vec0 into a suitable mask, by 15363 copying the most significant bit of each lane into all positions 15364 in the lane. */ 15365 IRTemp sh = newTemp(Ity_I8); 15366 assign(sh, mkU8(8 * gran - 1)); 15367 15368 IRTemp mask = newTemp(Ity_V128); 15369 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 15370 15371 IRTemp notmask = newTemp(Ity_V128); 15372 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 15373 15374 IRTemp res = newTemp(Ity_V128); 15375 assign(res, binop(Iop_OrV128, 15376 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 15377 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)))); 15378 return res; 15379 } 15380 15381 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG, 15382 IRTemp vec0/*controlling mask*/, 15383 UInt gran, IROp opSAR128 ) 15384 { 15385 /* The tricky bit is to convert vec0 into a suitable mask, by 15386 copying the most significant bit of each lane into all positions 15387 in the lane. */ 15388 IRTemp sh = newTemp(Ity_I8); 15389 assign(sh, mkU8(8 * gran - 1)); 15390 15391 IRTemp vec0Hi = IRTemp_INVALID; 15392 IRTemp vec0Lo = IRTemp_INVALID; 15393 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo ); 15394 15395 IRTemp mask = newTemp(Ity_V256); 15396 assign(mask, binop(Iop_V128HLtoV256, 15397 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)), 15398 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh)))); 15399 15400 IRTemp notmask = newTemp(Ity_V256); 15401 assign(notmask, unop(Iop_NotV256, mkexpr(mask))); 15402 15403 IRTemp res = newTemp(Ity_V256); 15404 assign(res, binop(Iop_OrV256, 15405 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)), 15406 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask)))); 15407 return res; 15408 } 15409 15410 static Long dis_VBLENDV_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 15411 const HChar *name, UInt gran, IROp opSAR ) 15412 { 15413 IRTemp addr = IRTemp_INVALID; 15414 Int alen = 0; 15415 HChar dis_buf[50]; 15416 UChar modrm = getUChar(delta); 15417 UInt rG = gregOfRexRM(pfx, modrm); 15418 UInt rV = getVexNvvvv(pfx); 15419 UInt rIS4 = 0xFF; /* invalid */ 15420 IRTemp vecE = newTemp(Ity_V128); 15421 IRTemp vecV = newTemp(Ity_V128); 15422 IRTemp vecIS4 = newTemp(Ity_V128); 15423 if (epartIsReg(modrm)) { 15424 delta++; 15425 UInt rE = eregOfRexRM(pfx, modrm); 15426 assign(vecE, getXMMReg(rE)); 15427 UChar ib = getUChar(delta); 15428 rIS4 = (ib >> 4) & 0xF; 15429 DIP("%s %s,%s,%s,%s\n", 15430 name, nameXMMReg(rIS4), nameXMMReg(rE), 15431 nameXMMReg(rV), nameXMMReg(rG)); 15432 } else { 15433 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 15434 delta += alen; 15435 assign(vecE, loadLE(Ity_V128, mkexpr(addr))); 15436 UChar ib = getUChar(delta); 15437 rIS4 = (ib >> 4) & 0xF; 15438 DIP("%s %s,%s,%s,%s\n", 15439 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 15440 } 15441 delta++; 15442 assign(vecV, getXMMReg(rV)); 15443 assign(vecIS4, getXMMReg(rIS4)); 15444 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR ); 15445 putYMMRegLoAndZU( rG, mkexpr(res) ); 15446 return delta; 15447 } 15448 15449 static Long dis_VBLENDV_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 15450 const HChar *name, UInt gran, IROp opSAR128 ) 15451 { 15452 IRTemp addr = IRTemp_INVALID; 15453 Int alen = 0; 15454 HChar dis_buf[50]; 15455 UChar modrm = getUChar(delta); 15456 UInt rG = gregOfRexRM(pfx, modrm); 15457 UInt rV = getVexNvvvv(pfx); 15458 UInt rIS4 = 0xFF; /* invalid */ 15459 IRTemp vecE = newTemp(Ity_V256); 15460 IRTemp vecV = newTemp(Ity_V256); 15461 IRTemp vecIS4 = newTemp(Ity_V256); 15462 if (epartIsReg(modrm)) { 15463 delta++; 15464 UInt rE = eregOfRexRM(pfx, modrm); 15465 assign(vecE, getYMMReg(rE)); 15466 UChar ib = getUChar(delta); 15467 rIS4 = (ib >> 4) & 0xF; 15468 DIP("%s %s,%s,%s,%s\n", 15469 name, nameYMMReg(rIS4), nameYMMReg(rE), 15470 nameYMMReg(rV), nameYMMReg(rG)); 15471 } else { 15472 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 15473 delta += alen; 15474 assign(vecE, loadLE(Ity_V256, mkexpr(addr))); 15475 UChar ib = getUChar(delta); 15476 rIS4 = (ib >> 4) & 0xF; 15477 DIP("%s %s,%s,%s,%s\n", 15478 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 15479 } 15480 delta++; 15481 assign(vecV, getYMMReg(rV)); 15482 assign(vecIS4, getYMMReg(rIS4)); 15483 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 ); 15484 putYMMReg( rG, mkexpr(res) ); 15485 return delta; 15486 } 15487 15488 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) 15489 { 15490 /* Set Z=1 iff (vecE & vecG) == 0 15491 Set C=1 iff (vecE & not vecG) == 0 15492 */ 15493 15494 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 15495 15496 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top 15497 and bottom 64-bits together. It relies on this trick: 15498 15499 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 15500 15501 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 15502 InterleaveHI64x2([a,b],[a,b]) == [a,a] 15503 15504 and so the OR of the above 2 exprs produces 15505 [a OR b, a OR b], from which we simply take the lower half. 15506 */ 15507 IRTemp and64 = newTemp(Ity_I64); 15508 IRTemp andn64 = newTemp(Ity_I64); 15509 15510 assign(and64, 15511 unop(Iop_V128to64, 15512 binop(Iop_OrV128, 15513 binop(Iop_InterleaveLO64x2, 15514 mkexpr(andV), mkexpr(andV)), 15515 binop(Iop_InterleaveHI64x2, 15516 mkexpr(andV), mkexpr(andV))))); 15517 15518 assign(andn64, 15519 unop(Iop_V128to64, 15520 binop(Iop_OrV128, 15521 binop(Iop_InterleaveLO64x2, 15522 mkexpr(andnV), mkexpr(andnV)), 15523 binop(Iop_InterleaveHI64x2, 15524 mkexpr(andnV), mkexpr(andnV))))); 15525 15526 IRTemp z64 = newTemp(Ity_I64); 15527 IRTemp c64 = newTemp(Ity_I64); 15528 if (sign == 64) { 15529 /* When only interested in the most significant bit, just shift 15530 arithmetically right and negate. */ 15531 assign(z64, 15532 unop(Iop_Not64, 15533 binop(Iop_Sar64, mkexpr(and64), mkU8(63)))); 15534 15535 assign(c64, 15536 unop(Iop_Not64, 15537 binop(Iop_Sar64, mkexpr(andn64), mkU8(63)))); 15538 } else { 15539 if (sign == 32) { 15540 /* When interested in bit 31 and bit 63, mask those bits and 15541 fallthrough into the PTEST handling. */ 15542 IRTemp t0 = newTemp(Ity_I64); 15543 IRTemp t1 = newTemp(Ity_I64); 15544 IRTemp t2 = newTemp(Ity_I64); 15545 assign(t0, mkU64(0x8000000080000000ULL)); 15546 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0))); 15547 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0))); 15548 and64 = t1; 15549 andn64 = t2; 15550 } 15551 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 15552 slice out the Z and C bits conveniently. We use the standard 15553 trick all-zeroes -> all-zeroes, anything-else -> all-ones 15554 done by "(x | -x) >>s (word-size - 1)". 15555 */ 15556 assign(z64, 15557 unop(Iop_Not64, 15558 binop(Iop_Sar64, 15559 binop(Iop_Or64, 15560 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 15561 mkexpr(and64)), mkU8(63)))); 15562 15563 assign(c64, 15564 unop(Iop_Not64, 15565 binop(Iop_Sar64, 15566 binop(Iop_Or64, 15567 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 15568 mkexpr(andn64)), mkU8(63)))); 15569 } 15570 15571 /* And finally, slice out the Z and C flags and set the flags 15572 thunk to COPY for them. OSAP are set to zero. */ 15573 IRTemp newOSZACP = newTemp(Ity_I64); 15574 assign(newOSZACP, 15575 binop(Iop_Or64, 15576 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 15577 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)))); 15578 15579 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 15580 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15581 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15582 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15583 } 15584 15585 15586 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD. 15587 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 15588 static Long dis_xTESTy_128 ( VexAbiInfo* vbi, Prefix pfx, 15589 Long delta, Bool isAvx, Int sign ) 15590 { 15591 IRTemp addr = IRTemp_INVALID; 15592 Int alen = 0; 15593 HChar dis_buf[50]; 15594 UChar modrm = getUChar(delta); 15595 UInt rG = gregOfRexRM(pfx, modrm); 15596 IRTemp vecE = newTemp(Ity_V128); 15597 IRTemp vecG = newTemp(Ity_V128); 15598 15599 if ( epartIsReg(modrm) ) { 15600 UInt rE = eregOfRexRM(pfx, modrm); 15601 assign(vecE, getXMMReg(rE)); 15602 delta += 1; 15603 DIP( "%s%stest%s %s,%s\n", 15604 isAvx ? "v" : "", sign == 0 ? "p" : "", 15605 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 15606 nameXMMReg(rE), nameXMMReg(rG) ); 15607 } else { 15608 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15609 if (!isAvx) 15610 gen_SEGV_if_not_16_aligned( addr ); 15611 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 15612 delta += alen; 15613 DIP( "%s%stest%s %s,%s\n", 15614 isAvx ? "v" : "", sign == 0 ? "p" : "", 15615 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 15616 dis_buf, nameXMMReg(rG) ); 15617 } 15618 15619 assign(vecG, getXMMReg(rG)); 15620 15621 /* Set Z=1 iff (vecE & vecG) == 0 15622 Set C=1 iff (vecE & not vecG) == 0 15623 */ 15624 15625 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 15626 IRTemp andV = newTemp(Ity_V128); 15627 IRTemp andnV = newTemp(Ity_V128); 15628 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 15629 assign(andnV, binop(Iop_AndV128, 15630 mkexpr(vecE), 15631 binop(Iop_XorV128, mkexpr(vecG), 15632 mkV128(0xFFFF)))); 15633 15634 finish_xTESTy ( andV, andnV, sign ); 15635 return delta; 15636 } 15637 15638 15639 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD. 15640 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 15641 static Long dis_xTESTy_256 ( VexAbiInfo* vbi, Prefix pfx, 15642 Long delta, Int sign ) 15643 { 15644 IRTemp addr = IRTemp_INVALID; 15645 Int alen = 0; 15646 HChar dis_buf[50]; 15647 UChar modrm = getUChar(delta); 15648 UInt rG = gregOfRexRM(pfx, modrm); 15649 IRTemp vecE = newTemp(Ity_V256); 15650 IRTemp vecG = newTemp(Ity_V256); 15651 15652 if ( epartIsReg(modrm) ) { 15653 UInt rE = eregOfRexRM(pfx, modrm); 15654 assign(vecE, getYMMReg(rE)); 15655 delta += 1; 15656 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 15657 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 15658 nameYMMReg(rE), nameYMMReg(rG) ); 15659 } else { 15660 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15661 assign(vecE, loadLE( Ity_V256, mkexpr(addr) )); 15662 delta += alen; 15663 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 15664 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 15665 dis_buf, nameYMMReg(rG) ); 15666 } 15667 15668 assign(vecG, getYMMReg(rG)); 15669 15670 /* Set Z=1 iff (vecE & vecG) == 0 15671 Set C=1 iff (vecE & not vecG) == 0 15672 */ 15673 15674 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 15675 IRTemp andV = newTemp(Ity_V256); 15676 IRTemp andnV = newTemp(Ity_V256); 15677 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG))); 15678 assign(andnV, binop(Iop_AndV256, 15679 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG)))); 15680 15681 IRTemp andVhi = IRTemp_INVALID; 15682 IRTemp andVlo = IRTemp_INVALID; 15683 IRTemp andnVhi = IRTemp_INVALID; 15684 IRTemp andnVlo = IRTemp_INVALID; 15685 breakupV256toV128s( andV, &andVhi, &andVlo ); 15686 breakupV256toV128s( andnV, &andnVhi, &andnVlo ); 15687 15688 IRTemp andV128 = newTemp(Ity_V128); 15689 IRTemp andnV128 = newTemp(Ity_V128); 15690 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) ); 15691 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) ); 15692 15693 finish_xTESTy ( andV128, andnV128, sign ); 15694 return delta; 15695 } 15696 15697 15698 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */ 15699 static Long dis_PMOVxXBW_128 ( VexAbiInfo* vbi, Prefix pfx, 15700 Long delta, Bool isAvx, Bool xIsZ ) 15701 { 15702 IRTemp addr = IRTemp_INVALID; 15703 Int alen = 0; 15704 HChar dis_buf[50]; 15705 IRTemp srcVec = newTemp(Ity_V128); 15706 UChar modrm = getUChar(delta); 15707 UChar* mbV = isAvx ? "v" : ""; 15708 UChar how = xIsZ ? 'z' : 's'; 15709 UInt rG = gregOfRexRM(pfx, modrm); 15710 if ( epartIsReg(modrm) ) { 15711 UInt rE = eregOfRexRM(pfx, modrm); 15712 assign( srcVec, getXMMReg(rE) ); 15713 delta += 1; 15714 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 15715 } else { 15716 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15717 assign( srcVec, 15718 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15719 delta += alen; 15720 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 15721 } 15722 15723 IRExpr* res 15724 = xIsZ /* do math for either zero or sign extend */ 15725 ? binop( Iop_InterleaveLO8x16, 15726 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 15727 : binop( Iop_SarN16x8, 15728 binop( Iop_ShlN16x8, 15729 binop( Iop_InterleaveLO8x16, 15730 IRExpr_Const( IRConst_V128(0) ), 15731 mkexpr(srcVec) ), 15732 mkU8(8) ), 15733 mkU8(8) ); 15734 15735 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 15736 15737 return delta; 15738 } 15739 15740 15741 static Long dis_PMOVxXWD_128 ( VexAbiInfo* vbi, Prefix pfx, 15742 Long delta, Bool isAvx, Bool xIsZ ) 15743 { 15744 IRTemp addr = IRTemp_INVALID; 15745 Int alen = 0; 15746 HChar dis_buf[50]; 15747 IRTemp srcVec = newTemp(Ity_V128); 15748 UChar modrm = getUChar(delta); 15749 UChar* mbV = isAvx ? "v" : ""; 15750 UChar how = xIsZ ? 'z' : 's'; 15751 UInt rG = gregOfRexRM(pfx, modrm); 15752 15753 if ( epartIsReg(modrm) ) { 15754 UInt rE = eregOfRexRM(pfx, modrm); 15755 assign( srcVec, getXMMReg(rE) ); 15756 delta += 1; 15757 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 15758 } else { 15759 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15760 assign( srcVec, 15761 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15762 delta += alen; 15763 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 15764 } 15765 15766 IRExpr* res 15767 = binop( Iop_InterleaveLO16x8, 15768 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ); 15769 if (!xIsZ) 15770 res = binop(Iop_SarN32x4, 15771 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16)); 15772 15773 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15774 ( gregOfRexRM(pfx, modrm), res ); 15775 15776 return delta; 15777 } 15778 15779 15780 static Long dis_PMOVSXWQ_128 ( VexAbiInfo* vbi, Prefix pfx, 15781 Long delta, Bool isAvx ) 15782 { 15783 IRTemp addr = IRTemp_INVALID; 15784 Int alen = 0; 15785 HChar dis_buf[50]; 15786 IRTemp srcBytes = newTemp(Ity_I32); 15787 UChar modrm = getUChar(delta); 15788 UChar* mbV = isAvx ? "v" : ""; 15789 UInt rG = gregOfRexRM(pfx, modrm); 15790 15791 if ( epartIsReg( modrm ) ) { 15792 UInt rE = eregOfRexRM(pfx, modrm); 15793 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 15794 delta += 1; 15795 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 15796 } else { 15797 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15798 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 15799 delta += alen; 15800 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 15801 } 15802 15803 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15804 ( rG, binop( Iop_64HLtoV128, 15805 unop( Iop_16Sto64, 15806 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 15807 unop( Iop_16Sto64, 15808 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 15809 return delta; 15810 } 15811 15812 15813 static Long dis_PMOVZXWQ_128 ( VexAbiInfo* vbi, Prefix pfx, 15814 Long delta, Bool isAvx ) 15815 { 15816 IRTemp addr = IRTemp_INVALID; 15817 Int alen = 0; 15818 HChar dis_buf[50]; 15819 IRTemp srcVec = newTemp(Ity_V128); 15820 UChar modrm = getUChar(delta); 15821 UChar* mbV = isAvx ? "v" : ""; 15822 UInt rG = gregOfRexRM(pfx, modrm); 15823 15824 if ( epartIsReg( modrm ) ) { 15825 UInt rE = eregOfRexRM(pfx, modrm); 15826 assign( srcVec, getXMMReg(rE) ); 15827 delta += 1; 15828 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 15829 } else { 15830 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15831 assign( srcVec, 15832 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15833 delta += alen; 15834 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 15835 } 15836 15837 IRTemp zeroVec = newTemp( Ity_V128 ); 15838 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15839 15840 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15841 ( rG, binop( Iop_InterleaveLO16x8, 15842 mkexpr(zeroVec), 15843 binop( Iop_InterleaveLO16x8, 15844 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 15845 return delta; 15846 } 15847 15848 15849 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */ 15850 static Long dis_PMOVxXDQ_128 ( VexAbiInfo* vbi, Prefix pfx, 15851 Long delta, Bool isAvx, Bool xIsZ ) 15852 { 15853 IRTemp addr = IRTemp_INVALID; 15854 Int alen = 0; 15855 HChar dis_buf[50]; 15856 IRTemp srcI64 = newTemp(Ity_I64); 15857 IRTemp srcVec = newTemp(Ity_V128); 15858 UChar modrm = getUChar(delta); 15859 UChar* mbV = isAvx ? "v" : ""; 15860 UChar how = xIsZ ? 'z' : 's'; 15861 UInt rG = gregOfRexRM(pfx, modrm); 15862 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 15863 thing in a V128, with arbitrary junk in the top 64 bits. Use 15864 one or both of them and let iropt clean up afterwards (as 15865 usual). */ 15866 if ( epartIsReg(modrm) ) { 15867 UInt rE = eregOfRexRM(pfx, modrm); 15868 assign( srcVec, getXMMReg(rE) ); 15869 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) ); 15870 delta += 1; 15871 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 15872 } else { 15873 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15874 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) ); 15875 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) ); 15876 delta += alen; 15877 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 15878 } 15879 15880 IRExpr* res 15881 = xIsZ /* do math for either zero or sign extend */ 15882 ? binop( Iop_InterleaveLO32x4, 15883 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 15884 : binop( Iop_64HLtoV128, 15885 unop( Iop_32Sto64, 15886 unop( Iop_64HIto32, mkexpr(srcI64) ) ), 15887 unop( Iop_32Sto64, 15888 unop( Iop_64to32, mkexpr(srcI64) ) ) ); 15889 15890 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 15891 15892 return delta; 15893 } 15894 15895 15896 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */ 15897 static Long dis_PMOVxXBD_128 ( VexAbiInfo* vbi, Prefix pfx, 15898 Long delta, Bool isAvx, Bool xIsZ ) 15899 { 15900 IRTemp addr = IRTemp_INVALID; 15901 Int alen = 0; 15902 HChar dis_buf[50]; 15903 IRTemp srcVec = newTemp(Ity_V128); 15904 UChar modrm = getUChar(delta); 15905 UChar* mbV = isAvx ? "v" : ""; 15906 UChar how = xIsZ ? 'z' : 's'; 15907 UInt rG = gregOfRexRM(pfx, modrm); 15908 if ( epartIsReg(modrm) ) { 15909 UInt rE = eregOfRexRM(pfx, modrm); 15910 assign( srcVec, getXMMReg(rE) ); 15911 delta += 1; 15912 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 15913 } else { 15914 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15915 assign( srcVec, 15916 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15917 delta += alen; 15918 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 15919 } 15920 15921 IRTemp zeroVec = newTemp(Ity_V128); 15922 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15923 15924 IRExpr* res 15925 = binop(Iop_InterleaveLO8x16, 15926 mkexpr(zeroVec), 15927 binop(Iop_InterleaveLO8x16, 15928 mkexpr(zeroVec), mkexpr(srcVec))); 15929 if (!xIsZ) 15930 res = binop(Iop_SarN32x4, 15931 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24)); 15932 15933 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 15934 15935 return delta; 15936 } 15937 15938 15939 /* Handles 128 bit versions of PMOVSXBQ. */ 15940 static Long dis_PMOVSXBQ_128 ( VexAbiInfo* vbi, Prefix pfx, 15941 Long delta, Bool isAvx ) 15942 { 15943 IRTemp addr = IRTemp_INVALID; 15944 Int alen = 0; 15945 HChar dis_buf[50]; 15946 IRTemp srcBytes = newTemp(Ity_I16); 15947 UChar modrm = getUChar(delta); 15948 UChar* mbV = isAvx ? "v" : ""; 15949 UInt rG = gregOfRexRM(pfx, modrm); 15950 if ( epartIsReg(modrm) ) { 15951 UInt rE = eregOfRexRM(pfx, modrm); 15952 assign( srcBytes, getXMMRegLane16( rE, 0 ) ); 15953 delta += 1; 15954 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 15955 } else { 15956 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15957 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 15958 delta += alen; 15959 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 15960 } 15961 15962 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15963 ( rG, binop( Iop_64HLtoV128, 15964 unop( Iop_8Sto64, 15965 unop( Iop_16HIto8, mkexpr(srcBytes) ) ), 15966 unop( Iop_8Sto64, 15967 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 15968 return delta; 15969 } 15970 15971 15972 /* Handles 128 bit versions of PMOVZXBQ. */ 15973 static Long dis_PMOVZXBQ_128 ( VexAbiInfo* vbi, Prefix pfx, 15974 Long delta, Bool isAvx ) 15975 { 15976 IRTemp addr = IRTemp_INVALID; 15977 Int alen = 0; 15978 HChar dis_buf[50]; 15979 IRTemp srcVec = newTemp(Ity_V128); 15980 UChar modrm = getUChar(delta); 15981 UChar* mbV = isAvx ? "v" : ""; 15982 UInt rG = gregOfRexRM(pfx, modrm); 15983 if ( epartIsReg(modrm) ) { 15984 UInt rE = eregOfRexRM(pfx, modrm); 15985 assign( srcVec, getXMMReg(rE) ); 15986 delta += 1; 15987 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 15988 } else { 15989 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 15990 assign( srcVec, 15991 unop( Iop_32UtoV128, 15992 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) )))); 15993 delta += alen; 15994 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 15995 } 15996 15997 IRTemp zeroVec = newTemp(Ity_V128); 15998 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15999 16000 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16001 ( rG, binop( Iop_InterleaveLO8x16, 16002 mkexpr(zeroVec), 16003 binop( Iop_InterleaveLO8x16, 16004 mkexpr(zeroVec), 16005 binop( Iop_InterleaveLO8x16, 16006 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 16007 return delta; 16008 } 16009 16010 16011 static Long dis_PHMINPOSUW_128 ( VexAbiInfo* vbi, Prefix pfx, 16012 Long delta, Bool isAvx ) 16013 { 16014 IRTemp addr = IRTemp_INVALID; 16015 Int alen = 0; 16016 HChar dis_buf[50]; 16017 UChar modrm = getUChar(delta); 16018 UChar* mbV = isAvx ? "v" : ""; 16019 IRTemp sV = newTemp(Ity_V128); 16020 IRTemp sHi = newTemp(Ity_I64); 16021 IRTemp sLo = newTemp(Ity_I64); 16022 IRTemp dLo = newTemp(Ity_I64); 16023 UInt rG = gregOfRexRM(pfx,modrm); 16024 if (epartIsReg(modrm)) { 16025 UInt rE = eregOfRexRM(pfx,modrm); 16026 assign( sV, getXMMReg(rE) ); 16027 delta += 1; 16028 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 16029 } else { 16030 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16031 if (!isAvx) 16032 gen_SEGV_if_not_16_aligned(addr); 16033 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16034 delta += alen; 16035 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG)); 16036 } 16037 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 16038 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 16039 assign( dLo, mkIRExprCCall( 16040 Ity_I64, 0/*regparms*/, 16041 "amd64g_calculate_sse_phminposuw", 16042 &amd64g_calculate_sse_phminposuw, 16043 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) 16044 )); 16045 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16046 (rG, unop(Iop_64UtoV128, mkexpr(dLo))); 16047 return delta; 16048 } 16049 16050 16051 static Long dis_AESx ( VexAbiInfo* vbi, Prefix pfx, 16052 Long delta, Bool isAvx, UChar opc ) 16053 { 16054 IRTemp addr = IRTemp_INVALID; 16055 Int alen = 0; 16056 HChar dis_buf[50]; 16057 UChar modrm = getUChar(delta); 16058 UInt rG = gregOfRexRM(pfx, modrm); 16059 UInt regNoL = 0; 16060 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG; 16061 16062 /* This is a nasty kludge. We need to pass 2 x V128 to the 16063 helper. Since we can't do that, use a dirty 16064 helper to compute the results directly from the XMM regs in 16065 the guest state. That means for the memory case, we need to 16066 move the left operand into a pseudo-register (XMM16, let's 16067 call it). */ 16068 if (epartIsReg(modrm)) { 16069 regNoL = eregOfRexRM(pfx, modrm); 16070 delta += 1; 16071 } else { 16072 regNoL = 16; /* use XMM16 as an intermediary */ 16073 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16074 /* alignment check needed ???? */ 16075 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 16076 delta += alen; 16077 } 16078 16079 void* fn = &amd64g_dirtyhelper_AES; 16080 HChar* nm = "amd64g_dirtyhelper_AES"; 16081 16082 /* Round up the arguments. Note that this is a kludge -- the 16083 use of mkU64 rather than mkIRExpr_HWord implies the 16084 assumption that the host's word size is 64-bit. */ 16085 UInt gstOffD = ymmGuestRegOffset(rG); 16086 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 16087 UInt gstOffR = ymmGuestRegOffset(regNoR); 16088 IRExpr* opc4 = mkU64(opc); 16089 IRExpr* gstOffDe = mkU64(gstOffD); 16090 IRExpr* gstOffLe = mkU64(gstOffL); 16091 IRExpr* gstOffRe = mkU64(gstOffR); 16092 IRExpr** args 16093 = mkIRExprVec_4( opc4, gstOffDe, gstOffLe, gstOffRe ); 16094 16095 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 16096 /* It's not really a dirty call, but we can't use the clean 16097 helper mechanism here for the very lame reason that we can't 16098 pass 2 x V128s by value to a helper, nor get one back. Hence 16099 this roundabout scheme. */ 16100 d->needsBBP = True; 16101 d->nFxState = 2; 16102 vex_bzero(&d->fxState, sizeof(d->fxState)); 16103 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes 16104 the second for !isAvx or the third for isAvx. 16105 AESIMC (0xDB) reads the first register, and writes the second. */ 16106 d->fxState[0].fx = Ifx_Read; 16107 d->fxState[0].offset = gstOffL; 16108 d->fxState[0].size = sizeof(U128); 16109 d->fxState[1].offset = gstOffR; 16110 d->fxState[1].size = sizeof(U128); 16111 if (opc == 0xDB) 16112 d->fxState[1].fx = Ifx_Write; 16113 else if (!isAvx || rG == regNoR) 16114 d->fxState[1].fx = Ifx_Modify; 16115 else { 16116 d->fxState[1].fx = Ifx_Read; 16117 d->nFxState++; 16118 d->fxState[2].fx = Ifx_Write; 16119 d->fxState[2].offset = gstOffD; 16120 d->fxState[2].size = sizeof(U128); 16121 } 16122 16123 stmt( IRStmt_Dirty(d) ); 16124 { 16125 HChar* opsuf; 16126 switch (opc) { 16127 case 0xDC: opsuf = "enc"; break; 16128 case 0XDD: opsuf = "enclast"; break; 16129 case 0xDE: opsuf = "dec"; break; 16130 case 0xDF: opsuf = "declast"; break; 16131 case 0xDB: opsuf = "imc"; break; 16132 default: vassert(0); 16133 } 16134 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf, 16135 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 16136 nameXMMReg(regNoR), 16137 (isAvx && opc != 0xDB) ? "," : "", 16138 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : ""); 16139 } 16140 if (isAvx) 16141 putYMMRegLane128( rG, 1, mkV128(0) ); 16142 return delta; 16143 } 16144 16145 static Long dis_AESKEYGENASSIST ( VexAbiInfo* vbi, Prefix pfx, 16146 Long delta, Bool isAvx ) 16147 { 16148 IRTemp addr = IRTemp_INVALID; 16149 Int alen = 0; 16150 HChar dis_buf[50]; 16151 UChar modrm = getUChar(delta); 16152 UInt regNoL = 0; 16153 UInt regNoR = gregOfRexRM(pfx, modrm); 16154 UChar imm = 0; 16155 16156 /* This is a nasty kludge. See AESENC et al. instructions. */ 16157 modrm = getUChar(delta); 16158 if (epartIsReg(modrm)) { 16159 regNoL = eregOfRexRM(pfx, modrm); 16160 imm = getUChar(delta+1); 16161 delta += 1+1; 16162 } else { 16163 regNoL = 16; /* use XMM16 as an intermediary */ 16164 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16165 /* alignment check ???? . */ 16166 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 16167 imm = getUChar(delta+alen); 16168 delta += alen+1; 16169 } 16170 16171 /* Who ya gonna call? Presumably not Ghostbusters. */ 16172 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST; 16173 HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST"; 16174 16175 /* Round up the arguments. Note that this is a kludge -- the 16176 use of mkU64 rather than mkIRExpr_HWord implies the 16177 assumption that the host's word size is 64-bit. */ 16178 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 16179 UInt gstOffR = ymmGuestRegOffset(regNoR); 16180 16181 IRExpr* imme = mkU64(imm & 0xFF); 16182 IRExpr* gstOffLe = mkU64(gstOffL); 16183 IRExpr* gstOffRe = mkU64(gstOffR); 16184 IRExpr** args 16185 = mkIRExprVec_3( imme, gstOffLe, gstOffRe ); 16186 16187 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 16188 /* It's not really a dirty call, but we can't use the clean 16189 helper mechanism here for the very lame reason that we can't 16190 pass 2 x V128s by value to a helper, nor get one back. Hence 16191 this roundabout scheme. */ 16192 d->needsBBP = True; 16193 d->nFxState = 2; 16194 vex_bzero(&d->fxState, sizeof(d->fxState)); 16195 d->fxState[0].fx = Ifx_Read; 16196 d->fxState[0].offset = gstOffL; 16197 d->fxState[0].size = sizeof(U128); 16198 d->fxState[1].fx = Ifx_Write; 16199 d->fxState[1].offset = gstOffR; 16200 d->fxState[1].size = sizeof(U128); 16201 stmt( IRStmt_Dirty(d) ); 16202 16203 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm, 16204 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 16205 nameXMMReg(regNoR)); 16206 if (isAvx) 16207 putYMMRegLane128( regNoR, 1, mkV128(0) ); 16208 return delta; 16209 } 16210 16211 16212 __attribute__((noinline)) 16213 static 16214 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK, 16215 VexAbiInfo* vbi, 16216 Prefix pfx, Int sz, Long deltaIN ) 16217 { 16218 IRTemp addr = IRTemp_INVALID; 16219 UChar modrm = 0; 16220 Int alen = 0; 16221 HChar dis_buf[50]; 16222 16223 *decode_OK = False; 16224 16225 Long delta = deltaIN; 16226 UChar opc = getUChar(delta); 16227 delta++; 16228 switch (opc) { 16229 16230 case 0x10: 16231 case 0x14: 16232 case 0x15: 16233 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 16234 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 16235 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 16236 Blend at various granularities, with XMM0 (implicit operand) 16237 providing the controlling mask. 16238 */ 16239 if (have66noF2noF3(pfx) && sz == 2) { 16240 modrm = getUChar(delta); 16241 16242 HChar* nm = NULL; 16243 UInt gran = 0; 16244 IROp opSAR = Iop_INVALID; 16245 switch (opc) { 16246 case 0x10: 16247 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 16248 break; 16249 case 0x14: 16250 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 16251 break; 16252 case 0x15: 16253 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 16254 break; 16255 } 16256 vassert(nm); 16257 16258 IRTemp vecE = newTemp(Ity_V128); 16259 IRTemp vecG = newTemp(Ity_V128); 16260 IRTemp vec0 = newTemp(Ity_V128); 16261 16262 if ( epartIsReg(modrm) ) { 16263 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 16264 delta += 1; 16265 DIP( "%s %s,%s\n", nm, 16266 nameXMMReg( eregOfRexRM(pfx, modrm) ), 16267 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16268 } else { 16269 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16270 gen_SEGV_if_not_16_aligned( addr ); 16271 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 16272 delta += alen; 16273 DIP( "%s %s,%s\n", nm, 16274 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16275 } 16276 16277 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 16278 assign(vec0, getXMMReg(0)); 16279 16280 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR ); 16281 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res)); 16282 16283 goto decode_success; 16284 } 16285 break; 16286 16287 case 0x17: 16288 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128 16289 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 16290 if (have66noF2noF3(pfx) 16291 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 16292 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 ); 16293 goto decode_success; 16294 } 16295 break; 16296 16297 case 0x20: 16298 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64 16299 Packed Move with Sign Extend from Byte to Word (XMM) */ 16300 if (have66noF2noF3(pfx) && sz == 2) { 16301 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 16302 False/*!isAvx*/, False/*!xIsZ*/ ); 16303 goto decode_success; 16304 } 16305 break; 16306 16307 case 0x21: 16308 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32 16309 Packed Move with Sign Extend from Byte to DWord (XMM) */ 16310 if (have66noF2noF3(pfx) && sz == 2) { 16311 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 16312 False/*!isAvx*/, False/*!xIsZ*/ ); 16313 goto decode_success; 16314 } 16315 break; 16316 16317 case 0x22: 16318 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 16319 Packed Move with Sign Extend from Byte to QWord (XMM) */ 16320 if (have66noF2noF3(pfx) && sz == 2) { 16321 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 16322 goto decode_success; 16323 } 16324 break; 16325 16326 case 0x23: 16327 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64 16328 Packed Move with Sign Extend from Word to DWord (XMM) */ 16329 if (have66noF2noF3(pfx) && sz == 2) { 16330 delta = dis_PMOVxXWD_128(vbi, pfx, delta, 16331 False/*!isAvx*/, False/*!xIsZ*/); 16332 goto decode_success; 16333 } 16334 break; 16335 16336 case 0x24: 16337 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 16338 Packed Move with Sign Extend from Word to QWord (XMM) */ 16339 if (have66noF2noF3(pfx) && sz == 2) { 16340 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 16341 goto decode_success; 16342 } 16343 break; 16344 16345 case 0x25: 16346 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 16347 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 16348 if (have66noF2noF3(pfx) && sz == 2) { 16349 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 16350 False/*!isAvx*/, False/*!xIsZ*/ ); 16351 goto decode_success; 16352 } 16353 break; 16354 16355 case 0x28: 16356 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes 16357 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper 16358 64-bit half */ 16359 /* This is a really poor translation -- could be improved if 16360 performance critical. It's a copy-paste of PMULUDQ, too. */ 16361 if (have66noF2noF3(pfx) && sz == 2) { 16362 IRTemp sV = newTemp(Ity_V128); 16363 IRTemp dV = newTemp(Ity_V128); 16364 modrm = getUChar(delta); 16365 UInt rG = gregOfRexRM(pfx,modrm); 16366 assign( dV, getXMMReg(rG) ); 16367 if (epartIsReg(modrm)) { 16368 UInt rE = eregOfRexRM(pfx,modrm); 16369 assign( sV, getXMMReg(rE) ); 16370 delta += 1; 16371 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 16372 } else { 16373 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16374 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16375 delta += alen; 16376 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG)); 16377 } 16378 16379 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) ); 16380 goto decode_success; 16381 } 16382 break; 16383 16384 case 0x29: 16385 /* 66 0F 38 29 = PCMPEQQ 16386 64x2 equality comparison */ 16387 if (have66noF2noF3(pfx) && sz == 2) { 16388 /* FIXME: this needs an alignment check */ 16389 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 16390 "pcmpeqq", Iop_CmpEQ64x2, False ); 16391 goto decode_success; 16392 } 16393 break; 16394 16395 case 0x2B: 16396 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128 16397 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */ 16398 if (have66noF2noF3(pfx) && sz == 2) { 16399 16400 modrm = getUChar(delta); 16401 16402 IRTemp argL = newTemp(Ity_V128); 16403 IRTemp argR = newTemp(Ity_V128); 16404 16405 if ( epartIsReg(modrm) ) { 16406 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 16407 delta += 1; 16408 DIP( "packusdw %s,%s\n", 16409 nameXMMReg( eregOfRexRM(pfx, modrm) ), 16410 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16411 } else { 16412 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16413 gen_SEGV_if_not_16_aligned( addr ); 16414 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 16415 delta += alen; 16416 DIP( "packusdw %s,%s\n", 16417 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16418 } 16419 16420 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 16421 16422 putXMMReg( gregOfRexRM(pfx, modrm), 16423 binop( Iop_QNarrowBin32Sto16Ux8, 16424 mkexpr(argL), mkexpr(argR)) ); 16425 16426 goto decode_success; 16427 } 16428 break; 16429 16430 case 0x30: 16431 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64 16432 Packed Move with Zero Extend from Byte to Word (XMM) */ 16433 if (have66noF2noF3(pfx) && sz == 2) { 16434 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 16435 False/*!isAvx*/, True/*xIsZ*/ ); 16436 goto decode_success; 16437 } 16438 break; 16439 16440 case 0x31: 16441 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32 16442 Packed Move with Zero Extend from Byte to DWord (XMM) */ 16443 if (have66noF2noF3(pfx) && sz == 2) { 16444 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 16445 False/*!isAvx*/, True/*xIsZ*/ ); 16446 goto decode_success; 16447 } 16448 break; 16449 16450 case 0x32: 16451 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 16452 Packed Move with Zero Extend from Byte to QWord (XMM) */ 16453 if (have66noF2noF3(pfx) && sz == 2) { 16454 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 16455 goto decode_success; 16456 } 16457 break; 16458 16459 case 0x33: 16460 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64 16461 Packed Move with Zero Extend from Word to DWord (XMM) */ 16462 if (have66noF2noF3(pfx) && sz == 2) { 16463 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 16464 False/*!isAvx*/, True/*xIsZ*/ ); 16465 goto decode_success; 16466 } 16467 break; 16468 16469 case 0x34: 16470 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 16471 Packed Move with Zero Extend from Word to QWord (XMM) */ 16472 if (have66noF2noF3(pfx) && sz == 2) { 16473 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 16474 goto decode_success; 16475 } 16476 break; 16477 16478 case 0x35: 16479 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 16480 Packed Move with Zero Extend from DWord to QWord (XMM) */ 16481 if (have66noF2noF3(pfx) && sz == 2) { 16482 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 16483 False/*!isAvx*/, True/*xIsZ*/ ); 16484 goto decode_success; 16485 } 16486 break; 16487 16488 case 0x37: 16489 /* 66 0F 38 37 = PCMPGTQ 16490 64x2 comparison (signed, presumably; the Intel docs don't say :-) 16491 */ 16492 if (have66noF2noF3(pfx) && sz == 2) { 16493 /* FIXME: this needs an alignment check */ 16494 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 16495 "pcmpgtq", Iop_CmpGT64Sx2, False ); 16496 goto decode_success; 16497 } 16498 break; 16499 16500 case 0x38: 16501 case 0x3C: 16502 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min 16503 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max 16504 */ 16505 if (have66noF2noF3(pfx) && sz == 2) { 16506 /* FIXME: this needs an alignment check */ 16507 Bool isMAX = opc == 0x3C; 16508 delta = dis_SSEint_E_to_G( 16509 vbi, pfx, delta, 16510 isMAX ? "pmaxsb" : "pminsb", 16511 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 16512 False 16513 ); 16514 goto decode_success; 16515 } 16516 break; 16517 16518 case 0x39: 16519 case 0x3D: 16520 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 16521 Minimum of Packed Signed Double Word Integers (XMM) 16522 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 16523 Maximum of Packed Signed Double Word Integers (XMM) 16524 */ 16525 if (have66noF2noF3(pfx) && sz == 2) { 16526 /* FIXME: this needs an alignment check */ 16527 Bool isMAX = opc == 0x3D; 16528 delta = dis_SSEint_E_to_G( 16529 vbi, pfx, delta, 16530 isMAX ? "pmaxsd" : "pminsd", 16531 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 16532 False 16533 ); 16534 goto decode_success; 16535 } 16536 break; 16537 16538 case 0x3A: 16539 case 0x3E: 16540 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 16541 Minimum of Packed Unsigned Word Integers (XMM) 16542 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 16543 Maximum of Packed Unsigned Word Integers (XMM) 16544 */ 16545 if (have66noF2noF3(pfx) && sz == 2) { 16546 /* FIXME: this needs an alignment check */ 16547 Bool isMAX = opc == 0x3E; 16548 delta = dis_SSEint_E_to_G( 16549 vbi, pfx, delta, 16550 isMAX ? "pmaxuw" : "pminuw", 16551 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 16552 False 16553 ); 16554 goto decode_success; 16555 } 16556 break; 16557 16558 case 0x3B: 16559 case 0x3F: 16560 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 16561 Minimum of Packed Unsigned Doubleword Integers (XMM) 16562 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 16563 Maximum of Packed Unsigned Doubleword Integers (XMM) 16564 */ 16565 if (have66noF2noF3(pfx) && sz == 2) { 16566 /* FIXME: this needs an alignment check */ 16567 Bool isMAX = opc == 0x3F; 16568 delta = dis_SSEint_E_to_G( 16569 vbi, pfx, delta, 16570 isMAX ? "pmaxud" : "pminud", 16571 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 16572 False 16573 ); 16574 goto decode_success; 16575 } 16576 break; 16577 16578 case 0x40: 16579 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128 16580 32x4 integer multiply from xmm2/m128 to xmm1 */ 16581 if (have66noF2noF3(pfx) && sz == 2) { 16582 16583 modrm = getUChar(delta); 16584 16585 IRTemp argL = newTemp(Ity_V128); 16586 IRTemp argR = newTemp(Ity_V128); 16587 16588 if ( epartIsReg(modrm) ) { 16589 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 16590 delta += 1; 16591 DIP( "pmulld %s,%s\n", 16592 nameXMMReg( eregOfRexRM(pfx, modrm) ), 16593 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16594 } else { 16595 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16596 gen_SEGV_if_not_16_aligned( addr ); 16597 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 16598 delta += alen; 16599 DIP( "pmulld %s,%s\n", 16600 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16601 } 16602 16603 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 16604 16605 putXMMReg( gregOfRexRM(pfx, modrm), 16606 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 16607 16608 goto decode_success; 16609 } 16610 break; 16611 16612 case 0x41: 16613 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128 16614 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */ 16615 if (have66noF2noF3(pfx) && sz == 2) { 16616 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ ); 16617 goto decode_success; 16618 } 16619 break; 16620 16621 case 0xDC: 16622 case 0xDD: 16623 case 0xDE: 16624 case 0xDF: 16625 case 0xDB: 16626 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128 16627 DD /r = AESENCLAST xmm1, xmm2/m128 16628 DE /r = AESDEC xmm1, xmm2/m128 16629 DF /r = AESDECLAST xmm1, xmm2/m128 16630 16631 DB /r = AESIMC xmm1, xmm2/m128 */ 16632 if (have66noF2noF3(pfx) && sz == 2) { 16633 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc ); 16634 goto decode_success; 16635 } 16636 break; 16637 16638 case 0xF0: 16639 case 0xF1: 16640 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 16641 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 16642 The decoding on this is a bit unusual. 16643 */ 16644 if (haveF2noF3(pfx) 16645 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) { 16646 modrm = getUChar(delta); 16647 16648 if (opc == 0xF0) 16649 sz = 1; 16650 else 16651 vassert(sz == 2 || sz == 4 || sz == 8); 16652 16653 IRType tyE = szToITy(sz); 16654 IRTemp valE = newTemp(tyE); 16655 16656 if (epartIsReg(modrm)) { 16657 assign(valE, getIRegE(sz, pfx, modrm)); 16658 delta += 1; 16659 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 16660 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 16661 } else { 16662 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16663 assign(valE, loadLE(tyE, mkexpr(addr))); 16664 delta += alen; 16665 DIP("crc32b %s,%s\n", dis_buf, 16666 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 16667 } 16668 16669 /* Somewhat funny getting/putting of the crc32 value, in order 16670 to ensure that it turns into 64-bit gets and puts. However, 16671 mask off the upper 32 bits so as to not get memcheck false 16672 +ves around the helper call. */ 16673 IRTemp valG0 = newTemp(Ity_I64); 16674 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 16675 mkU64(0xFFFFFFFF))); 16676 16677 HChar* nm = NULL; 16678 void* fn = NULL; 16679 switch (sz) { 16680 case 1: nm = "amd64g_calc_crc32b"; 16681 fn = &amd64g_calc_crc32b; break; 16682 case 2: nm = "amd64g_calc_crc32w"; 16683 fn = &amd64g_calc_crc32w; break; 16684 case 4: nm = "amd64g_calc_crc32l"; 16685 fn = &amd64g_calc_crc32l; break; 16686 case 8: nm = "amd64g_calc_crc32q"; 16687 fn = &amd64g_calc_crc32q; break; 16688 } 16689 vassert(nm && fn); 16690 IRTemp valG1 = newTemp(Ity_I64); 16691 assign(valG1, 16692 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 16693 mkIRExprVec_2(mkexpr(valG0), 16694 widenUto64(mkexpr(valE))))); 16695 16696 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 16697 goto decode_success; 16698 } 16699 break; 16700 16701 default: 16702 break; 16703 16704 } 16705 16706 //decode_failure: 16707 *decode_OK = False; 16708 return deltaIN; 16709 16710 decode_success: 16711 *decode_OK = True; 16712 return delta; 16713 } 16714 16715 16716 /*------------------------------------------------------------*/ 16717 /*--- ---*/ 16718 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/ 16719 /*--- ---*/ 16720 /*------------------------------------------------------------*/ 16721 16722 static Long dis_PEXTRW ( VexAbiInfo* vbi, Prefix pfx, 16723 Long delta, Bool isAvx ) 16724 { 16725 IRTemp addr = IRTemp_INVALID; 16726 IRTemp t0 = IRTemp_INVALID; 16727 IRTemp t1 = IRTemp_INVALID; 16728 IRTemp t2 = IRTemp_INVALID; 16729 IRTemp t3 = IRTemp_INVALID; 16730 UChar modrm = getUChar(delta); 16731 Int alen = 0; 16732 HChar dis_buf[50]; 16733 UInt rG = gregOfRexRM(pfx,modrm); 16734 Int imm8_20; 16735 IRTemp xmm_vec = newTemp(Ity_V128); 16736 IRTemp d16 = newTemp(Ity_I16); 16737 HChar* mbV = isAvx ? "v" : ""; 16738 16739 vassert(0==getRexW(pfx)); /* ensured by caller */ 16740 assign( xmm_vec, getXMMReg(rG) ); 16741 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 16742 16743 if ( epartIsReg( modrm ) ) { 16744 imm8_20 = (Int)(getUChar(delta+1) & 7); 16745 } else { 16746 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16747 imm8_20 = (Int)(getUChar(delta+alen) & 7); 16748 } 16749 16750 switch (imm8_20) { 16751 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break; 16752 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break; 16753 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break; 16754 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break; 16755 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break; 16756 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break; 16757 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break; 16758 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break; 16759 default: vassert(0); 16760 } 16761 16762 if ( epartIsReg( modrm ) ) { 16763 UInt rE = eregOfRexRM(pfx,modrm); 16764 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) ); 16765 delta += 1+1; 16766 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, 16767 nameXMMReg( rG ), nameIReg32( rE ) ); 16768 } else { 16769 storeLE( mkexpr(addr), mkexpr(d16) ); 16770 delta += alen+1; 16771 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf ); 16772 } 16773 return delta; 16774 } 16775 16776 16777 static Long dis_PEXTRD ( VexAbiInfo* vbi, Prefix pfx, 16778 Long delta, Bool isAvx ) 16779 { 16780 IRTemp addr = IRTemp_INVALID; 16781 IRTemp t0 = IRTemp_INVALID; 16782 IRTemp t1 = IRTemp_INVALID; 16783 IRTemp t2 = IRTemp_INVALID; 16784 IRTemp t3 = IRTemp_INVALID; 16785 UChar modrm = 0; 16786 Int alen = 0; 16787 HChar dis_buf[50]; 16788 16789 Int imm8_10; 16790 IRTemp xmm_vec = newTemp(Ity_V128); 16791 IRTemp src_dword = newTemp(Ity_I32); 16792 HChar* mbV = isAvx ? "v" : ""; 16793 16794 vassert(0==getRexW(pfx)); /* ensured by caller */ 16795 modrm = getUChar(delta); 16796 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 16797 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 16798 16799 if ( epartIsReg( modrm ) ) { 16800 imm8_10 = (Int)(getUChar(delta+1) & 3); 16801 } else { 16802 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16803 imm8_10 = (Int)(getUChar(delta+alen) & 3); 16804 } 16805 16806 switch ( imm8_10 ) { 16807 case 0: assign( src_dword, mkexpr(t0) ); break; 16808 case 1: assign( src_dword, mkexpr(t1) ); break; 16809 case 2: assign( src_dword, mkexpr(t2) ); break; 16810 case 3: assign( src_dword, mkexpr(t3) ); break; 16811 default: vassert(0); 16812 } 16813 16814 if ( epartIsReg( modrm ) ) { 16815 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 16816 delta += 1+1; 16817 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10, 16818 nameXMMReg( gregOfRexRM(pfx, modrm) ), 16819 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 16820 } else { 16821 storeLE( mkexpr(addr), mkexpr(src_dword) ); 16822 delta += alen+1; 16823 DIP( "%spextrd $%d, %s,%s\n", mbV, 16824 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 16825 } 16826 return delta; 16827 } 16828 16829 16830 static Long dis_PEXTRQ ( VexAbiInfo* vbi, Prefix pfx, 16831 Long delta, Bool isAvx ) 16832 { 16833 IRTemp addr = IRTemp_INVALID; 16834 UChar modrm = 0; 16835 Int alen = 0; 16836 HChar dis_buf[50]; 16837 16838 Int imm8_0; 16839 IRTemp xmm_vec = newTemp(Ity_V128); 16840 IRTemp src_qword = newTemp(Ity_I64); 16841 HChar* mbV = isAvx ? "v" : ""; 16842 16843 vassert(1==getRexW(pfx)); /* ensured by caller */ 16844 modrm = getUChar(delta); 16845 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 16846 16847 if ( epartIsReg( modrm ) ) { 16848 imm8_0 = (Int)(getUChar(delta+1) & 1); 16849 } else { 16850 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16851 imm8_0 = (Int)(getUChar(delta+alen) & 1); 16852 } 16853 16854 switch ( imm8_0 ) { 16855 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); 16856 break; 16857 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); 16858 break; 16859 default: vassert(0); 16860 } 16861 16862 if ( epartIsReg( modrm ) ) { 16863 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 16864 delta += 1+1; 16865 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0, 16866 nameXMMReg( gregOfRexRM(pfx, modrm) ), 16867 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 16868 } else { 16869 storeLE( mkexpr(addr), mkexpr(src_qword) ); 16870 delta += alen+1; 16871 DIP( "%spextrq $%d, %s,%s\n", mbV, 16872 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 16873 } 16874 return delta; 16875 } 16876 16877 16878 /* This can fail, in which case it returns the original (unchanged) 16879 delta. */ 16880 static Long dis_PCMPxSTRx ( VexAbiInfo* vbi, Prefix pfx, 16881 Long delta, Bool isAvx, UChar opc ) 16882 { 16883 Long delta0 = delta; 16884 UInt isISTRx = opc & 2; 16885 UInt isxSTRM = (opc & 1) ^ 1; 16886 UInt regNoL = 0; 16887 UInt regNoR = 0; 16888 UChar imm = 0; 16889 IRTemp addr = IRTemp_INVALID; 16890 Int alen = 0; 16891 HChar dis_buf[50]; 16892 16893 /* This is a nasty kludge. We need to pass 2 x V128 to the helper 16894 (which is clean). Since we can't do that, use a dirty helper to 16895 compute the results directly from the XMM regs in the guest 16896 state. That means for the memory case, we need to move the left 16897 operand into a pseudo-register (XMM16, let's call it). */ 16898 UChar modrm = getUChar(delta); 16899 if (epartIsReg(modrm)) { 16900 regNoL = eregOfRexRM(pfx, modrm); 16901 regNoR = gregOfRexRM(pfx, modrm); 16902 imm = getUChar(delta+1); 16903 delta += 1+1; 16904 } else { 16905 regNoL = 16; /* use XMM16 as an intermediary */ 16906 regNoR = gregOfRexRM(pfx, modrm); 16907 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16908 /* No alignment check; I guess that makes sense, given that 16909 these insns are for dealing with C style strings. */ 16910 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 16911 imm = getUChar(delta+alen); 16912 delta += alen+1; 16913 } 16914 16915 /* Now we know the XMM reg numbers for the operands, and the 16916 immediate byte. Is it one we can actually handle? Throw out any 16917 cases for which the helper function has not been verified. */ 16918 switch (imm) { 16919 case 0x00: 16920 case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12: 16921 case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A: 16922 case 0x46: 16923 break; 16924 case 0x01: // the 16-bit character versions of the above 16925 case 0x03: case 0x09: case 0x0B: case 0x0D: case 0x13: 16926 case 0x1B: case 0x39: case 0x3B: case 0x45: case 0x4B: 16927 break; 16928 default: 16929 return delta0; /*FAIL*/ 16930 } 16931 16932 /* Who ya gonna call? Presumably not Ghostbusters. */ 16933 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 16934 HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 16935 16936 /* Round up the arguments. Note that this is a kludge -- the use 16937 of mkU64 rather than mkIRExpr_HWord implies the assumption that 16938 the host's word size is 64-bit. */ 16939 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 16940 UInt gstOffR = ymmGuestRegOffset(regNoR); 16941 16942 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF)); 16943 IRExpr* gstOffLe = mkU64(gstOffL); 16944 IRExpr* gstOffRe = mkU64(gstOffR); 16945 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 16946 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 16947 IRExpr** args 16948 = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 16949 16950 IRTemp resT = newTemp(Ity_I64); 16951 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 16952 /* It's not really a dirty call, but we can't use the clean helper 16953 mechanism here for the very lame reason that we can't pass 2 x 16954 V128s by value to a helper, nor get one back. Hence this 16955 roundabout scheme. */ 16956 d->needsBBP = True; 16957 d->nFxState = 2; 16958 vex_bzero(&d->fxState, sizeof(d->fxState)); 16959 d->fxState[0].fx = Ifx_Read; 16960 d->fxState[0].offset = gstOffL; 16961 d->fxState[0].size = sizeof(U128); 16962 d->fxState[1].fx = Ifx_Read; 16963 d->fxState[1].offset = gstOffR; 16964 d->fxState[1].size = sizeof(U128); 16965 if (isxSTRM) { 16966 /* Declare that the helper writes XMM0. */ 16967 d->nFxState = 3; 16968 d->fxState[2].fx = Ifx_Write; 16969 d->fxState[2].offset = ymmGuestRegOffset(0); 16970 d->fxState[2].size = sizeof(U128); 16971 } 16972 16973 stmt( IRStmt_Dirty(d) ); 16974 16975 /* Now resT[15:0] holds the new OSZACP values, so the condition 16976 codes must be updated. And for a xSTRI case, resT[31:16] holds 16977 the new ECX value, so stash that too. */ 16978 if (!isxSTRM) { 16979 putIReg64(R_RCX, binop(Iop_And64, 16980 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 16981 mkU64(0xFFFF))); 16982 } 16983 16984 /* Zap the upper half of the dest reg as per AVX conventions. */ 16985 if (isxSTRM && isAvx) 16986 putYMMRegLane128(/*YMM*/0, 1, mkV128(0)); 16987 16988 stmt( IRStmt_Put( 16989 OFFB_CC_DEP1, 16990 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 16991 )); 16992 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16993 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16994 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16995 16996 if (regNoL == 16) { 16997 DIP("%spcmp%cstr%c $%x,%s,%s\n", 16998 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 16999 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 17000 } else { 17001 DIP("%spcmp%cstr%c $%x,%s,%s\n", 17002 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 17003 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 17004 } 17005 17006 return delta; 17007 } 17008 17009 17010 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 ) 17011 { 17012 vassert(imm8 >= 0 && imm8 <= 15); 17013 17014 // Create a V128 value which has the selected byte in the 17015 // specified lane, and zeroes everywhere else. 17016 IRTemp tmp128 = newTemp(Ity_V128); 17017 IRTemp halfshift = newTemp(Ity_I64); 17018 assign(halfshift, binop(Iop_Shl64, 17019 unop(Iop_8Uto64, mkexpr(u8)), 17020 mkU8(8 * (imm8 & 7)))); 17021 if (imm8 < 8) { 17022 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 17023 } else { 17024 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 17025 } 17026 17027 UShort mask = ~(1 << imm8); 17028 IRTemp res = newTemp(Ity_V128); 17029 assign( res, binop(Iop_OrV128, 17030 mkexpr(tmp128), 17031 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 17032 return res; 17033 } 17034 17035 17036 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 ) 17037 { 17038 IRTemp z32 = newTemp(Ity_I32); 17039 assign(z32, mkU32(0)); 17040 17041 /* Surround u32 with zeroes as per imm, giving us something we can 17042 OR into a suitably masked-out v128.*/ 17043 IRTemp withZs = newTemp(Ity_V128); 17044 UShort mask = 0; 17045 switch (imm8) { 17046 case 3: mask = 0x0FFF; 17047 assign(withZs, mkV128from32s(u32, z32, z32, z32)); 17048 break; 17049 case 2: mask = 0xF0FF; 17050 assign(withZs, mkV128from32s(z32, u32, z32, z32)); 17051 break; 17052 case 1: mask = 0xFF0F; 17053 assign(withZs, mkV128from32s(z32, z32, u32, z32)); 17054 break; 17055 case 0: mask = 0xFFF0; 17056 assign(withZs, mkV128from32s(z32, z32, z32, u32)); 17057 break; 17058 default: vassert(0); 17059 } 17060 17061 IRTemp res = newTemp(Ity_V128); 17062 assign(res, binop( Iop_OrV128, 17063 mkexpr(withZs), 17064 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 17065 return res; 17066 } 17067 17068 17069 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 ) 17070 { 17071 /* Surround u64 with zeroes as per imm, giving us something we can 17072 OR into a suitably masked-out v128.*/ 17073 IRTemp withZs = newTemp(Ity_V128); 17074 UShort mask = 0; 17075 if (imm8 == 0) { 17076 mask = 0xFF00; 17077 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64))); 17078 } else { 17079 vassert(imm8 == 1); 17080 mask = 0x00FF; 17081 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0))); 17082 } 17083 17084 IRTemp res = newTemp(Ity_V128); 17085 assign( res, binop( Iop_OrV128, 17086 mkexpr(withZs), 17087 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 17088 return res; 17089 } 17090 17091 17092 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 ) 17093 { 17094 const IRTemp inval = IRTemp_INVALID; 17095 IRTemp dstDs[4] = { inval, inval, inval, inval }; 17096 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] ); 17097 17098 vassert(imm8 <= 255); 17099 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */ 17100 17101 UInt imm8_zmask = (imm8 & 15); 17102 IRTemp zero_32 = newTemp(Ity_I32); 17103 assign( zero_32, mkU32(0) ); 17104 IRTemp resV = newTemp(Ity_V128); 17105 assign( resV, mkV128from32s( 17106 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3], 17107 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2], 17108 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1], 17109 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) ); 17110 return resV; 17111 } 17112 17113 17114 static Long dis_PEXTRB_128_GtoE ( VexAbiInfo* vbi, Prefix pfx, 17115 Long delta, Bool isAvx ) 17116 { 17117 IRTemp addr = IRTemp_INVALID; 17118 Int alen = 0; 17119 HChar dis_buf[50]; 17120 IRTemp xmm_vec = newTemp(Ity_V128); 17121 IRTemp sel_lane = newTemp(Ity_I32); 17122 IRTemp shr_lane = newTemp(Ity_I32); 17123 UChar* mbV = isAvx ? "v" : ""; 17124 UChar modrm = getUChar(delta); 17125 IRTemp t3, t2, t1, t0; 17126 Int imm8; 17127 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 17128 t3 = t2 = t1 = t0 = IRTemp_INVALID; 17129 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17130 17131 if ( epartIsReg( modrm ) ) { 17132 imm8 = (Int)getUChar(delta+1); 17133 } else { 17134 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17135 imm8 = (Int)getUChar(delta+alen); 17136 } 17137 switch ( (imm8 >> 2) & 3 ) { 17138 case 0: assign( sel_lane, mkexpr(t0) ); break; 17139 case 1: assign( sel_lane, mkexpr(t1) ); break; 17140 case 2: assign( sel_lane, mkexpr(t2) ); break; 17141 case 3: assign( sel_lane, mkexpr(t3) ); break; 17142 default: vassert(0); 17143 } 17144 assign( shr_lane, 17145 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 17146 17147 if ( epartIsReg( modrm ) ) { 17148 putIReg64( eregOfRexRM(pfx,modrm), 17149 unop( Iop_32Uto64, 17150 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 17151 delta += 1+1; 17152 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8, 17153 nameXMMReg( gregOfRexRM(pfx, modrm) ), 17154 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 17155 } else { 17156 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 17157 delta += alen+1; 17158 DIP( "%spextrb $%d,%s,%s\n", mbV, 17159 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 17160 } 17161 17162 return delta; 17163 } 17164 17165 17166 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 17167 { 17168 vassert(imm8 < 256); 17169 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 17170 IRTemp and_vec = newTemp(Ity_V128); 17171 IRTemp sum_vec = newTemp(Ity_V128); 17172 assign( and_vec, binop( Iop_AndV128, 17173 binop( Iop_Mul64Fx2, 17174 mkexpr(dst_vec), mkexpr(src_vec) ), 17175 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 17176 17177 assign( sum_vec, binop( Iop_Add64F0x2, 17178 binop( Iop_InterleaveHI64x2, 17179 mkexpr(and_vec), mkexpr(and_vec) ), 17180 binop( Iop_InterleaveLO64x2, 17181 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 17182 IRTemp res = newTemp(Ity_V128); 17183 assign(res, binop( Iop_AndV128, 17184 binop( Iop_InterleaveLO64x2, 17185 mkexpr(sum_vec), mkexpr(sum_vec) ), 17186 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 17187 return res; 17188 } 17189 17190 17191 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 17192 { 17193 vassert(imm8 < 256); 17194 IRTemp tmp_prod_vec = newTemp(Ity_V128); 17195 IRTemp prod_vec = newTemp(Ity_V128); 17196 IRTemp sum_vec = newTemp(Ity_V128); 17197 IRTemp v3, v2, v1, v0; 17198 v3 = v2 = v1 = v0 = IRTemp_INVALID; 17199 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 17200 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 17201 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 17202 0xFFFF }; 17203 17204 assign( tmp_prod_vec, 17205 binop( Iop_AndV128, 17206 binop( Iop_Mul32Fx4, mkexpr(dst_vec), 17207 mkexpr(src_vec) ), 17208 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 17209 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 17210 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) ); 17211 17212 assign( sum_vec, binop( Iop_Add32Fx4, 17213 binop( Iop_InterleaveHI32x4, 17214 mkexpr(prod_vec), mkexpr(prod_vec) ), 17215 binop( Iop_InterleaveLO32x4, 17216 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 17217 17218 IRTemp res = newTemp(Ity_V128); 17219 assign( res, binop( Iop_AndV128, 17220 binop( Iop_Add32Fx4, 17221 binop( Iop_InterleaveHI32x4, 17222 mkexpr(sum_vec), mkexpr(sum_vec) ), 17223 binop( Iop_InterleaveLO32x4, 17224 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 17225 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 17226 return res; 17227 } 17228 17229 17230 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 ) 17231 { 17232 /* Mask out bits of the operands we don't need. This isn't 17233 strictly necessary, but it does ensure Memcheck doesn't 17234 give us any false uninitialised value errors as a 17235 result. */ 17236 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 }; 17237 UShort dst_mask[2] = { 0x07FF, 0x7FF0 }; 17238 17239 IRTemp src_maskV = newTemp(Ity_V128); 17240 IRTemp dst_maskV = newTemp(Ity_V128); 17241 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] )); 17242 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] )); 17243 17244 IRTemp src_masked = newTemp(Ity_V128); 17245 IRTemp dst_masked = newTemp(Ity_V128); 17246 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV))); 17247 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV))); 17248 17249 /* Generate 4 64 bit values that we can hand to a clean helper */ 17250 IRTemp sHi = newTemp(Ity_I64); 17251 IRTemp sLo = newTemp(Ity_I64); 17252 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) ); 17253 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) ); 17254 17255 IRTemp dHi = newTemp(Ity_I64); 17256 IRTemp dLo = newTemp(Ity_I64); 17257 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) ); 17258 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) ); 17259 17260 /* Compute halves of the result separately */ 17261 IRTemp resHi = newTemp(Ity_I64); 17262 IRTemp resLo = newTemp(Ity_I64); 17263 17264 IRExpr** argsHi 17265 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 17266 mkU64( 0x80 | (imm8 & 7) )); 17267 IRExpr** argsLo 17268 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 17269 mkU64( 0x00 | (imm8 & 7) )); 17270 17271 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/, 17272 "amd64g_calc_mpsadbw", 17273 &amd64g_calc_mpsadbw, argsHi )); 17274 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/, 17275 "amd64g_calc_mpsadbw", 17276 &amd64g_calc_mpsadbw, argsLo )); 17277 17278 IRTemp res = newTemp(Ity_V128); 17279 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))); 17280 return res; 17281 } 17282 17283 static Long dis_EXTRACTPS ( VexAbiInfo* vbi, Prefix pfx, 17284 Long delta, Bool isAvx ) 17285 { 17286 IRTemp addr = IRTemp_INVALID; 17287 Int alen = 0; 17288 HChar dis_buf[50]; 17289 UChar modrm = getUChar(delta); 17290 Int imm8_10; 17291 IRTemp xmm_vec = newTemp(Ity_V128); 17292 IRTemp src_dword = newTemp(Ity_I32); 17293 UInt rG = gregOfRexRM(pfx,modrm); 17294 IRTemp t3, t2, t1, t0; 17295 t3 = t2 = t1 = t0 = IRTemp_INVALID; 17296 17297 assign( xmm_vec, getXMMReg( rG ) ); 17298 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17299 17300 if ( epartIsReg( modrm ) ) { 17301 imm8_10 = (Int)(getUChar(delta+1) & 3); 17302 } else { 17303 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17304 imm8_10 = (Int)(getUChar(delta+alen) & 3); 17305 } 17306 17307 switch ( imm8_10 ) { 17308 case 0: assign( src_dword, mkexpr(t0) ); break; 17309 case 1: assign( src_dword, mkexpr(t1) ); break; 17310 case 2: assign( src_dword, mkexpr(t2) ); break; 17311 case 3: assign( src_dword, mkexpr(t3) ); break; 17312 default: vassert(0); 17313 } 17314 17315 if ( epartIsReg( modrm ) ) { 17316 UInt rE = eregOfRexRM(pfx,modrm); 17317 putIReg32( rE, mkexpr(src_dword) ); 17318 delta += 1+1; 17319 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 17320 nameXMMReg( rG ), nameIReg32( rE ) ); 17321 } else { 17322 storeLE( mkexpr(addr), mkexpr(src_dword) ); 17323 delta += alen+1; 17324 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 17325 nameXMMReg( rG ), dis_buf ); 17326 } 17327 17328 return delta; 17329 } 17330 17331 17332 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 ) 17333 { 17334 IRTemp t0 = newTemp(Ity_I64); 17335 IRTemp t1 = newTemp(Ity_I64); 17336 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, 17337 mkexpr(dV))); 17338 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, 17339 mkexpr(sV))); 17340 17341 IRTemp t2 = newTemp(Ity_I64); 17342 IRTemp t3 = newTemp(Ity_I64); 17343 17344 IRExpr** args; 17345 17346 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 17347 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 17348 &amd64g_calculate_pclmul, args)); 17349 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 17350 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 17351 &amd64g_calculate_pclmul, args)); 17352 17353 IRTemp res = newTemp(Ity_V128); 17354 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 17355 return res; 17356 } 17357 17358 17359 __attribute__((noinline)) 17360 static 17361 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK, 17362 VexAbiInfo* vbi, 17363 Prefix pfx, Int sz, Long deltaIN ) 17364 { 17365 IRTemp addr = IRTemp_INVALID; 17366 UChar modrm = 0; 17367 Int alen = 0; 17368 HChar dis_buf[50]; 17369 17370 *decode_OK = False; 17371 17372 Long delta = deltaIN; 17373 UChar opc = getUChar(delta); 17374 delta++; 17375 switch (opc) { 17376 17377 case 0x08: 17378 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 17379 if (have66noF2noF3(pfx) && sz == 2) { 17380 17381 IRTemp src0 = newTemp(Ity_F32); 17382 IRTemp src1 = newTemp(Ity_F32); 17383 IRTemp src2 = newTemp(Ity_F32); 17384 IRTemp src3 = newTemp(Ity_F32); 17385 IRTemp res0 = newTemp(Ity_F32); 17386 IRTemp res1 = newTemp(Ity_F32); 17387 IRTemp res2 = newTemp(Ity_F32); 17388 IRTemp res3 = newTemp(Ity_F32); 17389 IRTemp rm = newTemp(Ity_I32); 17390 Int imm = 0; 17391 17392 modrm = getUChar(delta); 17393 17394 if (epartIsReg(modrm)) { 17395 assign( src0, 17396 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 17397 assign( src1, 17398 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 17399 assign( src2, 17400 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 17401 assign( src3, 17402 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 17403 imm = getUChar(delta+1); 17404 if (imm & ~15) goto decode_failure; 17405 delta += 1+1; 17406 DIP( "roundps $%d,%s,%s\n", 17407 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 17408 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17409 } else { 17410 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17411 gen_SEGV_if_not_16_aligned(addr); 17412 assign( src0, loadLE(Ity_F32, 17413 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 17414 assign( src1, loadLE(Ity_F32, 17415 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 17416 assign( src2, loadLE(Ity_F32, 17417 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 17418 assign( src3, loadLE(Ity_F32, 17419 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 17420 imm = getUChar(delta+alen); 17421 if (imm & ~15) goto decode_failure; 17422 delta += alen+1; 17423 DIP( "roundps $%d,%s,%s\n", 17424 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17425 } 17426 17427 /* (imm & 3) contains an Intel-encoded rounding mode. Because 17428 that encoding is the same as the encoding for IRRoundingMode, 17429 we can use that value directly in the IR as a rounding 17430 mode. */ 17431 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 17432 17433 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 17434 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 17435 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 17436 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 17437 17438 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 17439 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 17440 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 17441 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 17442 17443 goto decode_success; 17444 } 17445 break; 17446 17447 case 0x09: 17448 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 17449 if (have66noF2noF3(pfx) && sz == 2) { 17450 17451 IRTemp src0 = newTemp(Ity_F64); 17452 IRTemp src1 = newTemp(Ity_F64); 17453 IRTemp res0 = newTemp(Ity_F64); 17454 IRTemp res1 = newTemp(Ity_F64); 17455 IRTemp rm = newTemp(Ity_I32); 17456 Int imm = 0; 17457 17458 modrm = getUChar(delta); 17459 17460 if (epartIsReg(modrm)) { 17461 assign( src0, 17462 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 17463 assign( src1, 17464 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 17465 imm = getUChar(delta+1); 17466 if (imm & ~15) goto decode_failure; 17467 delta += 1+1; 17468 DIP( "roundpd $%d,%s,%s\n", 17469 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 17470 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17471 } else { 17472 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17473 gen_SEGV_if_not_16_aligned(addr); 17474 assign( src0, loadLE(Ity_F64, 17475 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 17476 assign( src1, loadLE(Ity_F64, 17477 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 17478 imm = getUChar(delta+alen); 17479 if (imm & ~15) goto decode_failure; 17480 delta += alen+1; 17481 DIP( "roundpd $%d,%s,%s\n", 17482 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17483 } 17484 17485 /* (imm & 3) contains an Intel-encoded rounding mode. Because 17486 that encoding is the same as the encoding for IRRoundingMode, 17487 we can use that value directly in the IR as a rounding 17488 mode. */ 17489 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 17490 17491 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 17492 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 17493 17494 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 17495 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 17496 17497 goto decode_success; 17498 } 17499 break; 17500 17501 case 0x0A: 17502 case 0x0B: 17503 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 17504 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 17505 */ 17506 if (have66noF2noF3(pfx) && sz == 2) { 17507 17508 Bool isD = opc == 0x0B; 17509 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 17510 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 17511 Int imm = 0; 17512 17513 modrm = getUChar(delta); 17514 17515 if (epartIsReg(modrm)) { 17516 assign( src, 17517 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 17518 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 17519 imm = getUChar(delta+1); 17520 if (imm & ~15) goto decode_failure; 17521 delta += 1+1; 17522 DIP( "rounds%c $%d,%s,%s\n", 17523 isD ? 'd' : 's', 17524 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 17525 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17526 } else { 17527 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17528 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 17529 imm = getUChar(delta+alen); 17530 if (imm & ~15) goto decode_failure; 17531 delta += alen+1; 17532 DIP( "rounds%c $%d,%s,%s\n", 17533 isD ? 'd' : 's', 17534 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17535 } 17536 17537 /* (imm & 3) contains an Intel-encoded rounding mode. Because 17538 that encoding is the same as the encoding for IRRoundingMode, 17539 we can use that value directly in the IR as a rounding 17540 mode. */ 17541 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 17542 (imm & 4) ? get_sse_roundingmode() 17543 : mkU32(imm & 3), 17544 mkexpr(src)) ); 17545 17546 if (isD) 17547 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 17548 else 17549 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 17550 17551 goto decode_success; 17552 } 17553 break; 17554 17555 case 0x0C: 17556 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 17557 Blend Packed Single Precision Floating-Point Values (XMM) */ 17558 if (have66noF2noF3(pfx) && sz == 2) { 17559 17560 Int imm8; 17561 IRTemp dst_vec = newTemp(Ity_V128); 17562 IRTemp src_vec = newTemp(Ity_V128); 17563 17564 modrm = getUChar(delta); 17565 17566 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 17567 17568 if ( epartIsReg( modrm ) ) { 17569 imm8 = (Int)getUChar(delta+1); 17570 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17571 delta += 1+1; 17572 DIP( "blendps $%d, %s,%s\n", imm8, 17573 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17574 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17575 } else { 17576 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17577 1/* imm8 is 1 byte after the amode */ ); 17578 gen_SEGV_if_not_16_aligned( addr ); 17579 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17580 imm8 = (Int)getUChar(delta+alen); 17581 delta += alen+1; 17582 DIP( "blendpd $%d, %s,%s\n", 17583 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17584 } 17585 17586 putXMMReg( gregOfRexRM(pfx, modrm), 17587 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) ); 17588 goto decode_success; 17589 } 17590 break; 17591 17592 case 0x0D: 17593 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 17594 Blend Packed Double Precision Floating-Point Values (XMM) */ 17595 if (have66noF2noF3(pfx) && sz == 2) { 17596 17597 Int imm8; 17598 IRTemp dst_vec = newTemp(Ity_V128); 17599 IRTemp src_vec = newTemp(Ity_V128); 17600 17601 modrm = getUChar(delta); 17602 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 17603 17604 if ( epartIsReg( modrm ) ) { 17605 imm8 = (Int)getUChar(delta+1); 17606 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17607 delta += 1+1; 17608 DIP( "blendpd $%d, %s,%s\n", imm8, 17609 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17610 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17611 } else { 17612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17613 1/* imm8 is 1 byte after the amode */ ); 17614 gen_SEGV_if_not_16_aligned( addr ); 17615 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17616 imm8 = (Int)getUChar(delta+alen); 17617 delta += alen+1; 17618 DIP( "blendpd $%d, %s,%s\n", 17619 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17620 } 17621 17622 putXMMReg( gregOfRexRM(pfx, modrm), 17623 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) ); 17624 goto decode_success; 17625 } 17626 break; 17627 17628 case 0x0E: 17629 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 17630 Blend Packed Words (XMM) */ 17631 if (have66noF2noF3(pfx) && sz == 2) { 17632 17633 Int imm8; 17634 IRTemp dst_vec = newTemp(Ity_V128); 17635 IRTemp src_vec = newTemp(Ity_V128); 17636 17637 modrm = getUChar(delta); 17638 17639 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 17640 17641 if ( epartIsReg( modrm ) ) { 17642 imm8 = (Int)getUChar(delta+1); 17643 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17644 delta += 1+1; 17645 DIP( "pblendw $%d, %s,%s\n", imm8, 17646 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17647 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17648 } else { 17649 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17650 1/* imm8 is 1 byte after the amode */ ); 17651 gen_SEGV_if_not_16_aligned( addr ); 17652 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17653 imm8 = (Int)getUChar(delta+alen); 17654 delta += alen+1; 17655 DIP( "pblendw $%d, %s,%s\n", 17656 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17657 } 17658 17659 putXMMReg( gregOfRexRM(pfx, modrm), 17660 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) ); 17661 goto decode_success; 17662 } 17663 break; 17664 17665 case 0x14: 17666 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 17667 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. 17668 (XMM) */ 17669 if (have66noF2noF3(pfx) && sz == 2) { 17670 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 17671 goto decode_success; 17672 } 17673 break; 17674 17675 case 0x15: 17676 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 17677 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. 17678 (XMM) */ 17679 if (have66noF2noF3(pfx) && sz == 2) { 17680 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ ); 17681 goto decode_success; 17682 } 17683 break; 17684 17685 case 0x16: 17686 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 17687 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 17688 Note that this insn has the same opcodes as PEXTRQ, but 17689 here the REX.W bit is _not_ present */ 17690 if (have66noF2noF3(pfx) 17691 && sz == 2 /* REX.W is _not_ present */) { 17692 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ ); 17693 goto decode_success; 17694 } 17695 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 17696 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 17697 Note that this insn has the same opcodes as PEXTRD, but 17698 here the REX.W bit is present */ 17699 if (have66noF2noF3(pfx) 17700 && sz == 8 /* REX.W is present */) { 17701 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/); 17702 goto decode_success; 17703 } 17704 break; 17705 17706 case 0x17: 17707 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 17708 float from xmm reg and store in gen.reg or mem. This is 17709 identical to PEXTRD, except that REX.W appears to be ignored. 17710 */ 17711 if (have66noF2noF3(pfx) 17712 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 17713 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ ); 17714 goto decode_success; 17715 } 17716 break; 17717 17718 case 0x20: 17719 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 17720 Extract byte from r32/m8 and insert into xmm1 */ 17721 if (have66noF2noF3(pfx) && sz == 2) { 17722 Int imm8; 17723 IRTemp new8 = newTemp(Ity_I8); 17724 modrm = getUChar(delta); 17725 UInt rG = gregOfRexRM(pfx, modrm); 17726 if ( epartIsReg( modrm ) ) { 17727 UInt rE = eregOfRexRM(pfx,modrm); 17728 imm8 = (Int)(getUChar(delta+1) & 0xF); 17729 assign( new8, unop(Iop_32to8, getIReg32(rE)) ); 17730 delta += 1+1; 17731 DIP( "pinsrb $%d,%s,%s\n", imm8, 17732 nameIReg32(rE), nameXMMReg(rG) ); 17733 } else { 17734 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17735 imm8 = (Int)(getUChar(delta+alen) & 0xF); 17736 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) ); 17737 delta += alen+1; 17738 DIP( "pinsrb $%d,%s,%s\n", 17739 imm8, dis_buf, nameXMMReg(rG) ); 17740 } 17741 IRTemp src_vec = newTemp(Ity_V128); 17742 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) )); 17743 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 ); 17744 putXMMReg( rG, mkexpr(res) ); 17745 goto decode_success; 17746 } 17747 break; 17748 17749 case 0x21: 17750 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1 17751 Insert Packed Single Precision Floating-Point Value (XMM) */ 17752 if (have66noF2noF3(pfx) && sz == 2) { 17753 UInt imm8; 17754 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 17755 const IRTemp inval = IRTemp_INVALID; 17756 17757 modrm = getUChar(delta); 17758 UInt rG = gregOfRexRM(pfx, modrm); 17759 17760 if ( epartIsReg( modrm ) ) { 17761 UInt rE = eregOfRexRM(pfx, modrm); 17762 IRTemp vE = newTemp(Ity_V128); 17763 assign( vE, getXMMReg(rE) ); 17764 IRTemp dsE[4] = { inval, inval, inval, inval }; 17765 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 17766 imm8 = getUChar(delta+1); 17767 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 17768 delta += 1+1; 17769 DIP( "insertps $%u, %s,%s\n", 17770 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 17771 } else { 17772 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17773 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 17774 imm8 = getUChar(delta+alen); 17775 delta += alen+1; 17776 DIP( "insertps $%u, %s,%s\n", 17777 imm8, dis_buf, nameXMMReg(rG) ); 17778 } 17779 17780 IRTemp vG = newTemp(Ity_V128); 17781 assign( vG, getXMMReg(rG) ); 17782 17783 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) ); 17784 goto decode_success; 17785 } 17786 break; 17787 17788 case 0x22: 17789 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 17790 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 17791 if (have66noF2noF3(pfx) 17792 && sz == 2 /* REX.W is NOT present */) { 17793 Int imm8_10; 17794 IRTemp src_u32 = newTemp(Ity_I32); 17795 modrm = getUChar(delta); 17796 UInt rG = gregOfRexRM(pfx, modrm); 17797 17798 if ( epartIsReg( modrm ) ) { 17799 UInt rE = eregOfRexRM(pfx,modrm); 17800 imm8_10 = (Int)(getUChar(delta+1) & 3); 17801 assign( src_u32, getIReg32( rE ) ); 17802 delta += 1+1; 17803 DIP( "pinsrd $%d, %s,%s\n", 17804 imm8_10, nameIReg32(rE), nameXMMReg(rG) ); 17805 } else { 17806 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17807 imm8_10 = (Int)(getUChar(delta+alen) & 3); 17808 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 17809 delta += alen+1; 17810 DIP( "pinsrd $%d, %s,%s\n", 17811 imm8_10, dis_buf, nameXMMReg(rG) ); 17812 } 17813 17814 IRTemp src_vec = newTemp(Ity_V128); 17815 assign(src_vec, getXMMReg( rG )); 17816 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 17817 putXMMReg( rG, mkexpr(res_vec) ); 17818 goto decode_success; 17819 } 17820 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 17821 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 17822 if (have66noF2noF3(pfx) 17823 && sz == 8 /* REX.W is present */) { 17824 Int imm8_0; 17825 IRTemp src_u64 = newTemp(Ity_I64); 17826 modrm = getUChar(delta); 17827 UInt rG = gregOfRexRM(pfx, modrm); 17828 17829 if ( epartIsReg( modrm ) ) { 17830 UInt rE = eregOfRexRM(pfx,modrm); 17831 imm8_0 = (Int)(getUChar(delta+1) & 1); 17832 assign( src_u64, getIReg64( rE ) ); 17833 delta += 1+1; 17834 DIP( "pinsrq $%d, %s,%s\n", 17835 imm8_0, nameIReg64(rE), nameXMMReg(rG) ); 17836 } else { 17837 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17838 imm8_0 = (Int)(getUChar(delta+alen) & 1); 17839 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 17840 delta += alen+1; 17841 DIP( "pinsrq $%d, %s,%s\n", 17842 imm8_0, dis_buf, nameXMMReg(rG) ); 17843 } 17844 17845 IRTemp src_vec = newTemp(Ity_V128); 17846 assign(src_vec, getXMMReg( rG )); 17847 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 17848 putXMMReg( rG, mkexpr(res_vec) ); 17849 goto decode_success; 17850 } 17851 break; 17852 17853 case 0x40: 17854 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 17855 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 17856 if (have66noF2noF3(pfx) && sz == 2) { 17857 modrm = getUChar(delta); 17858 Int imm8; 17859 IRTemp src_vec = newTemp(Ity_V128); 17860 IRTemp dst_vec = newTemp(Ity_V128); 17861 UInt rG = gregOfRexRM(pfx, modrm); 17862 assign( dst_vec, getXMMReg( rG ) ); 17863 if ( epartIsReg( modrm ) ) { 17864 UInt rE = eregOfRexRM(pfx, modrm); 17865 imm8 = (Int)getUChar(delta+1); 17866 assign( src_vec, getXMMReg(rE) ); 17867 delta += 1+1; 17868 DIP( "dpps $%d, %s,%s\n", 17869 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 17870 } else { 17871 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17872 1/* imm8 is 1 byte after the amode */ ); 17873 gen_SEGV_if_not_16_aligned( addr ); 17874 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17875 imm8 = (Int)getUChar(delta+alen); 17876 delta += alen+1; 17877 DIP( "dpps $%d, %s,%s\n", 17878 imm8, dis_buf, nameXMMReg(rG) ); 17879 } 17880 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 ); 17881 putXMMReg( rG, mkexpr(res) ); 17882 goto decode_success; 17883 } 17884 break; 17885 17886 case 0x41: 17887 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 17888 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 17889 if (have66noF2noF3(pfx) && sz == 2) { 17890 modrm = getUChar(delta); 17891 Int imm8; 17892 IRTemp src_vec = newTemp(Ity_V128); 17893 IRTemp dst_vec = newTemp(Ity_V128); 17894 UInt rG = gregOfRexRM(pfx, modrm); 17895 assign( dst_vec, getXMMReg( rG ) ); 17896 if ( epartIsReg( modrm ) ) { 17897 UInt rE = eregOfRexRM(pfx, modrm); 17898 imm8 = (Int)getUChar(delta+1); 17899 assign( src_vec, getXMMReg(rE) ); 17900 delta += 1+1; 17901 DIP( "dppd $%d, %s,%s\n", 17902 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 17903 } else { 17904 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17905 1/* imm8 is 1 byte after the amode */ ); 17906 gen_SEGV_if_not_16_aligned( addr ); 17907 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17908 imm8 = (Int)getUChar(delta+alen); 17909 delta += alen+1; 17910 DIP( "dppd $%d, %s,%s\n", 17911 imm8, dis_buf, nameXMMReg(rG) ); 17912 } 17913 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 ); 17914 putXMMReg( rG, mkexpr(res) ); 17915 goto decode_success; 17916 } 17917 break; 17918 17919 case 0x42: 17920 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8 17921 Multiple Packed Sums of Absolule Difference (XMM) */ 17922 if (have66noF2noF3(pfx) && sz == 2) { 17923 Int imm8; 17924 IRTemp src_vec = newTemp(Ity_V128); 17925 IRTemp dst_vec = newTemp(Ity_V128); 17926 modrm = getUChar(delta); 17927 UInt rG = gregOfRexRM(pfx, modrm); 17928 17929 assign( dst_vec, getXMMReg(rG) ); 17930 17931 if ( epartIsReg( modrm ) ) { 17932 UInt rE = eregOfRexRM(pfx, modrm); 17933 17934 imm8 = (Int)getUChar(delta+1); 17935 assign( src_vec, getXMMReg(rE) ); 17936 delta += 1+1; 17937 DIP( "mpsadbw $%d, %s,%s\n", imm8, 17938 nameXMMReg(rE), nameXMMReg(rG) ); 17939 } else { 17940 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17941 1/* imm8 is 1 byte after the amode */ ); 17942 gen_SEGV_if_not_16_aligned( addr ); 17943 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 17944 imm8 = (Int)getUChar(delta+alen); 17945 delta += alen+1; 17946 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) ); 17947 } 17948 17949 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) ); 17950 goto decode_success; 17951 } 17952 break; 17953 17954 case 0x44: 17955 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 17956 * Carry-less multiplication of selected XMM quadwords into XMM 17957 * registers (a.k.a multiplication of polynomials over GF(2)) 17958 */ 17959 if (have66noF2noF3(pfx) && sz == 2) { 17960 17961 Int imm8; 17962 IRTemp svec = newTemp(Ity_V128); 17963 IRTemp dvec = newTemp(Ity_V128); 17964 modrm = getUChar(delta); 17965 UInt rG = gregOfRexRM(pfx, modrm); 17966 17967 assign( dvec, getXMMReg(rG) ); 17968 17969 if ( epartIsReg( modrm ) ) { 17970 UInt rE = eregOfRexRM(pfx, modrm); 17971 imm8 = (Int)getUChar(delta+1); 17972 assign( svec, getXMMReg(rE) ); 17973 delta += 1+1; 17974 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 17975 nameXMMReg(rE), nameXMMReg(rG) ); 17976 } else { 17977 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 17978 1/* imm8 is 1 byte after the amode */ ); 17979 gen_SEGV_if_not_16_aligned( addr ); 17980 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 17981 imm8 = (Int)getUChar(delta+alen); 17982 delta += alen+1; 17983 DIP( "pclmulqdq $%d, %s,%s\n", 17984 imm8, dis_buf, nameXMMReg(rG) ); 17985 } 17986 17987 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) ); 17988 goto decode_success; 17989 } 17990 break; 17991 17992 case 0x60: 17993 case 0x61: 17994 case 0x62: 17995 case 0x63: 17996 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 17997 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 17998 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 17999 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 18000 (selected special cases that actually occur in glibc, 18001 not by any means a complete implementation.) 18002 */ 18003 if (have66noF2noF3(pfx) && sz == 2) { 18004 Long delta0 = delta; 18005 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc ); 18006 if (delta > delta0) goto decode_success; 18007 /* else fall though; dis_PCMPxSTRx failed to decode it */ 18008 } 18009 break; 18010 18011 case 0xDF: 18012 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */ 18013 if (have66noF2noF3(pfx) && sz == 2) { 18014 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ ); 18015 goto decode_success; 18016 } 18017 break; 18018 18019 default: 18020 break; 18021 18022 } 18023 18024 decode_failure: 18025 *decode_OK = False; 18026 return deltaIN; 18027 18028 decode_success: 18029 *decode_OK = True; 18030 return delta; 18031 } 18032 18033 18034 /*------------------------------------------------------------*/ 18035 /*--- ---*/ 18036 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/ 18037 /*--- ---*/ 18038 /*------------------------------------------------------------*/ 18039 18040 __attribute__((noinline)) 18041 static 18042 Long dis_ESC_NONE ( 18043 /*MB_OUT*/DisResult* dres, 18044 /*MB_OUT*/Bool* expect_CAS, 18045 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 18046 Bool resteerCisOk, 18047 void* callback_opaque, 18048 VexArchInfo* archinfo, 18049 VexAbiInfo* vbi, 18050 Prefix pfx, Int sz, Long deltaIN 18051 ) 18052 { 18053 Long d64 = 0; 18054 UChar abyte = 0; 18055 IRTemp addr = IRTemp_INVALID; 18056 IRTemp t1 = IRTemp_INVALID; 18057 IRTemp t2 = IRTemp_INVALID; 18058 IRTemp t3 = IRTemp_INVALID; 18059 IRTemp t4 = IRTemp_INVALID; 18060 IRTemp t5 = IRTemp_INVALID; 18061 IRType ty = Ity_INVALID; 18062 UChar modrm = 0; 18063 Int am_sz = 0; 18064 Int d_sz = 0; 18065 Int alen = 0; 18066 HChar dis_buf[50]; 18067 18068 Long delta = deltaIN; 18069 UChar opc = getUChar(delta); 18070 delta++; 18071 switch (opc) { 18072 18073 case 0x00: /* ADD Gb,Eb */ 18074 if (haveF2orF3(pfx)) goto decode_failure; 18075 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 18076 return delta; 18077 case 0x01: /* ADD Gv,Ev */ 18078 if (haveF2orF3(pfx)) goto decode_failure; 18079 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 18080 return delta; 18081 18082 case 0x02: /* ADD Eb,Gb */ 18083 if (haveF2orF3(pfx)) goto decode_failure; 18084 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 18085 return delta; 18086 case 0x03: /* ADD Ev,Gv */ 18087 if (haveF2orF3(pfx)) goto decode_failure; 18088 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 18089 return delta; 18090 18091 case 0x04: /* ADD Ib, AL */ 18092 if (haveF2orF3(pfx)) goto decode_failure; 18093 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 18094 return delta; 18095 case 0x05: /* ADD Iv, eAX */ 18096 if (haveF2orF3(pfx)) goto decode_failure; 18097 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 18098 return delta; 18099 18100 case 0x08: /* OR Gb,Eb */ 18101 if (haveF2orF3(pfx)) goto decode_failure; 18102 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 18103 return delta; 18104 case 0x09: /* OR Gv,Ev */ 18105 if (haveF2orF3(pfx)) goto decode_failure; 18106 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 18107 return delta; 18108 18109 case 0x0A: /* OR Eb,Gb */ 18110 if (haveF2orF3(pfx)) goto decode_failure; 18111 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 18112 return delta; 18113 case 0x0B: /* OR Ev,Gv */ 18114 if (haveF2orF3(pfx)) goto decode_failure; 18115 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 18116 return delta; 18117 18118 case 0x0C: /* OR Ib, AL */ 18119 if (haveF2orF3(pfx)) goto decode_failure; 18120 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 18121 return delta; 18122 case 0x0D: /* OR Iv, eAX */ 18123 if (haveF2orF3(pfx)) goto decode_failure; 18124 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 18125 return delta; 18126 18127 case 0x10: /* ADC Gb,Eb */ 18128 if (haveF2orF3(pfx)) goto decode_failure; 18129 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 18130 return delta; 18131 case 0x11: /* ADC Gv,Ev */ 18132 if (haveF2orF3(pfx)) goto decode_failure; 18133 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 18134 return delta; 18135 18136 case 0x12: /* ADC Eb,Gb */ 18137 if (haveF2orF3(pfx)) goto decode_failure; 18138 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 18139 return delta; 18140 case 0x13: /* ADC Ev,Gv */ 18141 if (haveF2orF3(pfx)) goto decode_failure; 18142 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 18143 return delta; 18144 18145 case 0x14: /* ADC Ib, AL */ 18146 if (haveF2orF3(pfx)) goto decode_failure; 18147 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 18148 return delta; 18149 case 0x15: /* ADC Iv, eAX */ 18150 if (haveF2orF3(pfx)) goto decode_failure; 18151 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 18152 return delta; 18153 18154 case 0x18: /* SBB Gb,Eb */ 18155 if (haveF2orF3(pfx)) goto decode_failure; 18156 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 18157 return delta; 18158 case 0x19: /* SBB Gv,Ev */ 18159 if (haveF2orF3(pfx)) goto decode_failure; 18160 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 18161 return delta; 18162 18163 case 0x1A: /* SBB Eb,Gb */ 18164 if (haveF2orF3(pfx)) goto decode_failure; 18165 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 18166 return delta; 18167 case 0x1B: /* SBB Ev,Gv */ 18168 if (haveF2orF3(pfx)) goto decode_failure; 18169 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 18170 return delta; 18171 18172 case 0x1C: /* SBB Ib, AL */ 18173 if (haveF2orF3(pfx)) goto decode_failure; 18174 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 18175 return delta; 18176 case 0x1D: /* SBB Iv, eAX */ 18177 if (haveF2orF3(pfx)) goto decode_failure; 18178 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 18179 return delta; 18180 18181 case 0x20: /* AND Gb,Eb */ 18182 if (haveF2orF3(pfx)) goto decode_failure; 18183 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 18184 return delta; 18185 case 0x21: /* AND Gv,Ev */ 18186 if (haveF2orF3(pfx)) goto decode_failure; 18187 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 18188 return delta; 18189 18190 case 0x22: /* AND Eb,Gb */ 18191 if (haveF2orF3(pfx)) goto decode_failure; 18192 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 18193 return delta; 18194 case 0x23: /* AND Ev,Gv */ 18195 if (haveF2orF3(pfx)) goto decode_failure; 18196 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 18197 return delta; 18198 18199 case 0x24: /* AND Ib, AL */ 18200 if (haveF2orF3(pfx)) goto decode_failure; 18201 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 18202 return delta; 18203 case 0x25: /* AND Iv, eAX */ 18204 if (haveF2orF3(pfx)) goto decode_failure; 18205 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 18206 return delta; 18207 18208 case 0x28: /* SUB Gb,Eb */ 18209 if (haveF2orF3(pfx)) goto decode_failure; 18210 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 18211 return delta; 18212 case 0x29: /* SUB Gv,Ev */ 18213 if (haveF2orF3(pfx)) goto decode_failure; 18214 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 18215 return delta; 18216 18217 case 0x2A: /* SUB Eb,Gb */ 18218 if (haveF2orF3(pfx)) goto decode_failure; 18219 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 18220 return delta; 18221 case 0x2B: /* SUB Ev,Gv */ 18222 if (haveF2orF3(pfx)) goto decode_failure; 18223 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 18224 return delta; 18225 18226 case 0x2C: /* SUB Ib, AL */ 18227 if (haveF2orF3(pfx)) goto decode_failure; 18228 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 18229 return delta; 18230 18231 case 0x2D: /* SUB Iv, eAX */ 18232 if (haveF2orF3(pfx)) goto decode_failure; 18233 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 18234 return delta; 18235 18236 case 0x30: /* XOR Gb,Eb */ 18237 if (haveF2orF3(pfx)) goto decode_failure; 18238 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 18239 return delta; 18240 case 0x31: /* XOR Gv,Ev */ 18241 if (haveF2orF3(pfx)) goto decode_failure; 18242 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 18243 return delta; 18244 18245 case 0x32: /* XOR Eb,Gb */ 18246 if (haveF2orF3(pfx)) goto decode_failure; 18247 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 18248 return delta; 18249 case 0x33: /* XOR Ev,Gv */ 18250 if (haveF2orF3(pfx)) goto decode_failure; 18251 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 18252 return delta; 18253 18254 case 0x34: /* XOR Ib, AL */ 18255 if (haveF2orF3(pfx)) goto decode_failure; 18256 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 18257 return delta; 18258 case 0x35: /* XOR Iv, eAX */ 18259 if (haveF2orF3(pfx)) goto decode_failure; 18260 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 18261 return delta; 18262 18263 case 0x38: /* CMP Gb,Eb */ 18264 if (haveF2orF3(pfx)) goto decode_failure; 18265 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 18266 return delta; 18267 case 0x39: /* CMP Gv,Ev */ 18268 if (haveF2orF3(pfx)) goto decode_failure; 18269 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 18270 return delta; 18271 18272 case 0x3A: /* CMP Eb,Gb */ 18273 if (haveF2orF3(pfx)) goto decode_failure; 18274 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 18275 return delta; 18276 case 0x3B: /* CMP Ev,Gv */ 18277 if (haveF2orF3(pfx)) goto decode_failure; 18278 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 18279 return delta; 18280 18281 case 0x3C: /* CMP Ib, AL */ 18282 if (haveF2orF3(pfx)) goto decode_failure; 18283 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 18284 return delta; 18285 case 0x3D: /* CMP Iv, eAX */ 18286 if (haveF2orF3(pfx)) goto decode_failure; 18287 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 18288 return delta; 18289 18290 case 0x50: /* PUSH eAX */ 18291 case 0x51: /* PUSH eCX */ 18292 case 0x52: /* PUSH eDX */ 18293 case 0x53: /* PUSH eBX */ 18294 case 0x55: /* PUSH eBP */ 18295 case 0x56: /* PUSH eSI */ 18296 case 0x57: /* PUSH eDI */ 18297 case 0x54: /* PUSH eSP */ 18298 /* This is the Right Way, in that the value to be pushed is 18299 established before %rsp is changed, so that pushq %rsp 18300 correctly pushes the old value. */ 18301 if (haveF2orF3(pfx)) goto decode_failure; 18302 vassert(sz == 2 || sz == 4 || sz == 8); 18303 if (sz == 4) 18304 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 18305 ty = sz==2 ? Ity_I16 : Ity_I64; 18306 t1 = newTemp(ty); 18307 t2 = newTemp(Ity_I64); 18308 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 18309 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 18310 putIReg64(R_RSP, mkexpr(t2) ); 18311 storeLE(mkexpr(t2),mkexpr(t1)); 18312 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 18313 return delta; 18314 18315 case 0x58: /* POP eAX */ 18316 case 0x59: /* POP eCX */ 18317 case 0x5A: /* POP eDX */ 18318 case 0x5B: /* POP eBX */ 18319 case 0x5D: /* POP eBP */ 18320 case 0x5E: /* POP eSI */ 18321 case 0x5F: /* POP eDI */ 18322 case 0x5C: /* POP eSP */ 18323 if (haveF2orF3(pfx)) goto decode_failure; 18324 vassert(sz == 2 || sz == 4 || sz == 8); 18325 if (sz == 4) 18326 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 18327 t1 = newTemp(szToITy(sz)); 18328 t2 = newTemp(Ity_I64); 18329 assign(t2, getIReg64(R_RSP)); 18330 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 18331 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 18332 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 18333 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 18334 return delta; 18335 18336 case 0x63: /* MOVSX */ 18337 if (haveF2orF3(pfx)) goto decode_failure; 18338 if (haveREX(pfx) && 1==getRexW(pfx)) { 18339 vassert(sz == 8); 18340 /* movsx r/m32 to r64 */ 18341 modrm = getUChar(delta); 18342 if (epartIsReg(modrm)) { 18343 delta++; 18344 putIRegG(8, pfx, modrm, 18345 unop(Iop_32Sto64, 18346 getIRegE(4, pfx, modrm))); 18347 DIP("movslq %s,%s\n", 18348 nameIRegE(4, pfx, modrm), 18349 nameIRegG(8, pfx, modrm)); 18350 return delta; 18351 } else { 18352 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18353 delta += alen; 18354 putIRegG(8, pfx, modrm, 18355 unop(Iop_32Sto64, 18356 loadLE(Ity_I32, mkexpr(addr)))); 18357 DIP("movslq %s,%s\n", dis_buf, 18358 nameIRegG(8, pfx, modrm)); 18359 return delta; 18360 } 18361 } else { 18362 goto decode_failure; 18363 } 18364 18365 case 0x68: /* PUSH Iv */ 18366 if (haveF2orF3(pfx)) goto decode_failure; 18367 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 18368 if (sz == 4) sz = 8; 18369 d64 = getSDisp(imin(4,sz),delta); 18370 delta += imin(4,sz); 18371 goto do_push_I; 18372 18373 case 0x69: /* IMUL Iv, Ev, Gv */ 18374 if (haveF2orF3(pfx)) goto decode_failure; 18375 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 18376 return delta; 18377 18378 case 0x6A: /* PUSH Ib, sign-extended to sz */ 18379 if (haveF2orF3(pfx)) goto decode_failure; 18380 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 18381 if (sz == 4) sz = 8; 18382 d64 = getSDisp8(delta); delta += 1; 18383 goto do_push_I; 18384 do_push_I: 18385 ty = szToITy(sz); 18386 t1 = newTemp(Ity_I64); 18387 t2 = newTemp(ty); 18388 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 18389 putIReg64(R_RSP, mkexpr(t1) ); 18390 /* stop mkU16 asserting if d32 is a negative 16-bit number 18391 (bug #132813) */ 18392 if (ty == Ity_I16) 18393 d64 &= 0xFFFF; 18394 storeLE( mkexpr(t1), mkU(ty,d64) ); 18395 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 18396 return delta; 18397 18398 case 0x6B: /* IMUL Ib, Ev, Gv */ 18399 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 18400 return delta; 18401 18402 case 0x70: 18403 case 0x71: 18404 case 0x72: /* JBb/JNAEb (jump below) */ 18405 case 0x73: /* JNBb/JAEb (jump not below) */ 18406 case 0x74: /* JZb/JEb (jump zero) */ 18407 case 0x75: /* JNZb/JNEb (jump not zero) */ 18408 case 0x76: /* JBEb/JNAb (jump below or equal) */ 18409 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 18410 case 0x78: /* JSb (jump negative) */ 18411 case 0x79: /* JSb (jump not negative) */ 18412 case 0x7A: /* JP (jump parity even) */ 18413 case 0x7B: /* JNP/JPO (jump parity odd) */ 18414 case 0x7C: /* JLb/JNGEb (jump less) */ 18415 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 18416 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 18417 case 0x7F: { /* JGb/JNLEb (jump greater) */ 18418 Long jmpDelta; 18419 HChar* comment = ""; 18420 if (haveF2orF3(pfx)) goto decode_failure; 18421 jmpDelta = getSDisp8(delta); 18422 vassert(-128 <= jmpDelta && jmpDelta < 128); 18423 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 18424 delta++; 18425 if (resteerCisOk 18426 && vex_control.guest_chase_cond 18427 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18428 && jmpDelta < 0 18429 && resteerOkFn( callback_opaque, d64) ) { 18430 /* Speculation: assume this backward branch is taken. So we 18431 need to emit a side-exit to the insn following this one, 18432 on the negation of the condition, and continue at the 18433 branch target address (d64). If we wind up back at the 18434 first instruction of the trace, just stop; it's better to 18435 let the IR loop unroller handle that case. */ 18436 stmt( IRStmt_Exit( 18437 mk_amd64g_calculate_condition( 18438 (AMD64Condcode)(1 ^ (opc - 0x70))), 18439 Ijk_Boring, 18440 IRConst_U64(guest_RIP_bbstart+delta), 18441 OFFB_RIP ) ); 18442 dres->whatNext = Dis_ResteerC; 18443 dres->continueAt = d64; 18444 comment = "(assumed taken)"; 18445 } 18446 else 18447 if (resteerCisOk 18448 && vex_control.guest_chase_cond 18449 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18450 && jmpDelta >= 0 18451 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 18452 /* Speculation: assume this forward branch is not taken. So 18453 we need to emit a side-exit to d64 (the dest) and continue 18454 disassembling at the insn immediately following this 18455 one. */ 18456 stmt( IRStmt_Exit( 18457 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 18458 Ijk_Boring, 18459 IRConst_U64(d64), 18460 OFFB_RIP ) ); 18461 dres->whatNext = Dis_ResteerC; 18462 dres->continueAt = guest_RIP_bbstart+delta; 18463 comment = "(assumed not taken)"; 18464 } 18465 else { 18466 /* Conservative default translation - end the block at this 18467 point. */ 18468 jcc_01( dres, (AMD64Condcode)(opc - 0x70), 18469 guest_RIP_bbstart+delta, d64 ); 18470 vassert(dres->whatNext == Dis_StopHere); 18471 } 18472 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); 18473 return delta; 18474 } 18475 18476 case 0x80: /* Grp1 Ib,Eb */ 18477 if (haveF2orF3(pfx)) goto decode_failure; 18478 modrm = getUChar(delta); 18479 am_sz = lengthAMode(pfx,delta); 18480 sz = 1; 18481 d_sz = 1; 18482 d64 = getSDisp8(delta + am_sz); 18483 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 18484 return delta; 18485 18486 case 0x81: /* Grp1 Iv,Ev */ 18487 if (haveF2orF3(pfx)) goto decode_failure; 18488 modrm = getUChar(delta); 18489 am_sz = lengthAMode(pfx,delta); 18490 d_sz = imin(sz,4); 18491 d64 = getSDisp(d_sz, delta + am_sz); 18492 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 18493 return delta; 18494 18495 case 0x83: /* Grp1 Ib,Ev */ 18496 if (haveF2orF3(pfx)) goto decode_failure; 18497 modrm = getUChar(delta); 18498 am_sz = lengthAMode(pfx,delta); 18499 d_sz = 1; 18500 d64 = getSDisp8(delta + am_sz); 18501 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 18502 return delta; 18503 18504 case 0x84: /* TEST Eb,Gb */ 18505 if (haveF2orF3(pfx)) goto decode_failure; 18506 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 18507 return delta; 18508 18509 case 0x85: /* TEST Ev,Gv */ 18510 if (haveF2orF3(pfx)) goto decode_failure; 18511 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 18512 return delta; 18513 18514 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 18515 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) 18516 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is 18517 used with an explicit LOCK prefix, we don't want to end up with 18518 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by 18519 the generic LOCK logic at the top of disInstr. */ 18520 case 0x86: /* XCHG Gb,Eb */ 18521 sz = 1; 18522 /* Fall through ... */ 18523 case 0x87: /* XCHG Gv,Ev */ 18524 if (haveF2orF3(pfx)) goto decode_failure; 18525 modrm = getUChar(delta); 18526 ty = szToITy(sz); 18527 t1 = newTemp(ty); t2 = newTemp(ty); 18528 if (epartIsReg(modrm)) { 18529 assign(t1, getIRegE(sz, pfx, modrm)); 18530 assign(t2, getIRegG(sz, pfx, modrm)); 18531 putIRegG(sz, pfx, modrm, mkexpr(t1)); 18532 putIRegE(sz, pfx, modrm, mkexpr(t2)); 18533 delta++; 18534 DIP("xchg%c %s, %s\n", 18535 nameISize(sz), nameIRegG(sz, pfx, modrm), 18536 nameIRegE(sz, pfx, modrm)); 18537 } else { 18538 *expect_CAS = True; 18539 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18540 assign( t1, loadLE(ty, mkexpr(addr)) ); 18541 assign( t2, getIRegG(sz, pfx, modrm) ); 18542 casLE( mkexpr(addr), 18543 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 18544 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 18545 delta += alen; 18546 DIP("xchg%c %s, %s\n", nameISize(sz), 18547 nameIRegG(sz, pfx, modrm), dis_buf); 18548 } 18549 return delta; 18550 18551 case 0x88: /* MOV Gb,Eb */ 18552 if (haveF2orF3(pfx)) goto decode_failure; 18553 delta = dis_mov_G_E(vbi, pfx, 1, delta); 18554 return delta; 18555 18556 case 0x89: /* MOV Gv,Ev */ 18557 if (haveF2orF3(pfx)) goto decode_failure; 18558 delta = dis_mov_G_E(vbi, pfx, sz, delta); 18559 return delta; 18560 18561 case 0x8A: /* MOV Eb,Gb */ 18562 if (haveF2orF3(pfx)) goto decode_failure; 18563 delta = dis_mov_E_G(vbi, pfx, 1, delta); 18564 return delta; 18565 18566 case 0x8B: /* MOV Ev,Gv */ 18567 if (haveF2orF3(pfx)) goto decode_failure; 18568 delta = dis_mov_E_G(vbi, pfx, sz, delta); 18569 return delta; 18570 18571 case 0x8D: /* LEA M,Gv */ 18572 if (haveF2orF3(pfx)) goto decode_failure; 18573 if (sz != 4 && sz != 8) 18574 goto decode_failure; 18575 modrm = getUChar(delta); 18576 if (epartIsReg(modrm)) 18577 goto decode_failure; 18578 /* NOTE! this is the one place where a segment override prefix 18579 has no effect on the address calculation. Therefore we clear 18580 any segment override bits in pfx. */ 18581 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 18582 delta += alen; 18583 /* This is a hack. But it isn't clear that really doing the 18584 calculation at 32 bits is really worth it. Hence for leal, 18585 do the full 64-bit calculation and then truncate it. */ 18586 putIRegG( sz, pfx, modrm, 18587 sz == 4 18588 ? unop(Iop_64to32, mkexpr(addr)) 18589 : mkexpr(addr) 18590 ); 18591 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 18592 nameIRegG(sz,pfx,modrm)); 18593 return delta; 18594 18595 case 0x8F: { /* POPQ m64 / POPW m16 */ 18596 Int len; 18597 UChar rm; 18598 /* There is no encoding for 32-bit pop in 64-bit mode. 18599 So sz==4 actually means sz==8. */ 18600 if (haveF2orF3(pfx)) goto decode_failure; 18601 vassert(sz == 2 || sz == 4 18602 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 18603 if (sz == 4) sz = 8; 18604 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 18605 18606 rm = getUChar(delta); 18607 18608 /* make sure this instruction is correct POP */ 18609 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 18610 goto decode_failure; 18611 /* and has correct size */ 18612 vassert(sz == 8); 18613 18614 t1 = newTemp(Ity_I64); 18615 t3 = newTemp(Ity_I64); 18616 assign( t1, getIReg64(R_RSP) ); 18617 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 18618 18619 /* Increase RSP; must be done before the STORE. Intel manual 18620 says: If the RSP register is used as a base register for 18621 addressing a destination operand in memory, the POP 18622 instruction computes the effective address of the operand 18623 after it increments the RSP register. */ 18624 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 18625 18626 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 18627 storeLE( mkexpr(addr), mkexpr(t3) ); 18628 18629 DIP("popl %s\n", dis_buf); 18630 18631 delta += len; 18632 return delta; 18633 } 18634 18635 case 0x90: /* XCHG eAX,eAX */ 18636 /* detect and handle F3 90 (rep nop) specially */ 18637 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 18638 DIP("rep nop (P4 pause)\n"); 18639 /* "observe" the hint. The Vex client needs to be careful not 18640 to cause very long delays as a result, though. */ 18641 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta); 18642 vassert(dres->whatNext == Dis_StopHere); 18643 return delta; 18644 } 18645 /* detect and handle NOPs specially */ 18646 if (/* F2/F3 probably change meaning completely */ 18647 !haveF2orF3(pfx) 18648 /* If REX.B is 1, we're not exchanging rAX with itself */ 18649 && getRexB(pfx)==0 ) { 18650 DIP("nop\n"); 18651 return delta; 18652 } 18653 /* else fall through to normal case. */ 18654 case 0x91: /* XCHG rAX,rCX */ 18655 case 0x92: /* XCHG rAX,rDX */ 18656 case 0x93: /* XCHG rAX,rBX */ 18657 case 0x94: /* XCHG rAX,rSP */ 18658 case 0x95: /* XCHG rAX,rBP */ 18659 case 0x96: /* XCHG rAX,rSI */ 18660 case 0x97: /* XCHG rAX,rDI */ 18661 /* guard against mutancy */ 18662 if (haveF2orF3(pfx)) goto decode_failure; 18663 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 18664 return delta; 18665 18666 case 0x98: /* CBW */ 18667 if (haveF2orF3(pfx)) goto decode_failure; 18668 if (sz == 8) { 18669 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 18670 DIP(/*"cdqe\n"*/"cltq"); 18671 return delta; 18672 } 18673 if (sz == 4) { 18674 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 18675 DIP("cwtl\n"); 18676 return delta; 18677 } 18678 if (sz == 2) { 18679 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 18680 DIP("cbw\n"); 18681 return delta; 18682 } 18683 goto decode_failure; 18684 18685 case 0x99: /* CWD/CDQ/CQO */ 18686 if (haveF2orF3(pfx)) goto decode_failure; 18687 vassert(sz == 2 || sz == 4 || sz == 8); 18688 ty = szToITy(sz); 18689 putIRegRDX( sz, 18690 binop(mkSizedOp(ty,Iop_Sar8), 18691 getIRegRAX(sz), 18692 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 18693 DIP(sz == 2 ? "cwd\n" 18694 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 18695 : "cqo\n")); 18696 return delta; 18697 18698 case 0x9B: /* FWAIT (X87 insn) */ 18699 /* ignore? */ 18700 DIP("fwait\n"); 18701 return delta; 18702 18703 case 0x9C: /* PUSHF */ { 18704 /* Note. There is no encoding for a 32-bit pushf in 64-bit 18705 mode. So sz==4 actually means sz==8. */ 18706 /* 24 July 06: has also been seen with a redundant REX prefix, 18707 so must also allow sz==8. */ 18708 if (haveF2orF3(pfx)) goto decode_failure; 18709 vassert(sz == 2 || sz == 4 || sz == 8); 18710 if (sz == 4) sz = 8; 18711 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 18712 18713 t1 = newTemp(Ity_I64); 18714 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 18715 putIReg64(R_RSP, mkexpr(t1) ); 18716 18717 t2 = newTemp(Ity_I64); 18718 assign( t2, mk_amd64g_calculate_rflags_all() ); 18719 18720 /* Patch in the D flag. This can simply be a copy of bit 10 of 18721 baseBlock[OFFB_DFLAG]. */ 18722 t3 = newTemp(Ity_I64); 18723 assign( t3, binop(Iop_Or64, 18724 mkexpr(t2), 18725 binop(Iop_And64, 18726 IRExpr_Get(OFFB_DFLAG,Ity_I64), 18727 mkU64(1<<10))) 18728 ); 18729 18730 /* And patch in the ID flag. */ 18731 t4 = newTemp(Ity_I64); 18732 assign( t4, binop(Iop_Or64, 18733 mkexpr(t3), 18734 binop(Iop_And64, 18735 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 18736 mkU8(21)), 18737 mkU64(1<<21))) 18738 ); 18739 18740 /* And patch in the AC flag too. */ 18741 t5 = newTemp(Ity_I64); 18742 assign( t5, binop(Iop_Or64, 18743 mkexpr(t4), 18744 binop(Iop_And64, 18745 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 18746 mkU8(18)), 18747 mkU64(1<<18))) 18748 ); 18749 18750 /* if sz==2, the stored value needs to be narrowed. */ 18751 if (sz == 2) 18752 storeLE( mkexpr(t1), unop(Iop_32to16, 18753 unop(Iop_64to32,mkexpr(t5))) ); 18754 else 18755 storeLE( mkexpr(t1), mkexpr(t5) ); 18756 18757 DIP("pushf%c\n", nameISize(sz)); 18758 return delta; 18759 } 18760 18761 case 0x9D: /* POPF */ 18762 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 18763 So sz==4 actually means sz==8. */ 18764 if (haveF2orF3(pfx)) goto decode_failure; 18765 vassert(sz == 2 || sz == 4); 18766 if (sz == 4) sz = 8; 18767 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 18768 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 18769 assign(t2, getIReg64(R_RSP)); 18770 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 18771 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 18772 /* t1 is the flag word. Mask out everything except OSZACP and 18773 set the flags thunk to AMD64G_CC_OP_COPY. */ 18774 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18775 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18776 stmt( IRStmt_Put( OFFB_CC_DEP1, 18777 binop(Iop_And64, 18778 mkexpr(t1), 18779 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 18780 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 18781 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 18782 ) 18783 ) 18784 ); 18785 18786 /* Also need to set the D flag, which is held in bit 10 of t1. 18787 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 18788 stmt( IRStmt_Put( 18789 OFFB_DFLAG, 18790 IRExpr_Mux0X( 18791 unop(Iop_32to8, 18792 unop(Iop_64to32, 18793 binop(Iop_And64, 18794 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 18795 mkU64(1)))), 18796 mkU64(1), 18797 mkU64(0xFFFFFFFFFFFFFFFFULL))) 18798 ); 18799 18800 /* And set the ID flag */ 18801 stmt( IRStmt_Put( 18802 OFFB_IDFLAG, 18803 IRExpr_Mux0X( 18804 unop(Iop_32to8, 18805 unop(Iop_64to32, 18806 binop(Iop_And64, 18807 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 18808 mkU64(1)))), 18809 mkU64(0), 18810 mkU64(1))) 18811 ); 18812 18813 /* And set the AC flag too */ 18814 stmt( IRStmt_Put( 18815 OFFB_ACFLAG, 18816 IRExpr_Mux0X( 18817 unop(Iop_32to8, 18818 unop(Iop_64to32, 18819 binop(Iop_And64, 18820 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 18821 mkU64(1)))), 18822 mkU64(0), 18823 mkU64(1))) 18824 ); 18825 18826 DIP("popf%c\n", nameISize(sz)); 18827 return delta; 18828 18829 case 0x9E: /* SAHF */ 18830 codegen_SAHF(); 18831 DIP("sahf\n"); 18832 return delta; 18833 18834 case 0x9F: /* LAHF */ 18835 codegen_LAHF(); 18836 DIP("lahf\n"); 18837 return delta; 18838 18839 case 0xA0: /* MOV Ob,AL */ 18840 if (have66orF2orF3(pfx)) goto decode_failure; 18841 sz = 1; 18842 /* Fall through ... */ 18843 case 0xA1: /* MOV Ov,eAX */ 18844 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 18845 goto decode_failure; 18846 d64 = getDisp64(delta); 18847 delta += 8; 18848 ty = szToITy(sz); 18849 addr = newTemp(Ity_I64); 18850 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 18851 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 18852 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 18853 segRegTxt(pfx), d64, 18854 nameIRegRAX(sz)); 18855 return delta; 18856 18857 case 0xA2: /* MOV AL,Ob */ 18858 if (have66orF2orF3(pfx)) goto decode_failure; 18859 sz = 1; 18860 /* Fall through ... */ 18861 case 0xA3: /* MOV eAX,Ov */ 18862 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 18863 goto decode_failure; 18864 d64 = getDisp64(delta); 18865 delta += 8; 18866 ty = szToITy(sz); 18867 addr = newTemp(Ity_I64); 18868 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 18869 storeLE( mkexpr(addr), getIRegRAX(sz) ); 18870 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 18871 segRegTxt(pfx), d64); 18872 return delta; 18873 18874 case 0xA4: 18875 case 0xA5: 18876 /* F3 A4: rep movsb */ 18877 if (haveF3(pfx) && !haveF2(pfx)) { 18878 if (opc == 0xA4) 18879 sz = 1; 18880 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz, 18881 guest_RIP_curr_instr, 18882 guest_RIP_bbstart+delta, "rep movs", pfx ); 18883 dres->whatNext = Dis_StopHere; 18884 return delta; 18885 } 18886 /* A4: movsb */ 18887 if (!haveF3(pfx) && !haveF2(pfx)) { 18888 if (opc == 0xA4) 18889 sz = 1; 18890 dis_string_op( dis_MOVS, sz, "movs", pfx ); 18891 return delta; 18892 } 18893 goto decode_failure; 18894 18895 case 0xA6: 18896 case 0xA7: 18897 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 18898 if (haveF3(pfx) && !haveF2(pfx)) { 18899 if (opc == 0xA6) 18900 sz = 1; 18901 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz, 18902 guest_RIP_curr_instr, 18903 guest_RIP_bbstart+delta, "repe cmps", pfx ); 18904 dres->whatNext = Dis_StopHere; 18905 return delta; 18906 } 18907 goto decode_failure; 18908 18909 case 0xAA: 18910 case 0xAB: 18911 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 18912 if (haveF3(pfx) && !haveF2(pfx)) { 18913 if (opc == 0xAA) 18914 sz = 1; 18915 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz, 18916 guest_RIP_curr_instr, 18917 guest_RIP_bbstart+delta, "rep stos", pfx ); 18918 vassert(dres->whatNext == Dis_StopHere); 18919 return delta; 18920 } 18921 /* AA/AB: stosb/stos{w,l,q} */ 18922 if (!haveF3(pfx) && !haveF2(pfx)) { 18923 if (opc == 0xAA) 18924 sz = 1; 18925 dis_string_op( dis_STOS, sz, "stos", pfx ); 18926 return delta; 18927 } 18928 goto decode_failure; 18929 18930 case 0xA8: /* TEST Ib, AL */ 18931 if (haveF2orF3(pfx)) goto decode_failure; 18932 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 18933 return delta; 18934 case 0xA9: /* TEST Iv, eAX */ 18935 if (haveF2orF3(pfx)) goto decode_failure; 18936 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 18937 return delta; 18938 18939 case 0xAC: /* LODS, no REP prefix */ 18940 case 0xAD: 18941 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 18942 return delta; 18943 18944 case 0xAE: 18945 case 0xAF: 18946 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 18947 if (haveF2(pfx) && !haveF3(pfx)) { 18948 if (opc == 0xAE) 18949 sz = 1; 18950 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz, 18951 guest_RIP_curr_instr, 18952 guest_RIP_bbstart+delta, "repne scas", pfx ); 18953 vassert(dres->whatNext == Dis_StopHere); 18954 return delta; 18955 } 18956 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 18957 if (!haveF2(pfx) && haveF3(pfx)) { 18958 if (opc == 0xAE) 18959 sz = 1; 18960 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz, 18961 guest_RIP_curr_instr, 18962 guest_RIP_bbstart+delta, "repe scas", pfx ); 18963 vassert(dres->whatNext == Dis_StopHere); 18964 return delta; 18965 } 18966 /* AE/AF: scasb/scas{w,l,q} */ 18967 if (!haveF2(pfx) && !haveF3(pfx)) { 18968 if (opc == 0xAE) 18969 sz = 1; 18970 dis_string_op( dis_SCAS, sz, "scas", pfx ); 18971 return delta; 18972 } 18973 goto decode_failure; 18974 18975 /* XXXX be careful here with moves to AH/BH/CH/DH */ 18976 case 0xB0: /* MOV imm,AL */ 18977 case 0xB1: /* MOV imm,CL */ 18978 case 0xB2: /* MOV imm,DL */ 18979 case 0xB3: /* MOV imm,BL */ 18980 case 0xB4: /* MOV imm,AH */ 18981 case 0xB5: /* MOV imm,CH */ 18982 case 0xB6: /* MOV imm,DH */ 18983 case 0xB7: /* MOV imm,BH */ 18984 if (haveF2orF3(pfx)) goto decode_failure; 18985 d64 = getUChar(delta); 18986 delta += 1; 18987 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 18988 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 18989 return delta; 18990 18991 case 0xB8: /* MOV imm,eAX */ 18992 case 0xB9: /* MOV imm,eCX */ 18993 case 0xBA: /* MOV imm,eDX */ 18994 case 0xBB: /* MOV imm,eBX */ 18995 case 0xBC: /* MOV imm,eSP */ 18996 case 0xBD: /* MOV imm,eBP */ 18997 case 0xBE: /* MOV imm,eSI */ 18998 case 0xBF: /* MOV imm,eDI */ 18999 /* This is the one-and-only place where 64-bit literals are 19000 allowed in the instruction stream. */ 19001 if (haveF2orF3(pfx)) goto decode_failure; 19002 if (sz == 8) { 19003 d64 = getDisp64(delta); 19004 delta += 8; 19005 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 19006 DIP("movabsq $%lld,%s\n", (Long)d64, 19007 nameIRegRexB(8,pfx,opc-0xB8)); 19008 } else { 19009 d64 = getSDisp(imin(4,sz),delta); 19010 delta += imin(4,sz); 19011 putIRegRexB(sz, pfx, opc-0xB8, 19012 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 19013 DIP("mov%c $%lld,%s\n", nameISize(sz), 19014 (Long)d64, 19015 nameIRegRexB(sz,pfx,opc-0xB8)); 19016 } 19017 return delta; 19018 19019 case 0xC0: { /* Grp2 Ib,Eb */ 19020 Bool decode_OK = True; 19021 if (haveF2orF3(pfx)) goto decode_failure; 19022 modrm = getUChar(delta); 19023 am_sz = lengthAMode(pfx,delta); 19024 d_sz = 1; 19025 d64 = getUChar(delta + am_sz); 19026 sz = 1; 19027 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19028 mkU8(d64 & 0xFF), NULL, &decode_OK ); 19029 if (!decode_OK) goto decode_failure; 19030 return delta; 19031 } 19032 19033 case 0xC1: { /* Grp2 Ib,Ev */ 19034 Bool decode_OK = True; 19035 if (haveF2orF3(pfx)) goto decode_failure; 19036 modrm = getUChar(delta); 19037 am_sz = lengthAMode(pfx,delta); 19038 d_sz = 1; 19039 d64 = getUChar(delta + am_sz); 19040 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19041 mkU8(d64 & 0xFF), NULL, &decode_OK ); 19042 if (!decode_OK) goto decode_failure; 19043 return delta; 19044 } 19045 19046 case 0xC2: /* RET imm16 */ 19047 if (have66orF2orF3(pfx)) goto decode_failure; 19048 d64 = getUDisp16(delta); 19049 delta += 2; 19050 dis_ret(dres, vbi, d64); 19051 DIP("ret $%lld\n", d64); 19052 return delta; 19053 19054 case 0xC3: /* RET */ 19055 if (have66orF2(pfx)) goto decode_failure; 19056 /* F3 is acceptable on AMD. */ 19057 dis_ret(dres, vbi, 0); 19058 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 19059 return delta; 19060 19061 case 0xC6: /* MOV Ib,Eb */ 19062 sz = 1; 19063 goto do_Mov_I_E; 19064 case 0xC7: /* MOV Iv,Ev */ 19065 goto do_Mov_I_E; 19066 do_Mov_I_E: 19067 if (haveF2orF3(pfx)) goto decode_failure; 19068 modrm = getUChar(delta); 19069 if (epartIsReg(modrm)) { 19070 delta++; /* mod/rm byte */ 19071 d64 = getSDisp(imin(4,sz),delta); 19072 delta += imin(4,sz); 19073 putIRegE(sz, pfx, modrm, 19074 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 19075 DIP("mov%c $%lld, %s\n", nameISize(sz), 19076 (Long)d64, 19077 nameIRegE(sz,pfx,modrm)); 19078 } else { 19079 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 19080 /*xtra*/imin(4,sz) ); 19081 delta += alen; 19082 d64 = getSDisp(imin(4,sz),delta); 19083 delta += imin(4,sz); 19084 storeLE(mkexpr(addr), 19085 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 19086 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 19087 } 19088 return delta; 19089 19090 case 0xC8: /* ENTER */ 19091 /* Same comments re operand size as for LEAVE below apply. 19092 Also, only handles the case "enter $imm16, $0"; other cases 19093 for the second operand (nesting depth) are not handled. */ 19094 if (sz != 4) 19095 goto decode_failure; 19096 d64 = getUDisp16(delta); 19097 delta += 2; 19098 vassert(d64 >= 0 && d64 <= 0xFFFF); 19099 if (getUChar(delta) != 0) 19100 goto decode_failure; 19101 delta++; 19102 /* Intel docs seem to suggest: 19103 push rbp 19104 temp = rsp 19105 rbp = temp 19106 rsp = rsp - imm16 19107 */ 19108 t1 = newTemp(Ity_I64); 19109 assign(t1, getIReg64(R_RBP)); 19110 t2 = newTemp(Ity_I64); 19111 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 19112 putIReg64(R_RSP, mkexpr(t2)); 19113 storeLE(mkexpr(t2), mkexpr(t1)); 19114 putIReg64(R_RBP, mkexpr(t2)); 19115 if (d64 > 0) { 19116 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64))); 19117 } 19118 DIP("enter $%u, $0\n", (UInt)d64); 19119 return delta; 19120 19121 case 0xC9: /* LEAVE */ 19122 /* In 64-bit mode this defaults to a 64-bit operand size. There 19123 is no way to encode a 32-bit variant. Hence sz==4 but we do 19124 it as if sz=8. */ 19125 if (sz != 4) 19126 goto decode_failure; 19127 t1 = newTemp(Ity_I64); 19128 t2 = newTemp(Ity_I64); 19129 assign(t1, getIReg64(R_RBP)); 19130 /* First PUT RSP looks redundant, but need it because RSP must 19131 always be up-to-date for Memcheck to work... */ 19132 putIReg64(R_RSP, mkexpr(t1)); 19133 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 19134 putIReg64(R_RBP, mkexpr(t2)); 19135 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 19136 DIP("leave\n"); 19137 return delta; 19138 19139 case 0xCC: /* INT 3 */ 19140 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta); 19141 vassert(dres->whatNext == Dis_StopHere); 19142 DIP("int $0x3\n"); 19143 return delta; 19144 19145 case 0xD0: { /* Grp2 1,Eb */ 19146 Bool decode_OK = True; 19147 if (haveF2orF3(pfx)) goto decode_failure; 19148 modrm = getUChar(delta); 19149 am_sz = lengthAMode(pfx,delta); 19150 d_sz = 0; 19151 d64 = 1; 19152 sz = 1; 19153 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19154 mkU8(d64), NULL, &decode_OK ); 19155 if (!decode_OK) goto decode_failure; 19156 return delta; 19157 } 19158 19159 case 0xD1: { /* Grp2 1,Ev */ 19160 Bool decode_OK = True; 19161 if (haveF2orF3(pfx)) goto decode_failure; 19162 modrm = getUChar(delta); 19163 am_sz = lengthAMode(pfx,delta); 19164 d_sz = 0; 19165 d64 = 1; 19166 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19167 mkU8(d64), NULL, &decode_OK ); 19168 if (!decode_OK) goto decode_failure; 19169 return delta; 19170 } 19171 19172 case 0xD2: { /* Grp2 CL,Eb */ 19173 Bool decode_OK = True; 19174 if (haveF2orF3(pfx)) goto decode_failure; 19175 modrm = getUChar(delta); 19176 am_sz = lengthAMode(pfx,delta); 19177 d_sz = 0; 19178 sz = 1; 19179 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19180 getIRegCL(), "%cl", &decode_OK ); 19181 if (!decode_OK) goto decode_failure; 19182 return delta; 19183 } 19184 19185 case 0xD3: { /* Grp2 CL,Ev */ 19186 Bool decode_OK = True; 19187 if (haveF2orF3(pfx)) goto decode_failure; 19188 modrm = getUChar(delta); 19189 am_sz = lengthAMode(pfx,delta); 19190 d_sz = 0; 19191 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 19192 getIRegCL(), "%cl", &decode_OK ); 19193 if (!decode_OK) goto decode_failure; 19194 return delta; 19195 } 19196 19197 case 0xD8: /* X87 instructions */ 19198 case 0xD9: 19199 case 0xDA: 19200 case 0xDB: 19201 case 0xDC: 19202 case 0xDD: 19203 case 0xDE: 19204 case 0xDF: { 19205 Bool redundantREXWok = False; 19206 19207 if (haveF2orF3(pfx)) 19208 goto decode_failure; 19209 19210 /* kludge to tolerate redundant rex.w prefixes (should do this 19211 properly one day) */ 19212 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 19213 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 19214 redundantREXWok = True; 19215 19216 Bool size_OK = False; 19217 if ( sz == 4 ) 19218 size_OK = True; 19219 else if ( sz == 8 ) 19220 size_OK = redundantREXWok; 19221 else if ( sz == 2 ) { 19222 int mod_rm = getUChar(delta+0); 19223 int reg = gregLO3ofRM(mod_rm); 19224 /* The HotSpot JVM uses these */ 19225 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ || 19226 reg == 4 /* FNSAVE */ || 19227 reg == 6 /* FRSTOR */ ) ) 19228 size_OK = True; 19229 } 19230 /* AMD manual says 0x66 size override is ignored, except where 19231 it is meaningful */ 19232 if (!size_OK) 19233 goto decode_failure; 19234 19235 Bool decode_OK = False; 19236 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 19237 if (!decode_OK) 19238 goto decode_failure; 19239 19240 return delta; 19241 } 19242 19243 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 19244 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 19245 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 19246 { /* The docs say this uses rCX as a count depending on the 19247 address size override, not the operand one. */ 19248 IRExpr* zbit = NULL; 19249 IRExpr* count = NULL; 19250 IRExpr* cond = NULL; 19251 HChar* xtra = NULL; 19252 19253 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 19254 /* So at this point we've rejected any variants which appear to 19255 be governed by the usual operand-size modifiers. Hence only 19256 the address size prefix can have an effect. It changes the 19257 size from 64 (default) to 32. */ 19258 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 19259 delta++; 19260 if (haveASO(pfx)) { 19261 /* 64to32 of 64-bit get is merely a get-put improvement 19262 trick. */ 19263 putIReg32(R_RCX, binop(Iop_Sub32, 19264 unop(Iop_64to32, getIReg64(R_RCX)), 19265 mkU32(1))); 19266 } else { 19267 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 19268 } 19269 19270 /* This is correct, both for 32- and 64-bit versions. If we're 19271 doing a 32-bit dec and the result is zero then the default 19272 zero extension rule will cause the upper 32 bits to be zero 19273 too. Hence a 64-bit check against zero is OK. */ 19274 count = getIReg64(R_RCX); 19275 cond = binop(Iop_CmpNE64, count, mkU64(0)); 19276 switch (opc) { 19277 case 0xE2: 19278 xtra = ""; 19279 break; 19280 case 0xE1: 19281 xtra = "e"; 19282 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 19283 cond = mkAnd1(cond, zbit); 19284 break; 19285 case 0xE0: 19286 xtra = "ne"; 19287 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 19288 cond = mkAnd1(cond, zbit); 19289 break; 19290 default: 19291 vassert(0); 19292 } 19293 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) ); 19294 19295 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); 19296 return delta; 19297 } 19298 19299 case 0xE3: 19300 /* JRCXZ or JECXZ, depending address size override. */ 19301 if (have66orF2orF3(pfx)) goto decode_failure; 19302 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 19303 delta++; 19304 if (haveASO(pfx)) { 19305 /* 32-bit */ 19306 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 19307 unop(Iop_32Uto64, getIReg32(R_RCX)), 19308 mkU64(0)), 19309 Ijk_Boring, 19310 IRConst_U64(d64), 19311 OFFB_RIP 19312 )); 19313 DIP("jecxz 0x%llx\n", d64); 19314 } else { 19315 /* 64-bit */ 19316 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 19317 getIReg64(R_RCX), 19318 mkU64(0)), 19319 Ijk_Boring, 19320 IRConst_U64(d64), 19321 OFFB_RIP 19322 )); 19323 DIP("jrcxz 0x%llx\n", d64); 19324 } 19325 return delta; 19326 19327 case 0xE4: /* IN imm8, AL */ 19328 sz = 1; 19329 t1 = newTemp(Ity_I64); 19330 abyte = getUChar(delta); delta++; 19331 assign(t1, mkU64( abyte & 0xFF )); 19332 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 19333 goto do_IN; 19334 case 0xE5: /* IN imm8, eAX */ 19335 if (!(sz == 2 || sz == 4)) goto decode_failure; 19336 t1 = newTemp(Ity_I64); 19337 abyte = getUChar(delta); delta++; 19338 assign(t1, mkU64( abyte & 0xFF )); 19339 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 19340 goto do_IN; 19341 case 0xEC: /* IN %DX, AL */ 19342 sz = 1; 19343 t1 = newTemp(Ity_I64); 19344 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 19345 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 19346 nameIRegRAX(sz)); 19347 goto do_IN; 19348 case 0xED: /* IN %DX, eAX */ 19349 if (!(sz == 2 || sz == 4)) goto decode_failure; 19350 t1 = newTemp(Ity_I64); 19351 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 19352 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 19353 nameIRegRAX(sz)); 19354 goto do_IN; 19355 do_IN: { 19356 /* At this point, sz indicates the width, and t1 is a 64-bit 19357 value giving port number. */ 19358 IRDirty* d; 19359 if (haveF2orF3(pfx)) goto decode_failure; 19360 vassert(sz == 1 || sz == 2 || sz == 4); 19361 ty = szToITy(sz); 19362 t2 = newTemp(Ity_I64); 19363 d = unsafeIRDirty_1_N( 19364 t2, 19365 0/*regparms*/, 19366 "amd64g_dirtyhelper_IN", 19367 &amd64g_dirtyhelper_IN, 19368 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 19369 ); 19370 /* do the call, dumping the result in t2. */ 19371 stmt( IRStmt_Dirty(d) ); 19372 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 19373 return delta; 19374 } 19375 19376 case 0xE6: /* OUT AL, imm8 */ 19377 sz = 1; 19378 t1 = newTemp(Ity_I64); 19379 abyte = getUChar(delta); delta++; 19380 assign( t1, mkU64( abyte & 0xFF ) ); 19381 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 19382 goto do_OUT; 19383 case 0xE7: /* OUT eAX, imm8 */ 19384 if (!(sz == 2 || sz == 4)) goto decode_failure; 19385 t1 = newTemp(Ity_I64); 19386 abyte = getUChar(delta); delta++; 19387 assign( t1, mkU64( abyte & 0xFF ) ); 19388 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 19389 goto do_OUT; 19390 case 0xEE: /* OUT AL, %DX */ 19391 sz = 1; 19392 t1 = newTemp(Ity_I64); 19393 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 19394 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 19395 nameIRegRDX(2)); 19396 goto do_OUT; 19397 case 0xEF: /* OUT eAX, %DX */ 19398 if (!(sz == 2 || sz == 4)) goto decode_failure; 19399 t1 = newTemp(Ity_I64); 19400 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 19401 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 19402 nameIRegRDX(2)); 19403 goto do_OUT; 19404 do_OUT: { 19405 /* At this point, sz indicates the width, and t1 is a 64-bit 19406 value giving port number. */ 19407 IRDirty* d; 19408 if (haveF2orF3(pfx)) goto decode_failure; 19409 vassert(sz == 1 || sz == 2 || sz == 4); 19410 ty = szToITy(sz); 19411 d = unsafeIRDirty_0_N( 19412 0/*regparms*/, 19413 "amd64g_dirtyhelper_OUT", 19414 &amd64g_dirtyhelper_OUT, 19415 mkIRExprVec_3( mkexpr(t1), 19416 widenUto64( getIRegRAX(sz) ), 19417 mkU64(sz) ) 19418 ); 19419 stmt( IRStmt_Dirty(d) ); 19420 return delta; 19421 } 19422 19423 case 0xE8: /* CALL J4 */ 19424 if (haveF2orF3(pfx)) goto decode_failure; 19425 d64 = getSDisp32(delta); delta += 4; 19426 d64 += (guest_RIP_bbstart+delta); 19427 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 19428 t1 = newTemp(Ity_I64); 19429 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 19430 putIReg64(R_RSP, mkexpr(t1)); 19431 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 19432 t2 = newTemp(Ity_I64); 19433 assign(t2, mkU64((Addr64)d64)); 19434 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 19435 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 19436 /* follow into the call target. */ 19437 dres->whatNext = Dis_ResteerU; 19438 dres->continueAt = d64; 19439 } else { 19440 jmp_lit(dres, Ijk_Call, d64); 19441 vassert(dres->whatNext == Dis_StopHere); 19442 } 19443 DIP("call 0x%llx\n",d64); 19444 return delta; 19445 19446 case 0xE9: /* Jv (jump, 16/32 offset) */ 19447 if (haveF2orF3(pfx)) goto decode_failure; 19448 if (sz != 4) 19449 goto decode_failure; /* JRS added 2004 July 11 */ 19450 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 19451 delta += sz; 19452 if (resteerOkFn(callback_opaque,d64)) { 19453 dres->whatNext = Dis_ResteerU; 19454 dres->continueAt = d64; 19455 } else { 19456 jmp_lit(dres, Ijk_Boring, d64); 19457 vassert(dres->whatNext == Dis_StopHere); 19458 } 19459 DIP("jmp 0x%llx\n", d64); 19460 return delta; 19461 19462 case 0xEB: /* Jb (jump, byte offset) */ 19463 if (haveF2orF3(pfx)) goto decode_failure; 19464 if (sz != 4) 19465 goto decode_failure; /* JRS added 2004 July 11 */ 19466 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 19467 delta++; 19468 if (resteerOkFn(callback_opaque,d64)) { 19469 dres->whatNext = Dis_ResteerU; 19470 dres->continueAt = d64; 19471 } else { 19472 jmp_lit(dres, Ijk_Boring, d64); 19473 vassert(dres->whatNext == Dis_StopHere); 19474 } 19475 DIP("jmp-8 0x%llx\n", d64); 19476 return delta; 19477 19478 case 0xF5: /* CMC */ 19479 case 0xF8: /* CLC */ 19480 case 0xF9: /* STC */ 19481 t1 = newTemp(Ity_I64); 19482 t2 = newTemp(Ity_I64); 19483 assign( t1, mk_amd64g_calculate_rflags_all() ); 19484 switch (opc) { 19485 case 0xF5: 19486 assign( t2, binop(Iop_Xor64, mkexpr(t1), 19487 mkU64(AMD64G_CC_MASK_C))); 19488 DIP("cmc\n"); 19489 break; 19490 case 0xF8: 19491 assign( t2, binop(Iop_And64, mkexpr(t1), 19492 mkU64(~AMD64G_CC_MASK_C))); 19493 DIP("clc\n"); 19494 break; 19495 case 0xF9: 19496 assign( t2, binop(Iop_Or64, mkexpr(t1), 19497 mkU64(AMD64G_CC_MASK_C))); 19498 DIP("stc\n"); 19499 break; 19500 default: 19501 vpanic("disInstr(x64)(cmc/clc/stc)"); 19502 } 19503 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 19504 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 19505 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) )); 19506 /* Set NDEP even though it isn't used. This makes redundant-PUT 19507 elimination of previous stores to this field work better. */ 19508 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 19509 return delta; 19510 19511 case 0xF6: { /* Grp3 Eb */ 19512 Bool decode_OK = True; 19513 if (haveF2orF3(pfx)) goto decode_failure; 19514 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 19515 if (!decode_OK) goto decode_failure; 19516 return delta; 19517 } 19518 19519 case 0xF7: { /* Grp3 Ev */ 19520 Bool decode_OK = True; 19521 if (haveF2orF3(pfx)) goto decode_failure; 19522 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 19523 if (!decode_OK) goto decode_failure; 19524 return delta; 19525 } 19526 19527 case 0xFC: /* CLD */ 19528 if (haveF2orF3(pfx)) goto decode_failure; 19529 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 19530 DIP("cld\n"); 19531 return delta; 19532 19533 case 0xFD: /* STD */ 19534 if (haveF2orF3(pfx)) goto decode_failure; 19535 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 19536 DIP("std\n"); 19537 return delta; 19538 19539 case 0xFE: { /* Grp4 Eb */ 19540 Bool decode_OK = True; 19541 if (haveF2orF3(pfx)) goto decode_failure; 19542 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 19543 if (!decode_OK) goto decode_failure; 19544 return delta; 19545 } 19546 19547 case 0xFF: { /* Grp5 Ev */ 19548 Bool decode_OK = True; 19549 if (haveF2orF3(pfx)) goto decode_failure; 19550 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK ); 19551 if (!decode_OK) goto decode_failure; 19552 return delta; 19553 } 19554 19555 default: 19556 break; 19557 19558 } 19559 19560 decode_failure: 19561 return deltaIN; /* fail */ 19562 } 19563 19564 19565 /*------------------------------------------------------------*/ 19566 /*--- ---*/ 19567 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/ 19568 /*--- ---*/ 19569 /*------------------------------------------------------------*/ 19570 19571 static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 19572 { 19573 IRTemp t2 = newTemp(ty); 19574 if (ty == Ity_I64) { 19575 IRTemp m8 = newTemp(Ity_I64); 19576 IRTemp s8 = newTemp(Ity_I64); 19577 IRTemp m16 = newTemp(Ity_I64); 19578 IRTemp s16 = newTemp(Ity_I64); 19579 IRTemp m32 = newTemp(Ity_I64); 19580 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 19581 assign( s8, 19582 binop(Iop_Or64, 19583 binop(Iop_Shr64, 19584 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 19585 mkU8(8)), 19586 binop(Iop_And64, 19587 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 19588 mkexpr(m8)) 19589 ) 19590 ); 19591 19592 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 19593 assign( s16, 19594 binop(Iop_Or64, 19595 binop(Iop_Shr64, 19596 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 19597 mkU8(16)), 19598 binop(Iop_And64, 19599 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 19600 mkexpr(m16)) 19601 ) 19602 ); 19603 19604 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 19605 assign( t2, 19606 binop(Iop_Or64, 19607 binop(Iop_Shr64, 19608 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 19609 mkU8(32)), 19610 binop(Iop_And64, 19611 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 19612 mkexpr(m32)) 19613 ) 19614 ); 19615 return t2; 19616 } 19617 if (ty == Ity_I32) { 19618 assign( t2, 19619 binop( 19620 Iop_Or32, 19621 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 19622 binop( 19623 Iop_Or32, 19624 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 19625 mkU32(0x00FF0000)), 19626 binop(Iop_Or32, 19627 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 19628 mkU32(0x0000FF00)), 19629 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 19630 mkU32(0x000000FF) ) 19631 ))) 19632 ); 19633 return t2; 19634 } 19635 if (ty == Ity_I16) { 19636 assign(t2, 19637 binop(Iop_Or16, 19638 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 19639 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 19640 return t2; 19641 } 19642 vassert(0); 19643 /*NOTREACHED*/ 19644 return IRTemp_INVALID; 19645 } 19646 19647 19648 __attribute__((noinline)) 19649 static 19650 Long dis_ESC_0F ( 19651 /*MB_OUT*/DisResult* dres, 19652 /*MB_OUT*/Bool* expect_CAS, 19653 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 19654 Bool resteerCisOk, 19655 void* callback_opaque, 19656 VexArchInfo* archinfo, 19657 VexAbiInfo* vbi, 19658 Prefix pfx, Int sz, Long deltaIN 19659 ) 19660 { 19661 Long d64 = 0; 19662 IRTemp addr = IRTemp_INVALID; 19663 IRTemp t1 = IRTemp_INVALID; 19664 IRTemp t2 = IRTemp_INVALID; 19665 UChar modrm = 0; 19666 Int am_sz = 0; 19667 Int alen = 0; 19668 HChar dis_buf[50]; 19669 19670 /* In the first switch, look for ordinary integer insns. */ 19671 Long delta = deltaIN; 19672 UChar opc = getUChar(delta); 19673 delta++; 19674 switch (opc) { /* first switch */ 19675 19676 case 0x01: 19677 { 19678 modrm = getUChar(delta); 19679 /* 0F 01 /0 -- SGDT */ 19680 /* 0F 01 /1 -- SIDT */ 19681 if (!epartIsReg(modrm) 19682 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) { 19683 /* This is really revolting, but ... since each processor 19684 (core) only has one IDT and one GDT, just let the guest 19685 see it (pass-through semantics). I can't see any way to 19686 construct a faked-up value, so don't bother to try. */ 19687 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19688 delta += alen; 19689 switch (gregLO3ofRM(modrm)) { 19690 case 0: DIP("sgdt %s\n", dis_buf); break; 19691 case 1: DIP("sidt %s\n", dis_buf); break; 19692 default: vassert(0); /*NOTREACHED*/ 19693 } 19694 IRDirty* d = unsafeIRDirty_0_N ( 19695 0/*regparms*/, 19696 "amd64g_dirtyhelper_SxDT", 19697 &amd64g_dirtyhelper_SxDT, 19698 mkIRExprVec_2( mkexpr(addr), 19699 mkU64(gregLO3ofRM(modrm)) ) 19700 ); 19701 /* declare we're writing memory */ 19702 d->mFx = Ifx_Write; 19703 d->mAddr = mkexpr(addr); 19704 d->mSize = 6; 19705 stmt( IRStmt_Dirty(d) ); 19706 return delta; 19707 } 19708 /* 0F 01 D0 = XGETBV */ 19709 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 19710 delta += 1; 19711 DIP("xgetbv\n"); 19712 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I 19713 am not sure if that translates in to SEGV or to something 19714 else, in user space. */ 19715 t1 = newTemp(Ity_I32); 19716 assign( t1, getIReg32(R_RCX) ); 19717 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)), 19718 Ijk_SigSEGV, 19719 IRConst_U64(guest_RIP_curr_instr), 19720 OFFB_RIP 19721 )); 19722 putIRegRAX(4, mkU32(7)); 19723 putIRegRDX(4, mkU32(0)); 19724 return delta; 19725 } 19726 /* else decode failed */ 19727 break; 19728 } 19729 19730 case 0x05: /* SYSCALL */ 19731 guest_RIP_next_mustcheck = True; 19732 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 19733 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 19734 /* It's important that all guest state is up-to-date 19735 at this point. So we declare an end-of-block here, which 19736 forces any cached guest state to be flushed. */ 19737 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed); 19738 vassert(dres->whatNext == Dis_StopHere); 19739 DIP("syscall\n"); 19740 return delta; 19741 19742 case 0x0B: /* UD2 */ 19743 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 19744 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr); 19745 vassert(dres->whatNext == Dis_StopHere); 19746 DIP("ud2\n"); 19747 return delta; 19748 19749 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 19750 /* 0F 0D /1 -- prefetchw mem8 */ 19751 if (have66orF2orF3(pfx)) goto decode_failure; 19752 modrm = getUChar(delta); 19753 if (epartIsReg(modrm)) goto decode_failure; 19754 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 19755 goto decode_failure; 19756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19757 delta += alen; 19758 switch (gregLO3ofRM(modrm)) { 19759 case 0: DIP("prefetch %s\n", dis_buf); break; 19760 case 1: DIP("prefetchw %s\n", dis_buf); break; 19761 default: vassert(0); /*NOTREACHED*/ 19762 } 19763 return delta; 19764 19765 case 0x1F: 19766 if (haveF2orF3(pfx)) goto decode_failure; 19767 modrm = getUChar(delta); 19768 if (epartIsReg(modrm)) goto decode_failure; 19769 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19770 delta += alen; 19771 DIP("nop%c %s\n", nameISize(sz), dis_buf); 19772 return delta; 19773 19774 case 0x31: { /* RDTSC */ 19775 IRTemp val = newTemp(Ity_I64); 19776 IRExpr** args = mkIRExprVec_0(); 19777 IRDirty* d = unsafeIRDirty_1_N ( 19778 val, 19779 0/*regparms*/, 19780 "amd64g_dirtyhelper_RDTSC", 19781 &amd64g_dirtyhelper_RDTSC, 19782 args 19783 ); 19784 if (have66orF2orF3(pfx)) goto decode_failure; 19785 /* execute the dirty call, dumping the result in val. */ 19786 stmt( IRStmt_Dirty(d) ); 19787 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 19788 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 19789 DIP("rdtsc\n"); 19790 return delta; 19791 } 19792 19793 case 0x40: 19794 case 0x41: 19795 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 19796 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 19797 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 19798 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 19799 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 19800 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 19801 case 0x48: /* CMOVSb (cmov negative) */ 19802 case 0x49: /* CMOVSb (cmov not negative) */ 19803 case 0x4A: /* CMOVP (cmov parity even) */ 19804 case 0x4B: /* CMOVNP (cmov parity odd) */ 19805 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 19806 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 19807 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 19808 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 19809 if (haveF2orF3(pfx)) goto decode_failure; 19810 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 19811 return delta; 19812 19813 case 0x80: 19814 case 0x81: 19815 case 0x82: /* JBb/JNAEb (jump below) */ 19816 case 0x83: /* JNBb/JAEb (jump not below) */ 19817 case 0x84: /* JZb/JEb (jump zero) */ 19818 case 0x85: /* JNZb/JNEb (jump not zero) */ 19819 case 0x86: /* JBEb/JNAb (jump below or equal) */ 19820 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 19821 case 0x88: /* JSb (jump negative) */ 19822 case 0x89: /* JSb (jump not negative) */ 19823 case 0x8A: /* JP (jump parity even) */ 19824 case 0x8B: /* JNP/JPO (jump parity odd) */ 19825 case 0x8C: /* JLb/JNGEb (jump less) */ 19826 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 19827 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 19828 case 0x8F: { /* JGb/JNLEb (jump greater) */ 19829 Long jmpDelta; 19830 HChar* comment = ""; 19831 if (haveF2orF3(pfx)) goto decode_failure; 19832 jmpDelta = getSDisp32(delta); 19833 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 19834 delta += 4; 19835 if (resteerCisOk 19836 && vex_control.guest_chase_cond 19837 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19838 && jmpDelta < 0 19839 && resteerOkFn( callback_opaque, d64) ) { 19840 /* Speculation: assume this backward branch is taken. So 19841 we need to emit a side-exit to the insn following this 19842 one, on the negation of the condition, and continue at 19843 the branch target address (d64). If we wind up back at 19844 the first instruction of the trace, just stop; it's 19845 better to let the IR loop unroller handle that case. */ 19846 stmt( IRStmt_Exit( 19847 mk_amd64g_calculate_condition( 19848 (AMD64Condcode)(1 ^ (opc - 0x80))), 19849 Ijk_Boring, 19850 IRConst_U64(guest_RIP_bbstart+delta), 19851 OFFB_RIP 19852 )); 19853 dres->whatNext = Dis_ResteerC; 19854 dres->continueAt = d64; 19855 comment = "(assumed taken)"; 19856 } 19857 else 19858 if (resteerCisOk 19859 && vex_control.guest_chase_cond 19860 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19861 && jmpDelta >= 0 19862 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 19863 /* Speculation: assume this forward branch is not taken. 19864 So we need to emit a side-exit to d64 (the dest) and 19865 continue disassembling at the insn immediately 19866 following this one. */ 19867 stmt( IRStmt_Exit( 19868 mk_amd64g_calculate_condition((AMD64Condcode) 19869 (opc - 0x80)), 19870 Ijk_Boring, 19871 IRConst_U64(d64), 19872 OFFB_RIP 19873 )); 19874 dres->whatNext = Dis_ResteerC; 19875 dres->continueAt = guest_RIP_bbstart+delta; 19876 comment = "(assumed not taken)"; 19877 } 19878 else { 19879 /* Conservative default translation - end the block at 19880 this point. */ 19881 jcc_01( dres, (AMD64Condcode)(opc - 0x80), 19882 guest_RIP_bbstart+delta, d64 ); 19883 vassert(dres->whatNext == Dis_StopHere); 19884 } 19885 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); 19886 return delta; 19887 } 19888 19889 case 0x90: 19890 case 0x91: 19891 case 0x92: /* set-Bb/set-NAEb (set if below) */ 19892 case 0x93: /* set-NBb/set-AEb (set if not below) */ 19893 case 0x94: /* set-Zb/set-Eb (set if zero) */ 19894 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 19895 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 19896 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 19897 case 0x98: /* set-Sb (set if negative) */ 19898 case 0x99: /* set-Sb (set if not negative) */ 19899 case 0x9A: /* set-P (set if parity even) */ 19900 case 0x9B: /* set-NP (set if parity odd) */ 19901 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 19902 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 19903 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 19904 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 19905 if (haveF2orF3(pfx)) goto decode_failure; 19906 t1 = newTemp(Ity_I8); 19907 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 19908 modrm = getUChar(delta); 19909 if (epartIsReg(modrm)) { 19910 delta++; 19911 putIRegE(1, pfx, modrm, mkexpr(t1)); 19912 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 19913 nameIRegE(1,pfx,modrm)); 19914 } else { 19915 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19916 delta += alen; 19917 storeLE( mkexpr(addr), mkexpr(t1) ); 19918 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 19919 } 19920 return delta; 19921 19922 case 0xA2: { /* CPUID */ 19923 /* Uses dirty helper: 19924 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 19925 declared to mod rax, wr rbx, rcx, rdx 19926 */ 19927 IRDirty* d = NULL; 19928 HChar* fName = NULL; 19929 void* fAddr = NULL; 19930 if (haveF2orF3(pfx)) goto decode_failure; 19931 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3 19932 |VEX_HWCAPS_AMD64_CX16 19933 |VEX_HWCAPS_AMD64_AVX)) { 19934 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16"; 19935 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16; 19936 /* This is a Core-i5-2300-like machine */ 19937 } 19938 else if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3 19939 |VEX_HWCAPS_AMD64_CX16)) { 19940 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 19941 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 19942 /* This is a Core-i5-670-like machine */ 19943 } 19944 else { 19945 /* Give a CPUID for at least a baseline machine, SSE2 19946 only, and no CX16 */ 19947 fName = "amd64g_dirtyhelper_CPUID_baseline"; 19948 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 19949 } 19950 19951 vassert(fName); vassert(fAddr); 19952 d = unsafeIRDirty_0_N ( 0/*regparms*/, 19953 fName, fAddr, mkIRExprVec_0() ); 19954 /* declare guest state effects */ 19955 d->needsBBP = True; 19956 d->nFxState = 4; 19957 vex_bzero(&d->fxState, sizeof(d->fxState)); 19958 d->fxState[0].fx = Ifx_Modify; 19959 d->fxState[0].offset = OFFB_RAX; 19960 d->fxState[0].size = 8; 19961 d->fxState[1].fx = Ifx_Write; 19962 d->fxState[1].offset = OFFB_RBX; 19963 d->fxState[1].size = 8; 19964 d->fxState[2].fx = Ifx_Modify; 19965 d->fxState[2].offset = OFFB_RCX; 19966 d->fxState[2].size = 8; 19967 d->fxState[3].fx = Ifx_Write; 19968 d->fxState[3].offset = OFFB_RDX; 19969 d->fxState[3].size = 8; 19970 /* execute the dirty call, side-effecting guest state */ 19971 stmt( IRStmt_Dirty(d) ); 19972 /* CPUID is a serialising insn. So, just in case someone is 19973 using it as a memory fence ... */ 19974 stmt( IRStmt_MBE(Imbe_Fence) ); 19975 DIP("cpuid\n"); 19976 return delta; 19977 } 19978 19979 case 0xA3: /* BT Gv,Ev */ 19980 if (haveF2orF3(pfx)) goto decode_failure; 19981 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 19982 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone ); 19983 return delta; 19984 19985 case 0xA4: /* SHLDv imm8,Gv,Ev */ 19986 modrm = getUChar(delta); 19987 d64 = delta + lengthAMode(pfx, delta); 19988 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 19989 delta = dis_SHLRD_Gv_Ev ( 19990 vbi, pfx, delta, modrm, sz, 19991 mkU8(getUChar(d64)), True, /* literal */ 19992 dis_buf, True /* left */ ); 19993 return delta; 19994 19995 case 0xA5: /* SHLDv %cl,Gv,Ev */ 19996 modrm = getUChar(delta); 19997 delta = dis_SHLRD_Gv_Ev ( 19998 vbi, pfx, delta, modrm, sz, 19999 getIRegCL(), False, /* not literal */ 20000 "%cl", True /* left */ ); 20001 return delta; 20002 20003 case 0xAB: /* BTS Gv,Ev */ 20004 if (haveF2orF3(pfx)) goto decode_failure; 20005 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 20006 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet ); 20007 return delta; 20008 20009 case 0xAC: /* SHRDv imm8,Gv,Ev */ 20010 modrm = getUChar(delta); 20011 d64 = delta + lengthAMode(pfx, delta); 20012 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 20013 delta = dis_SHLRD_Gv_Ev ( 20014 vbi, pfx, delta, modrm, sz, 20015 mkU8(getUChar(d64)), True, /* literal */ 20016 dis_buf, False /* right */ ); 20017 return delta; 20018 20019 case 0xAD: /* SHRDv %cl,Gv,Ev */ 20020 modrm = getUChar(delta); 20021 delta = dis_SHLRD_Gv_Ev ( 20022 vbi, pfx, delta, modrm, sz, 20023 getIRegCL(), False, /* not literal */ 20024 "%cl", False /* right */); 20025 return delta; 20026 20027 case 0xAF: /* IMUL Ev, Gv */ 20028 if (haveF2orF3(pfx)) goto decode_failure; 20029 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 20030 return delta; 20031 20032 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 20033 Bool ok = True; 20034 if (haveF2orF3(pfx)) goto decode_failure; 20035 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 20036 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 20037 if (!ok) goto decode_failure; 20038 return delta; 20039 } 20040 20041 case 0xB0: { /* CMPXCHG Gb,Eb */ 20042 Bool ok = True; 20043 if (haveF2orF3(pfx)) goto decode_failure; 20044 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 20045 if (!ok) goto decode_failure; 20046 return delta; 20047 } 20048 20049 case 0xB3: /* BTR Gv,Ev */ 20050 if (haveF2orF3(pfx)) goto decode_failure; 20051 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 20052 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset ); 20053 return delta; 20054 20055 case 0xB6: /* MOVZXb Eb,Gv */ 20056 if (haveF2orF3(pfx)) goto decode_failure; 20057 if (sz != 2 && sz != 4 && sz != 8) 20058 goto decode_failure; 20059 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 20060 return delta; 20061 20062 case 0xB7: /* MOVZXw Ew,Gv */ 20063 if (haveF2orF3(pfx)) goto decode_failure; 20064 if (sz != 4 && sz != 8) 20065 goto decode_failure; 20066 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 20067 return delta; 20068 20069 case 0xBA: { /* Grp8 Ib,Ev */ 20070 Bool decode_OK = False; 20071 if (haveF2orF3(pfx)) goto decode_failure; 20072 modrm = getUChar(delta); 20073 am_sz = lengthAMode(pfx,delta); 20074 d64 = getSDisp8(delta + am_sz); 20075 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 20076 &decode_OK ); 20077 if (!decode_OK) 20078 goto decode_failure; 20079 return delta; 20080 } 20081 20082 case 0xBB: /* BTC Gv,Ev */ 20083 if (haveF2orF3(pfx)) goto decode_failure; 20084 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 20085 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp ); 20086 return delta; 20087 20088 case 0xBC: /* BSF Gv,Ev */ 20089 if (haveF2(pfx)) goto decode_failure; 20090 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 20091 return delta; 20092 20093 case 0xBD: /* BSR Gv,Ev */ 20094 if (!haveF2orF3(pfx) 20095 || (haveF3noF2(pfx) 20096 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) { 20097 /* no-F2 no-F3 0F BD = BSR 20098 or F3 0F BD = REP; BSR on older CPUs. */ 20099 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 20100 return delta; 20101 } 20102 /* Fall through, since F3 0F BD is LZCNT, and needs to 20103 be handled by dis_ESC_0F__SSE4. */ 20104 break; 20105 20106 case 0xBE: /* MOVSXb Eb,Gv */ 20107 if (haveF2orF3(pfx)) goto decode_failure; 20108 if (sz != 2 && sz != 4 && sz != 8) 20109 goto decode_failure; 20110 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 20111 return delta; 20112 20113 case 0xBF: /* MOVSXw Ew,Gv */ 20114 if (haveF2orF3(pfx)) goto decode_failure; 20115 if (sz != 4 && sz != 8) 20116 goto decode_failure; 20117 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 20118 return delta; 20119 20120 case 0xC1: { /* XADD Gv,Ev */ 20121 Bool decode_OK = False; 20122 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 20123 if (!decode_OK) 20124 goto decode_failure; 20125 return delta; 20126 } 20127 20128 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 20129 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 20130 IRTemp expdHi = newTemp(elemTy); 20131 IRTemp expdLo = newTemp(elemTy); 20132 IRTemp dataHi = newTemp(elemTy); 20133 IRTemp dataLo = newTemp(elemTy); 20134 IRTemp oldHi = newTemp(elemTy); 20135 IRTemp oldLo = newTemp(elemTy); 20136 IRTemp flags_old = newTemp(Ity_I64); 20137 IRTemp flags_new = newTemp(Ity_I64); 20138 IRTemp success = newTemp(Ity_I1); 20139 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 20140 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 20141 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 20142 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 20143 IRTemp expdHi64 = newTemp(Ity_I64); 20144 IRTemp expdLo64 = newTemp(Ity_I64); 20145 20146 /* Translate this using a DCAS, even if there is no LOCK 20147 prefix. Life is too short to bother with generating two 20148 different translations for the with/without-LOCK-prefix 20149 cases. */ 20150 *expect_CAS = True; 20151 20152 /* Decode, and generate address. */ 20153 if (have66orF2orF3(pfx)) goto decode_failure; 20154 if (sz != 4 && sz != 8) goto decode_failure; 20155 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 20156 goto decode_failure; 20157 modrm = getUChar(delta); 20158 if (epartIsReg(modrm)) goto decode_failure; 20159 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 20160 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20161 delta += alen; 20162 20163 /* cmpxchg16b requires an alignment check. */ 20164 if (sz == 8) 20165 gen_SEGV_if_not_16_aligned( addr ); 20166 20167 /* Get the expected and new values. */ 20168 assign( expdHi64, getIReg64(R_RDX) ); 20169 assign( expdLo64, getIReg64(R_RAX) ); 20170 20171 /* These are the correctly-sized expected and new values. 20172 However, we also get expdHi64/expdLo64 above as 64-bits 20173 regardless, because we will need them later in the 32-bit 20174 case (paradoxically). */ 20175 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 20176 : mkexpr(expdHi64) ); 20177 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 20178 : mkexpr(expdLo64) ); 20179 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 20180 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 20181 20182 /* Do the DCAS */ 20183 stmt( IRStmt_CAS( 20184 mkIRCAS( oldHi, oldLo, 20185 Iend_LE, mkexpr(addr), 20186 mkexpr(expdHi), mkexpr(expdLo), 20187 mkexpr(dataHi), mkexpr(dataLo) 20188 ))); 20189 20190 /* success when oldHi:oldLo == expdHi:expdLo */ 20191 assign( success, 20192 binop(opCasCmpEQ, 20193 binop(opOR, 20194 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 20195 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 20196 ), 20197 zero 20198 )); 20199 20200 /* If the DCAS is successful, that is to say oldHi:oldLo == 20201 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 20202 which is where they came from originally. Both the actual 20203 contents of these two regs, and any shadow values, are 20204 unchanged. If the DCAS fails then we're putting into 20205 RDX:RAX the value seen in memory. */ 20206 /* Now of course there's a complication in the 32-bit case 20207 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 20208 unchanged; but if we use the same scheme as in the 64-bit 20209 case, we get hit by the standard rule that a write to the 20210 bottom 32 bits of an integer register zeros the upper 32 20211 bits. And so the upper halves of RDX and RAX mysteriously 20212 become zero. So we have to stuff back in the original 20213 64-bit values which we previously stashed in 20214 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 20215 /* It's just _so_ much fun ... */ 20216 putIRegRDX( 8, 20217 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 20218 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 20219 : mkexpr(oldHi), 20220 mkexpr(expdHi64) 20221 )); 20222 putIRegRAX( 8, 20223 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 20224 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 20225 : mkexpr(oldLo), 20226 mkexpr(expdLo64) 20227 )); 20228 20229 /* Copy the success bit into the Z flag and leave the others 20230 unchanged */ 20231 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 20232 assign( 20233 flags_new, 20234 binop(Iop_Or64, 20235 binop(Iop_And64, mkexpr(flags_old), 20236 mkU64(~AMD64G_CC_MASK_Z)), 20237 binop(Iop_Shl64, 20238 binop(Iop_And64, 20239 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 20240 mkU8(AMD64G_CC_SHIFT_Z)) )); 20241 20242 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20243 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 20244 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20245 /* Set NDEP even though it isn't used. This makes 20246 redundant-PUT elimination of previous stores to this field 20247 work better. */ 20248 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 20249 20250 /* Sheesh. Aren't you glad it was me and not you that had to 20251 write and validate all this grunge? */ 20252 20253 DIP("cmpxchg8b %s\n", dis_buf); 20254 return delta; 20255 } 20256 20257 case 0xC8: /* BSWAP %eax */ 20258 case 0xC9: 20259 case 0xCA: 20260 case 0xCB: 20261 case 0xCC: 20262 case 0xCD: 20263 case 0xCE: 20264 case 0xCF: /* BSWAP %edi */ 20265 if (haveF2orF3(pfx)) goto decode_failure; 20266 /* According to the AMD64 docs, this insn can have size 4 or 20267 8. */ 20268 if (sz == 4) { 20269 t1 = newTemp(Ity_I32); 20270 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 20271 t2 = math_BSWAP( t1, Ity_I32 ); 20272 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 20273 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 20274 return delta; 20275 } 20276 if (sz == 8) { 20277 t1 = newTemp(Ity_I64); 20278 t2 = newTemp(Ity_I64); 20279 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 20280 t2 = math_BSWAP( t1, Ity_I64 ); 20281 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 20282 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 20283 return delta; 20284 } 20285 goto decode_failure; 20286 20287 default: 20288 break; 20289 20290 } /* first switch */ 20291 20292 20293 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */ 20294 /* In the second switch, pick off MMX insns. */ 20295 20296 if (!have66orF2orF3(pfx)) { 20297 /* So there's no SIMD prefix. */ 20298 20299 vassert(sz == 4 || sz == 8); 20300 20301 switch (opc) { /* second switch */ 20302 20303 case 0x71: 20304 case 0x72: 20305 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 20306 20307 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 20308 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 20309 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 20310 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 20311 20312 case 0xFC: 20313 case 0xFD: 20314 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 20315 20316 case 0xEC: 20317 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 20318 20319 case 0xDC: 20320 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 20321 20322 case 0xF8: 20323 case 0xF9: 20324 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 20325 20326 case 0xE8: 20327 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 20328 20329 case 0xD8: 20330 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 20331 20332 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 20333 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 20334 20335 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 20336 20337 case 0x74: 20338 case 0x75: 20339 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 20340 20341 case 0x64: 20342 case 0x65: 20343 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 20344 20345 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 20346 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 20347 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 20348 20349 case 0x68: 20350 case 0x69: 20351 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 20352 20353 case 0x60: 20354 case 0x61: 20355 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 20356 20357 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 20358 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 20359 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 20360 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 20361 20362 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 20363 case 0xF2: 20364 case 0xF3: 20365 20366 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 20367 case 0xD2: 20368 case 0xD3: 20369 20370 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 20371 case 0xE2: { 20372 Bool decode_OK = False; 20373 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN ); 20374 if (decode_OK) 20375 return delta; 20376 goto decode_failure; 20377 } 20378 20379 default: 20380 break; 20381 } /* second switch */ 20382 20383 } 20384 20385 /* A couple of MMX corner cases */ 20386 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) { 20387 if (sz != 4) 20388 goto decode_failure; 20389 do_EMMS_preamble(); 20390 DIP("{f}emms\n"); 20391 return delta; 20392 } 20393 20394 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */ 20395 /* Perhaps it's an SSE or SSE2 instruction. We can try this 20396 without checking the guest hwcaps because SSE2 is a baseline 20397 facility in 64 bit mode. */ 20398 { 20399 Bool decode_OK = False; 20400 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres ); 20401 if (decode_OK) 20402 return delta; 20403 } 20404 20405 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */ 20406 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps 20407 first. */ 20408 { 20409 Bool decode_OK = False; 20410 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 20411 if (decode_OK) 20412 return delta; 20413 } 20414 20415 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 20416 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps 20417 first. */ 20418 { 20419 Bool decode_OK = False; 20420 delta = dis_ESC_0F__SSE4 ( &decode_OK, 20421 archinfo, vbi, pfx, sz, deltaIN ); 20422 if (decode_OK) 20423 return delta; 20424 } 20425 20426 decode_failure: 20427 return deltaIN; /* fail */ 20428 } 20429 20430 20431 /*------------------------------------------------------------*/ 20432 /*--- ---*/ 20433 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/ 20434 /*--- ---*/ 20435 /*------------------------------------------------------------*/ 20436 20437 __attribute__((noinline)) 20438 static 20439 Long dis_ESC_0F38 ( 20440 /*MB_OUT*/DisResult* dres, 20441 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 20442 Bool resteerCisOk, 20443 void* callback_opaque, 20444 VexArchInfo* archinfo, 20445 VexAbiInfo* vbi, 20446 Prefix pfx, Int sz, Long deltaIN 20447 ) 20448 { 20449 Long delta = deltaIN; 20450 UChar opc = getUChar(delta); 20451 delta++; 20452 switch (opc) { 20453 20454 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */ 20455 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */ 20456 if (!haveF2orF3(pfx) && !haveVEX(pfx) 20457 && (sz == 2 || sz == 4 || sz == 8)) { 20458 IRTemp addr = IRTemp_INVALID; 20459 UChar modrm = 0; 20460 Int alen = 0; 20461 HChar dis_buf[50]; 20462 modrm = getUChar(delta); 20463 if (epartIsReg(modrm)) break; 20464 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20465 delta += alen; 20466 IRType ty = szToITy(sz); 20467 IRTemp src = newTemp(ty); 20468 if (opc == 0xF0) { /* LOAD */ 20469 assign(src, loadLE(ty, mkexpr(addr))); 20470 IRTemp dst = math_BSWAP(src, ty); 20471 putIRegG(sz, pfx, modrm, mkexpr(dst)); 20472 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm)); 20473 } else { /* STORE */ 20474 assign(src, getIRegG(sz, pfx, modrm)); 20475 IRTemp dst = math_BSWAP(src, ty); 20476 storeLE(mkexpr(addr), mkexpr(dst)); 20477 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf); 20478 } 20479 return delta; 20480 } 20481 /* else fall through; maybe one of the decoders below knows what 20482 it is. */ 20483 break; 20484 } 20485 20486 default: 20487 break; 20488 20489 } 20490 20491 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 20492 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 20493 rather than proceeding indiscriminately. */ 20494 { 20495 Bool decode_OK = False; 20496 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 20497 if (decode_OK) 20498 return delta; 20499 } 20500 20501 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 20502 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 20503 rather than proceeding indiscriminately. */ 20504 { 20505 Bool decode_OK = False; 20506 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 20507 if (decode_OK) 20508 return delta; 20509 } 20510 20511 /*decode_failure:*/ 20512 return deltaIN; /* fail */ 20513 } 20514 20515 20516 /*------------------------------------------------------------*/ 20517 /*--- ---*/ 20518 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/ 20519 /*--- ---*/ 20520 /*------------------------------------------------------------*/ 20521 20522 __attribute__((noinline)) 20523 static 20524 Long dis_ESC_0F3A ( 20525 /*MB_OUT*/DisResult* dres, 20526 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 20527 Bool resteerCisOk, 20528 void* callback_opaque, 20529 VexArchInfo* archinfo, 20530 VexAbiInfo* vbi, 20531 Prefix pfx, Int sz, Long deltaIN 20532 ) 20533 { 20534 Long delta = deltaIN; 20535 UChar opc = getUChar(delta); 20536 delta++; 20537 switch (opc) { 20538 20539 default: 20540 break; 20541 20542 } 20543 20544 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 20545 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 20546 rather than proceeding indiscriminately. */ 20547 { 20548 Bool decode_OK = False; 20549 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 20550 if (decode_OK) 20551 return delta; 20552 } 20553 20554 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 20555 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 20556 rather than proceeding indiscriminately. */ 20557 { 20558 Bool decode_OK = False; 20559 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 20560 if (decode_OK) 20561 return delta; 20562 } 20563 20564 return deltaIN; /* fail */ 20565 } 20566 20567 20568 /*------------------------------------------------------------*/ 20569 /*--- ---*/ 20570 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/ 20571 /*--- ---*/ 20572 /*------------------------------------------------------------*/ 20573 20574 /* FIXME: common up with the _256_ version below? */ 20575 static 20576 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG ( 20577 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 20578 Prefix pfx, Long delta, HChar* name, 20579 /* The actual operation. Use either 'op' or 'opfn', 20580 but not both. */ 20581 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 20582 Bool invertLeftArg, 20583 Bool swapArgs 20584 ) 20585 { 20586 UChar modrm = getUChar(delta); 20587 UInt rD = gregOfRexRM(pfx, modrm); 20588 UInt rSL = getVexNvvvv(pfx); 20589 IRTemp tSL = newTemp(Ity_V128); 20590 IRTemp tSR = newTemp(Ity_V128); 20591 IRTemp addr = IRTemp_INVALID; 20592 HChar dis_buf[50]; 20593 Int alen = 0; 20594 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/); 20595 20596 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL)) 20597 : getXMMReg(rSL)); 20598 20599 if (epartIsReg(modrm)) { 20600 UInt rSR = eregOfRexRM(pfx, modrm); 20601 delta += 1; 20602 assign(tSR, getXMMReg(rSR)); 20603 DIP("%s %s,%s,%s\n", 20604 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD)); 20605 } else { 20606 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 20607 delta += alen; 20608 assign(tSR, loadLE(Ity_V128, mkexpr(addr))); 20609 DIP("%s %s,%s,%s\n", 20610 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD)); 20611 } 20612 20613 IRTemp res = IRTemp_INVALID; 20614 if (op != Iop_INVALID) { 20615 vassert(opFn == NULL); 20616 res = newTemp(Ity_V128); 20617 assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL)) 20618 : binop(op, mkexpr(tSL), mkexpr(tSR))); 20619 } else { 20620 vassert(opFn != NULL); 20621 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 20622 } 20623 20624 putYMMRegLoAndZU(rD, mkexpr(res)); 20625 20626 *uses_vvvv = True; 20627 return delta; 20628 } 20629 20630 20631 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp 20632 for the operation, no inversion of the left arg, and no swapping of 20633 args. */ 20634 static 20635 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple ( 20636 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 20637 Prefix pfx, Long delta, HChar* name, 20638 IROp op 20639 ) 20640 { 20641 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 20642 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 20643 } 20644 20645 20646 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR 20647 generator to compute the result, no inversion of the left 20648 arg, and no swapping of args. */ 20649 static 20650 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex ( 20651 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 20652 Prefix pfx, Long delta, HChar* name, 20653 IRTemp(*opFn)(IRTemp,IRTemp) 20654 ) 20655 { 20656 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 20657 uses_vvvv, vbi, pfx, delta, name, 20658 Iop_INVALID, opFn, False, False ); 20659 } 20660 20661 20662 /* Vector by scalar shift of V by the amount specified at the bottom 20663 of E. */ 20664 static ULong dis_AVX128_shiftV_byE ( VexAbiInfo* vbi, 20665 Prefix pfx, Long delta, 20666 HChar* opname, IROp op ) 20667 { 20668 HChar dis_buf[50]; 20669 Int alen, size; 20670 IRTemp addr; 20671 Bool shl, shr, sar; 20672 UChar modrm = getUChar(delta); 20673 UInt rG = gregOfRexRM(pfx,modrm); 20674 UInt rV = getVexNvvvv(pfx);; 20675 IRTemp g0 = newTemp(Ity_V128); 20676 IRTemp g1 = newTemp(Ity_V128); 20677 IRTemp amt = newTemp(Ity_I64); 20678 IRTemp amt8 = newTemp(Ity_I8); 20679 if (epartIsReg(modrm)) { 20680 UInt rE = eregOfRexRM(pfx,modrm); 20681 assign( amt, getXMMRegLane64(rE, 0) ); 20682 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 20683 nameXMMReg(rV), nameXMMReg(rG) ); 20684 delta++; 20685 } else { 20686 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20687 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 20688 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 20689 delta += alen; 20690 } 20691 assign( g0, getXMMReg(rV) ); 20692 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 20693 20694 shl = shr = sar = False; 20695 size = 0; 20696 switch (op) { 20697 case Iop_ShlN16x8: shl = True; size = 32; break; 20698 case Iop_ShlN32x4: shl = True; size = 32; break; 20699 case Iop_ShlN64x2: shl = True; size = 64; break; 20700 case Iop_SarN16x8: sar = True; size = 16; break; 20701 case Iop_SarN32x4: sar = True; size = 32; break; 20702 case Iop_ShrN16x8: shr = True; size = 16; break; 20703 case Iop_ShrN32x4: shr = True; size = 32; break; 20704 case Iop_ShrN64x2: shr = True; size = 64; break; 20705 default: vassert(0); 20706 } 20707 20708 if (shl || shr) { 20709 assign( 20710 g1, 20711 IRExpr_Mux0X( 20712 unop(Iop_1Uto8, 20713 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))), 20714 mkV128(0x0000), 20715 binop(op, mkexpr(g0), mkexpr(amt8)) 20716 ) 20717 ); 20718 } else 20719 if (sar) { 20720 assign( 20721 g1, 20722 IRExpr_Mux0X( 20723 unop(Iop_1Uto8, 20724 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size))), 20725 binop(op, mkexpr(g0), mkU8(size-1)), 20726 binop(op, mkexpr(g0), mkexpr(amt8)) 20727 ) 20728 ); 20729 } else { 20730 vassert(0); 20731 } 20732 20733 putYMMRegLoAndZU( rG, mkexpr(g1) ); 20734 return delta; 20735 } 20736 20737 20738 /* Vector by scalar shift of E into V, by an immediate byte. Modified 20739 version of dis_SSE_shiftE_imm. */ 20740 static 20741 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx, 20742 Long delta, HChar* opname, IROp op ) 20743 { 20744 Bool shl, shr, sar; 20745 UChar rm = getUChar(delta); 20746 IRTemp e0 = newTemp(Ity_V128); 20747 IRTemp e1 = newTemp(Ity_V128); 20748 UInt rD = getVexNvvvv(pfx); 20749 UChar amt, size; 20750 vassert(epartIsReg(rm)); 20751 vassert(gregLO3ofRM(rm) == 2 20752 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 20753 amt = getUChar(delta+1); 20754 delta += 2; 20755 DIP("%s $%d,%s,%s\n", opname, 20756 (Int)amt, 20757 nameXMMReg(eregOfRexRM(pfx,rm)), 20758 nameXMMReg(rD)); 20759 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 20760 20761 shl = shr = sar = False; 20762 size = 0; 20763 switch (op) { 20764 case Iop_ShlN16x8: shl = True; size = 16; break; 20765 case Iop_ShlN32x4: shl = True; size = 32; break; 20766 case Iop_ShlN64x2: shl = True; size = 64; break; 20767 case Iop_SarN16x8: sar = True; size = 16; break; 20768 case Iop_SarN32x4: sar = True; size = 32; break; 20769 case Iop_ShrN16x8: shr = True; size = 16; break; 20770 case Iop_ShrN32x4: shr = True; size = 32; break; 20771 case Iop_ShrN64x2: shr = True; size = 64; break; 20772 default: vassert(0); 20773 } 20774 20775 if (shl || shr) { 20776 assign( e1, amt >= size 20777 ? mkV128(0x0000) 20778 : binop(op, mkexpr(e0), mkU8(amt)) 20779 ); 20780 } else 20781 if (sar) { 20782 assign( e1, amt >= size 20783 ? binop(op, mkexpr(e0), mkU8(size-1)) 20784 : binop(op, mkexpr(e0), mkU8(amt)) 20785 ); 20786 } else { 20787 vassert(0); 20788 } 20789 20790 putYMMRegLoAndZU( rD, mkexpr(e1) ); 20791 return delta; 20792 } 20793 20794 20795 /* Lower 64-bit lane only AVX128 binary operation: 20796 G[63:0] = V[63:0] `op` E[63:0] 20797 G[127:64] = V[127:64] 20798 G[255:128] = 0. 20799 The specified op must be of the 64F0x2 kind, so that it 20800 copies the upper half of the left operand to the result. 20801 */ 20802 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv, 20803 VexAbiInfo* vbi, 20804 Prefix pfx, Long delta, 20805 HChar* opname, IROp op ) 20806 { 20807 HChar dis_buf[50]; 20808 Int alen; 20809 IRTemp addr; 20810 UChar rm = getUChar(delta); 20811 UInt rG = gregOfRexRM(pfx,rm); 20812 UInt rV = getVexNvvvv(pfx); 20813 IRExpr* vpart = getXMMReg(rV); 20814 if (epartIsReg(rm)) { 20815 UInt rE = eregOfRexRM(pfx,rm); 20816 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 20817 DIP("%s %s,%s,%s\n", opname, 20818 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 20819 delta = delta+1; 20820 } else { 20821 /* We can only do a 64-bit memory read, so the upper half of the 20822 E operand needs to be made simply of zeroes. */ 20823 IRTemp epart = newTemp(Ity_V128); 20824 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20825 assign( epart, unop( Iop_64UtoV128, 20826 loadLE(Ity_I64, mkexpr(addr))) ); 20827 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 20828 DIP("%s %s,%s,%s\n", opname, 20829 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 20830 delta = delta+alen; 20831 } 20832 putYMMRegLane128( rG, 1, mkV128(0) ); 20833 *uses_vvvv = True; 20834 return delta; 20835 } 20836 20837 20838 /* Lower 64-bit lane only AVX128 unary operation: 20839 G[63:0] = op(E[63:0]) 20840 G[127:64] = V[127:64] 20841 G[255:128] = 0 20842 The specified op must be of the 64F0x2 kind, so that it 20843 copies the upper half of the operand to the result. 20844 */ 20845 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv, 20846 VexAbiInfo* vbi, 20847 Prefix pfx, Long delta, 20848 HChar* opname, IROp op ) 20849 { 20850 HChar dis_buf[50]; 20851 Int alen; 20852 IRTemp addr; 20853 UChar rm = getUChar(delta); 20854 UInt rG = gregOfRexRM(pfx,rm); 20855 UInt rV = getVexNvvvv(pfx); 20856 IRTemp e64 = newTemp(Ity_I64); 20857 20858 /* Fetch E[63:0] */ 20859 if (epartIsReg(rm)) { 20860 UInt rE = eregOfRexRM(pfx,rm); 20861 assign(e64, getXMMRegLane64(rE, 0)); 20862 DIP("%s %s,%s,%s\n", opname, 20863 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 20864 delta += 1; 20865 } else { 20866 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20867 assign(e64, loadLE(Ity_I64, mkexpr(addr))); 20868 DIP("%s %s,%s,%s\n", opname, 20869 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 20870 delta += alen; 20871 } 20872 20873 /* Create a value 'arg' as V[127:64]++E[63:0] */ 20874 IRTemp arg = newTemp(Ity_V128); 20875 assign(arg, 20876 binop(Iop_SetV128lo64, 20877 getXMMReg(rV), mkexpr(e64))); 20878 /* and apply op to it */ 20879 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 20880 *uses_vvvv = True; 20881 return delta; 20882 } 20883 20884 20885 /* Lower 32-bit lane only AVX128 unary operation: 20886 G[31:0] = op(E[31:0]) 20887 G[127:32] = V[127:32] 20888 G[255:128] = 0 20889 The specified op must be of the 32F0x4 kind, so that it 20890 copies the upper 3/4 of the operand to the result. 20891 */ 20892 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv, 20893 VexAbiInfo* vbi, 20894 Prefix pfx, Long delta, 20895 HChar* opname, IROp op ) 20896 { 20897 HChar dis_buf[50]; 20898 Int alen; 20899 IRTemp addr; 20900 UChar rm = getUChar(delta); 20901 UInt rG = gregOfRexRM(pfx,rm); 20902 UInt rV = getVexNvvvv(pfx); 20903 IRTemp e32 = newTemp(Ity_I32); 20904 20905 /* Fetch E[31:0] */ 20906 if (epartIsReg(rm)) { 20907 UInt rE = eregOfRexRM(pfx,rm); 20908 assign(e32, getXMMRegLane32(rE, 0)); 20909 DIP("%s %s,%s,%s\n", opname, 20910 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 20911 delta += 1; 20912 } else { 20913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20914 assign(e32, loadLE(Ity_I32, mkexpr(addr))); 20915 DIP("%s %s,%s,%s\n", opname, 20916 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 20917 delta += alen; 20918 } 20919 20920 /* Create a value 'arg' as V[127:32]++E[31:0] */ 20921 IRTemp arg = newTemp(Ity_V128); 20922 assign(arg, 20923 binop(Iop_SetV128lo32, 20924 getXMMReg(rV), mkexpr(e32))); 20925 /* and apply op to it */ 20926 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 20927 *uses_vvvv = True; 20928 return delta; 20929 } 20930 20931 20932 /* Lower 32-bit lane only AVX128 binary operation: 20933 G[31:0] = V[31:0] `op` E[31:0] 20934 G[127:32] = V[127:32] 20935 G[255:128] = 0. 20936 The specified op must be of the 32F0x4 kind, so that it 20937 copies the upper 3/4 of the left operand to the result. 20938 */ 20939 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv, 20940 VexAbiInfo* vbi, 20941 Prefix pfx, Long delta, 20942 HChar* opname, IROp op ) 20943 { 20944 HChar dis_buf[50]; 20945 Int alen; 20946 IRTemp addr; 20947 UChar rm = getUChar(delta); 20948 UInt rG = gregOfRexRM(pfx,rm); 20949 UInt rV = getVexNvvvv(pfx); 20950 IRExpr* vpart = getXMMReg(rV); 20951 if (epartIsReg(rm)) { 20952 UInt rE = eregOfRexRM(pfx,rm); 20953 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 20954 DIP("%s %s,%s,%s\n", opname, 20955 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 20956 delta = delta+1; 20957 } else { 20958 /* We can only do a 32-bit memory read, so the upper 3/4 of the 20959 E operand needs to be made simply of zeroes. */ 20960 IRTemp epart = newTemp(Ity_V128); 20961 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20962 assign( epart, unop( Iop_32UtoV128, 20963 loadLE(Ity_I32, mkexpr(addr))) ); 20964 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 20965 DIP("%s %s,%s,%s\n", opname, 20966 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 20967 delta = delta+alen; 20968 } 20969 putYMMRegLane128( rG, 1, mkV128(0) ); 20970 *uses_vvvv = True; 20971 return delta; 20972 } 20973 20974 20975 /* All-lanes AVX128 binary operation: 20976 G[127:0] = V[127:0] `op` E[127:0] 20977 G[255:128] = 0. 20978 */ 20979 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 20980 VexAbiInfo* vbi, 20981 Prefix pfx, Long delta, 20982 HChar* opname, IROp op ) 20983 { 20984 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 20985 uses_vvvv, vbi, pfx, delta, opname, op, 20986 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 20987 ); 20988 } 20989 20990 20991 /* Handles AVX128 32F/64F comparisons. A derivative of 20992 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 20993 original delta to indicate failure. */ 20994 static 20995 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 20996 VexAbiInfo* vbi, 20997 Prefix pfx, Long delta, 20998 HChar* opname, Bool all_lanes, Int sz ) 20999 { 21000 vassert(sz == 4 || sz == 8); 21001 Long deltaIN = delta; 21002 HChar dis_buf[50]; 21003 Int alen; 21004 UInt imm8; 21005 IRTemp addr; 21006 Bool preSwap = False; 21007 IROp op = Iop_INVALID; 21008 Bool postNot = False; 21009 IRTemp plain = newTemp(Ity_V128); 21010 UChar rm = getUChar(delta); 21011 UInt rG = gregOfRexRM(pfx, rm); 21012 UInt rV = getVexNvvvv(pfx); 21013 IRTemp argL = newTemp(Ity_V128); 21014 IRTemp argR = newTemp(Ity_V128); 21015 21016 assign(argL, getXMMReg(rV)); 21017 if (epartIsReg(rm)) { 21018 imm8 = getUChar(delta+1); 21019 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 21020 if (!ok) return deltaIN; /* FAIL */ 21021 UInt rE = eregOfRexRM(pfx,rm); 21022 assign(argR, getXMMReg(rE)); 21023 delta += 1+1; 21024 DIP("%s $%d,%s,%s,%s\n", 21025 opname, (Int)imm8, 21026 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21027 } else { 21028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 21029 imm8 = getUChar(delta+alen); 21030 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 21031 if (!ok) return deltaIN; /* FAIL */ 21032 assign(argR, 21033 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 21034 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 21035 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))); 21036 delta += alen+1; 21037 DIP("%s $%d,%s,%s,%s\n", 21038 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 21039 } 21040 21041 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL)) 21042 : binop(op, mkexpr(argL), mkexpr(argR))); 21043 21044 if (all_lanes) { 21045 /* This is simple: just invert the result, if necessary, and 21046 have done. */ 21047 if (postNot) { 21048 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) ); 21049 } else { 21050 putYMMRegLoAndZU( rG, mkexpr(plain) ); 21051 } 21052 } 21053 else 21054 if (!preSwap) { 21055 /* More complex. It's a one-lane-only, hence need to possibly 21056 invert only that one lane. But at least the other lanes are 21057 correctly "in" the result, having been copied from the left 21058 operand (argL). */ 21059 if (postNot) { 21060 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF); 21061 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain), 21062 mask) ); 21063 } else { 21064 putYMMRegLoAndZU( rG, mkexpr(plain) ); 21065 } 21066 } 21067 else { 21068 /* This is the most complex case. One-lane-only, but the args 21069 were swapped. So we have to possibly invert the bottom lane, 21070 and (definitely) we have to copy the upper lane(s) from argL 21071 since, due to the swapping, what's currently there is from 21072 argR, which is not correct. */ 21073 IRTemp res = newTemp(Ity_V128); 21074 IRTemp mask = newTemp(Ity_V128); 21075 IRTemp notMask = newTemp(Ity_V128); 21076 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF)); 21077 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00)); 21078 if (postNot) { 21079 assign(res, 21080 binop(Iop_OrV128, 21081 binop(Iop_AndV128, 21082 unop(Iop_NotV128, mkexpr(plain)), 21083 mkexpr(mask)), 21084 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 21085 } else { 21086 assign(res, 21087 binop(Iop_OrV128, 21088 binop(Iop_AndV128, 21089 mkexpr(plain), 21090 mkexpr(mask)), 21091 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 21092 } 21093 putYMMRegLoAndZU( rG, mkexpr(res) ); 21094 } 21095 21096 *uses_vvvv = True; 21097 return delta; 21098 } 21099 21100 21101 /* Handles AVX256 32F/64F comparisons. A derivative of 21102 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 21103 original delta to indicate failure. */ 21104 static 21105 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 21106 VexAbiInfo* vbi, 21107 Prefix pfx, Long delta, 21108 HChar* opname, Int sz ) 21109 { 21110 vassert(sz == 4 || sz == 8); 21111 Long deltaIN = delta; 21112 HChar dis_buf[50]; 21113 Int alen; 21114 UInt imm8; 21115 IRTemp addr; 21116 Bool preSwap = False; 21117 IROp op = Iop_INVALID; 21118 Bool postNot = False; 21119 IRTemp plain = newTemp(Ity_V256); 21120 UChar rm = getUChar(delta); 21121 UInt rG = gregOfRexRM(pfx, rm); 21122 UInt rV = getVexNvvvv(pfx); 21123 IRTemp argL = newTemp(Ity_V256); 21124 IRTemp argR = newTemp(Ity_V256); 21125 IRTemp argLhi = IRTemp_INVALID; 21126 IRTemp argLlo = IRTemp_INVALID; 21127 IRTemp argRhi = IRTemp_INVALID; 21128 IRTemp argRlo = IRTemp_INVALID; 21129 21130 assign(argL, getYMMReg(rV)); 21131 if (epartIsReg(rm)) { 21132 imm8 = getUChar(delta+1); 21133 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 21134 True/*all_lanes*/, sz); 21135 if (!ok) return deltaIN; /* FAIL */ 21136 UInt rE = eregOfRexRM(pfx,rm); 21137 assign(argR, getYMMReg(rE)); 21138 delta += 1+1; 21139 DIP("%s $%d,%s,%s,%s\n", 21140 opname, (Int)imm8, 21141 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 21142 } else { 21143 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 21144 imm8 = getUChar(delta+alen); 21145 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 21146 True/*all_lanes*/, sz); 21147 if (!ok) return deltaIN; /* FAIL */ 21148 assign(argR, loadLE(Ity_V256, mkexpr(addr)) ); 21149 delta += alen+1; 21150 DIP("%s $%d,%s,%s,%s\n", 21151 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 21152 } 21153 21154 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo ); 21155 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo ); 21156 assign(plain, binop( Iop_V128HLtoV256, 21157 binop(op, mkexpr(argLhi), mkexpr(argRhi)), 21158 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) ); 21159 21160 /* This is simple: just invert the result, if necessary, and 21161 have done. */ 21162 if (postNot) { 21163 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) ); 21164 } else { 21165 putYMMReg( rG, mkexpr(plain) ); 21166 } 21167 21168 *uses_vvvv = True; 21169 return delta; 21170 } 21171 21172 21173 /* Handles AVX128 unary E-to-G all-lanes operations. */ 21174 static 21175 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 21176 VexAbiInfo* vbi, 21177 Prefix pfx, Long delta, 21178 HChar* opname, 21179 IRTemp (*opFn)(IRTemp) ) 21180 { 21181 HChar dis_buf[50]; 21182 Int alen; 21183 IRTemp addr; 21184 IRTemp res = newTemp(Ity_V128); 21185 IRTemp arg = newTemp(Ity_V128); 21186 UChar rm = getUChar(delta); 21187 UInt rG = gregOfRexRM(pfx, rm); 21188 if (epartIsReg(rm)) { 21189 UInt rE = eregOfRexRM(pfx,rm); 21190 assign(arg, getXMMReg(rE)); 21191 delta += 1; 21192 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 21193 } else { 21194 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21195 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 21196 delta += alen; 21197 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 21198 } 21199 res = opFn(arg); 21200 putYMMRegLoAndZU( rG, mkexpr(res) ); 21201 *uses_vvvv = False; 21202 return delta; 21203 } 21204 21205 21206 /* Handles AVX128 unary E-to-G all-lanes operations. */ 21207 static 21208 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 21209 VexAbiInfo* vbi, 21210 Prefix pfx, Long delta, 21211 HChar* opname, IROp op ) 21212 { 21213 HChar dis_buf[50]; 21214 Int alen; 21215 IRTemp addr; 21216 IRTemp arg = newTemp(Ity_V128); 21217 UChar rm = getUChar(delta); 21218 UInt rG = gregOfRexRM(pfx, rm); 21219 if (epartIsReg(rm)) { 21220 UInt rE = eregOfRexRM(pfx,rm); 21221 assign(arg, getXMMReg(rE)); 21222 delta += 1; 21223 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 21224 } else { 21225 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21226 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 21227 delta += alen; 21228 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 21229 } 21230 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 21231 *uses_vvvv = False; 21232 return delta; 21233 } 21234 21235 21236 /* FIXME: common up with the _128_ version above? */ 21237 static 21238 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG ( 21239 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 21240 Prefix pfx, Long delta, HChar* name, 21241 /* The actual operation. Use either 'op' or 'opfn', 21242 but not both. */ 21243 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 21244 Bool invertLeftArg, 21245 Bool swapArgs 21246 ) 21247 { 21248 UChar modrm = getUChar(delta); 21249 UInt rD = gregOfRexRM(pfx, modrm); 21250 UInt rSL = getVexNvvvv(pfx); 21251 IRTemp tSL = newTemp(Ity_V256); 21252 IRTemp tSR = newTemp(Ity_V256); 21253 IRTemp addr = IRTemp_INVALID; 21254 HChar dis_buf[50]; 21255 Int alen = 0; 21256 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/); 21257 21258 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL)) 21259 : getYMMReg(rSL)); 21260 21261 if (epartIsReg(modrm)) { 21262 UInt rSR = eregOfRexRM(pfx, modrm); 21263 delta += 1; 21264 assign(tSR, getYMMReg(rSR)); 21265 DIP("%s %s,%s,%s\n", 21266 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD)); 21267 } else { 21268 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 21269 delta += alen; 21270 assign(tSR, loadLE(Ity_V256, mkexpr(addr))); 21271 DIP("%s %s,%s,%s\n", 21272 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD)); 21273 } 21274 21275 IRTemp res = IRTemp_INVALID; 21276 if (op != Iop_INVALID) { 21277 vassert(opFn == NULL); 21278 res = newTemp(Ity_V256); 21279 assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL)) 21280 : binop(op, mkexpr(tSL), mkexpr(tSR))); 21281 } else { 21282 vassert(opFn != NULL); 21283 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 21284 } 21285 21286 putYMMReg(rD, mkexpr(res)); 21287 21288 *uses_vvvv = True; 21289 return delta; 21290 } 21291 21292 21293 /* All-lanes AVX256 binary operation: 21294 G[255:0] = V[255:0] `op` E[255:0] 21295 */ 21296 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 21297 VexAbiInfo* vbi, 21298 Prefix pfx, Long delta, 21299 HChar* opname, IROp op ) 21300 { 21301 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 21302 uses_vvvv, vbi, pfx, delta, opname, op, 21303 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 21304 ); 21305 } 21306 21307 21308 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR 21309 generator to compute the result, no inversion of the left 21310 arg, and no swapping of args. */ 21311 static 21312 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex ( 21313 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 21314 Prefix pfx, Long delta, HChar* name, 21315 IRTemp(*opFn)(IRTemp,IRTemp) 21316 ) 21317 { 21318 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 21319 uses_vvvv, vbi, pfx, delta, name, 21320 Iop_INVALID, opFn, False, False ); 21321 } 21322 21323 21324 /* Handles AVX256 unary E-to-G all-lanes operations. */ 21325 static 21326 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 21327 VexAbiInfo* vbi, 21328 Prefix pfx, Long delta, 21329 HChar* opname, IROp op ) 21330 { 21331 HChar dis_buf[50]; 21332 Int alen; 21333 IRTemp addr; 21334 IRTemp arg = newTemp(Ity_V256); 21335 UChar rm = getUChar(delta); 21336 UInt rG = gregOfRexRM(pfx, rm); 21337 if (epartIsReg(rm)) { 21338 UInt rE = eregOfRexRM(pfx,rm); 21339 assign(arg, getYMMReg(rE)); 21340 delta += 1; 21341 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 21342 } else { 21343 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21344 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 21345 delta += alen; 21346 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 21347 } 21348 putYMMReg( rG, unop(op, mkexpr(arg)) ); 21349 *uses_vvvv = False; 21350 return delta; 21351 } 21352 21353 21354 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we 21355 had a variant of Iop_64x4toV256 that took F64s as args instead. */ 21356 static Long dis_CVTDQ2PD_256 ( VexAbiInfo* vbi, Prefix pfx, 21357 Long delta ) 21358 { 21359 IRTemp addr = IRTemp_INVALID; 21360 Int alen = 0; 21361 HChar dis_buf[50]; 21362 UChar modrm = getUChar(delta); 21363 IRTemp sV = newTemp(Ity_V128); 21364 UInt rG = gregOfRexRM(pfx,modrm); 21365 if (epartIsReg(modrm)) { 21366 UInt rE = eregOfRexRM(pfx,modrm); 21367 assign( sV, getXMMReg(rE) ); 21368 delta += 1; 21369 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 21370 } else { 21371 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21372 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 21373 delta += alen; 21374 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) ); 21375 } 21376 IRTemp s3, s2, s1, s0; 21377 s3 = s2 = s1 = s0 = IRTemp_INVALID; 21378 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 21379 IRExpr* res 21380 = IRExpr_Qop( 21381 Iop_64x4toV256, 21382 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))), 21383 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))), 21384 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))), 21385 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0))) 21386 ); 21387 putYMMReg(rG, res); 21388 return delta; 21389 } 21390 21391 21392 static Long dis_CVTPD2PS_256 ( VexAbiInfo* vbi, Prefix pfx, 21393 Long delta ) 21394 { 21395 IRTemp addr = IRTemp_INVALID; 21396 Int alen = 0; 21397 HChar dis_buf[50]; 21398 UChar modrm = getUChar(delta); 21399 UInt rG = gregOfRexRM(pfx,modrm); 21400 IRTemp argV = newTemp(Ity_V256); 21401 IRTemp rmode = newTemp(Ity_I32); 21402 if (epartIsReg(modrm)) { 21403 UInt rE = eregOfRexRM(pfx,modrm); 21404 assign( argV, getYMMReg(rE) ); 21405 delta += 1; 21406 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG)); 21407 } else { 21408 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21409 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 21410 delta += alen; 21411 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) ); 21412 } 21413 21414 assign( rmode, get_sse_roundingmode() ); 21415 IRTemp t3, t2, t1, t0; 21416 t3 = t2 = t1 = t0 = IRTemp_INVALID; 21417 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 21418 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \ 21419 unop(Iop_ReinterpI64asF64, mkexpr(_t)) ) 21420 putXMMRegLane32F( rG, 3, CVT(t3) ); 21421 putXMMRegLane32F( rG, 2, CVT(t2) ); 21422 putXMMRegLane32F( rG, 1, CVT(t1) ); 21423 putXMMRegLane32F( rG, 0, CVT(t0) ); 21424 # undef CVT 21425 putYMMRegLane128( rG, 1, mkV128(0) ); 21426 return delta; 21427 } 21428 21429 21430 __attribute__((noinline)) 21431 static 21432 Long dis_ESC_0F__VEX ( 21433 /*MB_OUT*/DisResult* dres, 21434 /*OUT*/ Bool* uses_vvvv, 21435 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 21436 Bool resteerCisOk, 21437 void* callback_opaque, 21438 VexArchInfo* archinfo, 21439 VexAbiInfo* vbi, 21440 Prefix pfx, Int sz, Long deltaIN 21441 ) 21442 { 21443 IRTemp addr = IRTemp_INVALID; 21444 Int alen = 0; 21445 HChar dis_buf[50]; 21446 Long delta = deltaIN; 21447 UChar opc = getUChar(delta); 21448 delta++; 21449 *uses_vvvv = False; 21450 21451 switch (opc) { 21452 21453 case 0x10: 21454 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 21455 /* Move 64 bits from E (mem only) to G (lo half xmm). 21456 Bits 255-64 of the dest are zeroed out. */ 21457 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 21458 UChar modrm = getUChar(delta); 21459 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21460 UInt rG = gregOfRexRM(pfx,modrm); 21461 IRTemp z128 = newTemp(Ity_V128); 21462 assign(z128, mkV128(0)); 21463 putXMMReg( rG, mkexpr(z128) ); 21464 /* FIXME: ALIGNMENT CHECK? */ 21465 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 21466 putYMMRegLane128( rG, 1, mkexpr(z128) ); 21467 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG)); 21468 delta += alen; 21469 goto decode_success; 21470 } 21471 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 21472 /* Reg form. */ 21473 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 21474 UChar modrm = getUChar(delta); 21475 UInt rG = gregOfRexRM(pfx, modrm); 21476 UInt rE = eregOfRexRM(pfx, modrm); 21477 UInt rV = getVexNvvvv(pfx); 21478 delta++; 21479 DIP("vmovsd %s,%s,%s\n", 21480 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21481 IRTemp res = newTemp(Ity_V128); 21482 assign(res, binop(Iop_64HLtoV128, 21483 getXMMRegLane64(rV, 1), 21484 getXMMRegLane64(rE, 0))); 21485 putYMMRegLoAndZU(rG, mkexpr(res)); 21486 *uses_vvvv = True; 21487 goto decode_success; 21488 } 21489 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 21490 /* Move 32 bits from E (mem only) to G (lo half xmm). 21491 Bits 255-32 of the dest are zeroed out. */ 21492 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 21493 UChar modrm = getUChar(delta); 21494 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21495 UInt rG = gregOfRexRM(pfx,modrm); 21496 IRTemp z128 = newTemp(Ity_V128); 21497 assign(z128, mkV128(0)); 21498 putXMMReg( rG, mkexpr(z128) ); 21499 /* FIXME: ALIGNMENT CHECK? */ 21500 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) ); 21501 putYMMRegLane128( rG, 1, mkexpr(z128) ); 21502 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG)); 21503 delta += alen; 21504 goto decode_success; 21505 } 21506 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 21507 /* Reg form. */ 21508 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 21509 UChar modrm = getUChar(delta); 21510 UInt rG = gregOfRexRM(pfx, modrm); 21511 UInt rE = eregOfRexRM(pfx, modrm); 21512 UInt rV = getVexNvvvv(pfx); 21513 delta++; 21514 DIP("vmovss %s,%s,%s\n", 21515 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21516 IRTemp res = newTemp(Ity_V128); 21517 assign( res, binop( Iop_64HLtoV128, 21518 getXMMRegLane64(rV, 1), 21519 binop(Iop_32HLto64, 21520 getXMMRegLane32(rV, 1), 21521 getXMMRegLane32(rE, 0)) ) ); 21522 putYMMRegLoAndZU(rG, mkexpr(res)); 21523 *uses_vvvv = True; 21524 goto decode_success; 21525 } 21526 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */ 21527 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21528 UChar modrm = getUChar(delta); 21529 UInt rG = gregOfRexRM(pfx, modrm); 21530 if (epartIsReg(modrm)) { 21531 UInt rE = eregOfRexRM(pfx,modrm); 21532 putYMMRegLoAndZU( rG, getXMMReg( rE )); 21533 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 21534 delta += 1; 21535 } else { 21536 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21537 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 21538 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG)); 21539 delta += alen; 21540 } 21541 goto decode_success; 21542 } 21543 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */ 21544 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21545 UChar modrm = getUChar(delta); 21546 UInt rG = gregOfRexRM(pfx, modrm); 21547 if (epartIsReg(modrm)) { 21548 UInt rE = eregOfRexRM(pfx,modrm); 21549 putYMMReg( rG, getYMMReg( rE )); 21550 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 21551 delta += 1; 21552 } else { 21553 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21554 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 21555 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG)); 21556 delta += alen; 21557 } 21558 goto decode_success; 21559 } 21560 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */ 21561 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21562 UChar modrm = getUChar(delta); 21563 UInt rG = gregOfRexRM(pfx, modrm); 21564 if (epartIsReg(modrm)) { 21565 UInt rE = eregOfRexRM(pfx,modrm); 21566 putYMMRegLoAndZU( rG, getXMMReg( rE )); 21567 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 21568 delta += 1; 21569 } else { 21570 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21571 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 21572 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG)); 21573 delta += alen; 21574 } 21575 goto decode_success; 21576 } 21577 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */ 21578 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21579 UChar modrm = getUChar(delta); 21580 UInt rG = gregOfRexRM(pfx, modrm); 21581 if (epartIsReg(modrm)) { 21582 UInt rE = eregOfRexRM(pfx,modrm); 21583 putYMMReg( rG, getYMMReg( rE )); 21584 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 21585 delta += 1; 21586 } else { 21587 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21588 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 21589 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG)); 21590 delta += alen; 21591 } 21592 goto decode_success; 21593 } 21594 break; 21595 21596 case 0x11: 21597 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */ 21598 /* Move 64 bits from G (low half xmm) to mem only. */ 21599 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 21600 UChar modrm = getUChar(delta); 21601 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21602 UInt rG = gregOfRexRM(pfx,modrm); 21603 /* FIXME: ALIGNMENT CHECK? */ 21604 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0)); 21605 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf); 21606 delta += alen; 21607 goto decode_success; 21608 } 21609 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */ 21610 /* Reg form. */ 21611 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 21612 UChar modrm = getUChar(delta); 21613 UInt rG = gregOfRexRM(pfx, modrm); 21614 UInt rE = eregOfRexRM(pfx, modrm); 21615 UInt rV = getVexNvvvv(pfx); 21616 delta++; 21617 DIP("vmovsd %s,%s,%s\n", 21618 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21619 IRTemp res = newTemp(Ity_V128); 21620 assign(res, binop(Iop_64HLtoV128, 21621 getXMMRegLane64(rV, 1), 21622 getXMMRegLane64(rE, 0))); 21623 putYMMRegLoAndZU(rG, mkexpr(res)); 21624 *uses_vvvv = True; 21625 goto decode_success; 21626 } 21627 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */ 21628 /* Move 32 bits from G (low 1/4 xmm) to mem only. */ 21629 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 21630 UChar modrm = getUChar(delta); 21631 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21632 UInt rG = gregOfRexRM(pfx,modrm); 21633 /* FIXME: ALIGNMENT CHECK? */ 21634 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0)); 21635 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf); 21636 delta += alen; 21637 goto decode_success; 21638 } 21639 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */ 21640 /* Reg form. */ 21641 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 21642 UChar modrm = getUChar(delta); 21643 UInt rG = gregOfRexRM(pfx, modrm); 21644 UInt rE = eregOfRexRM(pfx, modrm); 21645 UInt rV = getVexNvvvv(pfx); 21646 delta++; 21647 DIP("vmovss %s,%s,%s\n", 21648 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21649 IRTemp res = newTemp(Ity_V128); 21650 assign( res, binop( Iop_64HLtoV128, 21651 getXMMRegLane64(rV, 1), 21652 binop(Iop_32HLto64, 21653 getXMMRegLane32(rV, 1), 21654 getXMMRegLane32(rE, 0)) ) ); 21655 putYMMRegLoAndZU(rG, mkexpr(res)); 21656 *uses_vvvv = True; 21657 goto decode_success; 21658 } 21659 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */ 21660 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21661 UChar modrm = getUChar(delta); 21662 UInt rG = gregOfRexRM(pfx,modrm); 21663 if (epartIsReg(modrm)) { 21664 UInt rE = eregOfRexRM(pfx,modrm); 21665 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 21666 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 21667 delta += 1; 21668 } else { 21669 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21670 storeLE( mkexpr(addr), getXMMReg(rG) ); 21671 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf); 21672 delta += alen; 21673 } 21674 goto decode_success; 21675 } 21676 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */ 21677 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21678 UChar modrm = getUChar(delta); 21679 UInt rG = gregOfRexRM(pfx,modrm); 21680 if (epartIsReg(modrm)) { 21681 UInt rE = eregOfRexRM(pfx,modrm); 21682 putYMMReg( rE, getYMMReg(rG) ); 21683 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 21684 delta += 1; 21685 } else { 21686 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21687 storeLE( mkexpr(addr), getYMMReg(rG) ); 21688 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf); 21689 delta += alen; 21690 } 21691 goto decode_success; 21692 } 21693 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */ 21694 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21695 UChar modrm = getUChar(delta); 21696 UInt rG = gregOfRexRM(pfx,modrm); 21697 if (epartIsReg(modrm)) { 21698 UInt rE = eregOfRexRM(pfx,modrm); 21699 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 21700 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 21701 delta += 1; 21702 } else { 21703 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21704 storeLE( mkexpr(addr), getXMMReg(rG) ); 21705 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf); 21706 delta += alen; 21707 } 21708 goto decode_success; 21709 } 21710 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */ 21711 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21712 UChar modrm = getUChar(delta); 21713 UInt rG = gregOfRexRM(pfx,modrm); 21714 if (epartIsReg(modrm)) { 21715 UInt rE = eregOfRexRM(pfx,modrm); 21716 putYMMReg( rE, getYMMReg(rG) ); 21717 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 21718 delta += 1; 21719 } else { 21720 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21721 storeLE( mkexpr(addr), getYMMReg(rG) ); 21722 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf); 21723 delta += alen; 21724 } 21725 goto decode_success; 21726 } 21727 break; 21728 21729 case 0x12: 21730 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */ 21731 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21732 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ ); 21733 goto decode_success; 21734 } 21735 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */ 21736 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21737 delta = dis_MOVDDUP_256( vbi, pfx, delta ); 21738 goto decode_success; 21739 } 21740 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */ 21741 /* Insn only exists in reg form */ 21742 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 21743 && epartIsReg(getUChar(delta))) { 21744 UChar modrm = getUChar(delta); 21745 UInt rG = gregOfRexRM(pfx, modrm); 21746 UInt rE = eregOfRexRM(pfx, modrm); 21747 UInt rV = getVexNvvvv(pfx); 21748 delta++; 21749 DIP("vmovhlps %s,%s,%s\n", 21750 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21751 IRTemp res = newTemp(Ity_V128); 21752 assign(res, binop(Iop_64HLtoV128, 21753 getXMMRegLane64(rV, 1), 21754 getXMMRegLane64(rE, 1))); 21755 putYMMRegLoAndZU(rG, mkexpr(res)); 21756 *uses_vvvv = True; 21757 goto decode_success; 21758 } 21759 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */ 21760 /* Insn exists only in mem form, it appears. */ 21761 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */ 21762 /* Insn exists only in mem form, it appears. */ 21763 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 21764 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 21765 UChar modrm = getUChar(delta); 21766 UInt rG = gregOfRexRM(pfx, modrm); 21767 UInt rV = getVexNvvvv(pfx); 21768 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21769 delta += alen; 21770 DIP("vmovlpd %s,%s,%s\n", 21771 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 21772 IRTemp res = newTemp(Ity_V128); 21773 assign(res, binop(Iop_64HLtoV128, 21774 getXMMRegLane64(rV, 1), 21775 loadLE(Ity_I64, mkexpr(addr)))); 21776 putYMMRegLoAndZU(rG, mkexpr(res)); 21777 *uses_vvvv = True; 21778 goto decode_success; 21779 } 21780 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */ 21781 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 21782 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 21783 True/*isL*/ ); 21784 goto decode_success; 21785 } 21786 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */ 21787 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 21788 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ ); 21789 goto decode_success; 21790 } 21791 break; 21792 21793 case 0x13: 21794 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */ 21795 /* Insn exists only in mem form, it appears. */ 21796 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */ 21797 /* Insn exists only in mem form, it appears. */ 21798 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 21799 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 21800 UChar modrm = getUChar(delta); 21801 UInt rG = gregOfRexRM(pfx, modrm); 21802 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21803 delta += alen; 21804 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0)); 21805 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf); 21806 goto decode_success; 21807 } 21808 break; 21809 21810 case 0x14: 21811 case 0x15: 21812 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */ 21813 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */ 21814 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21815 Bool hi = opc == 0x15; 21816 UChar modrm = getUChar(delta); 21817 UInt rG = gregOfRexRM(pfx,modrm); 21818 UInt rV = getVexNvvvv(pfx); 21819 IRTemp eV = newTemp(Ity_V128); 21820 IRTemp vV = newTemp(Ity_V128); 21821 assign( vV, getXMMReg(rV) ); 21822 if (epartIsReg(modrm)) { 21823 UInt rE = eregOfRexRM(pfx,modrm); 21824 assign( eV, getXMMReg(rE) ); 21825 delta += 1; 21826 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 21827 nameXMMReg(rE), nameXMMReg(rG)); 21828 } else { 21829 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21830 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 21831 delta += alen; 21832 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 21833 dis_buf, nameXMMReg(rG)); 21834 } 21835 IRTemp res = math_UNPCKxPS_128( eV, vV, hi ); 21836 putYMMRegLoAndZU( rG, mkexpr(res) ); 21837 *uses_vvvv = True; 21838 goto decode_success; 21839 } 21840 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */ 21841 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */ 21842 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21843 Bool hi = opc == 0x15; 21844 UChar modrm = getUChar(delta); 21845 UInt rG = gregOfRexRM(pfx,modrm); 21846 UInt rV = getVexNvvvv(pfx); 21847 IRTemp eV = newTemp(Ity_V256); 21848 IRTemp vV = newTemp(Ity_V256); 21849 assign( vV, getYMMReg(rV) ); 21850 if (epartIsReg(modrm)) { 21851 UInt rE = eregOfRexRM(pfx,modrm); 21852 assign( eV, getYMMReg(rE) ); 21853 delta += 1; 21854 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 21855 nameYMMReg(rE), nameYMMReg(rG)); 21856 } else { 21857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21858 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 21859 delta += alen; 21860 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 21861 dis_buf, nameYMMReg(rG)); 21862 } 21863 IRTemp res = math_UNPCKxPS_256( eV, vV, hi ); 21864 putYMMReg( rG, mkexpr(res) ); 21865 *uses_vvvv = True; 21866 goto decode_success; 21867 } 21868 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */ 21869 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */ 21870 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 21871 Bool hi = opc == 0x15; 21872 UChar modrm = getUChar(delta); 21873 UInt rG = gregOfRexRM(pfx,modrm); 21874 UInt rV = getVexNvvvv(pfx); 21875 IRTemp eV = newTemp(Ity_V128); 21876 IRTemp vV = newTemp(Ity_V128); 21877 assign( vV, getXMMReg(rV) ); 21878 if (epartIsReg(modrm)) { 21879 UInt rE = eregOfRexRM(pfx,modrm); 21880 assign( eV, getXMMReg(rE) ); 21881 delta += 1; 21882 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 21883 nameXMMReg(rE), nameXMMReg(rG)); 21884 } else { 21885 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21886 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 21887 delta += alen; 21888 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 21889 dis_buf, nameXMMReg(rG)); 21890 } 21891 IRTemp res = math_UNPCKxPD_128( eV, vV, hi ); 21892 putYMMRegLoAndZU( rG, mkexpr(res) ); 21893 *uses_vvvv = True; 21894 goto decode_success; 21895 } 21896 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */ 21897 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */ 21898 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 21899 Bool hi = opc == 0x15; 21900 UChar modrm = getUChar(delta); 21901 UInt rG = gregOfRexRM(pfx,modrm); 21902 UInt rV = getVexNvvvv(pfx); 21903 IRTemp eV = newTemp(Ity_V256); 21904 IRTemp vV = newTemp(Ity_V256); 21905 assign( vV, getYMMReg(rV) ); 21906 if (epartIsReg(modrm)) { 21907 UInt rE = eregOfRexRM(pfx,modrm); 21908 assign( eV, getYMMReg(rE) ); 21909 delta += 1; 21910 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 21911 nameYMMReg(rE), nameYMMReg(rG)); 21912 } else { 21913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21914 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 21915 delta += alen; 21916 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 21917 dis_buf, nameYMMReg(rG)); 21918 } 21919 IRTemp res = math_UNPCKxPD_256( eV, vV, hi ); 21920 putYMMReg( rG, mkexpr(res) ); 21921 *uses_vvvv = True; 21922 goto decode_success; 21923 } 21924 break; 21925 21926 case 0x16: 21927 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */ 21928 /* Insn only exists in reg form */ 21929 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 21930 && epartIsReg(getUChar(delta))) { 21931 UChar modrm = getUChar(delta); 21932 UInt rG = gregOfRexRM(pfx, modrm); 21933 UInt rE = eregOfRexRM(pfx, modrm); 21934 UInt rV = getVexNvvvv(pfx); 21935 delta++; 21936 DIP("vmovlhps %s,%s,%s\n", 21937 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 21938 IRTemp res = newTemp(Ity_V128); 21939 assign(res, binop(Iop_64HLtoV128, 21940 getXMMRegLane64(rE, 0), 21941 getXMMRegLane64(rV, 0))); 21942 putYMMRegLoAndZU(rG, mkexpr(res)); 21943 *uses_vvvv = True; 21944 goto decode_success; 21945 } 21946 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */ 21947 /* Insn exists only in mem form, it appears. */ 21948 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */ 21949 /* Insn exists only in mem form, it appears. */ 21950 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 21951 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 21952 UChar modrm = getUChar(delta); 21953 UInt rG = gregOfRexRM(pfx, modrm); 21954 UInt rV = getVexNvvvv(pfx); 21955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21956 delta += alen; 21957 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's', 21958 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 21959 IRTemp res = newTemp(Ity_V128); 21960 assign(res, binop(Iop_64HLtoV128, 21961 loadLE(Ity_I64, mkexpr(addr)), 21962 getXMMRegLane64(rV, 0))); 21963 putYMMRegLoAndZU(rG, mkexpr(res)); 21964 *uses_vvvv = True; 21965 goto decode_success; 21966 } 21967 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */ 21968 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 21969 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 21970 False/*!isL*/ ); 21971 goto decode_success; 21972 } 21973 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */ 21974 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 21975 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ ); 21976 goto decode_success; 21977 } 21978 break; 21979 21980 case 0x17: 21981 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */ 21982 /* Insn exists only in mem form, it appears. */ 21983 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */ 21984 /* Insn exists only in mem form, it appears. */ 21985 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 21986 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 21987 UChar modrm = getUChar(delta); 21988 UInt rG = gregOfRexRM(pfx, modrm); 21989 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21990 delta += alen; 21991 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1)); 21992 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's', 21993 nameXMMReg(rG), dis_buf); 21994 goto decode_success; 21995 } 21996 break; 21997 21998 case 0x28: 21999 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */ 22000 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22001 UChar modrm = getUChar(delta); 22002 UInt rG = gregOfRexRM(pfx, modrm); 22003 if (epartIsReg(modrm)) { 22004 UInt rE = eregOfRexRM(pfx,modrm); 22005 putYMMRegLoAndZU( rG, getXMMReg( rE )); 22006 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 22007 delta += 1; 22008 } else { 22009 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22010 gen_SEGV_if_not_16_aligned( addr ); 22011 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 22012 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG)); 22013 delta += alen; 22014 } 22015 goto decode_success; 22016 } 22017 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */ 22018 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22019 UChar modrm = getUChar(delta); 22020 UInt rG = gregOfRexRM(pfx, modrm); 22021 if (epartIsReg(modrm)) { 22022 UInt rE = eregOfRexRM(pfx,modrm); 22023 putYMMReg( rG, getYMMReg( rE )); 22024 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 22025 delta += 1; 22026 } else { 22027 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22028 gen_SEGV_if_not_32_aligned( addr ); 22029 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 22030 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG)); 22031 delta += alen; 22032 } 22033 goto decode_success; 22034 } 22035 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */ 22036 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22037 UChar modrm = getUChar(delta); 22038 UInt rG = gregOfRexRM(pfx, modrm); 22039 if (epartIsReg(modrm)) { 22040 UInt rE = eregOfRexRM(pfx,modrm); 22041 putYMMRegLoAndZU( rG, getXMMReg( rE )); 22042 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 22043 delta += 1; 22044 } else { 22045 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22046 gen_SEGV_if_not_16_aligned( addr ); 22047 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 22048 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG)); 22049 delta += alen; 22050 } 22051 goto decode_success; 22052 } 22053 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */ 22054 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22055 UChar modrm = getUChar(delta); 22056 UInt rG = gregOfRexRM(pfx, modrm); 22057 if (epartIsReg(modrm)) { 22058 UInt rE = eregOfRexRM(pfx,modrm); 22059 putYMMReg( rG, getYMMReg( rE )); 22060 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 22061 delta += 1; 22062 } else { 22063 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22064 gen_SEGV_if_not_32_aligned( addr ); 22065 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 22066 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG)); 22067 delta += alen; 22068 } 22069 goto decode_success; 22070 } 22071 break; 22072 22073 case 0x29: 22074 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */ 22075 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22076 UChar modrm = getUChar(delta); 22077 UInt rG = gregOfRexRM(pfx,modrm); 22078 if (epartIsReg(modrm)) { 22079 UInt rE = eregOfRexRM(pfx,modrm); 22080 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 22081 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 22082 delta += 1; 22083 } else { 22084 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22085 gen_SEGV_if_not_16_aligned( addr ); 22086 storeLE( mkexpr(addr), getXMMReg(rG) ); 22087 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf ); 22088 delta += alen; 22089 } 22090 goto decode_success; 22091 } 22092 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */ 22093 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22094 UChar modrm = getUChar(delta); 22095 UInt rG = gregOfRexRM(pfx,modrm); 22096 if (epartIsReg(modrm)) { 22097 UInt rE = eregOfRexRM(pfx,modrm); 22098 putYMMReg( rE, getYMMReg(rG) ); 22099 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 22100 delta += 1; 22101 } else { 22102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22103 gen_SEGV_if_not_32_aligned( addr ); 22104 storeLE( mkexpr(addr), getYMMReg(rG) ); 22105 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf ); 22106 delta += alen; 22107 } 22108 goto decode_success; 22109 } 22110 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */ 22111 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22112 UChar modrm = getUChar(delta); 22113 UInt rG = gregOfRexRM(pfx,modrm); 22114 if (epartIsReg(modrm)) { 22115 UInt rE = eregOfRexRM(pfx,modrm); 22116 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 22117 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 22118 delta += 1; 22119 goto decode_success; 22120 } else { 22121 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22122 gen_SEGV_if_not_16_aligned( addr ); 22123 storeLE( mkexpr(addr), getXMMReg(rG) ); 22124 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf ); 22125 delta += alen; 22126 goto decode_success; 22127 } 22128 } 22129 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */ 22130 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22131 UChar modrm = getUChar(delta); 22132 UInt rG = gregOfRexRM(pfx,modrm); 22133 if (epartIsReg(modrm)) { 22134 UInt rE = eregOfRexRM(pfx,modrm); 22135 putYMMReg( rE, getYMMReg(rG) ); 22136 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 22137 delta += 1; 22138 goto decode_success; 22139 } else { 22140 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22141 gen_SEGV_if_not_32_aligned( addr ); 22142 storeLE( mkexpr(addr), getYMMReg(rG) ); 22143 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf ); 22144 delta += alen; 22145 goto decode_success; 22146 } 22147 } 22148 break; 22149 22150 case 0x2A: { 22151 IRTemp rmode = newTemp(Ity_I32); 22152 assign( rmode, get_sse_roundingmode() ); 22153 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */ 22154 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 22155 UChar modrm = getUChar(delta); 22156 UInt rV = getVexNvvvv(pfx); 22157 UInt rD = gregOfRexRM(pfx, modrm); 22158 IRTemp arg32 = newTemp(Ity_I32); 22159 if (epartIsReg(modrm)) { 22160 UInt rS = eregOfRexRM(pfx,modrm); 22161 assign( arg32, getIReg32(rS) ); 22162 delta += 1; 22163 DIP("vcvtsi2sdl %s,%s,%s\n", 22164 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 22165 } else { 22166 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22167 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 22168 delta += alen; 22169 DIP("vcvtsi2sdl %s,%s,%s\n", 22170 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22171 } 22172 putXMMRegLane64F( rD, 0, 22173 unop(Iop_I32StoF64, mkexpr(arg32))); 22174 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22175 putYMMRegLane128( rD, 1, mkV128(0) ); 22176 *uses_vvvv = True; 22177 goto decode_success; 22178 } 22179 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */ 22180 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 22181 UChar modrm = getUChar(delta); 22182 UInt rV = getVexNvvvv(pfx); 22183 UInt rD = gregOfRexRM(pfx, modrm); 22184 IRTemp arg64 = newTemp(Ity_I64); 22185 if (epartIsReg(modrm)) { 22186 UInt rS = eregOfRexRM(pfx,modrm); 22187 assign( arg64, getIReg64(rS) ); 22188 delta += 1; 22189 DIP("vcvtsi2sdq %s,%s,%s\n", 22190 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 22191 } else { 22192 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22193 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 22194 delta += alen; 22195 DIP("vcvtsi2sdq %s,%s,%s\n", 22196 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22197 } 22198 putXMMRegLane64F( rD, 0, 22199 binop( Iop_I64StoF64, 22200 get_sse_roundingmode(), 22201 mkexpr(arg64)) ); 22202 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22203 putYMMRegLane128( rD, 1, mkV128(0) ); 22204 *uses_vvvv = True; 22205 goto decode_success; 22206 } 22207 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */ 22208 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 22209 UChar modrm = getUChar(delta); 22210 UInt rV = getVexNvvvv(pfx); 22211 UInt rD = gregOfRexRM(pfx, modrm); 22212 IRTemp arg64 = newTemp(Ity_I64); 22213 if (epartIsReg(modrm)) { 22214 UInt rS = eregOfRexRM(pfx,modrm); 22215 assign( arg64, getIReg64(rS) ); 22216 delta += 1; 22217 DIP("vcvtsi2ssq %s,%s,%s\n", 22218 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 22219 } else { 22220 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22221 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 22222 delta += alen; 22223 DIP("vcvtsi2ssq %s,%s,%s\n", 22224 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22225 } 22226 putXMMRegLane32F( rD, 0, 22227 binop(Iop_F64toF32, 22228 mkexpr(rmode), 22229 binop(Iop_I64StoF64, mkexpr(rmode), 22230 mkexpr(arg64)) ) ); 22231 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 22232 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22233 putYMMRegLane128( rD, 1, mkV128(0) ); 22234 *uses_vvvv = True; 22235 goto decode_success; 22236 } 22237 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */ 22238 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 22239 UChar modrm = getUChar(delta); 22240 UInt rV = getVexNvvvv(pfx); 22241 UInt rD = gregOfRexRM(pfx, modrm); 22242 IRTemp arg32 = newTemp(Ity_I32); 22243 if (epartIsReg(modrm)) { 22244 UInt rS = eregOfRexRM(pfx,modrm); 22245 assign( arg32, getIReg32(rS) ); 22246 delta += 1; 22247 DIP("vcvtsi2ssl %s,%s,%s\n", 22248 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 22249 } else { 22250 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22251 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 22252 delta += alen; 22253 DIP("vcvtsi2ssl %s,%s,%s\n", 22254 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22255 } 22256 putXMMRegLane32F( rD, 0, 22257 binop(Iop_F64toF32, 22258 mkexpr(rmode), 22259 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 22260 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 22261 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22262 putYMMRegLane128( rD, 1, mkV128(0) ); 22263 *uses_vvvv = True; 22264 goto decode_success; 22265 } 22266 break; 22267 } 22268 22269 case 0x2B: 22270 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */ 22271 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */ 22272 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 22273 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 22274 UChar modrm = getUChar(delta); 22275 UInt rS = gregOfRexRM(pfx, modrm); 22276 IRTemp tS = newTemp(Ity_V128); 22277 assign(tS, getXMMReg(rS)); 22278 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 22279 delta += alen; 22280 gen_SEGV_if_not_16_aligned(addr); 22281 storeLE(mkexpr(addr), mkexpr(tS)); 22282 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 22283 nameXMMReg(rS), dis_buf); 22284 goto decode_success; 22285 } 22286 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */ 22287 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */ 22288 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 22289 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) { 22290 UChar modrm = getUChar(delta); 22291 UInt rS = gregOfRexRM(pfx, modrm); 22292 IRTemp tS = newTemp(Ity_V256); 22293 assign(tS, getYMMReg(rS)); 22294 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 22295 delta += alen; 22296 gen_SEGV_if_not_32_aligned(addr); 22297 storeLE(mkexpr(addr), mkexpr(tS)); 22298 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 22299 nameYMMReg(rS), dis_buf); 22300 goto decode_success; 22301 } 22302 break; 22303 22304 case 0x2C: 22305 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */ 22306 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 22307 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 22308 goto decode_success; 22309 } 22310 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */ 22311 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 22312 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 22313 goto decode_success; 22314 } 22315 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */ 22316 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 22317 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 22318 goto decode_success; 22319 } 22320 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */ 22321 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 22322 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 22323 goto decode_success; 22324 } 22325 break; 22326 22327 case 0x2D: 22328 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */ 22329 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 22330 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 22331 goto decode_success; 22332 } 22333 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */ 22334 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 22335 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 22336 goto decode_success; 22337 } 22338 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */ 22339 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 22340 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 22341 goto decode_success; 22342 } 22343 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */ 22344 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 22345 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 22346 goto decode_success; 22347 } 22348 break; 22349 22350 case 0x2E: 22351 case 0x2F: 22352 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */ 22353 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */ 22354 if (have66noF2noF3(pfx)) { 22355 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc ); 22356 goto decode_success; 22357 } 22358 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */ 22359 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */ 22360 if (haveNo66noF2noF3(pfx)) { 22361 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc ); 22362 goto decode_success; 22363 } 22364 break; 22365 22366 case 0x50: 22367 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */ 22368 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22369 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ ); 22370 goto decode_success; 22371 } 22372 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */ 22373 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22374 delta = dis_MOVMSKPD_256( vbi, pfx, delta ); 22375 goto decode_success; 22376 } 22377 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */ 22378 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22379 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ ); 22380 goto decode_success; 22381 } 22382 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */ 22383 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22384 delta = dis_MOVMSKPS_256( vbi, pfx, delta ); 22385 goto decode_success; 22386 } 22387 break; 22388 22389 case 0x51: 22390 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */ 22391 if (haveF3no66noF2(pfx)) { 22392 delta = dis_AVX128_E_V_to_G_lo32_unary( 22393 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 ); 22394 goto decode_success; 22395 } 22396 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */ 22397 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22398 delta = dis_AVX128_E_to_G_unary_all( 22399 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 ); 22400 goto decode_success; 22401 } 22402 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */ 22403 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22404 delta = dis_AVX256_E_to_G_unary_all( 22405 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 ); 22406 goto decode_success; 22407 } 22408 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */ 22409 if (haveF2no66noF3(pfx)) { 22410 delta = dis_AVX128_E_V_to_G_lo64_unary( 22411 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 ); 22412 goto decode_success; 22413 } 22414 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */ 22415 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22416 delta = dis_AVX128_E_to_G_unary_all( 22417 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 ); 22418 goto decode_success; 22419 } 22420 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */ 22421 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22422 delta = dis_AVX256_E_to_G_unary_all( 22423 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 ); 22424 goto decode_success; 22425 } 22426 break; 22427 22428 case 0x52: 22429 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */ 22430 if (haveF3no66noF2(pfx)) { 22431 delta = dis_AVX128_E_V_to_G_lo32_unary( 22432 uses_vvvv, vbi, pfx, delta, "vrsqrtss", Iop_RSqrt32F0x4 ); 22433 goto decode_success; 22434 } 22435 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */ 22436 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22437 delta = dis_AVX128_E_to_G_unary_all( 22438 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx4 ); 22439 goto decode_success; 22440 } 22441 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */ 22442 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22443 delta = dis_AVX256_E_to_G_unary_all( 22444 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx8 ); 22445 goto decode_success; 22446 } 22447 break; 22448 22449 case 0x53: 22450 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */ 22451 if (haveF3no66noF2(pfx)) { 22452 delta = dis_AVX128_E_V_to_G_lo32_unary( 22453 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_Recip32F0x4 ); 22454 goto decode_success; 22455 } 22456 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */ 22457 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22458 delta = dis_AVX128_E_to_G_unary_all( 22459 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx4 ); 22460 goto decode_success; 22461 } 22462 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */ 22463 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22464 delta = dis_AVX256_E_to_G_unary_all( 22465 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx8 ); 22466 goto decode_success; 22467 } 22468 break; 22469 22470 case 0x54: 22471 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 22472 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */ 22473 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22474 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22475 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 ); 22476 goto decode_success; 22477 } 22478 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 22479 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */ 22480 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22481 delta = dis_AVX256_E_V_to_G( 22482 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 ); 22483 goto decode_success; 22484 } 22485 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */ 22486 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22487 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22488 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 ); 22489 goto decode_success; 22490 } 22491 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */ 22492 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22493 delta = dis_AVX256_E_V_to_G( 22494 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 ); 22495 goto decode_success; 22496 } 22497 break; 22498 22499 case 0x55: 22500 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */ 22501 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */ 22502 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22503 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22504 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128, 22505 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 22506 goto decode_success; 22507 } 22508 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */ 22509 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22510 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 22511 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256, 22512 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 22513 goto decode_success; 22514 } 22515 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */ 22516 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22517 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22518 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128, 22519 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 22520 goto decode_success; 22521 } 22522 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */ 22523 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22524 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 22525 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256, 22526 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 22527 goto decode_success; 22528 } 22529 break; 22530 22531 case 0x56: 22532 /* VORPD r/m, rV, r ::: r = rV | r/m */ 22533 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */ 22534 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22535 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22536 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 ); 22537 goto decode_success; 22538 } 22539 /* VORPD r/m, rV, r ::: r = rV | r/m */ 22540 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */ 22541 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22542 delta = dis_AVX256_E_V_to_G( 22543 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 ); 22544 goto decode_success; 22545 } 22546 /* VORPS r/m, rV, r ::: r = rV | r/m */ 22547 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */ 22548 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22549 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22550 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 ); 22551 goto decode_success; 22552 } 22553 /* VORPS r/m, rV, r ::: r = rV | r/m */ 22554 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */ 22555 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22556 delta = dis_AVX256_E_V_to_G( 22557 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 ); 22558 goto decode_success; 22559 } 22560 break; 22561 22562 case 0x57: 22563 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 22564 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */ 22565 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22566 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22567 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 ); 22568 goto decode_success; 22569 } 22570 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 22571 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */ 22572 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22573 delta = dis_AVX256_E_V_to_G( 22574 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 ); 22575 goto decode_success; 22576 } 22577 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 22578 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */ 22579 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22580 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22581 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 ); 22582 goto decode_success; 22583 } 22584 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 22585 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */ 22586 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22587 delta = dis_AVX256_E_V_to_G( 22588 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 ); 22589 goto decode_success; 22590 } 22591 break; 22592 22593 case 0x58: 22594 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */ 22595 if (haveF2no66noF3(pfx)) { 22596 delta = dis_AVX128_E_V_to_G_lo64( 22597 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 ); 22598 goto decode_success; 22599 } 22600 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */ 22601 if (haveF3no66noF2(pfx)) { 22602 delta = dis_AVX128_E_V_to_G_lo32( 22603 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 ); 22604 goto decode_success; 22605 } 22606 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */ 22607 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22608 delta = dis_AVX128_E_V_to_G( 22609 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 ); 22610 goto decode_success; 22611 } 22612 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */ 22613 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22614 delta = dis_AVX256_E_V_to_G( 22615 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 ); 22616 goto decode_success; 22617 } 22618 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */ 22619 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22620 delta = dis_AVX128_E_V_to_G( 22621 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 ); 22622 goto decode_success; 22623 } 22624 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */ 22625 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22626 delta = dis_AVX256_E_V_to_G( 22627 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 ); 22628 goto decode_success; 22629 } 22630 break; 22631 22632 case 0x59: 22633 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */ 22634 if (haveF2no66noF3(pfx)) { 22635 delta = dis_AVX128_E_V_to_G_lo64( 22636 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 ); 22637 goto decode_success; 22638 } 22639 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */ 22640 if (haveF3no66noF2(pfx)) { 22641 delta = dis_AVX128_E_V_to_G_lo32( 22642 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 ); 22643 goto decode_success; 22644 } 22645 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */ 22646 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22647 delta = dis_AVX128_E_V_to_G( 22648 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 ); 22649 goto decode_success; 22650 } 22651 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */ 22652 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22653 delta = dis_AVX256_E_V_to_G( 22654 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 ); 22655 goto decode_success; 22656 } 22657 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */ 22658 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22659 delta = dis_AVX128_E_V_to_G( 22660 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 ); 22661 goto decode_success; 22662 } 22663 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */ 22664 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22665 delta = dis_AVX256_E_V_to_G( 22666 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 ); 22667 goto decode_success; 22668 } 22669 break; 22670 22671 case 0x5A: 22672 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */ 22673 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22674 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ ); 22675 goto decode_success; 22676 } 22677 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */ 22678 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22679 delta = dis_CVTPS2PD_256( vbi, pfx, delta ); 22680 goto decode_success; 22681 } 22682 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */ 22683 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22684 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ ); 22685 goto decode_success; 22686 } 22687 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */ 22688 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22689 delta = dis_CVTPD2PS_256( vbi, pfx, delta ); 22690 goto decode_success; 22691 } 22692 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */ 22693 if (haveF2no66noF3(pfx)) { 22694 UChar modrm = getUChar(delta); 22695 UInt rV = getVexNvvvv(pfx); 22696 UInt rD = gregOfRexRM(pfx, modrm); 22697 IRTemp f64lo = newTemp(Ity_F64); 22698 IRTemp rmode = newTemp(Ity_I32); 22699 assign( rmode, get_sse_roundingmode() ); 22700 if (epartIsReg(modrm)) { 22701 UInt rS = eregOfRexRM(pfx,modrm); 22702 assign(f64lo, getXMMRegLane64F(rS, 0)); 22703 delta += 1; 22704 DIP("vcvtsd2ss %s,%s,%s\n", 22705 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 22706 } else { 22707 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22708 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) ); 22709 delta += alen; 22710 DIP("vcvtsd2ss %s,%s,%s\n", 22711 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22712 } 22713 putXMMRegLane32F( rD, 0, 22714 binop( Iop_F64toF32, mkexpr(rmode), 22715 mkexpr(f64lo)) ); 22716 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 22717 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22718 putYMMRegLane128( rD, 1, mkV128(0) ); 22719 *uses_vvvv = True; 22720 goto decode_success; 22721 } 22722 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */ 22723 if (haveF3no66noF2(pfx)) { 22724 UChar modrm = getUChar(delta); 22725 UInt rV = getVexNvvvv(pfx); 22726 UInt rD = gregOfRexRM(pfx, modrm); 22727 IRTemp f32lo = newTemp(Ity_F32); 22728 if (epartIsReg(modrm)) { 22729 UInt rS = eregOfRexRM(pfx,modrm); 22730 assign(f32lo, getXMMRegLane32F(rS, 0)); 22731 delta += 1; 22732 DIP("vcvtss2sd %s,%s,%s\n", 22733 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 22734 } else { 22735 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22736 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 22737 delta += alen; 22738 DIP("vcvtss2sd %s,%s,%s\n", 22739 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 22740 } 22741 putXMMRegLane64F( rD, 0, 22742 unop( Iop_F32toF64, mkexpr(f32lo)) ); 22743 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 22744 putYMMRegLane128( rD, 1, mkV128(0) ); 22745 *uses_vvvv = True; 22746 goto decode_success; 22747 } 22748 break; 22749 22750 case 0x5B: 22751 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */ 22752 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22753 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 22754 True/*isAvx*/, False/*!r2zero*/ ); 22755 goto decode_success; 22756 } 22757 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */ 22758 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22759 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 22760 False/*!r2zero*/ ); 22761 goto decode_success; 22762 } 22763 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */ 22764 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 22765 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 22766 True/*isAvx*/, True/*r2zero*/ ); 22767 goto decode_success; 22768 } 22769 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */ 22770 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 22771 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 22772 True/*r2zero*/ ); 22773 goto decode_success; 22774 } 22775 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */ 22776 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22777 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ ); 22778 goto decode_success; 22779 } 22780 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */ 22781 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22782 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta ); 22783 goto decode_success; 22784 } 22785 break; 22786 22787 case 0x5C: 22788 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */ 22789 if (haveF2no66noF3(pfx)) { 22790 delta = dis_AVX128_E_V_to_G_lo64( 22791 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 ); 22792 goto decode_success; 22793 } 22794 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */ 22795 if (haveF3no66noF2(pfx)) { 22796 delta = dis_AVX128_E_V_to_G_lo32( 22797 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 ); 22798 goto decode_success; 22799 } 22800 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */ 22801 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22802 delta = dis_AVX128_E_V_to_G( 22803 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 ); 22804 goto decode_success; 22805 } 22806 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */ 22807 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22808 delta = dis_AVX256_E_V_to_G( 22809 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 ); 22810 goto decode_success; 22811 } 22812 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */ 22813 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22814 delta = dis_AVX128_E_V_to_G( 22815 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 ); 22816 goto decode_success; 22817 } 22818 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */ 22819 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22820 delta = dis_AVX256_E_V_to_G( 22821 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 ); 22822 goto decode_success; 22823 } 22824 break; 22825 22826 case 0x5D: 22827 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */ 22828 if (haveF2no66noF3(pfx)) { 22829 delta = dis_AVX128_E_V_to_G_lo64( 22830 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 ); 22831 goto decode_success; 22832 } 22833 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */ 22834 if (haveF3no66noF2(pfx)) { 22835 delta = dis_AVX128_E_V_to_G_lo32( 22836 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 ); 22837 goto decode_success; 22838 } 22839 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */ 22840 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22841 delta = dis_AVX128_E_V_to_G( 22842 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 ); 22843 goto decode_success; 22844 } 22845 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */ 22846 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22847 delta = dis_AVX256_E_V_to_G( 22848 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 ); 22849 goto decode_success; 22850 } 22851 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */ 22852 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22853 delta = dis_AVX128_E_V_to_G( 22854 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 ); 22855 goto decode_success; 22856 } 22857 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */ 22858 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22859 delta = dis_AVX256_E_V_to_G( 22860 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 ); 22861 goto decode_success; 22862 } 22863 break; 22864 22865 case 0x5E: 22866 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */ 22867 if (haveF2no66noF3(pfx)) { 22868 delta = dis_AVX128_E_V_to_G_lo64( 22869 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 ); 22870 goto decode_success; 22871 } 22872 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */ 22873 if (haveF3no66noF2(pfx)) { 22874 delta = dis_AVX128_E_V_to_G_lo32( 22875 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 ); 22876 goto decode_success; 22877 } 22878 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */ 22879 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22880 delta = dis_AVX128_E_V_to_G( 22881 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 ); 22882 goto decode_success; 22883 } 22884 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */ 22885 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22886 delta = dis_AVX256_E_V_to_G( 22887 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 ); 22888 goto decode_success; 22889 } 22890 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */ 22891 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22892 delta = dis_AVX128_E_V_to_G( 22893 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 ); 22894 goto decode_success; 22895 } 22896 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */ 22897 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22898 delta = dis_AVX256_E_V_to_G( 22899 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 ); 22900 goto decode_success; 22901 } 22902 break; 22903 22904 case 0x5F: 22905 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */ 22906 if (haveF2no66noF3(pfx)) { 22907 delta = dis_AVX128_E_V_to_G_lo64( 22908 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 ); 22909 goto decode_success; 22910 } 22911 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */ 22912 if (haveF3no66noF2(pfx)) { 22913 delta = dis_AVX128_E_V_to_G_lo32( 22914 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 ); 22915 goto decode_success; 22916 } 22917 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */ 22918 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22919 delta = dis_AVX128_E_V_to_G( 22920 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 ); 22921 goto decode_success; 22922 } 22923 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */ 22924 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22925 delta = dis_AVX256_E_V_to_G( 22926 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 ); 22927 goto decode_success; 22928 } 22929 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */ 22930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22931 delta = dis_AVX128_E_V_to_G( 22932 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 ); 22933 goto decode_success; 22934 } 22935 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */ 22936 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 22937 delta = dis_AVX256_E_V_to_G( 22938 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 ); 22939 goto decode_success; 22940 } 22941 break; 22942 22943 case 0x60: 22944 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 22945 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */ 22946 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22947 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22948 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 22949 Iop_InterleaveLO8x16, NULL, 22950 False/*!invertLeftArg*/, True/*swapArgs*/ ); 22951 goto decode_success; 22952 } 22953 break; 22954 22955 case 0x61: 22956 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 22957 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */ 22958 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22959 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22960 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 22961 Iop_InterleaveLO16x8, NULL, 22962 False/*!invertLeftArg*/, True/*swapArgs*/ ); 22963 goto decode_success; 22964 } 22965 break; 22966 22967 case 0x62: 22968 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 22969 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */ 22970 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22971 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22972 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 22973 Iop_InterleaveLO32x4, NULL, 22974 False/*!invertLeftArg*/, True/*swapArgs*/ ); 22975 goto decode_success; 22976 } 22977 break; 22978 22979 case 0x63: 22980 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 22981 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */ 22982 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22983 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22984 uses_vvvv, vbi, pfx, delta, "vpacksswb", 22985 Iop_QNarrowBin16Sto8Sx16, NULL, 22986 False/*!invertLeftArg*/, True/*swapArgs*/ ); 22987 goto decode_success; 22988 } 22989 break; 22990 22991 case 0x64: 22992 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 22993 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */ 22994 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 22995 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 22996 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 ); 22997 goto decode_success; 22998 } 22999 break; 23000 23001 case 0x65: 23002 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 23003 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */ 23004 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23005 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23006 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 ); 23007 goto decode_success; 23008 } 23009 break; 23010 23011 case 0x66: 23012 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 23013 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */ 23014 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23015 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23016 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 ); 23017 goto decode_success; 23018 } 23019 break; 23020 23021 case 0x67: 23022 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 23023 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */ 23024 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23025 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23026 uses_vvvv, vbi, pfx, delta, "vpackuswb", 23027 Iop_QNarrowBin16Sto8Ux16, NULL, 23028 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23029 goto decode_success; 23030 } 23031 break; 23032 23033 case 0x68: 23034 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 23035 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */ 23036 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23037 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23038 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 23039 Iop_InterleaveHI8x16, NULL, 23040 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23041 goto decode_success; 23042 } 23043 break; 23044 23045 case 0x69: 23046 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 23047 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */ 23048 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23049 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23050 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 23051 Iop_InterleaveHI16x8, NULL, 23052 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23053 goto decode_success; 23054 } 23055 break; 23056 23057 case 0x6A: 23058 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 23059 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */ 23060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23061 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23062 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 23063 Iop_InterleaveHI32x4, NULL, 23064 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23065 goto decode_success; 23066 } 23067 break; 23068 23069 case 0x6B: 23070 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 23071 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */ 23072 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23073 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23074 uses_vvvv, vbi, pfx, delta, "vpackssdw", 23075 Iop_QNarrowBin32Sto16Sx8, NULL, 23076 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23077 goto decode_success; 23078 } 23079 break; 23080 23081 case 0x6C: 23082 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 23083 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */ 23084 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23085 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23086 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 23087 Iop_InterleaveLO64x2, NULL, 23088 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23089 goto decode_success; 23090 } 23091 break; 23092 23093 case 0x6D: 23094 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 23095 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */ 23096 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23097 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23098 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 23099 Iop_InterleaveHI64x2, NULL, 23100 False/*!invertLeftArg*/, True/*swapArgs*/ ); 23101 goto decode_success; 23102 } 23103 break; 23104 23105 case 0x6E: 23106 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */ 23107 if (have66noF2noF3(pfx) 23108 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 23109 vassert(sz == 2); /* even tho we are transferring 4, not 2. */ 23110 UChar modrm = getUChar(delta); 23111 if (epartIsReg(modrm)) { 23112 delta += 1; 23113 putYMMRegLoAndZU( 23114 gregOfRexRM(pfx,modrm), 23115 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 23116 ); 23117 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 23118 nameXMMReg(gregOfRexRM(pfx,modrm))); 23119 } else { 23120 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23121 delta += alen; 23122 putYMMRegLoAndZU( 23123 gregOfRexRM(pfx,modrm), 23124 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr))) 23125 ); 23126 DIP("vmovd %s, %s\n", dis_buf, 23127 nameXMMReg(gregOfRexRM(pfx,modrm))); 23128 } 23129 goto decode_success; 23130 } 23131 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */ 23132 if (have66noF2noF3(pfx) 23133 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 23134 vassert(sz == 2); /* even tho we are transferring 8, not 2. */ 23135 UChar modrm = getUChar(delta); 23136 if (epartIsReg(modrm)) { 23137 delta += 1; 23138 putYMMRegLoAndZU( 23139 gregOfRexRM(pfx,modrm), 23140 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 23141 ); 23142 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 23143 nameXMMReg(gregOfRexRM(pfx,modrm))); 23144 } else { 23145 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23146 delta += alen; 23147 putYMMRegLoAndZU( 23148 gregOfRexRM(pfx,modrm), 23149 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr))) 23150 ); 23151 DIP("vmovq %s, %s\n", dis_buf, 23152 nameXMMReg(gregOfRexRM(pfx,modrm))); 23153 } 23154 goto decode_success; 23155 } 23156 break; 23157 23158 case 0x6F: 23159 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */ 23160 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */ 23161 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 23162 && 1==getVexL(pfx)/*256*/) { 23163 UChar modrm = getUChar(delta); 23164 UInt rD = gregOfRexRM(pfx, modrm); 23165 IRTemp tD = newTemp(Ity_V256); 23166 Bool isA = have66noF2noF3(pfx); 23167 UChar ch = isA ? 'a' : 'u'; 23168 if (epartIsReg(modrm)) { 23169 UInt rS = eregOfRexRM(pfx, modrm); 23170 delta += 1; 23171 assign(tD, getYMMReg(rS)); 23172 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 23173 } else { 23174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23175 delta += alen; 23176 if (isA) 23177 gen_SEGV_if_not_32_aligned(addr); 23178 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 23179 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD)); 23180 } 23181 putYMMReg(rD, mkexpr(tD)); 23182 goto decode_success; 23183 } 23184 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */ 23185 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */ 23186 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 23187 && 0==getVexL(pfx)/*128*/) { 23188 UChar modrm = getUChar(delta); 23189 UInt rD = gregOfRexRM(pfx, modrm); 23190 IRTemp tD = newTemp(Ity_V128); 23191 Bool isA = have66noF2noF3(pfx); 23192 UChar ch = isA ? 'a' : 'u'; 23193 if (epartIsReg(modrm)) { 23194 UInt rS = eregOfRexRM(pfx, modrm); 23195 delta += 1; 23196 assign(tD, getXMMReg(rS)); 23197 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 23198 } else { 23199 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23200 delta += alen; 23201 if (isA) 23202 gen_SEGV_if_not_16_aligned(addr); 23203 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 23204 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD)); 23205 } 23206 putYMMRegLoAndZU(rD, mkexpr(tD)); 23207 goto decode_success; 23208 } 23209 break; 23210 23211 case 0x70: 23212 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */ 23213 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23214 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/); 23215 goto decode_success; 23216 } 23217 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */ 23218 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23219 delta = dis_PSHUFxW_128( vbi, pfx, delta, 23220 True/*isAvx*/, False/*!xIsH*/ ); 23221 goto decode_success; 23222 } 23223 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */ 23224 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 23225 delta = dis_PSHUFxW_128( vbi, pfx, delta, 23226 True/*isAvx*/, True/*xIsH*/ ); 23227 goto decode_success; 23228 } 23229 break; 23230 23231 case 0x71: 23232 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */ 23233 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */ 23234 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */ 23235 if (have66noF2noF3(pfx) 23236 && 0==getVexL(pfx)/*128*/ 23237 && epartIsReg(getUChar(delta))) { 23238 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 23239 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23240 "vpsrlw", Iop_ShrN16x8 ); 23241 *uses_vvvv = True; 23242 goto decode_success; 23243 } 23244 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 23245 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23246 "vpsraw", Iop_SarN16x8 ); 23247 *uses_vvvv = True; 23248 goto decode_success; 23249 } 23250 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 23251 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23252 "vpsllw", Iop_ShlN16x8 ); 23253 *uses_vvvv = True; 23254 goto decode_success; 23255 } 23256 /* else fall through */ 23257 } 23258 break; 23259 23260 case 0x72: 23261 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */ 23262 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */ 23263 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */ 23264 if (have66noF2noF3(pfx) 23265 && 0==getVexL(pfx)/*128*/ 23266 && epartIsReg(getUChar(delta))) { 23267 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 23268 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23269 "vpsrld", Iop_ShrN32x4 ); 23270 *uses_vvvv = True; 23271 goto decode_success; 23272 } 23273 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 23274 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23275 "vpsrad", Iop_SarN32x4 ); 23276 *uses_vvvv = True; 23277 goto decode_success; 23278 } 23279 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 23280 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23281 "vpslld", Iop_ShlN32x4 ); 23282 *uses_vvvv = True; 23283 goto decode_success; 23284 } 23285 /* else fall through */ 23286 } 23287 break; 23288 23289 case 0x73: 23290 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */ 23291 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */ 23292 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */ 23293 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */ 23294 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23295 && epartIsReg(getUChar(delta))) { 23296 Int rS = eregOfRexRM(pfx,getUChar(delta)); 23297 Int rD = getVexNvvvv(pfx); 23298 IRTemp vecS = newTemp(Ity_V128); 23299 if (gregLO3ofRM(getUChar(delta)) == 3) { 23300 Int imm = (Int)getUChar(delta+1); 23301 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 23302 delta += 2; 23303 assign( vecS, getXMMReg(rS) ); 23304 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm ))); 23305 *uses_vvvv = True; 23306 goto decode_success; 23307 } 23308 if (gregLO3ofRM(getUChar(delta)) == 7) { 23309 Int imm = (Int)getUChar(delta+1); 23310 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 23311 delta += 2; 23312 assign( vecS, getXMMReg(rS) ); 23313 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm ))); 23314 *uses_vvvv = True; 23315 goto decode_success; 23316 } 23317 if (gregLO3ofRM(getUChar(delta)) == 2) { 23318 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23319 "vpsrlq", Iop_ShrN64x2 ); 23320 *uses_vvvv = True; 23321 goto decode_success; 23322 } 23323 if (gregLO3ofRM(getUChar(delta)) == 6) { 23324 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 23325 "vpsllq", Iop_ShlN64x2 ); 23326 *uses_vvvv = True; 23327 goto decode_success; 23328 } 23329 /* else fall through */ 23330 } 23331 break; 23332 23333 case 0x74: 23334 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 23335 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */ 23336 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23337 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23338 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 ); 23339 goto decode_success; 23340 } 23341 break; 23342 23343 case 0x75: 23344 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 23345 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */ 23346 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23347 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23348 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 ); 23349 goto decode_success; 23350 } 23351 break; 23352 23353 case 0x76: 23354 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 23355 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */ 23356 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23357 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23358 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 ); 23359 goto decode_success; 23360 } 23361 break; 23362 23363 case 0x77: 23364 /* VZEROUPPER = VEX.128.0F.WIG 77 */ 23365 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23366 Int i; 23367 IRTemp zero128 = newTemp(Ity_V128); 23368 assign(zero128, mkV128(0)); 23369 for (i = 0; i < 16; i++) { 23370 putYMMRegLane128(i, 1, mkexpr(zero128)); 23371 } 23372 DIP("vzeroupper\n"); 23373 goto decode_success; 23374 } 23375 /* VZEROALL = VEX.256.0F.WIG 77 */ 23376 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23377 Int i; 23378 IRTemp zero128 = newTemp(Ity_V128); 23379 assign(zero128, mkV128(0)); 23380 for (i = 0; i < 16; i++) { 23381 putYMMRegLoAndZU(i, mkexpr(zero128)); 23382 } 23383 DIP("vzeroall\n"); 23384 goto decode_success; 23385 } 23386 break; 23387 23388 case 0x7C: 23389 case 0x7D: 23390 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */ 23391 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */ 23392 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23393 IRTemp sV = newTemp(Ity_V128); 23394 IRTemp dV = newTemp(Ity_V128); 23395 Bool isAdd = opc == 0x7C; 23396 HChar* str = isAdd ? "add" : "sub"; 23397 UChar modrm = getUChar(delta); 23398 UInt rG = gregOfRexRM(pfx,modrm); 23399 UInt rV = getVexNvvvv(pfx); 23400 if (epartIsReg(modrm)) { 23401 UInt rE = eregOfRexRM(pfx,modrm); 23402 assign( sV, getXMMReg(rE) ); 23403 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 23404 nameXMMReg(rV), nameXMMReg(rG)); 23405 delta += 1; 23406 } else { 23407 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23408 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 23409 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 23410 nameXMMReg(rV), nameXMMReg(rG)); 23411 delta += alen; 23412 } 23413 assign( dV, getXMMReg(rV) ); 23414 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) ); 23415 *uses_vvvv = True; 23416 goto decode_success; 23417 } 23418 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */ 23419 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */ 23420 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23421 IRTemp sV = newTemp(Ity_V256); 23422 IRTemp dV = newTemp(Ity_V256); 23423 IRTemp s1, s0, d1, d0; 23424 Bool isAdd = opc == 0x7C; 23425 HChar* str = isAdd ? "add" : "sub"; 23426 UChar modrm = getUChar(delta); 23427 UInt rG = gregOfRexRM(pfx,modrm); 23428 UInt rV = getVexNvvvv(pfx); 23429 s1 = s0 = d1 = d0 = IRTemp_INVALID; 23430 if (epartIsReg(modrm)) { 23431 UInt rE = eregOfRexRM(pfx,modrm); 23432 assign( sV, getYMMReg(rE) ); 23433 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 23434 nameYMMReg(rV), nameYMMReg(rG)); 23435 delta += 1; 23436 } else { 23437 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23438 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 23439 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 23440 nameYMMReg(rV), nameYMMReg(rG)); 23441 delta += alen; 23442 } 23443 assign( dV, getYMMReg(rV) ); 23444 breakupV256toV128s( dV, &d1, &d0 ); 23445 breakupV256toV128s( sV, &s1, &s0 ); 23446 putYMMReg( rG, binop(Iop_V128HLtoV256, 23447 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ), 23448 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) ); 23449 *uses_vvvv = True; 23450 goto decode_success; 23451 } 23452 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */ 23453 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */ 23454 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23455 IRTemp sV = newTemp(Ity_V128); 23456 IRTemp dV = newTemp(Ity_V128); 23457 Bool isAdd = opc == 0x7C; 23458 HChar* str = isAdd ? "add" : "sub"; 23459 UChar modrm = getUChar(delta); 23460 UInt rG = gregOfRexRM(pfx,modrm); 23461 UInt rV = getVexNvvvv(pfx); 23462 if (epartIsReg(modrm)) { 23463 UInt rE = eregOfRexRM(pfx,modrm); 23464 assign( sV, getXMMReg(rE) ); 23465 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 23466 nameXMMReg(rV), nameXMMReg(rG)); 23467 delta += 1; 23468 } else { 23469 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23470 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 23471 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 23472 nameXMMReg(rV), nameXMMReg(rG)); 23473 delta += alen; 23474 } 23475 assign( dV, getXMMReg(rV) ); 23476 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) ); 23477 *uses_vvvv = True; 23478 goto decode_success; 23479 } 23480 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */ 23481 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */ 23482 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23483 IRTemp sV = newTemp(Ity_V256); 23484 IRTemp dV = newTemp(Ity_V256); 23485 IRTemp s1, s0, d1, d0; 23486 Bool isAdd = opc == 0x7C; 23487 HChar* str = isAdd ? "add" : "sub"; 23488 UChar modrm = getUChar(delta); 23489 UInt rG = gregOfRexRM(pfx,modrm); 23490 UInt rV = getVexNvvvv(pfx); 23491 s1 = s0 = d1 = d0 = IRTemp_INVALID; 23492 if (epartIsReg(modrm)) { 23493 UInt rE = eregOfRexRM(pfx,modrm); 23494 assign( sV, getYMMReg(rE) ); 23495 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 23496 nameYMMReg(rV), nameYMMReg(rG)); 23497 delta += 1; 23498 } else { 23499 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23500 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 23501 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 23502 nameYMMReg(rV), nameYMMReg(rG)); 23503 delta += alen; 23504 } 23505 assign( dV, getYMMReg(rV) ); 23506 breakupV256toV128s( dV, &d1, &d0 ); 23507 breakupV256toV128s( sV, &s1, &s0 ); 23508 putYMMReg( rG, binop(Iop_V128HLtoV256, 23509 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ), 23510 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) ); 23511 *uses_vvvv = True; 23512 goto decode_success; 23513 } 23514 break; 23515 23516 case 0x7E: 23517 /* Note the Intel docs don't make sense for this. I think they 23518 are wrong. They seem to imply it is a store when in fact I 23519 think it is a load. Also it's unclear whether this is W0, W1 23520 or WIG. */ 23521 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */ 23522 if (haveF3no66noF2(pfx) 23523 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 23524 vassert(sz == 4); /* even tho we are transferring 8, not 4. */ 23525 UChar modrm = getUChar(delta); 23526 UInt rG = gregOfRexRM(pfx,modrm); 23527 if (epartIsReg(modrm)) { 23528 UInt rE = eregOfRexRM(pfx,modrm); 23529 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 )); 23530 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23531 delta += 1; 23532 } else { 23533 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23534 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 23535 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 23536 delta += alen; 23537 } 23538 /* zero bits 255:64 */ 23539 putXMMRegLane64( rG, 1, mkU64(0) ); 23540 putYMMRegLane128( rG, 1, mkV128(0) ); 23541 goto decode_success; 23542 } 23543 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */ 23544 /* Moves from G to E, so is a store-form insn */ 23545 /* Intel docs list this in the VMOVD entry for some reason. */ 23546 if (have66noF2noF3(pfx) 23547 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 23548 UChar modrm = getUChar(delta); 23549 UInt rG = gregOfRexRM(pfx,modrm); 23550 if (epartIsReg(modrm)) { 23551 UInt rE = eregOfRexRM(pfx,modrm); 23552 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE)); 23553 putIReg64(rE, getXMMRegLane64(rG, 0)); 23554 delta += 1; 23555 } else { 23556 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23557 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) ); 23558 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 23559 delta += alen; 23560 } 23561 goto decode_success; 23562 } 23563 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */ 23564 /* Moves from G to E, so is a store-form insn */ 23565 if (have66noF2noF3(pfx) 23566 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 23567 UChar modrm = getUChar(delta); 23568 UInt rG = gregOfRexRM(pfx,modrm); 23569 if (epartIsReg(modrm)) { 23570 UInt rE = eregOfRexRM(pfx,modrm); 23571 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE)); 23572 putIReg32(rE, getXMMRegLane32(rG, 0)); 23573 delta += 1; 23574 } else { 23575 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23576 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) ); 23577 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG)); 23578 delta += alen; 23579 } 23580 goto decode_success; 23581 } 23582 break; 23583 23584 case 0x7F: 23585 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */ 23586 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */ 23587 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 23588 && 1==getVexL(pfx)/*256*/) { 23589 UChar modrm = getUChar(delta); 23590 UInt rS = gregOfRexRM(pfx, modrm); 23591 IRTemp tS = newTemp(Ity_V256); 23592 Bool isA = have66noF2noF3(pfx); 23593 UChar ch = isA ? 'a' : 'u'; 23594 assign(tS, getYMMReg(rS)); 23595 if (epartIsReg(modrm)) { 23596 UInt rD = eregOfRexRM(pfx, modrm); 23597 delta += 1; 23598 putYMMReg(rD, mkexpr(tS)); 23599 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 23600 } else { 23601 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23602 delta += alen; 23603 if (isA) 23604 gen_SEGV_if_not_32_aligned(addr); 23605 storeLE(mkexpr(addr), mkexpr(tS)); 23606 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf); 23607 } 23608 goto decode_success; 23609 } 23610 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */ 23611 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */ 23612 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 23613 && 0==getVexL(pfx)/*128*/) { 23614 UChar modrm = getUChar(delta); 23615 UInt rS = gregOfRexRM(pfx, modrm); 23616 IRTemp tS = newTemp(Ity_V128); 23617 Bool isA = have66noF2noF3(pfx); 23618 UChar ch = isA ? 'a' : 'u'; 23619 assign(tS, getXMMReg(rS)); 23620 if (epartIsReg(modrm)) { 23621 UInt rD = eregOfRexRM(pfx, modrm); 23622 delta += 1; 23623 putYMMRegLoAndZU(rD, mkexpr(tS)); 23624 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 23625 } else { 23626 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23627 delta += alen; 23628 if (isA) 23629 gen_SEGV_if_not_16_aligned(addr); 23630 storeLE(mkexpr(addr), mkexpr(tS)); 23631 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf); 23632 } 23633 goto decode_success; 23634 } 23635 break; 23636 23637 case 0xAE: 23638 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */ 23639 if (haveNo66noF2noF3(pfx) 23640 && 0==getVexL(pfx)/*LZ*/ 23641 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 23642 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 23643 && sz == 4) { 23644 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/); 23645 goto decode_success; 23646 } 23647 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */ 23648 if (haveNo66noF2noF3(pfx) 23649 && 0==getVexL(pfx)/*LZ*/ 23650 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 23651 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 23652 && sz == 4) { 23653 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/); 23654 goto decode_success; 23655 } 23656 break; 23657 23658 case 0xC2: 23659 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */ 23660 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */ 23661 if (haveF2no66noF3(pfx)) { 23662 Long delta0 = delta; 23663 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23664 "vcmpsd", False/*!all_lanes*/, 23665 8/*sz*/); 23666 if (delta > delta0) goto decode_success; 23667 /* else fall through -- decoding has failed */ 23668 } 23669 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */ 23670 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */ 23671 if (haveF3no66noF2(pfx)) { 23672 Long delta0 = delta; 23673 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23674 "vcmpss", False/*!all_lanes*/, 23675 4/*sz*/); 23676 if (delta > delta0) goto decode_success; 23677 /* else fall through -- decoding has failed */ 23678 } 23679 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 23680 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */ 23681 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23682 Long delta0 = delta; 23683 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23684 "vcmppd", True/*all_lanes*/, 23685 8/*sz*/); 23686 if (delta > delta0) goto decode_success; 23687 /* else fall through -- decoding has failed */ 23688 } 23689 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 23690 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */ 23691 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23692 Long delta0 = delta; 23693 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23694 "vcmppd", 8/*sz*/); 23695 if (delta > delta0) goto decode_success; 23696 /* else fall through -- decoding has failed */ 23697 } 23698 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 23699 /* = VEX.NDS.128.0F.WIG C2 /r ib */ 23700 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23701 Long delta0 = delta; 23702 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23703 "vcmpps", True/*all_lanes*/, 23704 4/*sz*/); 23705 if (delta > delta0) goto decode_success; 23706 /* else fall through -- decoding has failed */ 23707 } 23708 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 23709 /* = VEX.NDS.256.0F.WIG C2 /r ib */ 23710 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23711 Long delta0 = delta; 23712 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 23713 "vcmpps", 4/*sz*/); 23714 if (delta > delta0) goto decode_success; 23715 /* else fall through -- decoding has failed */ 23716 } 23717 break; 23718 23719 case 0xC4: 23720 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */ 23721 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23722 UChar modrm = getUChar(delta); 23723 UInt rG = gregOfRexRM(pfx, modrm); 23724 UInt rV = getVexNvvvv(pfx); 23725 Int imm8; 23726 IRTemp new16 = newTemp(Ity_I16); 23727 23728 if ( epartIsReg( modrm ) ) { 23729 imm8 = (Int)(getUChar(delta+1) & 7); 23730 assign( new16, unop(Iop_32to16, 23731 getIReg32(eregOfRexRM(pfx,modrm))) ); 23732 delta += 1+1; 23733 DIP( "vpinsrw $%d,%s,%s\n", imm8, 23734 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) ); 23735 } else { 23736 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 23737 imm8 = (Int)(getUChar(delta+alen) & 7); 23738 assign( new16, loadLE( Ity_I16, mkexpr(addr) )); 23739 delta += alen+1; 23740 DIP( "vpinsrw $%d,%s,%s\n", 23741 imm8, dis_buf, nameXMMReg(rG) ); 23742 } 23743 23744 IRTemp src_vec = newTemp(Ity_V128); 23745 assign(src_vec, getXMMReg( rV )); 23746 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 ); 23747 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 23748 *uses_vvvv = True; 23749 goto decode_success; 23750 } 23751 break; 23752 23753 case 0xC5: 23754 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */ 23755 if (have66noF2noF3(pfx) 23756 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 23757 Long delta0 = delta; 23758 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 23759 True/*isAvx*/ ); 23760 if (delta > delta0) goto decode_success; 23761 /* else fall through -- decoding has failed */ 23762 } 23763 break; 23764 23765 case 0xC6: 23766 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 23767 /* = VEX.NDS.128.0F.WIG C6 /r ib */ 23768 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23769 Int imm8 = 0; 23770 IRTemp eV = newTemp(Ity_V128); 23771 IRTemp vV = newTemp(Ity_V128); 23772 UInt modrm = getUChar(delta); 23773 UInt rG = gregOfRexRM(pfx,modrm); 23774 UInt rV = getVexNvvvv(pfx); 23775 assign( vV, getXMMReg(rV) ); 23776 if (epartIsReg(modrm)) { 23777 UInt rE = eregOfRexRM(pfx,modrm); 23778 assign( eV, getXMMReg(rE) ); 23779 imm8 = (Int)getUChar(delta+1); 23780 delta += 1+1; 23781 DIP("vshufps $%d,%s,%s,%s\n", 23782 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23783 } else { 23784 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23785 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23786 imm8 = (Int)getUChar(delta+alen); 23787 delta += 1+alen; 23788 DIP("vshufps $%d,%s,%s,%s\n", 23789 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23790 } 23791 IRTemp res = math_SHUFPS_128( eV, vV, imm8 ); 23792 putYMMRegLoAndZU( rG, mkexpr(res) ); 23793 *uses_vvvv = True; 23794 goto decode_success; 23795 } 23796 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 23797 /* = VEX.NDS.256.0F.WIG C6 /r ib */ 23798 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23799 Int imm8 = 0; 23800 IRTemp eV = newTemp(Ity_V256); 23801 IRTemp vV = newTemp(Ity_V256); 23802 UInt modrm = getUChar(delta); 23803 UInt rG = gregOfRexRM(pfx,modrm); 23804 UInt rV = getVexNvvvv(pfx); 23805 assign( vV, getYMMReg(rV) ); 23806 if (epartIsReg(modrm)) { 23807 UInt rE = eregOfRexRM(pfx,modrm); 23808 assign( eV, getYMMReg(rE) ); 23809 imm8 = (Int)getUChar(delta+1); 23810 delta += 1+1; 23811 DIP("vshufps $%d,%s,%s,%s\n", 23812 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 23813 } else { 23814 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23815 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23816 imm8 = (Int)getUChar(delta+alen); 23817 delta += 1+alen; 23818 DIP("vshufps $%d,%s,%s,%s\n", 23819 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 23820 } 23821 IRTemp res = math_SHUFPS_256( eV, vV, imm8 ); 23822 putYMMReg( rG, mkexpr(res) ); 23823 *uses_vvvv = True; 23824 goto decode_success; 23825 } 23826 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 23827 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */ 23828 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23829 Int imm8 = 0; 23830 IRTemp eV = newTemp(Ity_V128); 23831 IRTemp vV = newTemp(Ity_V128); 23832 UInt modrm = getUChar(delta); 23833 UInt rG = gregOfRexRM(pfx,modrm); 23834 UInt rV = getVexNvvvv(pfx); 23835 assign( vV, getXMMReg(rV) ); 23836 if (epartIsReg(modrm)) { 23837 UInt rE = eregOfRexRM(pfx,modrm); 23838 assign( eV, getXMMReg(rE) ); 23839 imm8 = (Int)getUChar(delta+1); 23840 delta += 1+1; 23841 DIP("vshufpd $%d,%s,%s,%s\n", 23842 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23843 } else { 23844 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23845 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23846 imm8 = (Int)getUChar(delta+alen); 23847 delta += 1+alen; 23848 DIP("vshufpd $%d,%s,%s,%s\n", 23849 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23850 } 23851 IRTemp res = math_SHUFPD_128( eV, vV, imm8 ); 23852 putYMMRegLoAndZU( rG, mkexpr(res) ); 23853 *uses_vvvv = True; 23854 goto decode_success; 23855 } 23856 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 23857 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */ 23858 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23859 Int imm8 = 0; 23860 IRTemp eV = newTemp(Ity_V256); 23861 IRTemp vV = newTemp(Ity_V256); 23862 UInt modrm = getUChar(delta); 23863 UInt rG = gregOfRexRM(pfx,modrm); 23864 UInt rV = getVexNvvvv(pfx); 23865 assign( vV, getYMMReg(rV) ); 23866 if (epartIsReg(modrm)) { 23867 UInt rE = eregOfRexRM(pfx,modrm); 23868 assign( eV, getYMMReg(rE) ); 23869 imm8 = (Int)getUChar(delta+1); 23870 delta += 1+1; 23871 DIP("vshufpd $%d,%s,%s,%s\n", 23872 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 23873 } else { 23874 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23875 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23876 imm8 = (Int)getUChar(delta+alen); 23877 delta += 1+alen; 23878 DIP("vshufpd $%d,%s,%s,%s\n", 23879 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 23880 } 23881 IRTemp res = math_SHUFPD_256( eV, vV, imm8 ); 23882 putYMMReg( rG, mkexpr(res) ); 23883 *uses_vvvv = True; 23884 goto decode_success; 23885 } 23886 break; 23887 23888 case 0xD0: 23889 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */ 23890 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23891 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 23892 uses_vvvv, vbi, pfx, delta, 23893 "vaddsubpd", math_ADDSUBPD_128 ); 23894 goto decode_success; 23895 } 23896 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */ 23897 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23898 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 23899 uses_vvvv, vbi, pfx, delta, 23900 "vaddsubpd", math_ADDSUBPD_256 ); 23901 goto decode_success; 23902 } 23903 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */ 23904 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23905 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 23906 uses_vvvv, vbi, pfx, delta, 23907 "vaddsubps", math_ADDSUBPS_128 ); 23908 goto decode_success; 23909 } 23910 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */ 23911 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23912 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 23913 uses_vvvv, vbi, pfx, delta, 23914 "vaddsubps", math_ADDSUBPS_256 ); 23915 goto decode_success; 23916 } 23917 break; 23918 23919 case 0xD1: 23920 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */ 23921 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23922 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 23923 "vpsrlw", Iop_ShrN16x8 ); 23924 *uses_vvvv = True; 23925 goto decode_success; 23926 23927 } 23928 break; 23929 23930 case 0xD2: 23931 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */ 23932 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23933 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 23934 "vpsrld", Iop_ShrN32x4 ); 23935 *uses_vvvv = True; 23936 goto decode_success; 23937 } 23938 break; 23939 23940 case 0xD3: 23941 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */ 23942 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23943 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 23944 "vpsrlq", Iop_ShrN64x2 ); 23945 *uses_vvvv = True; 23946 goto decode_success; 23947 } 23948 break; 23949 23950 case 0xD4: 23951 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 23952 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */ 23953 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23954 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 23955 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 ); 23956 goto decode_success; 23957 } 23958 break; 23959 23960 case 0xD5: 23961 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */ 23962 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23963 delta = dis_AVX128_E_V_to_G( 23964 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 ); 23965 goto decode_success; 23966 } 23967 break; 23968 23969 case 0xD6: 23970 /* I can't even find any Intel docs for this one. */ 23971 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half 23972 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0 23973 (WIG, maybe?) */ 23974 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23975 && 0==getRexW(pfx)/*this might be redundant, dunno*/) { 23976 UChar modrm = getUChar(delta); 23977 UInt rG = gregOfRexRM(pfx,modrm); 23978 if (epartIsReg(modrm)) { 23979 /* fall through, awaiting test case */ 23980 /* dst: lo half copied, hi half zeroed */ 23981 } else { 23982 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23983 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 )); 23984 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf ); 23985 delta += alen; 23986 goto decode_success; 23987 } 23988 } 23989 break; 23990 23991 case 0xD7: 23992 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */ 23993 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23994 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ ); 23995 goto decode_success; 23996 } 23997 break; 23998 23999 case 0xD8: 24000 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */ 24001 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24002 delta = dis_AVX128_E_V_to_G( 24003 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 ); 24004 goto decode_success; 24005 } 24006 break; 24007 24008 case 0xD9: 24009 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */ 24010 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24011 delta = dis_AVX128_E_V_to_G( 24012 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 ); 24013 goto decode_success; 24014 } 24015 break; 24016 24017 case 0xDA: 24018 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */ 24019 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24020 delta = dis_AVX128_E_V_to_G( 24021 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 ); 24022 goto decode_success; 24023 } 24024 break; 24025 24026 case 0xDB: 24027 /* VPAND r/m, rV, r ::: r = rV & r/m */ 24028 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */ 24029 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24030 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24031 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 ); 24032 goto decode_success; 24033 } 24034 break; 24035 24036 case 0xDC: 24037 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */ 24038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24039 delta = dis_AVX128_E_V_to_G( 24040 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 ); 24041 goto decode_success; 24042 } 24043 break; 24044 24045 case 0xDD: 24046 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */ 24047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24048 delta = dis_AVX128_E_V_to_G( 24049 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 ); 24050 goto decode_success; 24051 } 24052 break; 24053 24054 case 0xDE: 24055 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */ 24056 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24057 delta = dis_AVX128_E_V_to_G( 24058 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 ); 24059 goto decode_success; 24060 } 24061 break; 24062 24063 case 0xDF: 24064 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 24065 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */ 24066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24067 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24068 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128, 24069 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24070 goto decode_success; 24071 } 24072 break; 24073 24074 case 0xE0: 24075 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */ 24076 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24077 delta = dis_AVX128_E_V_to_G( 24078 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 ); 24079 goto decode_success; 24080 } 24081 break; 24082 24083 case 0xE1: 24084 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */ 24085 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24086 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 24087 "vpsraw", Iop_SarN16x8 ); 24088 *uses_vvvv = True; 24089 goto decode_success; 24090 } 24091 break; 24092 24093 case 0xE2: 24094 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */ 24095 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24096 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 24097 "vpsrad", Iop_SarN32x4 ); 24098 *uses_vvvv = True; 24099 goto decode_success; 24100 } 24101 break; 24102 24103 case 0xE3: 24104 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */ 24105 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24106 delta = dis_AVX128_E_V_to_G( 24107 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 ); 24108 goto decode_success; 24109 } 24110 break; 24111 24112 case 0xE4: 24113 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */ 24114 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24115 delta = dis_AVX128_E_V_to_G( 24116 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 ); 24117 goto decode_success; 24118 } 24119 break; 24120 24121 case 0xE5: 24122 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */ 24123 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24124 delta = dis_AVX128_E_V_to_G( 24125 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 ); 24126 goto decode_success; 24127 } 24128 break; 24129 24130 case 0xE6: 24131 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */ 24132 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 24133 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/); 24134 goto decode_success; 24135 } 24136 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */ 24137 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 24138 delta = dis_CVTDQ2PD_256(vbi, pfx, delta); 24139 goto decode_success; 24140 } 24141 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */ 24142 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24143 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 24144 True/*r2zero*/); 24145 goto decode_success; 24146 } 24147 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */ 24148 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24149 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/); 24150 goto decode_success; 24151 } 24152 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */ 24153 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24154 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 24155 False/*!r2zero*/); 24156 goto decode_success; 24157 } 24158 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */ 24159 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24160 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/); 24161 goto decode_success; 24162 } 24163 break; 24164 24165 case 0xE7: 24166 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */ 24167 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24168 UChar modrm = getUChar(delta); 24169 UInt rG = gregOfRexRM(pfx,modrm); 24170 if (!epartIsReg(modrm)) { 24171 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24172 gen_SEGV_if_not_16_aligned( addr ); 24173 storeLE( mkexpr(addr), getXMMReg(rG) ); 24174 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG)); 24175 delta += alen; 24176 goto decode_success; 24177 } 24178 /* else fall through */ 24179 } 24180 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */ 24181 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24182 UChar modrm = getUChar(delta); 24183 UInt rG = gregOfRexRM(pfx,modrm); 24184 if (!epartIsReg(modrm)) { 24185 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24186 gen_SEGV_if_not_32_aligned( addr ); 24187 storeLE( mkexpr(addr), getYMMReg(rG) ); 24188 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG)); 24189 delta += alen; 24190 goto decode_success; 24191 } 24192 /* else fall through */ 24193 } 24194 break; 24195 24196 case 0xE8: 24197 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */ 24198 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24199 delta = dis_AVX128_E_V_to_G( 24200 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 ); 24201 goto decode_success; 24202 } 24203 break; 24204 24205 case 0xE9: 24206 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */ 24207 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24208 delta = dis_AVX128_E_V_to_G( 24209 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 ); 24210 goto decode_success; 24211 } 24212 break; 24213 24214 case 0xEA: 24215 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 24216 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */ 24217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24218 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24219 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 ); 24220 goto decode_success; 24221 } 24222 break; 24223 24224 case 0xEB: 24225 /* VPOR r/m, rV, r ::: r = rV | r/m */ 24226 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */ 24227 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24228 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24229 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 ); 24230 goto decode_success; 24231 } 24232 break; 24233 24234 case 0xEC: 24235 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */ 24236 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24237 delta = dis_AVX128_E_V_to_G( 24238 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 ); 24239 goto decode_success; 24240 } 24241 break; 24242 24243 case 0xED: 24244 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */ 24245 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24246 delta = dis_AVX128_E_V_to_G( 24247 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 ); 24248 goto decode_success; 24249 } 24250 break; 24251 24252 case 0xEE: 24253 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 24254 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */ 24255 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24256 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24257 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 ); 24258 goto decode_success; 24259 } 24260 break; 24261 24262 case 0xEF: 24263 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 24264 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */ 24265 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24266 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24267 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 ); 24268 goto decode_success; 24269 } 24270 break; 24271 24272 case 0xF0: 24273 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */ 24274 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24275 UChar modrm = getUChar(delta); 24276 UInt rD = gregOfRexRM(pfx, modrm); 24277 IRTemp tD = newTemp(Ity_V256); 24278 if (epartIsReg(modrm)) break; 24279 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24280 delta += alen; 24281 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 24282 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD)); 24283 putYMMReg(rD, mkexpr(tD)); 24284 goto decode_success; 24285 } 24286 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */ 24287 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24288 UChar modrm = getUChar(delta); 24289 UInt rD = gregOfRexRM(pfx, modrm); 24290 IRTemp tD = newTemp(Ity_V128); 24291 if (epartIsReg(modrm)) break; 24292 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24293 delta += alen; 24294 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 24295 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD)); 24296 putYMMRegLoAndZU(rD, mkexpr(tD)); 24297 goto decode_success; 24298 } 24299 break; 24300 24301 case 0xF1: 24302 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */ 24303 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24304 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 24305 "vpsllw", Iop_ShlN16x8 ); 24306 *uses_vvvv = True; 24307 goto decode_success; 24308 24309 } 24310 break; 24311 24312 case 0xF2: 24313 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */ 24314 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24315 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 24316 "vpslld", Iop_ShlN32x4 ); 24317 *uses_vvvv = True; 24318 goto decode_success; 24319 } 24320 break; 24321 24322 case 0xF3: 24323 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */ 24324 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24325 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 24326 "vpsllq", Iop_ShlN64x2 ); 24327 *uses_vvvv = True; 24328 goto decode_success; 24329 } 24330 break; 24331 24332 case 0xF4: 24333 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */ 24334 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24335 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 24336 uses_vvvv, vbi, pfx, delta, 24337 "vpmuludq", math_PMULUDQ_128 ); 24338 goto decode_success; 24339 } 24340 break; 24341 24342 case 0xF5: 24343 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */ 24344 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24345 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 24346 uses_vvvv, vbi, pfx, delta, 24347 "vpmaddwd", math_PMADDWD_128 ); 24348 goto decode_success; 24349 } 24350 break; 24351 24352 case 0xF6: 24353 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */ 24354 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24355 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 24356 uses_vvvv, vbi, pfx, delta, 24357 "vpsadbw", math_PSADBW_128 ); 24358 goto decode_success; 24359 } 24360 break; 24361 24362 case 0xF7: 24363 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */ 24364 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 24365 && epartIsReg(getUChar(delta))) { 24366 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ ); 24367 goto decode_success; 24368 } 24369 break; 24370 24371 case 0xF8: 24372 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 24373 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */ 24374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24375 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24376 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 ); 24377 goto decode_success; 24378 } 24379 break; 24380 24381 case 0xF9: 24382 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 24383 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */ 24384 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24385 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24386 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 ); 24387 goto decode_success; 24388 } 24389 break; 24390 24391 case 0xFA: 24392 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 24393 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */ 24394 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24395 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24396 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 ); 24397 goto decode_success; 24398 } 24399 break; 24400 24401 case 0xFB: 24402 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 24403 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */ 24404 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24405 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24406 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 ); 24407 goto decode_success; 24408 } 24409 break; 24410 24411 case 0xFC: 24412 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 24413 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */ 24414 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24415 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24416 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 ); 24417 goto decode_success; 24418 } 24419 break; 24420 24421 case 0xFD: 24422 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 24423 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */ 24424 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24425 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24426 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 ); 24427 goto decode_success; 24428 } 24429 break; 24430 24431 case 0xFE: 24432 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 24433 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */ 24434 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24435 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24436 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 ); 24437 goto decode_success; 24438 } 24439 break; 24440 24441 default: 24442 break; 24443 24444 } 24445 24446 //decode_failure: 24447 return deltaIN; 24448 24449 decode_success: 24450 return delta; 24451 } 24452 24453 24454 /*------------------------------------------------------------*/ 24455 /*--- ---*/ 24456 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/ 24457 /*--- ---*/ 24458 /*------------------------------------------------------------*/ 24459 24460 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 24461 { 24462 /* In the control vector, zero out all but the bottom two bits of 24463 each 32-bit lane. */ 24464 IRExpr* cv1 = binop(Iop_ShrN32x4, 24465 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)), 24466 mkU8(30)); 24467 /* And use the resulting cleaned-up control vector as steering 24468 in a Perm operation. */ 24469 IRTemp res = newTemp(Ity_V128); 24470 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1)); 24471 return res; 24472 } 24473 24474 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 24475 { 24476 IRTemp dHi, dLo, cHi, cLo; 24477 dHi = dLo = cHi = cLo = IRTemp_INVALID; 24478 breakupV256toV128s( dataV, &dHi, &dLo ); 24479 breakupV256toV128s( ctrlV, &cHi, &cLo ); 24480 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi ); 24481 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo ); 24482 IRTemp res = newTemp(Ity_V256); 24483 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 24484 return res; 24485 } 24486 24487 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 24488 { 24489 /* No cleverness here .. */ 24490 IRTemp dHi, dLo, cHi, cLo; 24491 dHi = dLo = cHi = cLo = IRTemp_INVALID; 24492 breakupV128to64s( dataV, &dHi, &dLo ); 24493 breakupV128to64s( ctrlV, &cHi, &cLo ); 24494 IRExpr* rHi 24495 = IRExpr_Mux0X( unop(Iop_64to8, 24496 binop(Iop_And64, mkexpr(cHi), mkU64(2))), 24497 mkexpr(dLo), mkexpr(dHi) ); 24498 IRExpr* rLo 24499 = IRExpr_Mux0X( unop(Iop_64to8, 24500 binop(Iop_And64, mkexpr(cLo), mkU64(2))), 24501 mkexpr(dLo), mkexpr(dHi) ); 24502 IRTemp res = newTemp(Ity_V128); 24503 assign(res, binop(Iop_64HLtoV128, rHi, rLo)); 24504 return res; 24505 } 24506 24507 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 24508 { 24509 IRTemp dHi, dLo, cHi, cLo; 24510 dHi = dLo = cHi = cLo = IRTemp_INVALID; 24511 breakupV256toV128s( dataV, &dHi, &dLo ); 24512 breakupV256toV128s( ctrlV, &cHi, &cLo ); 24513 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi ); 24514 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo ); 24515 IRTemp res = newTemp(Ity_V256); 24516 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 24517 return res; 24518 } 24519 24520 __attribute__((noinline)) 24521 static 24522 Long dis_ESC_0F38__VEX ( 24523 /*MB_OUT*/DisResult* dres, 24524 /*OUT*/ Bool* uses_vvvv, 24525 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 24526 Bool resteerCisOk, 24527 void* callback_opaque, 24528 VexArchInfo* archinfo, 24529 VexAbiInfo* vbi, 24530 Prefix pfx, Int sz, Long deltaIN 24531 ) 24532 { 24533 IRTemp addr = IRTemp_INVALID; 24534 Int alen = 0; 24535 HChar dis_buf[50]; 24536 Long delta = deltaIN; 24537 UChar opc = getUChar(delta); 24538 delta++; 24539 *uses_vvvv = False; 24540 24541 switch (opc) { 24542 24543 case 0x00: 24544 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 24545 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */ 24546 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24547 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 24548 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM ); 24549 goto decode_success; 24550 } 24551 break; 24552 24553 case 0x01: 24554 case 0x02: 24555 case 0x03: 24556 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */ 24557 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */ 24558 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */ 24559 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24560 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 24561 *uses_vvvv = True; 24562 goto decode_success; 24563 } 24564 break; 24565 24566 case 0x04: 24567 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */ 24568 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24569 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 24570 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 24571 math_PMADDUBSW_128 ); 24572 goto decode_success; 24573 } 24574 break; 24575 24576 case 0x05: 24577 case 0x06: 24578 case 0x07: 24579 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */ 24580 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */ 24581 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */ 24582 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24583 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 24584 *uses_vvvv = True; 24585 goto decode_success; 24586 } 24587 break; 24588 24589 case 0x08: 24590 case 0x09: 24591 case 0x0A: 24592 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */ 24593 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */ 24594 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */ 24595 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24596 IRTemp sV = newTemp(Ity_V128); 24597 IRTemp dV = newTemp(Ity_V128); 24598 IRTemp sHi, sLo, dHi, dLo; 24599 sHi = sLo = dHi = dLo = IRTemp_INVALID; 24600 UChar ch = '?'; 24601 Int laneszB = 0; 24602 UChar modrm = getUChar(delta); 24603 UInt rG = gregOfRexRM(pfx,modrm); 24604 UInt rV = getVexNvvvv(pfx); 24605 24606 switch (opc) { 24607 case 0x08: laneszB = 1; ch = 'b'; break; 24608 case 0x09: laneszB = 2; ch = 'w'; break; 24609 case 0x0A: laneszB = 4; ch = 'd'; break; 24610 default: vassert(0); 24611 } 24612 24613 assign( dV, getXMMReg(rV) ); 24614 24615 if (epartIsReg(modrm)) { 24616 UInt rE = eregOfRexRM(pfx,modrm); 24617 assign( sV, getXMMReg(rE) ); 24618 delta += 1; 24619 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE), 24620 nameXMMReg(rV), nameXMMReg(rG)); 24621 } else { 24622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24623 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 24624 delta += alen; 24625 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 24626 nameXMMReg(rV), nameXMMReg(rG)); 24627 } 24628 24629 breakupV128to64s( dV, &dHi, &dLo ); 24630 breakupV128to64s( sV, &sHi, &sLo ); 24631 24632 putYMMRegLoAndZU( 24633 rG, 24634 binop(Iop_64HLtoV128, 24635 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 24636 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 24637 ) 24638 ); 24639 *uses_vvvv = True; 24640 goto decode_success; 24641 } 24642 break; 24643 24644 case 0x0B: 24645 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */ 24646 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24647 IRTemp sV = newTemp(Ity_V128); 24648 IRTemp dV = newTemp(Ity_V128); 24649 IRTemp sHi, sLo, dHi, dLo; 24650 sHi = sLo = dHi = dLo = IRTemp_INVALID; 24651 UChar modrm = getUChar(delta); 24652 UInt rG = gregOfRexRM(pfx,modrm); 24653 UInt rV = getVexNvvvv(pfx); 24654 24655 assign( dV, getXMMReg(rV) ); 24656 24657 if (epartIsReg(modrm)) { 24658 UInt rE = eregOfRexRM(pfx,modrm); 24659 assign( sV, getXMMReg(rE) ); 24660 delta += 1; 24661 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE), 24662 nameXMMReg(rV), nameXMMReg(rG)); 24663 } else { 24664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24665 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 24666 delta += alen; 24667 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 24668 nameXMMReg(rV), nameXMMReg(rG)); 24669 } 24670 24671 breakupV128to64s( dV, &dHi, &dLo ); 24672 breakupV128to64s( sV, &sHi, &sLo ); 24673 24674 putYMMRegLoAndZU( 24675 rG, 24676 binop(Iop_64HLtoV128, 24677 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 24678 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 24679 ) 24680 ); 24681 *uses_vvvv = True; 24682 goto decode_success; 24683 } 24684 break; 24685 24686 case 0x0C: 24687 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */ 24688 if (have66noF2noF3(pfx) 24689 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 24690 UChar modrm = getUChar(delta); 24691 UInt rG = gregOfRexRM(pfx, modrm); 24692 UInt rV = getVexNvvvv(pfx); 24693 IRTemp ctrlV = newTemp(Ity_V128); 24694 if (epartIsReg(modrm)) { 24695 UInt rE = eregOfRexRM(pfx, modrm); 24696 delta += 1; 24697 DIP("vpermilps %s,%s,%s\n", 24698 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24699 assign(ctrlV, getXMMReg(rE)); 24700 } else { 24701 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24702 delta += alen; 24703 DIP("vpermilps %s,%s,%s\n", 24704 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 24705 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 24706 } 24707 IRTemp dataV = newTemp(Ity_V128); 24708 assign(dataV, getXMMReg(rV)); 24709 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV); 24710 putYMMRegLoAndZU(rG, mkexpr(resV)); 24711 *uses_vvvv = True; 24712 goto decode_success; 24713 } 24714 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */ 24715 if (have66noF2noF3(pfx) 24716 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 24717 UChar modrm = getUChar(delta); 24718 UInt rG = gregOfRexRM(pfx, modrm); 24719 UInt rV = getVexNvvvv(pfx); 24720 IRTemp ctrlV = newTemp(Ity_V256); 24721 if (epartIsReg(modrm)) { 24722 UInt rE = eregOfRexRM(pfx, modrm); 24723 delta += 1; 24724 DIP("vpermilps %s,%s,%s\n", 24725 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 24726 assign(ctrlV, getYMMReg(rE)); 24727 } else { 24728 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24729 delta += alen; 24730 DIP("vpermilps %s,%s,%s\n", 24731 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 24732 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 24733 } 24734 IRTemp dataV = newTemp(Ity_V256); 24735 assign(dataV, getYMMReg(rV)); 24736 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV); 24737 putYMMReg(rG, mkexpr(resV)); 24738 *uses_vvvv = True; 24739 goto decode_success; 24740 } 24741 break; 24742 24743 case 0x0D: 24744 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */ 24745 if (have66noF2noF3(pfx) 24746 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 24747 UChar modrm = getUChar(delta); 24748 UInt rG = gregOfRexRM(pfx, modrm); 24749 UInt rV = getVexNvvvv(pfx); 24750 IRTemp ctrlV = newTemp(Ity_V128); 24751 if (epartIsReg(modrm)) { 24752 UInt rE = eregOfRexRM(pfx, modrm); 24753 delta += 1; 24754 DIP("vpermilpd %s,%s,%s\n", 24755 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24756 assign(ctrlV, getXMMReg(rE)); 24757 } else { 24758 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24759 delta += alen; 24760 DIP("vpermilpd %s,%s,%s\n", 24761 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 24762 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 24763 } 24764 IRTemp dataV = newTemp(Ity_V128); 24765 assign(dataV, getXMMReg(rV)); 24766 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV); 24767 putYMMRegLoAndZU(rG, mkexpr(resV)); 24768 *uses_vvvv = True; 24769 goto decode_success; 24770 } 24771 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */ 24772 if (have66noF2noF3(pfx) 24773 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 24774 UChar modrm = getUChar(delta); 24775 UInt rG = gregOfRexRM(pfx, modrm); 24776 UInt rV = getVexNvvvv(pfx); 24777 IRTemp ctrlV = newTemp(Ity_V256); 24778 if (epartIsReg(modrm)) { 24779 UInt rE = eregOfRexRM(pfx, modrm); 24780 delta += 1; 24781 DIP("vpermilpd %s,%s,%s\n", 24782 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 24783 assign(ctrlV, getYMMReg(rE)); 24784 } else { 24785 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24786 delta += alen; 24787 DIP("vpermilpd %s,%s,%s\n", 24788 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 24789 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 24790 } 24791 IRTemp dataV = newTemp(Ity_V256); 24792 assign(dataV, getYMMReg(rV)); 24793 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV); 24794 putYMMReg(rG, mkexpr(resV)); 24795 *uses_vvvv = True; 24796 goto decode_success; 24797 } 24798 break; 24799 24800 case 0x0E: 24801 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */ 24802 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24803 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 ); 24804 goto decode_success; 24805 } 24806 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */ 24807 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24808 delta = dis_xTESTy_256( vbi, pfx, delta, 32 ); 24809 goto decode_success; 24810 } 24811 break; 24812 24813 case 0x0F: 24814 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */ 24815 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24816 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 ); 24817 goto decode_success; 24818 } 24819 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */ 24820 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24821 delta = dis_xTESTy_256( vbi, pfx, delta, 64 ); 24822 goto decode_success; 24823 } 24824 break; 24825 24826 case 0x17: 24827 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */ 24828 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24829 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 ); 24830 goto decode_success; 24831 } 24832 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */ 24833 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24834 delta = dis_xTESTy_256( vbi, pfx, delta, 0 ); 24835 goto decode_success; 24836 } 24837 break; 24838 24839 case 0x18: 24840 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 24841 if (have66noF2noF3(pfx) 24842 && 0==getVexL(pfx)/*128*/ 24843 && !epartIsReg(getUChar(delta))) { 24844 UChar modrm = getUChar(delta); 24845 UInt rG = gregOfRexRM(pfx, modrm); 24846 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24847 delta += alen; 24848 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG)); 24849 IRTemp t32 = newTemp(Ity_I32); 24850 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 24851 IRTemp t64 = newTemp(Ity_I64); 24852 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 24853 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 24854 putYMMRegLoAndZU(rG, res); 24855 goto decode_success; 24856 } 24857 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 24858 if (have66noF2noF3(pfx) 24859 && 1==getVexL(pfx)/*256*/ 24860 && !epartIsReg(getUChar(delta))) { 24861 UChar modrm = getUChar(delta); 24862 UInt rG = gregOfRexRM(pfx, modrm); 24863 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24864 delta += alen; 24865 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG)); 24866 IRTemp t32 = newTemp(Ity_I32); 24867 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 24868 IRTemp t64 = newTemp(Ity_I64); 24869 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 24870 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 24871 mkexpr(t64), mkexpr(t64)); 24872 putYMMReg(rG, res); 24873 goto decode_success; 24874 } 24875 break; 24876 24877 case 0x19: 24878 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 24879 if (have66noF2noF3(pfx) 24880 && 1==getVexL(pfx)/*256*/ 24881 && !epartIsReg(getUChar(delta))) { 24882 UChar modrm = getUChar(delta); 24883 UInt rG = gregOfRexRM(pfx, modrm); 24884 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24885 delta += alen; 24886 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG)); 24887 IRTemp t64 = newTemp(Ity_I64); 24888 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 24889 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 24890 mkexpr(t64), mkexpr(t64)); 24891 putYMMReg(rG, res); 24892 goto decode_success; 24893 } 24894 break; 24895 24896 case 0x1A: 24897 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */ 24898 if (have66noF2noF3(pfx) 24899 && 1==getVexL(pfx)/*256*/ 24900 && !epartIsReg(getUChar(delta))) { 24901 UChar modrm = getUChar(delta); 24902 UInt rG = gregOfRexRM(pfx, modrm); 24903 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24904 delta += alen; 24905 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG)); 24906 IRTemp t128 = newTemp(Ity_V128); 24907 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 24908 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 24909 goto decode_success; 24910 } 24911 break; 24912 24913 case 0x1C: 24914 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */ 24915 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24916 delta = dis_AVX128_E_to_G_unary( 24917 uses_vvvv, vbi, pfx, delta, 24918 "vpabsb", math_PABS_XMM_pap1 ); 24919 goto decode_success; 24920 } 24921 break; 24922 24923 case 0x1D: 24924 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */ 24925 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24926 delta = dis_AVX128_E_to_G_unary( 24927 uses_vvvv, vbi, pfx, delta, 24928 "vpabsw", math_PABS_XMM_pap2 ); 24929 goto decode_success; 24930 } 24931 break; 24932 24933 case 0x1E: 24934 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */ 24935 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24936 delta = dis_AVX128_E_to_G_unary( 24937 uses_vvvv, vbi, pfx, delta, 24938 "vpabsd", math_PABS_XMM_pap4 ); 24939 goto decode_success; 24940 } 24941 break; 24942 24943 case 0x20: 24944 /* VPMOVSXBW xmm2/m64, xmm1 */ 24945 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */ 24946 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24947 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 24948 True/*isAvx*/, False/*!xIsZ*/ ); 24949 goto decode_success; 24950 } 24951 break; 24952 24953 case 0x21: 24954 /* VPMOVSXBD xmm2/m32, xmm1 */ 24955 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */ 24956 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24957 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 24958 True/*isAvx*/, False/*!xIsZ*/ ); 24959 goto decode_success; 24960 } 24961 break; 24962 24963 case 0x22: 24964 /* VPMOVSXBQ xmm2/m16, xmm1 */ 24965 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */ 24966 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24967 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 24968 goto decode_success; 24969 } 24970 break; 24971 24972 case 0x23: 24973 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */ 24974 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24975 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 24976 True/*isAvx*/, False/*!xIsZ*/ ); 24977 goto decode_success; 24978 } 24979 break; 24980 24981 case 0x24: 24982 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */ 24983 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24984 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 24985 goto decode_success; 24986 } 24987 break; 24988 24989 case 0x25: 24990 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */ 24991 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24992 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 24993 True/*isAvx*/, False/*!xIsZ*/ ); 24994 goto decode_success; 24995 } 24996 break; 24997 24998 case 0x28: 24999 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */ 25000 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25001 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 25002 uses_vvvv, vbi, pfx, delta, 25003 "vpmuldq", math_PMULDQ_128 ); 25004 goto decode_success; 25005 } 25006 break; 25007 25008 case 0x29: 25009 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 25010 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */ 25011 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25012 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25013 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 ); 25014 goto decode_success; 25015 } 25016 break; 25017 25018 case 0x2A: 25019 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */ 25020 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 25021 && !epartIsReg(getUChar(delta))) { 25022 UChar modrm = getUChar(delta); 25023 UInt rD = gregOfRexRM(pfx, modrm); 25024 IRTemp tD = newTemp(Ity_V128); 25025 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25026 delta += alen; 25027 gen_SEGV_if_not_16_aligned(addr); 25028 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 25029 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD)); 25030 putYMMRegLoAndZU(rD, mkexpr(tD)); 25031 goto decode_success; 25032 } 25033 break; 25034 25035 case 0x2B: 25036 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 25037 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */ 25038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25039 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25040 uses_vvvv, vbi, pfx, delta, "vpackusdw", 25041 Iop_QNarrowBin32Sto16Ux8, NULL, 25042 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25043 goto decode_success; 25044 } 25045 break; 25046 25047 case 0x30: 25048 /* VPMOVZXBW xmm2/m64, xmm1 */ 25049 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */ 25050 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25051 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 25052 True/*isAvx*/, True/*xIsZ*/ ); 25053 goto decode_success; 25054 } 25055 break; 25056 25057 case 0x31: 25058 /* VPMOVZXBD xmm2/m32, xmm1 */ 25059 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */ 25060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25061 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 25062 True/*isAvx*/, True/*xIsZ*/ ); 25063 goto decode_success; 25064 } 25065 break; 25066 25067 case 0x32: 25068 /* VPMOVZXBQ xmm2/m16, xmm1 */ 25069 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */ 25070 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25071 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 25072 goto decode_success; 25073 } 25074 break; 25075 25076 case 0x33: 25077 /* VPMOVZXWD xmm2/m64, xmm1 */ 25078 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */ 25079 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25080 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 25081 True/*isAvx*/, True/*xIsZ*/ ); 25082 goto decode_success; 25083 } 25084 break; 25085 25086 case 0x34: 25087 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */ 25088 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25089 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 25090 goto decode_success; 25091 } 25092 break; 25093 25094 case 0x35: 25095 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */ 25096 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25097 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 25098 True/*isAvx*/, True/*xIsZ*/ ); 25099 goto decode_success; 25100 } 25101 break; 25102 25103 case 0x37: 25104 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 25105 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */ 25106 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25107 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25108 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 ); 25109 goto decode_success; 25110 } 25111 break; 25112 25113 case 0x38: 25114 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 25115 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */ 25116 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25117 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25118 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 ); 25119 goto decode_success; 25120 } 25121 break; 25122 25123 case 0x39: 25124 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 25125 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */ 25126 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25127 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25128 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 ); 25129 goto decode_success; 25130 } 25131 break; 25132 25133 case 0x3A: 25134 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 25135 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */ 25136 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25137 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25138 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 ); 25139 goto decode_success; 25140 } 25141 break; 25142 25143 case 0x3B: 25144 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 25145 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */ 25146 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25147 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25148 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 ); 25149 goto decode_success; 25150 } 25151 break; 25152 25153 case 0x3C: 25154 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 25155 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */ 25156 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25157 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25158 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 ); 25159 goto decode_success; 25160 } 25161 break; 25162 25163 case 0x3D: 25164 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 25165 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */ 25166 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25167 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25168 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 ); 25169 goto decode_success; 25170 } 25171 break; 25172 25173 case 0x3E: 25174 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 25175 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */ 25176 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25177 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25178 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 ); 25179 goto decode_success; 25180 } 25181 break; 25182 25183 case 0x3F: 25184 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 25185 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */ 25186 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25187 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25188 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 ); 25189 goto decode_success; 25190 } 25191 break; 25192 25193 case 0x40: 25194 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 25195 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */ 25196 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25197 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25198 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 ); 25199 goto decode_success; 25200 } 25201 break; 25202 25203 case 0x41: 25204 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */ 25205 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25206 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ ); 25207 goto decode_success; 25208 } 25209 break; 25210 25211 case 0xDB: 25212 case 0xDC: 25213 case 0xDD: 25214 case 0xDE: 25215 case 0xDF: 25216 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */ 25217 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */ 25218 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */ 25219 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */ 25220 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */ 25221 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25222 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc ); 25223 if (opc != 0xDB) *uses_vvvv = True; 25224 goto decode_success; 25225 } 25226 break; 25227 25228 default: 25229 break; 25230 25231 } 25232 25233 //decode_failure: 25234 return deltaIN; 25235 25236 decode_success: 25237 return delta; 25238 } 25239 25240 25241 /*------------------------------------------------------------*/ 25242 /*--- ---*/ 25243 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/ 25244 /*--- ---*/ 25245 /*------------------------------------------------------------*/ 25246 25247 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 ) 25248 { 25249 vassert(imm8 < 256); 25250 IRTemp s3, s2, s1, s0; 25251 s3 = s2 = s1 = s0 = IRTemp_INVALID; 25252 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 25253 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \ 25254 : ((_nn)==2) ? s2 : s3) 25255 IRTemp res = newTemp(Ity_V128); 25256 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3), 25257 SEL((imm8 >> 4) & 3), 25258 SEL((imm8 >> 2) & 3), 25259 SEL((imm8 >> 0) & 3) )); 25260 # undef SEL 25261 return res; 25262 } 25263 25264 __attribute__((noinline)) 25265 static 25266 Long dis_ESC_0F3A__VEX ( 25267 /*MB_OUT*/DisResult* dres, 25268 /*OUT*/ Bool* uses_vvvv, 25269 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 25270 Bool resteerCisOk, 25271 void* callback_opaque, 25272 VexArchInfo* archinfo, 25273 VexAbiInfo* vbi, 25274 Prefix pfx, Int sz, Long deltaIN 25275 ) 25276 { 25277 IRTemp addr = IRTemp_INVALID; 25278 Int alen = 0; 25279 HChar dis_buf[50]; 25280 Long delta = deltaIN; 25281 UChar opc = getUChar(delta); 25282 delta++; 25283 *uses_vvvv = False; 25284 25285 switch (opc) { 25286 25287 case 0x04: 25288 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */ 25289 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25290 UChar modrm = getUChar(delta); 25291 UInt imm8 = 0; 25292 UInt rG = gregOfRexRM(pfx, modrm); 25293 IRTemp sV = newTemp(Ity_V256); 25294 if (epartIsReg(modrm)) { 25295 UInt rE = eregOfRexRM(pfx, modrm); 25296 delta += 1; 25297 imm8 = getUChar(delta); 25298 DIP("vpermilps $%u,%s,%s\n", 25299 imm8, nameYMMReg(rE), nameYMMReg(rG)); 25300 assign(sV, getYMMReg(rE)); 25301 } else { 25302 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25303 delta += alen; 25304 imm8 = getUChar(delta); 25305 DIP("vpermilps $%u,%s,%s\n", 25306 imm8, dis_buf, nameYMMReg(rG)); 25307 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 25308 } 25309 delta++; 25310 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 25311 breakupV256toV128s( sV, &sVhi, &sVlo ); 25312 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 ); 25313 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 ); 25314 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo)); 25315 putYMMReg(rG, res); 25316 goto decode_success; 25317 } 25318 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */ 25319 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25320 UChar modrm = getUChar(delta); 25321 UInt imm8 = 0; 25322 UInt rG = gregOfRexRM(pfx, modrm); 25323 IRTemp sV = newTemp(Ity_V128); 25324 if (epartIsReg(modrm)) { 25325 UInt rE = eregOfRexRM(pfx, modrm); 25326 delta += 1; 25327 imm8 = getUChar(delta); 25328 DIP("vpermilps $%u,%s,%s\n", 25329 imm8, nameXMMReg(rE), nameXMMReg(rG)); 25330 assign(sV, getXMMReg(rE)); 25331 } else { 25332 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25333 delta += alen; 25334 imm8 = getUChar(delta); 25335 DIP("vpermilps $%u,%s,%s\n", 25336 imm8, dis_buf, nameXMMReg(rG)); 25337 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 25338 } 25339 delta++; 25340 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) ); 25341 goto decode_success; 25342 } 25343 break; 25344 25345 case 0x05: 25346 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */ 25347 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25348 UChar modrm = getUChar(delta); 25349 UInt imm8 = 0; 25350 UInt rG = gregOfRexRM(pfx, modrm); 25351 IRTemp sV = newTemp(Ity_V128); 25352 if (epartIsReg(modrm)) { 25353 UInt rE = eregOfRexRM(pfx, modrm); 25354 delta += 1; 25355 imm8 = getUChar(delta); 25356 DIP("vpermilpd $%u,%s,%s\n", 25357 imm8, nameXMMReg(rE), nameXMMReg(rG)); 25358 assign(sV, getXMMReg(rE)); 25359 } else { 25360 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25361 delta += alen; 25362 imm8 = getUChar(delta); 25363 DIP("vpermilpd $%u,%s,%s\n", 25364 imm8, dis_buf, nameXMMReg(rG)); 25365 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 25366 } 25367 delta++; 25368 IRTemp s1 = newTemp(Ity_I64); 25369 IRTemp s0 = newTemp(Ity_I64); 25370 assign(s1, unop(Iop_V128HIto64, mkexpr(sV))); 25371 assign(s0, unop(Iop_V128to64, mkexpr(sV))); 25372 IRTemp dV = newTemp(Ity_V128); 25373 assign(dV, binop(Iop_64HLtoV128, 25374 mkexpr((imm8 & (1<<1)) ? s1 : s0), 25375 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 25376 putYMMRegLoAndZU(rG, mkexpr(dV)); 25377 goto decode_success; 25378 } 25379 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */ 25380 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25381 UChar modrm = getUChar(delta); 25382 UInt imm8 = 0; 25383 UInt rG = gregOfRexRM(pfx, modrm); 25384 IRTemp sV = newTemp(Ity_V256); 25385 if (epartIsReg(modrm)) { 25386 UInt rE = eregOfRexRM(pfx, modrm); 25387 delta += 1; 25388 imm8 = getUChar(delta); 25389 DIP("vpermilpd $%u,%s,%s\n", 25390 imm8, nameYMMReg(rE), nameYMMReg(rG)); 25391 assign(sV, getYMMReg(rE)); 25392 } else { 25393 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25394 delta += alen; 25395 imm8 = getUChar(delta); 25396 DIP("vpermilpd $%u,%s,%s\n", 25397 imm8, dis_buf, nameYMMReg(rG)); 25398 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 25399 } 25400 delta++; 25401 IRTemp s3, s2, s1, s0; 25402 s3 = s2 = s1 = s0 = IRTemp_INVALID; 25403 breakupV256to64s(sV, &s3, &s2, &s1, &s0); 25404 IRTemp dV = newTemp(Ity_V256); 25405 assign(dV, IRExpr_Qop(Iop_64x4toV256, 25406 mkexpr((imm8 & (1<<3)) ? s3 : s2), 25407 mkexpr((imm8 & (1<<2)) ? s3 : s2), 25408 mkexpr((imm8 & (1<<1)) ? s1 : s0), 25409 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 25410 putYMMReg(rG, mkexpr(dV)); 25411 goto decode_success; 25412 } 25413 break; 25414 25415 case 0x06: 25416 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */ 25417 if (have66noF2noF3(pfx) 25418 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 25419 UChar modrm = getUChar(delta); 25420 UInt imm8 = 0; 25421 UInt rG = gregOfRexRM(pfx, modrm); 25422 UInt rV = getVexNvvvv(pfx); 25423 IRTemp s00 = newTemp(Ity_V128); 25424 IRTemp s01 = newTemp(Ity_V128); 25425 IRTemp s10 = newTemp(Ity_V128); 25426 IRTemp s11 = newTemp(Ity_V128); 25427 assign(s00, getYMMRegLane128(rV, 0)); 25428 assign(s01, getYMMRegLane128(rV, 1)); 25429 if (epartIsReg(modrm)) { 25430 UInt rE = eregOfRexRM(pfx, modrm); 25431 delta += 1; 25432 imm8 = getUChar(delta); 25433 DIP("vperm2f128 $%u,%s,%s,%s\n", 25434 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 25435 assign(s10, getYMMRegLane128(rE, 0)); 25436 assign(s11, getYMMRegLane128(rE, 1)); 25437 } else { 25438 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25439 delta += alen; 25440 imm8 = getUChar(delta); 25441 DIP("vperm2f128 $%u,%s,%s,%s\n", 25442 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25443 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 25444 mkexpr(addr), mkU64(0)))); 25445 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 25446 mkexpr(addr), mkU64(16)))); 25447 } 25448 delta++; 25449 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 25450 : ((_nn)==2) ? s10 : s11) 25451 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 25452 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 25453 # undef SEL 25454 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 25455 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 25456 *uses_vvvv = True; 25457 goto decode_success; 25458 } 25459 break; 25460 25461 case 0x08: 25462 /* VROUNDPS imm8, xmm2/m128, xmm1 */ 25463 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */ 25464 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25465 UChar modrm = getUChar(delta); 25466 UInt rG = gregOfRexRM(pfx, modrm); 25467 IRTemp src = newTemp(Ity_V128); 25468 IRTemp s0 = IRTemp_INVALID; 25469 IRTemp s1 = IRTemp_INVALID; 25470 IRTemp s2 = IRTemp_INVALID; 25471 IRTemp s3 = IRTemp_INVALID; 25472 IRTemp rm = newTemp(Ity_I32); 25473 Int imm = 0; 25474 25475 modrm = getUChar(delta); 25476 25477 if (epartIsReg(modrm)) { 25478 UInt rE = eregOfRexRM(pfx, modrm); 25479 assign( src, getXMMReg( rE ) ); 25480 imm = getUChar(delta+1); 25481 if (imm & ~15) break; 25482 delta += 1+1; 25483 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 25484 } else { 25485 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25486 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 25487 imm = getUChar(delta+alen); 25488 if (imm & ~15) break; 25489 delta += alen+1; 25490 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 25491 } 25492 25493 /* (imm & 3) contains an Intel-encoded rounding mode. Because 25494 that encoding is the same as the encoding for IRRoundingMode, 25495 we can use that value directly in the IR as a rounding 25496 mode. */ 25497 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 25498 25499 breakupV128to32s( src, &s3, &s2, &s1, &s0 ); 25500 putYMMRegLane128( rG, 1, mkV128(0) ); 25501 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 25502 unop(Iop_ReinterpI32asF32, mkexpr(s))) 25503 putYMMRegLane32F( rG, 3, CVT(s3) ); 25504 putYMMRegLane32F( rG, 2, CVT(s2) ); 25505 putYMMRegLane32F( rG, 1, CVT(s1) ); 25506 putYMMRegLane32F( rG, 0, CVT(s0) ); 25507 # undef CVT 25508 goto decode_success; 25509 } 25510 /* VROUNDPS imm8, ymm2/m256, ymm1 */ 25511 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */ 25512 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25513 UChar modrm = getUChar(delta); 25514 UInt rG = gregOfRexRM(pfx, modrm); 25515 IRTemp src = newTemp(Ity_V256); 25516 IRTemp s0 = IRTemp_INVALID; 25517 IRTemp s1 = IRTemp_INVALID; 25518 IRTemp s2 = IRTemp_INVALID; 25519 IRTemp s3 = IRTemp_INVALID; 25520 IRTemp s4 = IRTemp_INVALID; 25521 IRTemp s5 = IRTemp_INVALID; 25522 IRTemp s6 = IRTemp_INVALID; 25523 IRTemp s7 = IRTemp_INVALID; 25524 IRTemp rm = newTemp(Ity_I32); 25525 Int imm = 0; 25526 25527 modrm = getUChar(delta); 25528 25529 if (epartIsReg(modrm)) { 25530 UInt rE = eregOfRexRM(pfx, modrm); 25531 assign( src, getYMMReg( rE ) ); 25532 imm = getUChar(delta+1); 25533 if (imm & ~15) break; 25534 delta += 1+1; 25535 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 25536 } else { 25537 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25538 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 25539 imm = getUChar(delta+alen); 25540 if (imm & ~15) break; 25541 delta += alen+1; 25542 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 25543 } 25544 25545 /* (imm & 3) contains an Intel-encoded rounding mode. Because 25546 that encoding is the same as the encoding for IRRoundingMode, 25547 we can use that value directly in the IR as a rounding 25548 mode. */ 25549 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 25550 25551 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 25552 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 25553 unop(Iop_ReinterpI32asF32, mkexpr(s))) 25554 putYMMRegLane32F( rG, 7, CVT(s7) ); 25555 putYMMRegLane32F( rG, 6, CVT(s6) ); 25556 putYMMRegLane32F( rG, 5, CVT(s5) ); 25557 putYMMRegLane32F( rG, 4, CVT(s4) ); 25558 putYMMRegLane32F( rG, 3, CVT(s3) ); 25559 putYMMRegLane32F( rG, 2, CVT(s2) ); 25560 putYMMRegLane32F( rG, 1, CVT(s1) ); 25561 putYMMRegLane32F( rG, 0, CVT(s0) ); 25562 # undef CVT 25563 goto decode_success; 25564 } 25565 25566 case 0x09: 25567 /* VROUNDPD imm8, xmm2/m128, xmm1 */ 25568 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */ 25569 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25570 UChar modrm = getUChar(delta); 25571 UInt rG = gregOfRexRM(pfx, modrm); 25572 IRTemp src = newTemp(Ity_V128); 25573 IRTemp s0 = IRTemp_INVALID; 25574 IRTemp s1 = IRTemp_INVALID; 25575 IRTemp rm = newTemp(Ity_I32); 25576 Int imm = 0; 25577 25578 modrm = getUChar(delta); 25579 25580 if (epartIsReg(modrm)) { 25581 UInt rE = eregOfRexRM(pfx, modrm); 25582 assign( src, getXMMReg( rE ) ); 25583 imm = getUChar(delta+1); 25584 if (imm & ~15) break; 25585 delta += 1+1; 25586 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 25587 } else { 25588 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25589 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 25590 imm = getUChar(delta+alen); 25591 if (imm & ~15) break; 25592 delta += alen+1; 25593 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 25594 } 25595 25596 /* (imm & 3) contains an Intel-encoded rounding mode. Because 25597 that encoding is the same as the encoding for IRRoundingMode, 25598 we can use that value directly in the IR as a rounding 25599 mode. */ 25600 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 25601 25602 breakupV128to64s( src, &s1, &s0 ); 25603 putYMMRegLane128( rG, 1, mkV128(0) ); 25604 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 25605 unop(Iop_ReinterpI64asF64, mkexpr(s))) 25606 putYMMRegLane64F( rG, 1, CVT(s1) ); 25607 putYMMRegLane64F( rG, 0, CVT(s0) ); 25608 # undef CVT 25609 goto decode_success; 25610 } 25611 /* VROUNDPD imm8, ymm2/m256, ymm1 */ 25612 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */ 25613 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25614 UChar modrm = getUChar(delta); 25615 UInt rG = gregOfRexRM(pfx, modrm); 25616 IRTemp src = newTemp(Ity_V256); 25617 IRTemp s0 = IRTemp_INVALID; 25618 IRTemp s1 = IRTemp_INVALID; 25619 IRTemp s2 = IRTemp_INVALID; 25620 IRTemp s3 = IRTemp_INVALID; 25621 IRTemp rm = newTemp(Ity_I32); 25622 Int imm = 0; 25623 25624 modrm = getUChar(delta); 25625 25626 if (epartIsReg(modrm)) { 25627 UInt rE = eregOfRexRM(pfx, modrm); 25628 assign( src, getYMMReg( rE ) ); 25629 imm = getUChar(delta+1); 25630 if (imm & ~15) break; 25631 delta += 1+1; 25632 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 25633 } else { 25634 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25635 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 25636 imm = getUChar(delta+alen); 25637 if (imm & ~15) break; 25638 delta += alen+1; 25639 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 25640 } 25641 25642 /* (imm & 3) contains an Intel-encoded rounding mode. Because 25643 that encoding is the same as the encoding for IRRoundingMode, 25644 we can use that value directly in the IR as a rounding 25645 mode. */ 25646 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 25647 25648 breakupV256to64s( src, &s3, &s2, &s1, &s0 ); 25649 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 25650 unop(Iop_ReinterpI64asF64, mkexpr(s))) 25651 putYMMRegLane64F( rG, 3, CVT(s3) ); 25652 putYMMRegLane64F( rG, 2, CVT(s2) ); 25653 putYMMRegLane64F( rG, 1, CVT(s1) ); 25654 putYMMRegLane64F( rG, 0, CVT(s0) ); 25655 # undef CVT 25656 goto decode_success; 25657 } 25658 25659 case 0x0A: 25660 case 0x0B: 25661 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */ 25662 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */ 25663 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */ 25664 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */ 25665 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25666 UChar modrm = getUChar(delta); 25667 UInt rG = gregOfRexRM(pfx, modrm); 25668 UInt rV = getVexNvvvv(pfx); 25669 Bool isD = opc == 0x0B; 25670 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 25671 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 25672 Int imm = 0; 25673 25674 if (epartIsReg(modrm)) { 25675 UInt rE = eregOfRexRM(pfx, modrm); 25676 assign( src, 25677 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 25678 imm = getUChar(delta+1); 25679 if (imm & ~15) break; 25680 delta += 1+1; 25681 DIP( "vrounds%c $%d,%s,%s,%s\n", 25682 isD ? 'd' : 's', 25683 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) ); 25684 } else { 25685 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25686 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 25687 imm = getUChar(delta+alen); 25688 if (imm & ~15) break; 25689 delta += alen+1; 25690 DIP( "vrounds%c $%d,%s,%s,%s\n", 25691 isD ? 'd' : 's', 25692 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) ); 25693 } 25694 25695 /* (imm & 3) contains an Intel-encoded rounding mode. Because 25696 that encoding is the same as the encoding for IRRoundingMode, 25697 we can use that value directly in the IR as a rounding 25698 mode. */ 25699 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 25700 (imm & 4) ? get_sse_roundingmode() 25701 : mkU32(imm & 3), 25702 mkexpr(src)) ); 25703 25704 if (isD) 25705 putXMMRegLane64F( rG, 0, mkexpr(res) ); 25706 else { 25707 putXMMRegLane32F( rG, 0, mkexpr(res) ); 25708 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) ); 25709 } 25710 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) ); 25711 putYMMRegLane128( rG, 1, mkV128(0) ); 25712 *uses_vvvv = True; 25713 goto decode_success; 25714 } 25715 break; 25716 25717 case 0x0C: 25718 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */ 25719 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */ 25720 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25721 UChar modrm = getUChar(delta); 25722 UInt imm8; 25723 UInt rG = gregOfRexRM(pfx, modrm); 25724 UInt rV = getVexNvvvv(pfx); 25725 IRTemp sV = newTemp(Ity_V256); 25726 IRTemp sE = newTemp(Ity_V256); 25727 assign ( sV, getYMMReg(rV) ); 25728 if (epartIsReg(modrm)) { 25729 UInt rE = eregOfRexRM(pfx, modrm); 25730 delta += 1; 25731 imm8 = getUChar(delta); 25732 DIP("vblendps $%u,%s,%s,%s\n", 25733 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 25734 assign(sE, getYMMReg(rE)); 25735 } else { 25736 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25737 delta += alen; 25738 imm8 = getUChar(delta); 25739 DIP("vblendps $%u,%s,%s,%s\n", 25740 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25741 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 25742 } 25743 delta++; 25744 putYMMReg( rG, 25745 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) ); 25746 *uses_vvvv = True; 25747 goto decode_success; 25748 } 25749 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */ 25750 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */ 25751 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25752 UChar modrm = getUChar(delta); 25753 UInt imm8; 25754 UInt rG = gregOfRexRM(pfx, modrm); 25755 UInt rV = getVexNvvvv(pfx); 25756 IRTemp sV = newTemp(Ity_V128); 25757 IRTemp sE = newTemp(Ity_V128); 25758 assign ( sV, getXMMReg(rV) ); 25759 if (epartIsReg(modrm)) { 25760 UInt rE = eregOfRexRM(pfx, modrm); 25761 delta += 1; 25762 imm8 = getUChar(delta); 25763 DIP("vblendps $%u,%s,%s,%s\n", 25764 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 25765 assign(sE, getXMMReg(rE)); 25766 } else { 25767 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25768 delta += alen; 25769 imm8 = getUChar(delta); 25770 DIP("vblendps $%u,%s,%s,%s\n", 25771 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 25772 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 25773 } 25774 delta++; 25775 putYMMRegLoAndZU( rG, 25776 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) ); 25777 *uses_vvvv = True; 25778 goto decode_success; 25779 } 25780 break; 25781 25782 case 0x0D: 25783 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */ 25784 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */ 25785 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25786 UChar modrm = getUChar(delta); 25787 UInt imm8; 25788 UInt rG = gregOfRexRM(pfx, modrm); 25789 UInt rV = getVexNvvvv(pfx); 25790 IRTemp sV = newTemp(Ity_V256); 25791 IRTemp sE = newTemp(Ity_V256); 25792 assign ( sV, getYMMReg(rV) ); 25793 if (epartIsReg(modrm)) { 25794 UInt rE = eregOfRexRM(pfx, modrm); 25795 delta += 1; 25796 imm8 = getUChar(delta); 25797 DIP("vblendpd $%u,%s,%s,%s\n", 25798 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 25799 assign(sE, getYMMReg(rE)); 25800 } else { 25801 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25802 delta += alen; 25803 imm8 = getUChar(delta); 25804 DIP("vblendpd $%u,%s,%s,%s\n", 25805 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25806 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 25807 } 25808 delta++; 25809 putYMMReg( rG, 25810 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) ); 25811 *uses_vvvv = True; 25812 goto decode_success; 25813 } 25814 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */ 25815 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */ 25816 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25817 UChar modrm = getUChar(delta); 25818 UInt imm8; 25819 UInt rG = gregOfRexRM(pfx, modrm); 25820 UInt rV = getVexNvvvv(pfx); 25821 IRTemp sV = newTemp(Ity_V128); 25822 IRTemp sE = newTemp(Ity_V128); 25823 assign ( sV, getXMMReg(rV) ); 25824 if (epartIsReg(modrm)) { 25825 UInt rE = eregOfRexRM(pfx, modrm); 25826 delta += 1; 25827 imm8 = getUChar(delta); 25828 DIP("vblendpd $%u,%s,%s,%s\n", 25829 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 25830 assign(sE, getXMMReg(rE)); 25831 } else { 25832 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25833 delta += alen; 25834 imm8 = getUChar(delta); 25835 DIP("vblendpd $%u,%s,%s,%s\n", 25836 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 25837 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 25838 } 25839 delta++; 25840 putYMMRegLoAndZU( rG, 25841 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) ); 25842 *uses_vvvv = True; 25843 goto decode_success; 25844 } 25845 break; 25846 25847 case 0x0E: 25848 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */ 25849 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */ 25850 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25851 UChar modrm = getUChar(delta); 25852 UInt imm8; 25853 UInt rG = gregOfRexRM(pfx, modrm); 25854 UInt rV = getVexNvvvv(pfx); 25855 IRTemp sV = newTemp(Ity_V128); 25856 IRTemp sE = newTemp(Ity_V128); 25857 assign ( sV, getXMMReg(rV) ); 25858 if (epartIsReg(modrm)) { 25859 UInt rE = eregOfRexRM(pfx, modrm); 25860 delta += 1; 25861 imm8 = getUChar(delta); 25862 DIP("vpblendw $%u,%s,%s,%s\n", 25863 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 25864 assign(sE, getXMMReg(rE)); 25865 } else { 25866 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25867 delta += alen; 25868 imm8 = getUChar(delta); 25869 DIP("vpblendw $%u,%s,%s,%s\n", 25870 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25871 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 25872 } 25873 delta++; 25874 putYMMRegLoAndZU( rG, 25875 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) ); 25876 *uses_vvvv = True; 25877 goto decode_success; 25878 } 25879 break; 25880 25881 case 0x0F: 25882 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */ 25883 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */ 25884 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25885 UChar modrm = getUChar(delta); 25886 UInt rG = gregOfRexRM(pfx, modrm); 25887 UInt rV = getVexNvvvv(pfx); 25888 IRTemp sV = newTemp(Ity_V128); 25889 IRTemp dV = newTemp(Ity_V128); 25890 UInt imm8; 25891 25892 assign( dV, getXMMReg(rV) ); 25893 25894 if ( epartIsReg( modrm ) ) { 25895 UInt rE = eregOfRexRM(pfx, modrm); 25896 assign( sV, getXMMReg(rE) ); 25897 imm8 = getUChar(delta+1); 25898 delta += 1+1; 25899 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE), 25900 nameXMMReg(rV), nameXMMReg(rG)); 25901 } else { 25902 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25903 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 25904 imm8 = getUChar(delta+alen); 25905 delta += alen+1; 25906 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf, 25907 nameXMMReg(rV), nameXMMReg(rG)); 25908 } 25909 25910 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 ); 25911 putYMMRegLoAndZU( rG, mkexpr(res) ); 25912 *uses_vvvv = True; 25913 goto decode_success; 25914 } 25915 break; 25916 25917 case 0x14: 25918 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */ 25919 if (have66noF2noF3(pfx) 25920 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25921 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 25922 goto decode_success; 25923 } 25924 break; 25925 25926 case 0x15: 25927 /* VPEXTRW imm8, reg/m16, xmm2 */ 25928 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */ 25929 if (have66noF2noF3(pfx) 25930 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25931 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ ); 25932 goto decode_success; 25933 } 25934 break; 25935 25936 case 0x16: 25937 /* VPEXTRD imm8, r32/m32, xmm2 */ 25938 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */ 25939 if (have66noF2noF3(pfx) 25940 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25941 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ ); 25942 goto decode_success; 25943 } 25944 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */ 25945 if (have66noF2noF3(pfx) 25946 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25947 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ ); 25948 goto decode_success; 25949 } 25950 break; 25951 25952 case 0x17: 25953 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */ 25954 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25955 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ ); 25956 goto decode_success; 25957 } 25958 break; 25959 25960 case 0x18: 25961 /* VINSERTF128 r/m, rV, rD 25962 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 25963 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */ 25964 if (have66noF2noF3(pfx) 25965 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 25966 UChar modrm = getUChar(delta); 25967 UInt ib = 0; 25968 UInt rG = gregOfRexRM(pfx, modrm); 25969 UInt rV = getVexNvvvv(pfx); 25970 IRTemp t128 = newTemp(Ity_V128); 25971 if (epartIsReg(modrm)) { 25972 UInt rE = eregOfRexRM(pfx, modrm); 25973 delta += 1; 25974 assign(t128, getXMMReg(rE)); 25975 ib = getUChar(delta); 25976 DIP("vinsertf128 $%u,%s,%s,%s\n", 25977 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 25978 } else { 25979 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25980 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 25981 delta += alen; 25982 ib = getUChar(delta); 25983 DIP("vinsertf128 $%u,%s,%s,%s\n", 25984 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25985 } 25986 delta++; 25987 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 25988 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 25989 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 25990 *uses_vvvv = True; 25991 goto decode_success; 25992 } 25993 break; 25994 25995 case 0x19: 25996 /* VEXTRACTF128 $lane_no, rS, r/m 25997 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 25998 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */ 25999 if (have66noF2noF3(pfx) 26000 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 26001 UChar modrm = getUChar(delta); 26002 UInt ib = 0; 26003 UInt rS = gregOfRexRM(pfx, modrm); 26004 IRTemp t128 = newTemp(Ity_V128); 26005 if (epartIsReg(modrm)) { 26006 UInt rD = eregOfRexRM(pfx, modrm); 26007 delta += 1; 26008 ib = getUChar(delta); 26009 assign(t128, getYMMRegLane128(rS, ib & 1)); 26010 putYMMRegLoAndZU(rD, mkexpr(t128)); 26011 DIP("vextractf128 $%u,%s,%s\n", 26012 ib, nameXMMReg(rS), nameYMMReg(rD)); 26013 } else { 26014 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26015 delta += alen; 26016 ib = getUChar(delta); 26017 assign(t128, getYMMRegLane128(rS, ib & 1)); 26018 storeLE(mkexpr(addr), mkexpr(t128)); 26019 DIP("vextractf128 $%u,%s,%s\n", 26020 ib, nameYMMReg(rS), dis_buf); 26021 } 26022 delta++; 26023 /* doesn't use vvvv */ 26024 goto decode_success; 26025 } 26026 break; 26027 26028 case 0x20: 26029 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */ 26030 if (have66noF2noF3(pfx) 26031 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 26032 UChar modrm = getUChar(delta); 26033 UInt rG = gregOfRexRM(pfx, modrm); 26034 UInt rV = getVexNvvvv(pfx); 26035 Int imm8; 26036 IRTemp src_u8 = newTemp(Ity_I8); 26037 26038 if ( epartIsReg( modrm ) ) { 26039 UInt rE = eregOfRexRM(pfx,modrm); 26040 imm8 = (Int)(getUChar(delta+1) & 15); 26041 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) ); 26042 delta += 1+1; 26043 DIP( "vpinsrb $%d,%s,%s,%s\n", 26044 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26045 } else { 26046 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26047 imm8 = (Int)(getUChar(delta+alen) & 15); 26048 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) ); 26049 delta += alen+1; 26050 DIP( "vpinsrb $%d,%s,%s,%s\n", 26051 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26052 } 26053 26054 IRTemp src_vec = newTemp(Ity_V128); 26055 assign(src_vec, getXMMReg( rV )); 26056 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 ); 26057 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26058 *uses_vvvv = True; 26059 goto decode_success; 26060 } 26061 break; 26062 26063 case 0x21: 26064 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1 26065 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */ 26066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26067 UChar modrm = getUChar(delta); 26068 UInt rG = gregOfRexRM(pfx, modrm); 26069 UInt rV = getVexNvvvv(pfx); 26070 UInt imm8; 26071 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 26072 const IRTemp inval = IRTemp_INVALID; 26073 26074 if ( epartIsReg( modrm ) ) { 26075 UInt rE = eregOfRexRM(pfx, modrm); 26076 IRTemp vE = newTemp(Ity_V128); 26077 assign( vE, getXMMReg(rE) ); 26078 IRTemp dsE[4] = { inval, inval, inval, inval }; 26079 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 26080 imm8 = getUChar(delta+1); 26081 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 26082 delta += 1+1; 26083 DIP( "insertps $%u, %s,%s\n", 26084 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 26085 } else { 26086 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26087 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 26088 imm8 = getUChar(delta+alen); 26089 delta += alen+1; 26090 DIP( "insertps $%u, %s,%s\n", 26091 imm8, dis_buf, nameXMMReg(rG) ); 26092 } 26093 26094 IRTemp vV = newTemp(Ity_V128); 26095 assign( vV, getXMMReg(rV) ); 26096 26097 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) ); 26098 *uses_vvvv = True; 26099 goto decode_success; 26100 } 26101 break; 26102 26103 case 0x22: 26104 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */ 26105 if (have66noF2noF3(pfx) 26106 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 26107 UChar modrm = getUChar(delta); 26108 UInt rG = gregOfRexRM(pfx, modrm); 26109 UInt rV = getVexNvvvv(pfx); 26110 Int imm8_10; 26111 IRTemp src_u32 = newTemp(Ity_I32); 26112 26113 if ( epartIsReg( modrm ) ) { 26114 UInt rE = eregOfRexRM(pfx,modrm); 26115 imm8_10 = (Int)(getUChar(delta+1) & 3); 26116 assign( src_u32, getIReg32( rE ) ); 26117 delta += 1+1; 26118 DIP( "vpinsrd $%d,%s,%s,%s\n", 26119 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26120 } else { 26121 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26122 imm8_10 = (Int)(getUChar(delta+alen) & 3); 26123 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 26124 delta += alen+1; 26125 DIP( "vpinsrd $%d,%s,%s,%s\n", 26126 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26127 } 26128 26129 IRTemp src_vec = newTemp(Ity_V128); 26130 assign(src_vec, getXMMReg( rV )); 26131 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 26132 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26133 *uses_vvvv = True; 26134 goto decode_success; 26135 } 26136 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */ 26137 if (have66noF2noF3(pfx) 26138 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 26139 UChar modrm = getUChar(delta); 26140 UInt rG = gregOfRexRM(pfx, modrm); 26141 UInt rV = getVexNvvvv(pfx); 26142 Int imm8_0; 26143 IRTemp src_u64 = newTemp(Ity_I64); 26144 26145 if ( epartIsReg( modrm ) ) { 26146 UInt rE = eregOfRexRM(pfx,modrm); 26147 imm8_0 = (Int)(getUChar(delta+1) & 1); 26148 assign( src_u64, getIReg64( rE ) ); 26149 delta += 1+1; 26150 DIP( "vpinsrq $%d,%s,%s,%s\n", 26151 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26152 } else { 26153 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26154 imm8_0 = (Int)(getUChar(delta+alen) & 1); 26155 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 26156 delta += alen+1; 26157 DIP( "vpinsrd $%d,%s,%s,%s\n", 26158 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26159 } 26160 26161 IRTemp src_vec = newTemp(Ity_V128); 26162 assign(src_vec, getXMMReg( rV )); 26163 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 26164 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26165 *uses_vvvv = True; 26166 goto decode_success; 26167 } 26168 break; 26169 26170 case 0x40: 26171 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */ 26172 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26173 UChar modrm = getUChar(delta); 26174 UInt rG = gregOfRexRM(pfx, modrm); 26175 UInt rV = getVexNvvvv(pfx); 26176 IRTemp dst_vec = newTemp(Ity_V128); 26177 Int imm8; 26178 if (epartIsReg( modrm )) { 26179 UInt rE = eregOfRexRM(pfx,modrm); 26180 imm8 = (Int)getUChar(delta+1); 26181 assign( dst_vec, getXMMReg( rE ) ); 26182 delta += 1+1; 26183 DIP( "vdpps $%d,%s,%s,%s\n", 26184 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26185 } else { 26186 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26187 imm8 = (Int)getUChar(delta+alen); 26188 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 26189 delta += alen+1; 26190 DIP( "vdpps $%d,%s,%s,%s\n", 26191 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26192 } 26193 26194 IRTemp src_vec = newTemp(Ity_V128); 26195 assign(src_vec, getXMMReg( rV )); 26196 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 ); 26197 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26198 *uses_vvvv = True; 26199 goto decode_success; 26200 } 26201 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */ 26202 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26203 UChar modrm = getUChar(delta); 26204 UInt rG = gregOfRexRM(pfx, modrm); 26205 UInt rV = getVexNvvvv(pfx); 26206 IRTemp dst_vec = newTemp(Ity_V256); 26207 Int imm8; 26208 if (epartIsReg( modrm )) { 26209 UInt rE = eregOfRexRM(pfx,modrm); 26210 imm8 = (Int)getUChar(delta+1); 26211 assign( dst_vec, getYMMReg( rE ) ); 26212 delta += 1+1; 26213 DIP( "vdpps $%d,%s,%s,%s\n", 26214 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 26215 } else { 26216 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26217 imm8 = (Int)getUChar(delta+alen); 26218 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 26219 delta += alen+1; 26220 DIP( "vdpps $%d,%s,%s,%s\n", 26221 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 26222 } 26223 26224 IRTemp src_vec = newTemp(Ity_V256); 26225 assign(src_vec, getYMMReg( rV )); 26226 IRTemp s0, s1, d0, d1; 26227 s0 = s1 = d0 = d1 = IRTemp_INVALID; 26228 breakupV256toV128s( dst_vec, &d1, &d0 ); 26229 breakupV256toV128s( src_vec, &s1, &s0 ); 26230 putYMMReg( rG, binop( Iop_V128HLtoV256, 26231 mkexpr( math_DPPS_128(s1, d1, imm8) ), 26232 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) ); 26233 *uses_vvvv = True; 26234 goto decode_success; 26235 } 26236 break; 26237 26238 case 0x41: 26239 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */ 26240 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26241 UChar modrm = getUChar(delta); 26242 UInt rG = gregOfRexRM(pfx, modrm); 26243 UInt rV = getVexNvvvv(pfx); 26244 IRTemp dst_vec = newTemp(Ity_V128); 26245 Int imm8; 26246 if (epartIsReg( modrm )) { 26247 UInt rE = eregOfRexRM(pfx,modrm); 26248 imm8 = (Int)getUChar(delta+1); 26249 assign( dst_vec, getXMMReg( rE ) ); 26250 delta += 1+1; 26251 DIP( "vdppd $%d,%s,%s,%s\n", 26252 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26253 } else { 26254 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26255 imm8 = (Int)getUChar(delta+alen); 26256 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 26257 delta += alen+1; 26258 DIP( "vdppd $%d,%s,%s,%s\n", 26259 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26260 } 26261 26262 IRTemp src_vec = newTemp(Ity_V128); 26263 assign(src_vec, getXMMReg( rV )); 26264 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 ); 26265 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26266 *uses_vvvv = True; 26267 goto decode_success; 26268 } 26269 break; 26270 26271 case 0x42: 26272 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */ 26273 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */ 26274 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26275 UChar modrm = getUChar(delta); 26276 Int imm8; 26277 IRTemp src_vec = newTemp(Ity_V128); 26278 IRTemp dst_vec = newTemp(Ity_V128); 26279 UInt rG = gregOfRexRM(pfx, modrm); 26280 UInt rV = getVexNvvvv(pfx); 26281 26282 assign( dst_vec, getXMMReg(rV) ); 26283 26284 if ( epartIsReg( modrm ) ) { 26285 UInt rE = eregOfRexRM(pfx, modrm); 26286 26287 imm8 = (Int)getUChar(delta+1); 26288 assign( src_vec, getXMMReg(rE) ); 26289 delta += 1+1; 26290 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 26291 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26292 } else { 26293 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 26294 1/* imm8 is 1 byte after the amode */ ); 26295 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 26296 imm8 = (Int)getUChar(delta+alen); 26297 delta += alen+1; 26298 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 26299 dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26300 } 26301 26302 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec, 26303 src_vec, imm8) ) ); 26304 *uses_vvvv = True; 26305 goto decode_success; 26306 } 26307 break; 26308 26309 case 0x44: 26310 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */ 26311 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */ 26312 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 26313 * Carry-less multiplication of selected XMM quadwords into XMM 26314 * registers (a.k.a multiplication of polynomials over GF(2)) 26315 */ 26316 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26317 UChar modrm = getUChar(delta); 26318 Int imm8; 26319 IRTemp sV = newTemp(Ity_V128); 26320 IRTemp dV = newTemp(Ity_V128); 26321 UInt rG = gregOfRexRM(pfx, modrm); 26322 UInt rV = getVexNvvvv(pfx); 26323 26324 assign( dV, getXMMReg(rV) ); 26325 26326 if ( epartIsReg( modrm ) ) { 26327 UInt rE = eregOfRexRM(pfx, modrm); 26328 imm8 = (Int)getUChar(delta+1); 26329 assign( sV, getXMMReg(rE) ); 26330 delta += 1+1; 26331 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8, 26332 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 26333 } else { 26334 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 26335 1/* imm8 is 1 byte after the amode */ ); 26336 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) ); 26337 imm8 = (Int)getUChar(delta+alen); 26338 delta += alen+1; 26339 DIP( "vpclmulqdq $%d, %s,%s,%s\n", 26340 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 26341 } 26342 26343 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) ); 26344 *uses_vvvv = True; 26345 goto decode_success; 26346 } 26347 break; 26348 26349 case 0x4A: 26350 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4 26351 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 26352 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */ 26353 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26354 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 26355 "vblendvps", 4, Iop_SarN32x4 ); 26356 *uses_vvvv = True; 26357 goto decode_success; 26358 } 26359 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4 26360 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 26361 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */ 26362 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26363 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 26364 "vblendvps", 4, Iop_SarN32x4 ); 26365 *uses_vvvv = True; 26366 goto decode_success; 26367 } 26368 break; 26369 26370 case 0x4B: 26371 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4 26372 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 26373 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */ 26374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26375 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 26376 "vblendvpd", 8, Iop_SarN64x2 ); 26377 *uses_vvvv = True; 26378 goto decode_success; 26379 } 26380 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4 26381 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 26382 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */ 26383 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26384 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 26385 "vblendvpd", 8, Iop_SarN64x2 ); 26386 *uses_vvvv = True; 26387 goto decode_success; 26388 } 26389 break; 26390 26391 case 0x4C: 26392 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4 26393 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 26394 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */ 26395 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26396 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 26397 "vpblendvb", 1, Iop_SarN8x16 ); 26398 *uses_vvvv = True; 26399 goto decode_success; 26400 } 26401 break; 26402 26403 case 0x60: 26404 case 0x61: 26405 case 0x62: 26406 case 0x63: 26407 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1 26408 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1 26409 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1 26410 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1 26411 (selected special cases that actually occur in glibc, 26412 not by any means a complete implementation.) 26413 */ 26414 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26415 Long delta0 = delta; 26416 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc ); 26417 if (delta > delta0) goto decode_success; 26418 /* else fall though; dis_PCMPxSTRx failed to decode it */ 26419 } 26420 break; 26421 26422 case 0xDF: 26423 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */ 26424 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26425 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ ); 26426 goto decode_success; 26427 } 26428 break; 26429 26430 default: 26431 break; 26432 26433 } 26434 26435 //decode_failure: 26436 return deltaIN; 26437 26438 decode_success: 26439 return delta; 26440 } 26441 26442 26443 /*------------------------------------------------------------*/ 26444 /*--- ---*/ 26445 /*--- Disassemble a single instruction ---*/ 26446 /*--- ---*/ 26447 /*------------------------------------------------------------*/ 26448 26449 /* Disassemble a single instruction into IR. The instruction is 26450 located in host memory at &guest_code[delta]. */ 26451 26452 static 26453 DisResult disInstr_AMD64_WRK ( 26454 /*OUT*/Bool* expect_CAS, 26455 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 26456 Bool resteerCisOk, 26457 void* callback_opaque, 26458 Long delta64, 26459 VexArchInfo* archinfo, 26460 VexAbiInfo* vbi 26461 ) 26462 { 26463 IRTemp t1, t2, t3, t4, t5, t6; 26464 UChar pre; 26465 Int n, n_prefixes; 26466 DisResult dres; 26467 26468 /* The running delta */ 26469 Long delta = delta64; 26470 26471 /* Holds eip at the start of the insn, so that we can print 26472 consistent error messages for unimplemented insns. */ 26473 Long delta_start = delta; 26474 26475 /* sz denotes the nominal data-op size of the insn; we change it to 26476 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 26477 conflict REX.W takes precedence. */ 26478 Int sz = 4; 26479 26480 /* pfx holds the summary of prefixes. */ 26481 Prefix pfx = PFX_EMPTY; 26482 26483 /* Holds the computed opcode-escape indication. */ 26484 Escape esc = ESC_NONE; 26485 26486 /* Set result defaults. */ 26487 dres.whatNext = Dis_Continue; 26488 dres.len = 0; 26489 dres.continueAt = 0; 26490 dres.jk_StopHere = Ijk_INVALID; 26491 *expect_CAS = False; 26492 26493 vassert(guest_RIP_next_assumed == 0); 26494 vassert(guest_RIP_next_mustcheck == False); 26495 26496 t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 26497 26498 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 26499 26500 /* Spot "Special" instructions (see comment at top of file). */ 26501 { 26502 UChar* code = (UChar*)(guest_code + delta); 26503 /* Spot the 16-byte preamble: 26504 48C1C703 rolq $3, %rdi 26505 48C1C70D rolq $13, %rdi 26506 48C1C73D rolq $61, %rdi 26507 48C1C733 rolq $51, %rdi 26508 */ 26509 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 26510 && code[ 3] == 0x03 && 26511 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 26512 && code[ 7] == 0x0D && 26513 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 26514 && code[11] == 0x3D && 26515 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 26516 && code[15] == 0x33) { 26517 /* Got a "Special" instruction preamble. Which one is it? */ 26518 if (code[16] == 0x48 && code[17] == 0x87 26519 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 26520 /* %RDX = client_request ( %RAX ) */ 26521 DIP("%%rdx = client_request ( %%rax )\n"); 26522 delta += 19; 26523 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta); 26524 vassert(dres.whatNext == Dis_StopHere); 26525 goto decode_success; 26526 } 26527 else 26528 if (code[16] == 0x48 && code[17] == 0x87 26529 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 26530 /* %RAX = guest_NRADDR */ 26531 DIP("%%rax = guest_NRADDR\n"); 26532 delta += 19; 26533 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 26534 goto decode_success; 26535 } 26536 else 26537 if (code[16] == 0x48 && code[17] == 0x87 26538 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 26539 /* call-noredir *%RAX */ 26540 DIP("call-noredir *%%rax\n"); 26541 delta += 19; 26542 t1 = newTemp(Ity_I64); 26543 assign(t1, getIRegRAX(8)); 26544 t2 = newTemp(Ity_I64); 26545 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 26546 putIReg64(R_RSP, mkexpr(t2)); 26547 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 26548 jmp_treg(&dres, Ijk_NoRedir, t1); 26549 vassert(dres.whatNext == Dis_StopHere); 26550 goto decode_success; 26551 } 26552 /* We don't know what it is. */ 26553 goto decode_failure; 26554 /*NOTREACHED*/ 26555 } 26556 } 26557 26558 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 26559 as many invalid combinations as possible. */ 26560 n_prefixes = 0; 26561 while (True) { 26562 if (n_prefixes > 7) goto decode_failure; 26563 pre = getUChar(delta); 26564 switch (pre) { 26565 case 0x66: pfx |= PFX_66; break; 26566 case 0x67: pfx |= PFX_ASO; break; 26567 case 0xF2: pfx |= PFX_F2; break; 26568 case 0xF3: pfx |= PFX_F3; break; 26569 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 26570 case 0x2E: pfx |= PFX_CS; break; 26571 case 0x3E: pfx |= PFX_DS; break; 26572 case 0x26: pfx |= PFX_ES; break; 26573 case 0x64: pfx |= PFX_FS; break; 26574 case 0x65: pfx |= PFX_GS; break; 26575 case 0x36: pfx |= PFX_SS; break; 26576 case 0x40 ... 0x4F: 26577 pfx |= PFX_REX; 26578 if (pre & (1<<3)) pfx |= PFX_REXW; 26579 if (pre & (1<<2)) pfx |= PFX_REXR; 26580 if (pre & (1<<1)) pfx |= PFX_REXX; 26581 if (pre & (1<<0)) pfx |= PFX_REXB; 26582 break; 26583 default: 26584 goto not_a_legacy_prefix; 26585 } 26586 n_prefixes++; 26587 delta++; 26588 } 26589 26590 not_a_legacy_prefix: 26591 /* We've used up all the non-VEX prefixes. Parse and validate a 26592 VEX prefix if that's appropriate. */ 26593 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) { 26594 /* Used temporarily for holding VEX prefixes. */ 26595 UChar vex0 = getUChar(delta); 26596 if (vex0 == 0xC4) { 26597 /* 3-byte VEX */ 26598 UChar vex1 = getUChar(delta+1); 26599 UChar vex2 = getUChar(delta+2); 26600 delta += 3; 26601 pfx |= PFX_VEX; 26602 /* Snarf contents of byte 1 */ 26603 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 26604 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX; 26605 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB; 26606 /* m-mmmm */ 26607 switch (vex1 & 0x1F) { 26608 case 1: esc = ESC_0F; break; 26609 case 2: esc = ESC_0F38; break; 26610 case 3: esc = ESC_0F3A; break; 26611 /* Any other m-mmmm field will #UD */ 26612 default: goto decode_failure; 26613 } 26614 /* Snarf contents of byte 2 */ 26615 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0; 26616 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3; 26617 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2; 26618 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1; 26619 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0; 26620 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0; 26621 /* pp */ 26622 switch (vex2 & 3) { 26623 case 0: break; 26624 case 1: pfx |= PFX_66; break; 26625 case 2: pfx |= PFX_F3; break; 26626 case 3: pfx |= PFX_F2; break; 26627 default: vassert(0); 26628 } 26629 } 26630 else if (vex0 == 0xC5) { 26631 /* 2-byte VEX */ 26632 UChar vex1 = getUChar(delta+1); 26633 delta += 2; 26634 pfx |= PFX_VEX; 26635 /* Snarf contents of byte 1 */ 26636 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 26637 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3; 26638 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2; 26639 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1; 26640 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0; 26641 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0; 26642 /* pp */ 26643 switch (vex1 & 3) { 26644 case 0: break; 26645 case 1: pfx |= PFX_66; break; 26646 case 2: pfx |= PFX_F3; break; 26647 case 3: pfx |= PFX_F2; break; 26648 default: vassert(0); 26649 } 26650 /* implied: */ 26651 esc = ESC_0F; 26652 } 26653 /* Can't have both VEX and REX */ 26654 if ((pfx & PFX_VEX) && (pfx & PFX_REX)) 26655 goto decode_failure; /* can't have both */ 26656 } 26657 26658 /* Dump invalid combinations */ 26659 n = 0; 26660 if (pfx & PFX_F2) n++; 26661 if (pfx & PFX_F3) n++; 26662 if (n > 1) 26663 goto decode_failure; /* can't have both */ 26664 26665 n = 0; 26666 if (pfx & PFX_CS) n++; 26667 if (pfx & PFX_DS) n++; 26668 if (pfx & PFX_ES) n++; 26669 if (pfx & PFX_FS) n++; 26670 if (pfx & PFX_GS) n++; 26671 if (pfx & PFX_SS) n++; 26672 if (n > 1) 26673 goto decode_failure; /* multiple seg overrides == illegal */ 26674 26675 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 26676 that we should accept it. */ 26677 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero) 26678 goto decode_failure; 26679 26680 /* Ditto for %gs prefixes. */ 26681 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60) 26682 goto decode_failure; 26683 26684 /* Set up sz. */ 26685 sz = 4; 26686 if (pfx & PFX_66) sz = 2; 26687 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 26688 26689 /* Now we should be looking at the primary opcode byte or the 26690 leading escapes. Check that any LOCK prefix is actually 26691 allowed. */ 26692 if (pfx & PFX_LOCK) { 26693 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 26694 DIP("lock "); 26695 } else { 26696 *expect_CAS = False; 26697 goto decode_failure; 26698 } 26699 } 26700 26701 /* Eat up opcode escape bytes, until we're really looking at the 26702 primary opcode byte. But only if there's no VEX present. */ 26703 if (!(pfx & PFX_VEX)) { 26704 vassert(esc == ESC_NONE); 26705 pre = getUChar(delta); 26706 if (pre == 0x0F) { 26707 delta++; 26708 pre = getUChar(delta); 26709 switch (pre) { 26710 case 0x38: esc = ESC_0F38; delta++; break; 26711 case 0x3A: esc = ESC_0F3A; delta++; break; 26712 default: esc = ESC_0F; break; 26713 } 26714 } 26715 } 26716 26717 /* So now we're really really looking at the primary opcode 26718 byte. */ 26719 Long delta_at_primary_opcode = delta; 26720 26721 if (!(pfx & PFX_VEX)) { 26722 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE 26723 instructions preserve the upper 128 bits of YMM registers; 26724 iow we can simply ignore the presence of the upper halves of 26725 these registers. */ 26726 switch (esc) { 26727 case ESC_NONE: 26728 delta = dis_ESC_NONE( &dres, expect_CAS, 26729 resteerOkFn, resteerCisOk, callback_opaque, 26730 archinfo, vbi, pfx, sz, delta ); 26731 break; 26732 case ESC_0F: 26733 delta = dis_ESC_0F ( &dres, expect_CAS, 26734 resteerOkFn, resteerCisOk, callback_opaque, 26735 archinfo, vbi, pfx, sz, delta ); 26736 break; 26737 case ESC_0F38: 26738 delta = dis_ESC_0F38( &dres, 26739 resteerOkFn, resteerCisOk, callback_opaque, 26740 archinfo, vbi, pfx, sz, delta ); 26741 break; 26742 case ESC_0F3A: 26743 delta = dis_ESC_0F3A( &dres, 26744 resteerOkFn, resteerCisOk, callback_opaque, 26745 archinfo, vbi, pfx, sz, delta ); 26746 break; 26747 default: 26748 vassert(0); 26749 } 26750 } else { 26751 /* VEX prefixed instruction */ 26752 /* Sloppy Intel wording: "An instruction encoded with a VEX.128 26753 prefix that loads a YMM register operand ..." zeroes out bits 26754 128 and above of the register. */ 26755 Bool uses_vvvv = False; 26756 switch (esc) { 26757 case ESC_0F: 26758 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv, 26759 resteerOkFn, resteerCisOk, 26760 callback_opaque, 26761 archinfo, vbi, pfx, sz, delta ); 26762 break; 26763 case ESC_0F38: 26764 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv, 26765 resteerOkFn, resteerCisOk, 26766 callback_opaque, 26767 archinfo, vbi, pfx, sz, delta ); 26768 break; 26769 case ESC_0F3A: 26770 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv, 26771 resteerOkFn, resteerCisOk, 26772 callback_opaque, 26773 archinfo, vbi, pfx, sz, delta ); 26774 break; 26775 case ESC_NONE: 26776 /* The presence of a VEX prefix, by Intel definition, 26777 always implies at least an 0F escape. */ 26778 goto decode_failure; 26779 default: 26780 vassert(0); 26781 } 26782 /* If the insn doesn't use VEX.vvvv then it must be all ones. 26783 Check this. */ 26784 if (!uses_vvvv) { 26785 if (getVexNvvvv(pfx) != 0) 26786 goto decode_failure; 26787 } 26788 } 26789 26790 vassert(delta - delta_at_primary_opcode >= 0); 26791 vassert(delta - delta_at_primary_opcode < 16/*let's say*/); 26792 26793 /* Use delta == delta_at_primary_opcode to denote decode failure. 26794 This implies that any successful decode must use at least one 26795 byte up. */ 26796 if (delta == delta_at_primary_opcode) 26797 goto decode_failure; 26798 else 26799 goto decode_success; /* \o/ */ 26800 26801 #if 0 /* XYZZY */ 26802 26803 /* ---------------------------------------------------- */ 26804 /* --- The SSE/SSE2 decoder. --- */ 26805 /* ---------------------------------------------------- */ 26806 26807 /* What did I do to deserve SSE ? Perhaps I was really bad in a 26808 previous life? */ 26809 26810 /* Note, this doesn't handle SSE3 right now. All amd64s support 26811 SSE2 as a minimum so there is no point distinguishing SSE1 vs 26812 SSE2. */ 26813 26814 insn = (UChar*)&guest_code[delta]; 26815 26816 /* FXSAVE is spuriously at the start here only because it is 26817 thusly placed in guest-x86/toIR.c. */ 26818 26819 /* ------ SSE decoder main ------ */ 26820 26821 /* ---------------------------------------------------- */ 26822 /* --- end of the SSE decoder. --- */ 26823 /* ---------------------------------------------------- */ 26824 26825 /* ---------------------------------------------------- */ 26826 /* --- start of the SSE2 decoder. --- */ 26827 /* ---------------------------------------------------- */ 26828 26829 /* ---------------------------------------------------- */ 26830 /* --- end of the SSE/SSE2 decoder. --- */ 26831 /* ---------------------------------------------------- */ 26832 26833 /* ---------------------------------------------------- */ 26834 /* --- start of the SSE3 decoder. --- */ 26835 /* ---------------------------------------------------- */ 26836 26837 /* ---------------------------------------------------- */ 26838 /* --- end of the SSE3 decoder. --- */ 26839 /* ---------------------------------------------------- */ 26840 26841 /* ---------------------------------------------------- */ 26842 /* --- start of the SSSE3 decoder. --- */ 26843 /* ---------------------------------------------------- */ 26844 26845 /* ---------------------------------------------------- */ 26846 /* --- end of the SSSE3 decoder. --- */ 26847 /* ---------------------------------------------------- */ 26848 26849 /* ---------------------------------------------------- */ 26850 /* --- start of the SSE4 decoder --- */ 26851 /* ---------------------------------------------------- */ 26852 26853 /* ---------------------------------------------------- */ 26854 /* --- end of the SSE4 decoder --- */ 26855 /* ---------------------------------------------------- */ 26856 26857 /*after_sse_decoders:*/ 26858 26859 /* Get the primary opcode. */ 26860 opc = getUChar(delta); delta++; 26861 26862 /* We get here if the current insn isn't SSE, or this CPU doesn't 26863 support SSE. */ 26864 26865 switch (opc) { 26866 26867 /* ------------------------ Control flow --------------- */ 26868 26869 /* ------------------------ CWD/CDQ -------------------- */ 26870 26871 /* ------------------------ FPU ops -------------------- */ 26872 26873 /* ------------------------ INT ------------------------ */ 26874 26875 case 0xCD: { /* INT imm8 */ 26876 IRJumpKind jk = Ijk_Boring; 26877 if (have66orF2orF3(pfx)) goto decode_failure; 26878 d64 = getUChar(delta); delta++; 26879 switch (d64) { 26880 case 32: jk = Ijk_Sys_int32; break; 26881 default: goto decode_failure; 26882 } 26883 guest_RIP_next_mustcheck = True; 26884 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 26885 jmp_lit(jk, guest_RIP_next_assumed); 26886 /* It's important that all ArchRegs carry their up-to-date value 26887 at this point. So we declare an end-of-block here, which 26888 forces any TempRegs caching ArchRegs to be flushed. */ 26889 vassert(dres.whatNext == Dis_StopHere); 26890 DIP("int $0x%02x\n", (UInt)d64); 26891 break; 26892 } 26893 26894 /* ------------------------ Jcond, byte offset --------- */ 26895 26896 /* ------------------------ IMUL ----------------------- */ 26897 26898 /* ------------------------ MOV ------------------------ */ 26899 26900 /* ------------------------ MOVx ------------------------ */ 26901 26902 /* ------------------------ opl imm, A ----------------- */ 26903 26904 /* ------------------------ opl Ev, Gv ----------------- */ 26905 26906 /* ------------------------ opl Gv, Ev ----------------- */ 26907 26908 /* ------------------------ POP ------------------------ */ 26909 26910 /* ------------------------ PUSH ----------------------- */ 26911 26912 /* ------ AE: SCAS variants ------ */ 26913 26914 /* ------ A6, A7: CMPS variants ------ */ 26915 26916 /* ------ AA, AB: STOS variants ------ */ 26917 26918 /* ------ A4, A5: MOVS variants ------ */ 26919 26920 /* ------------------------ XCHG ----------------------- */ 26921 26922 /* ------------------------ IN / OUT ----------------------- */ 26923 26924 /* ------------------------ (Grp1 extensions) ---------- */ 26925 26926 /* ------------------------ (Grp2 extensions) ---------- */ 26927 26928 /* ------------------------ (Grp3 extensions) ---------- */ 26929 26930 /* ------------------------ (Grp4 extensions) ---------- */ 26931 26932 /* ------------------------ (Grp5 extensions) ---------- */ 26933 26934 /* ------------------------ Escapes to 2-byte opcodes -- */ 26935 26936 case 0x0F: { 26937 opc = getUChar(delta); delta++; 26938 switch (opc) { 26939 26940 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 26941 26942 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 26943 26944 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 26945 26946 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 26947 26948 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 26949 26950 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 26951 26952 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 26953 26954 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 26955 26956 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 26957 26958 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 26959 26960 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 26961 26962 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ 26963 26964 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 26965 26966 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 26967 26968 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 26969 26970 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ 26971 26972 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 26973 26974 case 0xC0: { /* XADD Gb,Eb */ 26975 Bool decode_OK = False; 26976 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 26977 if (!decode_OK) 26978 goto decode_failure; 26979 break; 26980 } 26981 26982 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 26983 26984 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 26985 26986 default: 26987 goto decode_failure; 26988 } /* switch (opc) for the 2-byte opcodes */ 26989 goto decode_success; 26990 } /* case 0x0F: of primary opcode */ 26991 26992 /* ------------------------ ??? ------------------------ */ 26993 #endif /* XYZZY */ 26994 26995 //default: 26996 decode_failure: 26997 /* All decode failures end up here. */ 26998 vex_printf("vex amd64->IR: unhandled instruction bytes: " 26999 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 27000 (Int)getUChar(delta_start+0), 27001 (Int)getUChar(delta_start+1), 27002 (Int)getUChar(delta_start+2), 27003 (Int)getUChar(delta_start+3), 27004 (Int)getUChar(delta_start+4), 27005 (Int)getUChar(delta_start+5), 27006 (Int)getUChar(delta_start+6), 27007 (Int)getUChar(delta_start+7) ); 27008 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n", 27009 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx), 27010 getRexX(pfx), getRexB(pfx)); 27011 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n", 27012 haveVEX(pfx) ? 1 : 0, getVexL(pfx), 27013 getVexNvvvv(pfx), 27014 esc==ESC_NONE ? "NONE" : 27015 esc==ESC_0F ? "0F" : 27016 esc==ESC_0F38 ? "0F38" : 27017 esc==ESC_0F3A ? "0F3A" : "???"); 27018 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n", 27019 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0, 27020 haveF3(pfx) ? 1 : 0); 27021 27022 /* Tell the dispatcher that this insn cannot be decoded, and so has 27023 not been executed, and (is currently) the next to be executed. 27024 RIP should be up-to-date since it made so at the start of each 27025 insn, but nevertheless be paranoid and update it again right 27026 now. */ 27027 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 27028 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr); 27029 vassert(dres.whatNext == Dis_StopHere); 27030 dres.len = 0; 27031 /* We also need to say that a CAS is not expected now, regardless 27032 of what it might have been set to at the start of the function, 27033 since the IR that we've emitted just above (to synthesis a 27034 SIGILL) does not involve any CAS, and presumably no other IR has 27035 been emitted for this (non-decoded) insn. */ 27036 *expect_CAS = False; 27037 return dres; 27038 27039 // } /* switch (opc) for the main (primary) opcode switch. */ 27040 27041 decode_success: 27042 /* All decode successes end up here. */ 27043 switch (dres.whatNext) { 27044 case Dis_Continue: 27045 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 27046 break; 27047 case Dis_ResteerU: 27048 case Dis_ResteerC: 27049 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) ); 27050 break; 27051 case Dis_StopHere: 27052 break; 27053 default: 27054 vassert(0); 27055 } 27056 27057 DIP("\n"); 27058 dres.len = (Int)toUInt(delta - delta_start); 27059 return dres; 27060 } 27061 27062 #undef DIP 27063 #undef DIS 27064 27065 27066 /*------------------------------------------------------------*/ 27067 /*--- Top-level fn ---*/ 27068 /*------------------------------------------------------------*/ 27069 27070 /* Disassemble a single instruction into IR. The instruction 27071 is located in host memory at &guest_code[delta]. */ 27072 27073 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 27074 Bool (*resteerOkFn) ( void*, Addr64 ), 27075 Bool resteerCisOk, 27076 void* callback_opaque, 27077 UChar* guest_code_IN, 27078 Long delta, 27079 Addr64 guest_IP, 27080 VexArch guest_arch, 27081 VexArchInfo* archinfo, 27082 VexAbiInfo* abiinfo, 27083 Bool host_bigendian_IN ) 27084 { 27085 Int i, x1, x2; 27086 Bool expect_CAS, has_CAS; 27087 DisResult dres; 27088 27089 /* Set globals (see top of this file) */ 27090 vassert(guest_arch == VexArchAMD64); 27091 guest_code = guest_code_IN; 27092 irsb = irsb_IN; 27093 host_is_bigendian = host_bigendian_IN; 27094 guest_RIP_curr_instr = guest_IP; 27095 guest_RIP_bbstart = guest_IP - delta; 27096 27097 /* We'll consult these after doing disInstr_AMD64_WRK. */ 27098 guest_RIP_next_assumed = 0; 27099 guest_RIP_next_mustcheck = False; 27100 27101 x1 = irsb_IN->stmts_used; 27102 expect_CAS = False; 27103 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 27104 resteerCisOk, 27105 callback_opaque, 27106 delta, archinfo, abiinfo ); 27107 x2 = irsb_IN->stmts_used; 27108 vassert(x2 >= x1); 27109 27110 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 27111 got it right. Failure of this assertion is serious and denotes 27112 a bug in disInstr. */ 27113 if (guest_RIP_next_mustcheck 27114 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 27115 vex_printf("\n"); 27116 vex_printf("assumed next %%rip = 0x%llx\n", 27117 guest_RIP_next_assumed ); 27118 vex_printf(" actual next %%rip = 0x%llx\n", 27119 guest_RIP_curr_instr + dres.len ); 27120 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 27121 } 27122 27123 /* See comment at the top of disInstr_AMD64_WRK for meaning of 27124 expect_CAS. Here, we (sanity-)check for the presence/absence of 27125 IRCAS as directed by the returned expect_CAS value. */ 27126 has_CAS = False; 27127 for (i = x1; i < x2; i++) { 27128 if (irsb_IN->stmts[i]->tag == Ist_CAS) 27129 has_CAS = True; 27130 } 27131 27132 if (expect_CAS != has_CAS) { 27133 /* inconsistency detected. re-disassemble the instruction so as 27134 to generate a useful error message; then assert. */ 27135 vex_traceflags |= VEX_TRACE_FE; 27136 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 27137 resteerCisOk, 27138 callback_opaque, 27139 delta, archinfo, abiinfo ); 27140 for (i = x1; i < x2; i++) { 27141 vex_printf("\t\t"); 27142 ppIRStmt(irsb_IN->stmts[i]); 27143 vex_printf("\n"); 27144 } 27145 /* Failure of this assertion is serious and denotes a bug in 27146 disInstr. */ 27147 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 27148 } 27149 27150 return dres; 27151 } 27152 27153 27154 /*------------------------------------------------------------*/ 27155 /*--- Unused stuff ---*/ 27156 /*------------------------------------------------------------*/ 27157 27158 // A potentially more Memcheck-friendly version of gen_LZCNT, if 27159 // this should ever be needed. 27160 // 27161 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 27162 //{ 27163 // /* Scheme is simple: propagate the most significant 1-bit into all 27164 // lower positions in the word. This gives a word of the form 27165 // 0---01---1. Now invert it, giving a word of the form 27166 // 1---10---0, then do a population-count idiom (to count the 1s, 27167 // which is the number of leading zeroes, or the word size if the 27168 // original word was 0. 27169 // */ 27170 // Int i; 27171 // IRTemp t[7]; 27172 // for (i = 0; i < 7; i++) { 27173 // t[i] = newTemp(ty); 27174 // } 27175 // if (ty == Ity_I64) { 27176 // assign(t[0], binop(Iop_Or64, mkexpr(src), 27177 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 27178 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 27179 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 27180 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 27181 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 27182 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 27183 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 27184 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 27185 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 27186 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 27187 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 27188 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 27189 // return gen_POPCOUNT(ty, t[6]); 27190 // } 27191 // if (ty == Ity_I32) { 27192 // assign(t[0], binop(Iop_Or32, mkexpr(src), 27193 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 27194 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 27195 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 27196 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 27197 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 27198 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 27199 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 27200 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 27201 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 27202 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 27203 // return gen_POPCOUNT(ty, t[5]); 27204 // } 27205 // if (ty == Ity_I16) { 27206 // assign(t[0], binop(Iop_Or16, mkexpr(src), 27207 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 27208 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 27209 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 27210 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 27211 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 27212 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 27213 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 27214 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 27215 // return gen_POPCOUNT(ty, t[4]); 27216 // } 27217 // vassert(0); 27218 //} 27219 27220 27221 /*--------------------------------------------------------------------*/ 27222 /*--- end guest_amd64_toIR.c ---*/ 27223 /*--------------------------------------------------------------------*/ 27224