1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 56 even when it isn't. 57 58 * some of the FCOM cases could do with testing -- not convinced 59 that the args are the right way round. 60 61 * FSAVE does not re-initialise the FPU; it should do 62 63 * FINIT not only initialises the FPU environment, it also zeroes 64 all the FP registers. It should leave the registers unchanged. 65 66 RDTSC returns zero, always. 67 68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 69 per Intel docs this bit has no meaning anyway. Since PUSHF is the 70 only way to observe eflags[1], a proper fix would be to make that 71 bit be set by PUSHF. 72 73 This module uses global variables and so is not MT-safe (if that 74 should ever become relevant). 75 */ 76 77 /* Notes re address size overrides (0x67). 78 79 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 81 and System Instructions"), Section 1.2.3 ("Address-Size Override 82 Prefix"): 83 84 0x67 applies to all explicit memory references, causing the top 85 32 bits of the effective address to become zero. 86 87 0x67 has no effect on stack references (push/pop); these always 88 use a 64-bit address. 89 90 0x67 changes the interpretation of instructions which implicitly 91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 92 instead. These are: 93 94 cmp{s,sb,sw,sd,sq} 95 in{s,sb,sw,sd} 96 jcxz, jecxz, jrcxz 97 lod{s,sb,sw,sd,sq} 98 loop{,e,bz,be,z} 99 mov{s,sb,sw,sd,sq} 100 out{s,sb,sw,sd} 101 rep{,e,ne,nz} 102 sca{s,sb,sw,sd,sq} 103 sto{s,sb,sw,sd,sq} 104 xlat{,b} */ 105 106 /* "Special" instructions. 107 108 This instruction decoder can decode three special instructions 109 which mean nothing natively (are no-ops as far as regs/mem are 110 concerned) but have meaning for supporting Valgrind. A special 111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 114 Following that, one of the following 3 are allowed (standard 115 interpretation in parentheses): 116 117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 120 121 Any other bytes following the 16-byte preamble are illegal and 122 constitute a failure in instruction decoding. This all assumes 123 that the preamble will never occur except in specific code 124 fragments designed for Valgrind to catch. 125 126 No prefixes may precede a "Special" instruction. 127 */ 128 129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 130 insns: the side-exit back to the start of the insn is done with 131 Ijk_Boring. This is quite wrong, it should be done with 132 Ijk_NoRedir, since otherwise the side exit, which is intended to 133 restart the instruction for whatever reason, could go somewhere 134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 135 no-redir jumps performance critical, at least for rep-prefixed 136 instructions, since all iterations thereof would involve such a 137 jump. It's not such a big deal with casLE since the side exit is 138 only taken if the CAS fails, that is, the location is contended, 139 which is relatively unlikely. 140 141 Note also, the test for CAS success vs failure is done using 142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 144 shouldn't definedness-check these comparisons. See 145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 146 background/rationale. 147 */ 148 149 /* LOCK prefixed instructions. These are translated using IR-level 150 CAS statements (IRCAS) and are believed to preserve atomicity, even 151 from the point of view of some other process racing against a 152 simulated one (presumably they communicate via a shared memory 153 segment). 154 155 Handlers which are aware of LOCK prefixes are: 156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 157 dis_cmpxchg_G_E (cmpxchg) 158 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 159 dis_Grp3 (not, neg) 160 dis_Grp4 (inc, dec) 161 dis_Grp5 (inc, dec) 162 dis_Grp8_Imm (bts, btc, btr) 163 dis_bt_G_E (bts, btc, btr) 164 dis_xadd_G_E (xadd) 165 */ 166 167 168 #include "libvex_basictypes.h" 169 #include "libvex_ir.h" 170 #include "libvex.h" 171 #include "libvex_guest_amd64.h" 172 173 #include "main_util.h" 174 #include "main_globals.h" 175 #include "guest_generic_bb_to_IR.h" 176 #include "guest_generic_x87.h" 177 #include "guest_amd64_defs.h" 178 179 180 /*------------------------------------------------------------*/ 181 /*--- Globals ---*/ 182 /*------------------------------------------------------------*/ 183 184 /* These are set at the start of the translation of an insn, right 185 down in disInstr_AMD64, so that we don't have to pass them around 186 endlessly. They are all constant during the translation of any 187 given insn. */ 188 189 /* These are set at the start of the translation of a BB, so 190 that we don't have to pass them around endlessly. */ 191 192 /* We need to know this to do sub-register accesses correctly. */ 193 static Bool host_is_bigendian; 194 195 /* Pointer to the guest code area (points to start of BB, not to the 196 insn being processed). */ 197 static UChar* guest_code; 198 199 /* The guest address corresponding to guest_code[0]. */ 200 static Addr64 guest_RIP_bbstart; 201 202 /* The guest address for the instruction currently being 203 translated. */ 204 static Addr64 guest_RIP_curr_instr; 205 206 /* The IRSB* into which we're generating code. */ 207 static IRSB* irsb; 208 209 /* For ensuring that %rip-relative addressing is done right. A read 210 of %rip generates the address of the next instruction. It may be 211 that we don't conveniently know that inside disAMode(). For sanity 212 checking, if the next insn %rip is needed, we make a guess at what 213 it is, record that guess here, and set the accompanying Bool to 214 indicate that -- after this insn's decode is finished -- that guess 215 needs to be checked. */ 216 217 /* At the start of each insn decode, is set to (0, False). 218 After the decode, if _mustcheck is now True, _assumed is 219 checked. */ 220 221 static Addr64 guest_RIP_next_assumed; 222 static Bool guest_RIP_next_mustcheck; 223 224 225 /*------------------------------------------------------------*/ 226 /*--- Helpers for constructing IR. ---*/ 227 /*------------------------------------------------------------*/ 228 229 /* Generate a new temporary of the given type. */ 230 static IRTemp newTemp ( IRType ty ) 231 { 232 vassert(isPlausibleIRType(ty)); 233 return newIRTemp( irsb->tyenv, ty ); 234 } 235 236 /* Add a statement to the list held by "irsb". */ 237 static void stmt ( IRStmt* st ) 238 { 239 addStmtToIRSB( irsb, st ); 240 } 241 242 /* Generate a statement "dst := e". */ 243 static void assign ( IRTemp dst, IRExpr* e ) 244 { 245 stmt( IRStmt_WrTmp(dst, e) ); 246 } 247 248 static IRExpr* unop ( IROp op, IRExpr* a ) 249 { 250 return IRExpr_Unop(op, a); 251 } 252 253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 254 { 255 return IRExpr_Binop(op, a1, a2); 256 } 257 258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 259 { 260 return IRExpr_Triop(op, a1, a2, a3); 261 } 262 263 static IRExpr* mkexpr ( IRTemp tmp ) 264 { 265 return IRExpr_RdTmp(tmp); 266 } 267 268 static IRExpr* mkU8 ( ULong i ) 269 { 270 vassert(i < 256); 271 return IRExpr_Const(IRConst_U8( (UChar)i )); 272 } 273 274 static IRExpr* mkU16 ( ULong i ) 275 { 276 vassert(i < 0x10000ULL); 277 return IRExpr_Const(IRConst_U16( (UShort)i )); 278 } 279 280 static IRExpr* mkU32 ( ULong i ) 281 { 282 vassert(i < 0x100000000ULL); 283 return IRExpr_Const(IRConst_U32( (UInt)i )); 284 } 285 286 static IRExpr* mkU64 ( ULong i ) 287 { 288 return IRExpr_Const(IRConst_U64(i)); 289 } 290 291 static IRExpr* mkU ( IRType ty, ULong i ) 292 { 293 switch (ty) { 294 case Ity_I8: return mkU8(i); 295 case Ity_I16: return mkU16(i); 296 case Ity_I32: return mkU32(i); 297 case Ity_I64: return mkU64(i); 298 default: vpanic("mkU(amd64)"); 299 } 300 } 301 302 static void storeLE ( IRExpr* addr, IRExpr* data ) 303 { 304 stmt( IRStmt_Store(Iend_LE, addr, data) ); 305 } 306 307 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 308 { 309 return IRExpr_Load(Iend_LE, ty, addr); 310 } 311 312 static IROp mkSizedOp ( IRType ty, IROp op8 ) 313 { 314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 315 || op8 == Iop_Mul8 316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 319 || op8 == Iop_CasCmpNE8 320 || op8 == Iop_Not8 ); 321 switch (ty) { 322 case Ity_I8: return 0 +op8; 323 case Ity_I16: return 1 +op8; 324 case Ity_I32: return 2 +op8; 325 case Ity_I64: return 3 +op8; 326 default: vpanic("mkSizedOp(amd64)"); 327 } 328 } 329 330 static 331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 332 { 333 if (szSmall == 1 && szBig == 4) { 334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 335 } 336 if (szSmall == 1 && szBig == 2) { 337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 338 } 339 if (szSmall == 2 && szBig == 4) { 340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 341 } 342 if (szSmall == 1 && szBig == 8 && !signd) { 343 return unop(Iop_8Uto64, src); 344 } 345 if (szSmall == 1 && szBig == 8 && signd) { 346 return unop(Iop_8Sto64, src); 347 } 348 if (szSmall == 2 && szBig == 8 && !signd) { 349 return unop(Iop_16Uto64, src); 350 } 351 if (szSmall == 2 && szBig == 8 && signd) { 352 return unop(Iop_16Sto64, src); 353 } 354 vpanic("doScalarWidening(amd64)"); 355 } 356 357 358 359 /*------------------------------------------------------------*/ 360 /*--- Debugging output ---*/ 361 /*------------------------------------------------------------*/ 362 363 /* Bomb out if we can't handle something. */ 364 __attribute__ ((noreturn)) 365 static void unimplemented ( HChar* str ) 366 { 367 vex_printf("amd64toIR: unimplemented feature\n"); 368 vpanic(str); 369 } 370 371 #define DIP(format, args...) \ 372 if (vex_traceflags & VEX_TRACE_FE) \ 373 vex_printf(format, ## args) 374 375 #define DIS(buf, format, args...) \ 376 if (vex_traceflags & VEX_TRACE_FE) \ 377 vex_sprintf(buf, format, ## args) 378 379 380 /*------------------------------------------------------------*/ 381 /*--- Offsets of various parts of the amd64 guest state. ---*/ 382 /*------------------------------------------------------------*/ 383 384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 400 401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 402 403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) 404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) 405 406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 410 411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 419 //.. 420 //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS) 421 //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS) 422 //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES) 423 //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS) 424 //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS) 425 //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS) 426 //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 427 //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 428 429 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 430 #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0) 431 #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1) 432 #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2) 433 #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3) 434 #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4) 435 #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5) 436 #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6) 437 #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7) 438 #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8) 439 #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9) 440 #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10) 441 #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11) 442 #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12) 443 #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13) 444 #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14) 445 #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15) 446 #define OFFB_XMM16 offsetof(VexGuestAMD64State,guest_XMM16) 447 448 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN) 449 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) 450 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) 451 452 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 453 454 455 /*------------------------------------------------------------*/ 456 /*--- Helper bits and pieces for deconstructing the ---*/ 457 /*--- amd64 insn stream. ---*/ 458 /*------------------------------------------------------------*/ 459 460 /* This is the AMD64 register encoding -- integer regs. */ 461 #define R_RAX 0 462 #define R_RCX 1 463 #define R_RDX 2 464 #define R_RBX 3 465 #define R_RSP 4 466 #define R_RBP 5 467 #define R_RSI 6 468 #define R_RDI 7 469 #define R_R8 8 470 #define R_R9 9 471 #define R_R10 10 472 #define R_R11 11 473 #define R_R12 12 474 #define R_R13 13 475 #define R_R14 14 476 #define R_R15 15 477 478 //.. #define R_AL (0+R_EAX) 479 //.. #define R_AH (4+R_EAX) 480 481 /* This is the Intel register encoding -- segment regs. */ 482 #define R_ES 0 483 #define R_CS 1 484 #define R_SS 2 485 #define R_DS 3 486 #define R_FS 4 487 #define R_GS 5 488 489 490 /* Various simple conversions */ 491 492 static ULong extend_s_8to64 ( UChar x ) 493 { 494 return (ULong)((((Long)x) << 56) >> 56); 495 } 496 497 static ULong extend_s_16to64 ( UShort x ) 498 { 499 return (ULong)((((Long)x) << 48) >> 48); 500 } 501 502 static ULong extend_s_32to64 ( UInt x ) 503 { 504 return (ULong)((((Long)x) << 32) >> 32); 505 } 506 507 /* Figure out whether the mod and rm parts of a modRM byte refer to a 508 register or memory. If so, the byte will have the form 11XXXYYY, 509 where YYY is the register number. */ 510 inline 511 static Bool epartIsReg ( UChar mod_reg_rm ) 512 { 513 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 514 } 515 516 /* Extract the 'g' field from a modRM byte. This only produces 3 517 bits, which is not a complete register number. You should avoid 518 this function if at all possible. */ 519 inline 520 static Int gregLO3ofRM ( UChar mod_reg_rm ) 521 { 522 return (Int)( (mod_reg_rm >> 3) & 7 ); 523 } 524 525 /* Ditto the 'e' field of a modRM byte. */ 526 inline 527 static Int eregLO3ofRM ( UChar mod_reg_rm ) 528 { 529 return (Int)(mod_reg_rm & 0x7); 530 } 531 532 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 533 534 static UChar getUChar ( Long delta ) 535 { 536 UChar v = guest_code[delta+0]; 537 return v; 538 } 539 540 static UInt getUDisp16 ( Long delta ) 541 { 542 UInt v = guest_code[delta+1]; v <<= 8; 543 v |= guest_code[delta+0]; 544 return v & 0xFFFF; 545 } 546 547 //.. static UInt getUDisp ( Int size, Long delta ) 548 //.. { 549 //.. switch (size) { 550 //.. case 4: return getUDisp32(delta); 551 //.. case 2: return getUDisp16(delta); 552 //.. case 1: return getUChar(delta); 553 //.. default: vpanic("getUDisp(x86)"); 554 //.. } 555 //.. return 0; /*notreached*/ 556 //.. } 557 558 559 /* Get a byte value out of the insn stream and sign-extend to 64 560 bits. */ 561 static Long getSDisp8 ( Long delta ) 562 { 563 return extend_s_8to64( guest_code[delta] ); 564 } 565 566 /* Get a 16-bit value out of the insn stream and sign-extend to 64 567 bits. */ 568 static Long getSDisp16 ( Long delta ) 569 { 570 UInt v = guest_code[delta+1]; v <<= 8; 571 v |= guest_code[delta+0]; 572 return extend_s_16to64( (UShort)v ); 573 } 574 575 /* Get a 32-bit value out of the insn stream and sign-extend to 64 576 bits. */ 577 static Long getSDisp32 ( Long delta ) 578 { 579 UInt v = guest_code[delta+3]; v <<= 8; 580 v |= guest_code[delta+2]; v <<= 8; 581 v |= guest_code[delta+1]; v <<= 8; 582 v |= guest_code[delta+0]; 583 return extend_s_32to64( v ); 584 } 585 586 /* Get a 64-bit value out of the insn stream. */ 587 static Long getDisp64 ( Long delta ) 588 { 589 ULong v = 0; 590 v |= guest_code[delta+7]; v <<= 8; 591 v |= guest_code[delta+6]; v <<= 8; 592 v |= guest_code[delta+5]; v <<= 8; 593 v |= guest_code[delta+4]; v <<= 8; 594 v |= guest_code[delta+3]; v <<= 8; 595 v |= guest_code[delta+2]; v <<= 8; 596 v |= guest_code[delta+1]; v <<= 8; 597 v |= guest_code[delta+0]; 598 return v; 599 } 600 601 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 602 if this is called with size==8. Should not happen. */ 603 static Long getSDisp ( Int size, Long delta ) 604 { 605 switch (size) { 606 case 4: return getSDisp32(delta); 607 case 2: return getSDisp16(delta); 608 case 1: return getSDisp8(delta); 609 default: vpanic("getSDisp(amd64)"); 610 } 611 } 612 613 static ULong mkSizeMask ( Int sz ) 614 { 615 switch (sz) { 616 case 1: return 0x00000000000000FFULL; 617 case 2: return 0x000000000000FFFFULL; 618 case 4: return 0x00000000FFFFFFFFULL; 619 case 8: return 0xFFFFFFFFFFFFFFFFULL; 620 default: vpanic("mkSzMask(amd64)"); 621 } 622 } 623 624 static Int imin ( Int a, Int b ) 625 { 626 return (a < b) ? a : b; 627 } 628 629 static IRType szToITy ( Int n ) 630 { 631 switch (n) { 632 case 1: return Ity_I8; 633 case 2: return Ity_I16; 634 case 4: return Ity_I32; 635 case 8: return Ity_I64; 636 default: vex_printf("\nszToITy(%d)\n", n); 637 vpanic("szToITy(amd64)"); 638 } 639 } 640 641 642 /*------------------------------------------------------------*/ 643 /*--- For dealing with prefixes. ---*/ 644 /*------------------------------------------------------------*/ 645 646 /* The idea is to pass around an int holding a bitmask summarising 647 info from the prefixes seen on the current instruction, including 648 info from the REX byte. This info is used in various places, but 649 most especially when making sense of register fields in 650 instructions. 651 652 The top 16 bits of the prefix are 0x3141, just as a hacky way 653 to ensure it really is a valid prefix. 654 655 Things you can safely assume about a well-formed prefix: 656 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 657 * if REX is not present then REXW,REXR,REXX,REXB will read 658 as zero. 659 * F2 and F3 will not both be 1. 660 */ 661 662 typedef UInt Prefix; 663 664 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 665 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 666 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 667 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 668 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 669 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 670 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 671 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 672 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 673 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 674 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 675 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 676 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 677 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 678 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 679 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 680 681 #define PFX_EMPTY 0x31410000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFFFF0000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 /* Apparently unused. 695 static Int getRexR ( Prefix pfx ) { 696 return (pfx & PFX_REXR) ? 1 : 0; 697 } 698 */ 699 static Int getRexX ( Prefix pfx ) { 700 return (pfx & PFX_REXX) ? 1 : 0; 701 } 702 static Int getRexB ( Prefix pfx ) { 703 return (pfx & PFX_REXB) ? 1 : 0; 704 } 705 706 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 707 completely changes what instruction it really is. */ 708 static Bool haveF2orF3 ( Prefix pfx ) { 709 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 710 } 711 static Bool haveF2 ( Prefix pfx ) { 712 return toBool((pfx & PFX_F2) > 0); 713 } 714 static Bool haveF3 ( Prefix pfx ) { 715 return toBool((pfx & PFX_F3) > 0); 716 } 717 718 static Bool have66 ( Prefix pfx ) { 719 return toBool((pfx & PFX_66) > 0); 720 } 721 static Bool haveASO ( Prefix pfx ) { 722 return toBool((pfx & PFX_ASO) > 0); 723 } 724 725 /* Return True iff pfx has 66 set and F2 and F3 clear */ 726 static Bool have66noF2noF3 ( Prefix pfx ) 727 { 728 return 729 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 730 } 731 732 /* Return True iff pfx has F2 set and 66 and F3 clear */ 733 static Bool haveF2no66noF3 ( Prefix pfx ) 734 { 735 return 736 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 737 } 738 739 /* Return True iff pfx has F3 set and 66 and F2 clear */ 740 static Bool haveF3no66noF2 ( Prefix pfx ) 741 { 742 return 743 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 744 } 745 746 /* Return True iff pfx has F3 set and F2 clear */ 747 static Bool haveF3noF2 ( Prefix pfx ) 748 { 749 return 750 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 751 } 752 753 /* Return True iff pfx has F2 set and F3 clear */ 754 static Bool haveF2noF3 ( Prefix pfx ) 755 { 756 return 757 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 758 } 759 760 /* Return True iff pfx has 66, F2 and F3 clear */ 761 static Bool haveNo66noF2noF3 ( Prefix pfx ) 762 { 763 return 764 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 765 } 766 767 /* Return True iff pfx has any of 66, F2 and F3 set */ 768 static Bool have66orF2orF3 ( Prefix pfx ) 769 { 770 return toBool( ! haveNo66noF2noF3(pfx) ); 771 } 772 773 /* Return True iff pfx has 66 or F2 set */ 774 static Bool have66orF2 ( Prefix pfx ) 775 { 776 return toBool((pfx & (PFX_66|PFX_F2)) > 0); 777 } 778 779 /* Clear all the segment-override bits in a prefix. */ 780 static Prefix clearSegBits ( Prefix p ) 781 { 782 return 783 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 784 } 785 786 787 /*------------------------------------------------------------*/ 788 /*--- For dealing with integer registers ---*/ 789 /*------------------------------------------------------------*/ 790 791 /* This is somewhat complex. The rules are: 792 793 For 64, 32 and 16 bit register references, the e or g fields in the 794 modrm bytes supply the low 3 bits of the register number. The 795 fourth (most-significant) bit of the register number is supplied by 796 the REX byte, if it is present; else that bit is taken to be zero. 797 798 The REX.R bit supplies the high bit corresponding to the g register 799 field, and the REX.B bit supplies the high bit corresponding to the 800 e register field (when the mod part of modrm indicates that modrm's 801 e component refers to a register and not to memory). 802 803 The REX.X bit supplies a high register bit for certain registers 804 in SIB address modes, and is generally rarely used. 805 806 For 8 bit register references, the presence of the REX byte itself 807 has significance. If there is no REX present, then the 3-bit 808 number extracted from the modrm e or g field is treated as an index 809 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 810 old x86 encoding scheme. 811 812 But if there is a REX present, the register reference is 813 interpreted in the same way as for 64/32/16-bit references: a high 814 bit is extracted from REX, giving a 4-bit number, and the denoted 815 register is the lowest 8 bits of the 16 integer registers denoted 816 by the number. In particular, values 3 through 7 of this sequence 817 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 818 %rsp %rbp %rsi %rdi. 819 820 The REX.W bit has no bearing at all on register numbers. Instead 821 its presence indicates that the operand size is to be overridden 822 from its default value (32 bits) to 64 bits instead. This is in 823 the same fashion that an 0x66 prefix indicates the operand size is 824 to be overridden from 32 bits down to 16 bits. When both REX.W and 825 0x66 are present there is a conflict, and REX.W takes precedence. 826 827 Rather than try to handle this complexity using a single huge 828 function, several smaller ones are provided. The aim is to make it 829 as difficult as possible to screw up register decoding in a subtle 830 and hard-to-track-down way. 831 832 Because these routines fish around in the host's memory (that is, 833 in the guest state area) for sub-parts of guest registers, their 834 correctness depends on the host's endianness. So far these 835 routines only work for little-endian hosts. Those for which 836 endianness is important have assertions to ensure sanity. 837 */ 838 839 840 /* About the simplest question you can ask: where do the 64-bit 841 integer registers live (in the guest state) ? */ 842 843 static Int integerGuestReg64Offset ( UInt reg ) 844 { 845 switch (reg) { 846 case R_RAX: return OFFB_RAX; 847 case R_RCX: return OFFB_RCX; 848 case R_RDX: return OFFB_RDX; 849 case R_RBX: return OFFB_RBX; 850 case R_RSP: return OFFB_RSP; 851 case R_RBP: return OFFB_RBP; 852 case R_RSI: return OFFB_RSI; 853 case R_RDI: return OFFB_RDI; 854 case R_R8: return OFFB_R8; 855 case R_R9: return OFFB_R9; 856 case R_R10: return OFFB_R10; 857 case R_R11: return OFFB_R11; 858 case R_R12: return OFFB_R12; 859 case R_R13: return OFFB_R13; 860 case R_R14: return OFFB_R14; 861 case R_R15: return OFFB_R15; 862 default: vpanic("integerGuestReg64Offset(amd64)"); 863 } 864 } 865 866 867 /* Produce the name of an integer register, for printing purposes. 868 reg is a number in the range 0 .. 15 that has been generated from a 869 3-bit reg-field number and a REX extension bit. irregular denotes 870 the case where sz==1 and no REX byte is present. */ 871 872 static 873 HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 874 { 875 static HChar* ireg64_names[16] 876 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 877 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 878 static HChar* ireg32_names[16] 879 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 880 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 881 static HChar* ireg16_names[16] 882 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 883 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 884 static HChar* ireg8_names[16] 885 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 886 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 887 static HChar* ireg8_irregular[8] 888 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 889 890 vassert(reg < 16); 891 if (sz == 1) { 892 if (irregular) 893 vassert(reg < 8); 894 } else { 895 vassert(irregular == False); 896 } 897 898 switch (sz) { 899 case 8: return ireg64_names[reg]; 900 case 4: return ireg32_names[reg]; 901 case 2: return ireg16_names[reg]; 902 case 1: if (irregular) { 903 return ireg8_irregular[reg]; 904 } else { 905 return ireg8_names[reg]; 906 } 907 default: vpanic("nameIReg(amd64)"); 908 } 909 } 910 911 /* Using the same argument conventions as nameIReg, produce the 912 guest state offset of an integer register. */ 913 914 static 915 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 916 { 917 vassert(reg < 16); 918 if (sz == 1) { 919 if (irregular) 920 vassert(reg < 8); 921 } else { 922 vassert(irregular == False); 923 } 924 925 /* Deal with irregular case -- sz==1 and no REX present */ 926 if (sz == 1 && irregular) { 927 switch (reg) { 928 case R_RSP: return 1+ OFFB_RAX; 929 case R_RBP: return 1+ OFFB_RCX; 930 case R_RSI: return 1+ OFFB_RDX; 931 case R_RDI: return 1+ OFFB_RBX; 932 default: break; /* use the normal case */ 933 } 934 } 935 936 /* Normal case */ 937 return integerGuestReg64Offset(reg); 938 } 939 940 941 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 942 943 static IRExpr* getIRegCL ( void ) 944 { 945 vassert(!host_is_bigendian); 946 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 947 } 948 949 950 /* Write to the %AH register. */ 951 952 static void putIRegAH ( IRExpr* e ) 953 { 954 vassert(!host_is_bigendian); 955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 956 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 957 } 958 959 960 /* Read/write various widths of %RAX, as it has various 961 special-purpose uses. */ 962 963 static HChar* nameIRegRAX ( Int sz ) 964 { 965 switch (sz) { 966 case 1: return "%al"; 967 case 2: return "%ax"; 968 case 4: return "%eax"; 969 case 8: return "%rax"; 970 default: vpanic("nameIRegRAX(amd64)"); 971 } 972 } 973 974 static IRExpr* getIRegRAX ( Int sz ) 975 { 976 vassert(!host_is_bigendian); 977 switch (sz) { 978 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 979 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 980 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 981 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 982 default: vpanic("getIRegRAX(amd64)"); 983 } 984 } 985 986 static void putIRegRAX ( Int sz, IRExpr* e ) 987 { 988 IRType ty = typeOfIRExpr(irsb->tyenv, e); 989 vassert(!host_is_bigendian); 990 switch (sz) { 991 case 8: vassert(ty == Ity_I64); 992 stmt( IRStmt_Put( OFFB_RAX, e )); 993 break; 994 case 4: vassert(ty == Ity_I32); 995 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 996 break; 997 case 2: vassert(ty == Ity_I16); 998 stmt( IRStmt_Put( OFFB_RAX, e )); 999 break; 1000 case 1: vassert(ty == Ity_I8); 1001 stmt( IRStmt_Put( OFFB_RAX, e )); 1002 break; 1003 default: vpanic("putIRegRAX(amd64)"); 1004 } 1005 } 1006 1007 1008 /* Read/write various widths of %RDX, as it has various 1009 special-purpose uses. */ 1010 1011 static HChar* nameIRegRDX ( Int sz ) 1012 { 1013 switch (sz) { 1014 case 1: return "%dl"; 1015 case 2: return "%dx"; 1016 case 4: return "%edx"; 1017 case 8: return "%rdx"; 1018 default: vpanic("nameIRegRDX(amd64)"); 1019 } 1020 } 1021 1022 static IRExpr* getIRegRDX ( Int sz ) 1023 { 1024 vassert(!host_is_bigendian); 1025 switch (sz) { 1026 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1027 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1028 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1029 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1030 default: vpanic("getIRegRDX(amd64)"); 1031 } 1032 } 1033 1034 static void putIRegRDX ( Int sz, IRExpr* e ) 1035 { 1036 vassert(!host_is_bigendian); 1037 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1038 switch (sz) { 1039 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1040 break; 1041 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1042 break; 1043 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1044 break; 1045 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1046 break; 1047 default: vpanic("putIRegRDX(amd64)"); 1048 } 1049 } 1050 1051 1052 /* Simplistic functions to deal with the integer registers as a 1053 straightforward bank of 16 64-bit regs. */ 1054 1055 static IRExpr* getIReg64 ( UInt regno ) 1056 { 1057 return IRExpr_Get( integerGuestReg64Offset(regno), 1058 Ity_I64 ); 1059 } 1060 1061 static void putIReg64 ( UInt regno, IRExpr* e ) 1062 { 1063 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1064 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1065 } 1066 1067 static HChar* nameIReg64 ( UInt regno ) 1068 { 1069 return nameIReg( 8, regno, False ); 1070 } 1071 1072 1073 /* Simplistic functions to deal with the lower halves of integer 1074 registers as a straightforward bank of 16 32-bit regs. */ 1075 1076 static IRExpr* getIReg32 ( UInt regno ) 1077 { 1078 vassert(!host_is_bigendian); 1079 return unop(Iop_64to32, 1080 IRExpr_Get( integerGuestReg64Offset(regno), 1081 Ity_I64 )); 1082 } 1083 1084 static void putIReg32 ( UInt regno, IRExpr* e ) 1085 { 1086 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1087 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1088 unop(Iop_32Uto64,e) ) ); 1089 } 1090 1091 static HChar* nameIReg32 ( UInt regno ) 1092 { 1093 return nameIReg( 4, regno, False ); 1094 } 1095 1096 1097 /* Simplistic functions to deal with the lower quarters of integer 1098 registers as a straightforward bank of 16 16-bit regs. */ 1099 1100 static IRExpr* getIReg16 ( UInt regno ) 1101 { 1102 vassert(!host_is_bigendian); 1103 return IRExpr_Get( integerGuestReg64Offset(regno), 1104 Ity_I16 ); 1105 } 1106 1107 static void putIReg16 ( UInt regno, IRExpr* e ) 1108 { 1109 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1110 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1111 unop(Iop_16Uto64,e) ) ); 1112 } 1113 1114 static HChar* nameIReg16 ( UInt regno ) 1115 { 1116 return nameIReg( 2, regno, False ); 1117 } 1118 1119 1120 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1121 which field of the REX byte is to be used to extend to a 4-bit 1122 number. These functions cater for that situation. 1123 */ 1124 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1125 { 1126 vassert(lo3bits < 8); 1127 vassert(IS_VALID_PFX(pfx)); 1128 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1129 } 1130 1131 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1132 { 1133 vassert(lo3bits < 8); 1134 vassert(IS_VALID_PFX(pfx)); 1135 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1136 } 1137 1138 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1139 { 1140 vassert(lo3bits < 8); 1141 vassert(IS_VALID_PFX(pfx)); 1142 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1143 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1144 toBool(sz==1 && !haveREX(pfx)) ); 1145 } 1146 1147 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1148 { 1149 vassert(lo3bits < 8); 1150 vassert(IS_VALID_PFX(pfx)); 1151 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1152 if (sz == 4) { 1153 sz = 8; 1154 return unop(Iop_64to32, 1155 IRExpr_Get( 1156 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1157 toBool(sz==1 && !haveREX(pfx)) ), 1158 szToITy(sz) 1159 ) 1160 ); 1161 } else { 1162 return IRExpr_Get( 1163 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1164 toBool(sz==1 && !haveREX(pfx)) ), 1165 szToITy(sz) 1166 ); 1167 } 1168 } 1169 1170 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1171 { 1172 vassert(lo3bits < 8); 1173 vassert(IS_VALID_PFX(pfx)); 1174 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1175 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1176 stmt( IRStmt_Put( 1177 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1178 toBool(sz==1 && !haveREX(pfx)) ), 1179 sz==4 ? unop(Iop_32Uto64,e) : e 1180 )); 1181 } 1182 1183 1184 /* Functions for getting register numbers from modrm bytes and REX 1185 when we don't have to consider the complexities of integer subreg 1186 accesses. 1187 */ 1188 /* Extract the g reg field from a modRM byte, and augment it using the 1189 REX.R bit from the supplied REX byte. The R bit usually is 1190 associated with the g register field. 1191 */ 1192 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1193 { 1194 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1195 reg += (pfx & PFX_REXR) ? 8 : 0; 1196 return reg; 1197 } 1198 1199 /* Extract the e reg field from a modRM byte, and augment it using the 1200 REX.B bit from the supplied REX byte. The B bit usually is 1201 associated with the e register field (when modrm indicates e is a 1202 register, that is). 1203 */ 1204 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1205 { 1206 Int rm; 1207 vassert(epartIsReg(mod_reg_rm)); 1208 rm = (Int)(mod_reg_rm & 0x7); 1209 rm += (pfx & PFX_REXB) ? 8 : 0; 1210 return rm; 1211 } 1212 1213 1214 /* General functions for dealing with integer register access. */ 1215 1216 /* Produce the guest state offset for a reference to the 'g' register 1217 field in a modrm byte, taking into account REX (or its absence), 1218 and the size of the access. 1219 */ 1220 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1221 { 1222 UInt reg; 1223 vassert(!host_is_bigendian); 1224 vassert(IS_VALID_PFX(pfx)); 1225 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1226 reg = gregOfRexRM( pfx, mod_reg_rm ); 1227 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1228 } 1229 1230 static 1231 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1232 { 1233 if (sz == 4) { 1234 sz = 8; 1235 return unop(Iop_64to32, 1236 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1237 szToITy(sz) )); 1238 } else { 1239 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1240 szToITy(sz) ); 1241 } 1242 } 1243 1244 static 1245 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1246 { 1247 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1248 if (sz == 4) { 1249 e = unop(Iop_32Uto64,e); 1250 } 1251 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1252 } 1253 1254 static 1255 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1256 { 1257 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1258 toBool(sz==1 && !haveREX(pfx)) ); 1259 } 1260 1261 1262 /* Produce the guest state offset for a reference to the 'e' register 1263 field in a modrm byte, taking into account REX (or its absence), 1264 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1265 denotes a memory access rather than a register access. 1266 */ 1267 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1268 { 1269 UInt reg; 1270 vassert(!host_is_bigendian); 1271 vassert(IS_VALID_PFX(pfx)); 1272 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1273 reg = eregOfRexRM( pfx, mod_reg_rm ); 1274 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1275 } 1276 1277 static 1278 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1279 { 1280 if (sz == 4) { 1281 sz = 8; 1282 return unop(Iop_64to32, 1283 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1284 szToITy(sz) )); 1285 } else { 1286 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1287 szToITy(sz) ); 1288 } 1289 } 1290 1291 static 1292 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1293 { 1294 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1295 if (sz == 4) { 1296 e = unop(Iop_32Uto64,e); 1297 } 1298 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1299 } 1300 1301 static 1302 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1303 { 1304 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1305 toBool(sz==1 && !haveREX(pfx)) ); 1306 } 1307 1308 1309 /*------------------------------------------------------------*/ 1310 /*--- For dealing with XMM registers ---*/ 1311 /*------------------------------------------------------------*/ 1312 1313 //.. static Int segmentGuestRegOffset ( UInt sreg ) 1314 //.. { 1315 //.. switch (sreg) { 1316 //.. case R_ES: return OFFB_ES; 1317 //.. case R_CS: return OFFB_CS; 1318 //.. case R_SS: return OFFB_SS; 1319 //.. case R_DS: return OFFB_DS; 1320 //.. case R_FS: return OFFB_FS; 1321 //.. case R_GS: return OFFB_GS; 1322 //.. default: vpanic("segmentGuestRegOffset(x86)"); 1323 //.. } 1324 //.. } 1325 1326 static Int xmmGuestRegOffset ( UInt xmmreg ) 1327 { 1328 switch (xmmreg) { 1329 case 0: return OFFB_XMM0; 1330 case 1: return OFFB_XMM1; 1331 case 2: return OFFB_XMM2; 1332 case 3: return OFFB_XMM3; 1333 case 4: return OFFB_XMM4; 1334 case 5: return OFFB_XMM5; 1335 case 6: return OFFB_XMM6; 1336 case 7: return OFFB_XMM7; 1337 case 8: return OFFB_XMM8; 1338 case 9: return OFFB_XMM9; 1339 case 10: return OFFB_XMM10; 1340 case 11: return OFFB_XMM11; 1341 case 12: return OFFB_XMM12; 1342 case 13: return OFFB_XMM13; 1343 case 14: return OFFB_XMM14; 1344 case 15: return OFFB_XMM15; 1345 default: vpanic("xmmGuestRegOffset(amd64)"); 1346 } 1347 } 1348 1349 /* Lanes of vector registers are always numbered from zero being the 1350 least significant lane (rightmost in the register). */ 1351 1352 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1353 { 1354 /* Correct for little-endian host only. */ 1355 vassert(!host_is_bigendian); 1356 vassert(laneno >= 0 && laneno < 8); 1357 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1358 } 1359 1360 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1361 { 1362 /* Correct for little-endian host only. */ 1363 vassert(!host_is_bigendian); 1364 vassert(laneno >= 0 && laneno < 4); 1365 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1366 } 1367 1368 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1369 { 1370 /* Correct for little-endian host only. */ 1371 vassert(!host_is_bigendian); 1372 vassert(laneno >= 0 && laneno < 2); 1373 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1374 } 1375 1376 //.. static IRExpr* getSReg ( UInt sreg ) 1377 //.. { 1378 //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 1379 //.. } 1380 //.. 1381 //.. static void putSReg ( UInt sreg, IRExpr* e ) 1382 //.. { 1383 //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1384 //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 1385 //.. } 1386 1387 static IRExpr* getXMMReg ( UInt xmmreg ) 1388 { 1389 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1390 } 1391 1392 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1393 { 1394 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1395 } 1396 1397 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1398 { 1399 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1400 } 1401 1402 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1403 { 1404 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1405 } 1406 1407 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1408 { 1409 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1410 } 1411 1412 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1413 { 1414 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1415 } 1416 1417 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1418 { 1419 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1420 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1421 } 1422 1423 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1424 { 1425 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1426 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1427 } 1428 1429 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1430 { 1431 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1432 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1433 } 1434 1435 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1436 { 1437 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1438 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1439 } 1440 1441 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1442 { 1443 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1444 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1445 } 1446 1447 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 1448 { 1449 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1450 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 1451 } 1452 1453 static IRExpr* mkV128 ( UShort mask ) 1454 { 1455 return IRExpr_Const(IRConst_V128(mask)); 1456 } 1457 1458 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1459 { 1460 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1461 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1462 return unop(Iop_64to1, 1463 binop(Iop_And64, 1464 unop(Iop_1Uto64,x), 1465 unop(Iop_1Uto64,y))); 1466 } 1467 1468 /* Generate a compare-and-swap operation, operating on memory at 1469 'addr'. The expected value is 'expVal' and the new value is 1470 'newVal'. If the operation fails, then transfer control (with a 1471 no-redir jump (XXX no -- see comment at top of this file)) to 1472 'restart_point', which is presumably the address of the guest 1473 instruction again -- retrying, essentially. */ 1474 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1475 Addr64 restart_point ) 1476 { 1477 IRCAS* cas; 1478 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1479 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1480 IRTemp oldTmp = newTemp(tyE); 1481 IRTemp expTmp = newTemp(tyE); 1482 vassert(tyE == tyN); 1483 vassert(tyE == Ity_I64 || tyE == Ity_I32 1484 || tyE == Ity_I16 || tyE == Ity_I8); 1485 assign(expTmp, expVal); 1486 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1487 NULL, mkexpr(expTmp), NULL, newVal ); 1488 stmt( IRStmt_CAS(cas) ); 1489 stmt( IRStmt_Exit( 1490 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1491 mkexpr(oldTmp), mkexpr(expTmp) ), 1492 Ijk_Boring, /*Ijk_NoRedir*/ 1493 IRConst_U64( restart_point ) 1494 )); 1495 } 1496 1497 1498 /*------------------------------------------------------------*/ 1499 /*--- Helpers for %rflags. ---*/ 1500 /*------------------------------------------------------------*/ 1501 1502 /* -------------- Evaluating the flags-thunk. -------------- */ 1503 1504 /* Build IR to calculate all the eflags from stored 1505 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1506 Ity_I64. */ 1507 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1508 { 1509 IRExpr** args 1510 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1511 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1512 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1513 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1514 IRExpr* call 1515 = mkIRExprCCall( 1516 Ity_I64, 1517 0/*regparm*/, 1518 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1519 args 1520 ); 1521 /* Exclude OP and NDEP from definedness checking. We're only 1522 interested in DEP1 and DEP2. */ 1523 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1524 return call; 1525 } 1526 1527 /* Build IR to calculate some particular condition from stored 1528 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1529 Ity_Bit. */ 1530 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1531 { 1532 IRExpr** args 1533 = mkIRExprVec_5( mkU64(cond), 1534 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1535 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1536 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1537 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1538 IRExpr* call 1539 = mkIRExprCCall( 1540 Ity_I64, 1541 0/*regparm*/, 1542 "amd64g_calculate_condition", &amd64g_calculate_condition, 1543 args 1544 ); 1545 /* Exclude the requested condition, OP and NDEP from definedness 1546 checking. We're only interested in DEP1 and DEP2. */ 1547 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1548 return unop(Iop_64to1, call); 1549 } 1550 1551 /* Build IR to calculate just the carry flag from stored 1552 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1553 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1554 { 1555 IRExpr** args 1556 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1557 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1558 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1559 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1560 IRExpr* call 1561 = mkIRExprCCall( 1562 Ity_I64, 1563 0/*regparm*/, 1564 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1565 args 1566 ); 1567 /* Exclude OP and NDEP from definedness checking. We're only 1568 interested in DEP1 and DEP2. */ 1569 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1570 return call; 1571 } 1572 1573 1574 /* -------------- Building the flags-thunk. -------------- */ 1575 1576 /* The machinery in this section builds the flag-thunk following a 1577 flag-setting operation. Hence the various setFlags_* functions. 1578 */ 1579 1580 static Bool isAddSub ( IROp op8 ) 1581 { 1582 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1583 } 1584 1585 static Bool isLogic ( IROp op8 ) 1586 { 1587 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1588 } 1589 1590 /* U-widen 8/16/32/64 bit int expr to 64. */ 1591 static IRExpr* widenUto64 ( IRExpr* e ) 1592 { 1593 switch (typeOfIRExpr(irsb->tyenv,e)) { 1594 case Ity_I64: return e; 1595 case Ity_I32: return unop(Iop_32Uto64, e); 1596 case Ity_I16: return unop(Iop_16Uto64, e); 1597 case Ity_I8: return unop(Iop_8Uto64, e); 1598 default: vpanic("widenUto64"); 1599 } 1600 } 1601 1602 /* S-widen 8/16/32/64 bit int expr to 32. */ 1603 static IRExpr* widenSto64 ( IRExpr* e ) 1604 { 1605 switch (typeOfIRExpr(irsb->tyenv,e)) { 1606 case Ity_I64: return e; 1607 case Ity_I32: return unop(Iop_32Sto64, e); 1608 case Ity_I16: return unop(Iop_16Sto64, e); 1609 case Ity_I8: return unop(Iop_8Sto64, e); 1610 default: vpanic("widenSto64"); 1611 } 1612 } 1613 1614 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1615 of these combinations make sense. */ 1616 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1617 { 1618 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1619 if (src_ty == dst_ty) 1620 return e; 1621 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1622 return unop(Iop_32to16, e); 1623 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1624 return unop(Iop_32to8, e); 1625 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1626 return unop(Iop_64to32, e); 1627 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1628 return unop(Iop_64to16, e); 1629 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1630 return unop(Iop_64to8, e); 1631 1632 vex_printf("\nsrc, dst tys are: "); 1633 ppIRType(src_ty); 1634 vex_printf(", "); 1635 ppIRType(dst_ty); 1636 vex_printf("\n"); 1637 vpanic("narrowTo(amd64)"); 1638 } 1639 1640 1641 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1642 auto-sized up to the real op. */ 1643 1644 static 1645 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1646 { 1647 Int ccOp = 0; 1648 switch (ty) { 1649 case Ity_I8: ccOp = 0; break; 1650 case Ity_I16: ccOp = 1; break; 1651 case Ity_I32: ccOp = 2; break; 1652 case Ity_I64: ccOp = 3; break; 1653 default: vassert(0); 1654 } 1655 switch (op8) { 1656 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1657 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1658 default: ppIROp(op8); 1659 vpanic("setFlags_DEP1_DEP2(amd64)"); 1660 } 1661 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1662 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1663 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1664 } 1665 1666 1667 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1668 1669 static 1670 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1671 { 1672 Int ccOp = 0; 1673 switch (ty) { 1674 case Ity_I8: ccOp = 0; break; 1675 case Ity_I16: ccOp = 1; break; 1676 case Ity_I32: ccOp = 2; break; 1677 case Ity_I64: ccOp = 3; break; 1678 default: vassert(0); 1679 } 1680 switch (op8) { 1681 case Iop_Or8: 1682 case Iop_And8: 1683 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1684 default: ppIROp(op8); 1685 vpanic("setFlags_DEP1(amd64)"); 1686 } 1687 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1688 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1689 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1690 } 1691 1692 1693 /* For shift operations, we put in the result and the undershifted 1694 result. Except if the shift amount is zero, the thunk is left 1695 unchanged. */ 1696 1697 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1698 IRTemp res, 1699 IRTemp resUS, 1700 IRType ty, 1701 IRTemp guard ) 1702 { 1703 Int ccOp = 0; 1704 switch (ty) { 1705 case Ity_I8: ccOp = 0; break; 1706 case Ity_I16: ccOp = 1; break; 1707 case Ity_I32: ccOp = 2; break; 1708 case Ity_I64: ccOp = 3; break; 1709 default: vassert(0); 1710 } 1711 1712 vassert(guard); 1713 1714 /* Both kinds of right shifts are handled by the same thunk 1715 operation. */ 1716 switch (op64) { 1717 case Iop_Shr64: 1718 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1719 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1720 default: ppIROp(op64); 1721 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1722 } 1723 1724 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1725 stmt( IRStmt_Put( OFFB_CC_OP, 1726 IRExpr_Mux0X( mkexpr(guard), 1727 IRExpr_Get(OFFB_CC_OP,Ity_I64), 1728 mkU64(ccOp))) ); 1729 stmt( IRStmt_Put( OFFB_CC_DEP1, 1730 IRExpr_Mux0X( mkexpr(guard), 1731 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 1732 widenUto64(mkexpr(res)))) ); 1733 stmt( IRStmt_Put( OFFB_CC_DEP2, 1734 IRExpr_Mux0X( mkexpr(guard), 1735 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 1736 widenUto64(mkexpr(resUS)))) ); 1737 } 1738 1739 1740 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1741 the former value of the carry flag, which unfortunately we have to 1742 compute. */ 1743 1744 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1745 { 1746 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1747 1748 switch (ty) { 1749 case Ity_I8: ccOp += 0; break; 1750 case Ity_I16: ccOp += 1; break; 1751 case Ity_I32: ccOp += 2; break; 1752 case Ity_I64: ccOp += 3; break; 1753 default: vassert(0); 1754 } 1755 1756 /* This has to come first, because calculating the C flag 1757 may require reading all four thunk fields. */ 1758 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1759 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1760 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1761 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1762 } 1763 1764 1765 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1766 two arguments. */ 1767 1768 static 1769 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1770 { 1771 switch (ty) { 1772 case Ity_I8: 1773 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1774 break; 1775 case Ity_I16: 1776 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1777 break; 1778 case Ity_I32: 1779 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1780 break; 1781 case Ity_I64: 1782 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1783 break; 1784 default: 1785 vpanic("setFlags_MUL(amd64)"); 1786 } 1787 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1788 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1789 } 1790 1791 1792 /* -------------- Condition codes. -------------- */ 1793 1794 /* Condition codes, using the AMD encoding. */ 1795 1796 static HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1797 { 1798 switch (cond) { 1799 case AMD64CondO: return "o"; 1800 case AMD64CondNO: return "no"; 1801 case AMD64CondB: return "b"; 1802 case AMD64CondNB: return "ae"; /*"nb";*/ 1803 case AMD64CondZ: return "e"; /*"z";*/ 1804 case AMD64CondNZ: return "ne"; /*"nz";*/ 1805 case AMD64CondBE: return "be"; 1806 case AMD64CondNBE: return "a"; /*"nbe";*/ 1807 case AMD64CondS: return "s"; 1808 case AMD64CondNS: return "ns"; 1809 case AMD64CondP: return "p"; 1810 case AMD64CondNP: return "np"; 1811 case AMD64CondL: return "l"; 1812 case AMD64CondNL: return "ge"; /*"nl";*/ 1813 case AMD64CondLE: return "le"; 1814 case AMD64CondNLE: return "g"; /*"nle";*/ 1815 case AMD64CondAlways: return "ALWAYS"; 1816 default: vpanic("name_AMD64Condcode"); 1817 } 1818 } 1819 1820 static 1821 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1822 /*OUT*/Bool* needInvert ) 1823 { 1824 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1825 if (cond & 1) { 1826 *needInvert = True; 1827 return cond-1; 1828 } else { 1829 *needInvert = False; 1830 return cond; 1831 } 1832 } 1833 1834 1835 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1836 1837 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1838 appropriately. 1839 1840 Optionally, generate a store for the 'tres' value. This can either 1841 be a normal store, or it can be a cas-with-possible-failure style 1842 store: 1843 1844 if taddr is IRTemp_INVALID, then no store is generated. 1845 1846 if taddr is not IRTemp_INVALID, then a store (using taddr as 1847 the address) is generated: 1848 1849 if texpVal is IRTemp_INVALID then a normal store is 1850 generated, and restart_point must be zero (it is irrelevant). 1851 1852 if texpVal is not IRTemp_INVALID then a cas-style store is 1853 generated. texpVal is the expected value, restart_point 1854 is the restart point if the store fails, and texpVal must 1855 have the same type as tres. 1856 1857 */ 1858 static void helper_ADC ( Int sz, 1859 IRTemp tres, IRTemp ta1, IRTemp ta2, 1860 /* info about optional store: */ 1861 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1862 { 1863 UInt thunkOp; 1864 IRType ty = szToITy(sz); 1865 IRTemp oldc = newTemp(Ity_I64); 1866 IRTemp oldcn = newTemp(ty); 1867 IROp plus = mkSizedOp(ty, Iop_Add8); 1868 IROp xor = mkSizedOp(ty, Iop_Xor8); 1869 1870 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1871 1872 switch (sz) { 1873 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 1874 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 1875 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 1876 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 1877 default: vassert(0); 1878 } 1879 1880 /* oldc = old carry flag, 0 or 1 */ 1881 assign( oldc, binop(Iop_And64, 1882 mk_amd64g_calculate_rflags_c(), 1883 mkU64(1)) ); 1884 1885 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1886 1887 assign( tres, binop(plus, 1888 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1889 mkexpr(oldcn)) ); 1890 1891 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1892 start of this function. */ 1893 if (taddr != IRTemp_INVALID) { 1894 if (texpVal == IRTemp_INVALID) { 1895 vassert(restart_point == 0); 1896 storeLE( mkexpr(taddr), mkexpr(tres) ); 1897 } else { 1898 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1899 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1900 casLE( mkexpr(taddr), 1901 mkexpr(texpVal), mkexpr(tres), restart_point ); 1902 } 1903 } 1904 1905 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1906 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 1907 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1908 mkexpr(oldcn)) )) ); 1909 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1910 } 1911 1912 1913 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1914 appropriately. As with helper_ADC, possibly generate a store of 1915 the result -- see comments on helper_ADC for details. 1916 */ 1917 static void helper_SBB ( Int sz, 1918 IRTemp tres, IRTemp ta1, IRTemp ta2, 1919 /* info about optional store: */ 1920 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1921 { 1922 UInt thunkOp; 1923 IRType ty = szToITy(sz); 1924 IRTemp oldc = newTemp(Ity_I64); 1925 IRTemp oldcn = newTemp(ty); 1926 IROp minus = mkSizedOp(ty, Iop_Sub8); 1927 IROp xor = mkSizedOp(ty, Iop_Xor8); 1928 1929 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1930 1931 switch (sz) { 1932 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 1933 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 1934 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 1935 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 1936 default: vassert(0); 1937 } 1938 1939 /* oldc = old carry flag, 0 or 1 */ 1940 assign( oldc, binop(Iop_And64, 1941 mk_amd64g_calculate_rflags_c(), 1942 mkU64(1)) ); 1943 1944 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1945 1946 assign( tres, binop(minus, 1947 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1948 mkexpr(oldcn)) ); 1949 1950 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1951 start of this function. */ 1952 if (taddr != IRTemp_INVALID) { 1953 if (texpVal == IRTemp_INVALID) { 1954 vassert(restart_point == 0); 1955 storeLE( mkexpr(taddr), mkexpr(tres) ); 1956 } else { 1957 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1958 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1959 casLE( mkexpr(taddr), 1960 mkexpr(texpVal), mkexpr(tres), restart_point ); 1961 } 1962 } 1963 1964 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1965 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 1966 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1967 mkexpr(oldcn)) )) ); 1968 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1969 } 1970 1971 1972 /* -------------- Helpers for disassembly printing. -------------- */ 1973 1974 static HChar* nameGrp1 ( Int opc_aux ) 1975 { 1976 static HChar* grp1_names[8] 1977 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1978 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 1979 return grp1_names[opc_aux]; 1980 } 1981 1982 static HChar* nameGrp2 ( Int opc_aux ) 1983 { 1984 static HChar* grp2_names[8] 1985 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1986 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 1987 return grp2_names[opc_aux]; 1988 } 1989 1990 static HChar* nameGrp4 ( Int opc_aux ) 1991 { 1992 static HChar* grp4_names[8] 1993 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1994 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 1995 return grp4_names[opc_aux]; 1996 } 1997 1998 static HChar* nameGrp5 ( Int opc_aux ) 1999 { 2000 static HChar* grp5_names[8] 2001 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2002 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2003 return grp5_names[opc_aux]; 2004 } 2005 2006 static HChar* nameGrp8 ( Int opc_aux ) 2007 { 2008 static HChar* grp8_names[8] 2009 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2010 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2011 return grp8_names[opc_aux]; 2012 } 2013 2014 //.. static HChar* nameSReg ( UInt sreg ) 2015 //.. { 2016 //.. switch (sreg) { 2017 //.. case R_ES: return "%es"; 2018 //.. case R_CS: return "%cs"; 2019 //.. case R_SS: return "%ss"; 2020 //.. case R_DS: return "%ds"; 2021 //.. case R_FS: return "%fs"; 2022 //.. case R_GS: return "%gs"; 2023 //.. default: vpanic("nameSReg(x86)"); 2024 //.. } 2025 //.. } 2026 2027 static HChar* nameMMXReg ( Int mmxreg ) 2028 { 2029 static HChar* mmx_names[8] 2030 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2031 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2032 return mmx_names[mmxreg]; 2033 } 2034 2035 static HChar* nameXMMReg ( Int xmmreg ) 2036 { 2037 static HChar* xmm_names[16] 2038 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2039 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2040 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2041 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2042 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2043 return xmm_names[xmmreg]; 2044 } 2045 2046 static HChar* nameMMXGran ( Int gran ) 2047 { 2048 switch (gran) { 2049 case 0: return "b"; 2050 case 1: return "w"; 2051 case 2: return "d"; 2052 case 3: return "q"; 2053 default: vpanic("nameMMXGran(amd64,guest)"); 2054 } 2055 } 2056 2057 static HChar nameISize ( Int size ) 2058 { 2059 switch (size) { 2060 case 8: return 'q'; 2061 case 4: return 'l'; 2062 case 2: return 'w'; 2063 case 1: return 'b'; 2064 default: vpanic("nameISize(amd64)"); 2065 } 2066 } 2067 2068 2069 /*------------------------------------------------------------*/ 2070 /*--- JMP helpers ---*/ 2071 /*------------------------------------------------------------*/ 2072 2073 static void jmp_lit( IRJumpKind kind, Addr64 d64 ) 2074 { 2075 irsb->next = mkU64(d64); 2076 irsb->jumpkind = kind; 2077 } 2078 2079 static void jmp_treg( IRJumpKind kind, IRTemp t ) 2080 { 2081 irsb->next = mkexpr(t); 2082 irsb->jumpkind = kind; 2083 } 2084 2085 static 2086 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2087 { 2088 Bool invert; 2089 AMD64Condcode condPos; 2090 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2091 if (invert) { 2092 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2093 Ijk_Boring, 2094 IRConst_U64(d64_false) ) ); 2095 irsb->next = mkU64(d64_true); 2096 irsb->jumpkind = Ijk_Boring; 2097 } else { 2098 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2099 Ijk_Boring, 2100 IRConst_U64(d64_true) ) ); 2101 irsb->next = mkU64(d64_false); 2102 irsb->jumpkind = Ijk_Boring; 2103 } 2104 } 2105 2106 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2107 guest address of the next instruction to be executed. 2108 2109 This function generates an AbiHint to say that -128(%rsp) 2110 .. -1(%rsp) should now be regarded as uninitialised. 2111 */ 2112 static 2113 void make_redzone_AbiHint ( VexAbiInfo* vbi, 2114 IRTemp new_rsp, IRTemp nia, HChar* who ) 2115 { 2116 Int szB = vbi->guest_stack_redzone_size; 2117 vassert(szB >= 0); 2118 2119 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2120 for is ELF. So just check it's the expected 128 value 2121 (paranoia). */ 2122 vassert(szB == 128); 2123 2124 if (0) vex_printf("AbiHint: %s\n", who); 2125 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2126 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2127 if (szB > 0) 2128 stmt( IRStmt_AbiHint( 2129 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2130 szB, 2131 mkexpr(nia) 2132 )); 2133 } 2134 2135 2136 /*------------------------------------------------------------*/ 2137 /*--- Disassembling addressing modes ---*/ 2138 /*------------------------------------------------------------*/ 2139 2140 static 2141 HChar* segRegTxt ( Prefix pfx ) 2142 { 2143 if (pfx & PFX_CS) return "%cs:"; 2144 if (pfx & PFX_DS) return "%ds:"; 2145 if (pfx & PFX_ES) return "%es:"; 2146 if (pfx & PFX_FS) return "%fs:"; 2147 if (pfx & PFX_GS) return "%gs:"; 2148 if (pfx & PFX_SS) return "%ss:"; 2149 return ""; /* no override */ 2150 } 2151 2152 2153 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2154 linear address by adding any required segment override as indicated 2155 by sorb, and also dealing with any address size override 2156 present. */ 2157 static 2158 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, 2159 Prefix pfx, IRExpr* virtual ) 2160 { 2161 /* --- segment overrides --- */ 2162 if (pfx & PFX_FS) { 2163 if (vbi->guest_amd64_assume_fs_is_zero) { 2164 /* Note that this is a linux-kernel specific hack that relies 2165 on the assumption that %fs is always zero. */ 2166 /* return virtual + guest_FS_ZERO. */ 2167 virtual = binop(Iop_Add64, virtual, 2168 IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); 2169 } else { 2170 unimplemented("amd64 %fs segment override"); 2171 } 2172 } 2173 2174 if (pfx & PFX_GS) { 2175 if (vbi->guest_amd64_assume_gs_is_0x60) { 2176 /* Note that this is a darwin-kernel specific hack that relies 2177 on the assumption that %gs is always 0x60. */ 2178 /* return virtual + guest_GS_0x60. */ 2179 virtual = binop(Iop_Add64, virtual, 2180 IRExpr_Get(OFFB_GS_0x60, Ity_I64)); 2181 } else { 2182 unimplemented("amd64 %gs segment override"); 2183 } 2184 } 2185 2186 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2187 2188 /* --- address size override --- */ 2189 if (haveASO(pfx)) 2190 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2191 2192 return virtual; 2193 } 2194 2195 //.. { 2196 //.. Int sreg; 2197 //.. IRType hWordTy; 2198 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2199 //.. 2200 //.. if (sorb == 0) 2201 //.. /* the common case - no override */ 2202 //.. return virtual; 2203 //.. 2204 //.. switch (sorb) { 2205 //.. case 0x3E: sreg = R_DS; break; 2206 //.. case 0x26: sreg = R_ES; break; 2207 //.. case 0x64: sreg = R_FS; break; 2208 //.. case 0x65: sreg = R_GS; break; 2209 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2210 //.. } 2211 //.. 2212 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2213 //.. 2214 //.. seg_selector = newTemp(Ity_I32); 2215 //.. ldt_ptr = newTemp(hWordTy); 2216 //.. gdt_ptr = newTemp(hWordTy); 2217 //.. r64 = newTemp(Ity_I64); 2218 //.. 2219 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2220 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2221 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2222 //.. 2223 //.. /* 2224 //.. Call this to do the translation and limit checks: 2225 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2226 //.. UInt seg_selector, UInt virtual_addr ) 2227 //.. */ 2228 //.. assign( 2229 //.. r64, 2230 //.. mkIRExprCCall( 2231 //.. Ity_I64, 2232 //.. 0/*regparms*/, 2233 //.. "x86g_use_seg_selector", 2234 //.. &x86g_use_seg_selector, 2235 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2236 //.. mkexpr(seg_selector), virtual) 2237 //.. ) 2238 //.. ); 2239 //.. 2240 //.. /* If the high 32 of the result are non-zero, there was a 2241 //.. failure in address translation. In which case, make a 2242 //.. quick exit. 2243 //.. */ 2244 //.. stmt( 2245 //.. IRStmt_Exit( 2246 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2247 //.. Ijk_MapFail, 2248 //.. IRConst_U32( guest_eip_curr_instr ) 2249 //.. ) 2250 //.. ); 2251 //.. 2252 //.. /* otherwise, here's the translated result. */ 2253 //.. return unop(Iop_64to32, mkexpr(r64)); 2254 //.. } 2255 2256 2257 /* Generate IR to calculate an address indicated by a ModRM and 2258 following SIB bytes. The expression, and the number of bytes in 2259 the address mode, are returned (the latter in *len). Note that 2260 this fn should not be called if the R/M part of the address denotes 2261 a register instead of memory. If print_codegen is true, text of 2262 the addressing mode is placed in buf. 2263 2264 The computed address is stored in a new tempreg, and the 2265 identity of the tempreg is returned. 2266 2267 extra_bytes holds the number of bytes after the amode, as supplied 2268 by the caller. This is needed to make sense of %rip-relative 2269 addresses. Note that the value that *len is set to is only the 2270 length of the amode itself and does not include the value supplied 2271 in extra_bytes. 2272 */ 2273 2274 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2275 { 2276 IRTemp tmp = newTemp(Ity_I64); 2277 assign( tmp, addr64 ); 2278 return tmp; 2279 } 2280 2281 static 2282 IRTemp disAMode ( /*OUT*/Int* len, 2283 VexAbiInfo* vbi, Prefix pfx, Long delta, 2284 /*OUT*/HChar* buf, Int extra_bytes ) 2285 { 2286 UChar mod_reg_rm = getUChar(delta); 2287 delta++; 2288 2289 buf[0] = (UChar)0; 2290 vassert(extra_bytes >= 0 && extra_bytes < 10); 2291 2292 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2293 jump table seems a bit excessive. 2294 */ 2295 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2296 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2297 /* is now XX0XXYYY */ 2298 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2299 switch (mod_reg_rm) { 2300 2301 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2302 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2303 */ 2304 case 0x00: case 0x01: case 0x02: case 0x03: 2305 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2306 { UChar rm = toUChar(mod_reg_rm & 7); 2307 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2308 *len = 1; 2309 return disAMode_copy2tmp( 2310 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2311 } 2312 2313 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2314 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2315 */ 2316 case 0x08: case 0x09: case 0x0A: case 0x0B: 2317 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2318 { UChar rm = toUChar(mod_reg_rm & 7); 2319 Long d = getSDisp8(delta); 2320 if (d == 0) { 2321 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2322 } else { 2323 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2324 } 2325 *len = 2; 2326 return disAMode_copy2tmp( 2327 handleAddrOverrides(vbi, pfx, 2328 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2329 } 2330 2331 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2332 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2333 */ 2334 case 0x10: case 0x11: case 0x12: case 0x13: 2335 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2336 { UChar rm = toUChar(mod_reg_rm & 7); 2337 Long d = getSDisp32(delta); 2338 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2339 *len = 5; 2340 return disAMode_copy2tmp( 2341 handleAddrOverrides(vbi, pfx, 2342 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2343 } 2344 2345 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2346 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2347 case 0x18: case 0x19: case 0x1A: case 0x1B: 2348 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2349 vpanic("disAMode(amd64): not an addr!"); 2350 2351 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2352 correctly at the start of handling each instruction. */ 2353 case 0x05: 2354 { Long d = getSDisp32(delta); 2355 *len = 5; 2356 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2357 /* We need to know the next instruction's start address. 2358 Try and figure out what it is, record the guess, and ask 2359 the top-level driver logic (bbToIR_AMD64) to check we 2360 guessed right, after the instruction is completely 2361 decoded. */ 2362 guest_RIP_next_mustcheck = True; 2363 guest_RIP_next_assumed = guest_RIP_bbstart 2364 + delta+4 + extra_bytes; 2365 return disAMode_copy2tmp( 2366 handleAddrOverrides(vbi, pfx, 2367 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2368 mkU64(d)))); 2369 } 2370 2371 case 0x04: { 2372 /* SIB, with no displacement. Special cases: 2373 -- %rsp cannot act as an index value. 2374 If index_r indicates %rsp, zero is used for the index. 2375 -- when mod is zero and base indicates RBP or R13, base is 2376 instead a 32-bit sign-extended literal. 2377 It's all madness, I tell you. Extract %index, %base and 2378 scale from the SIB byte. The value denoted is then: 2379 | %index == %RSP && (%base == %RBP || %base == %R13) 2380 = d32 following SIB byte 2381 | %index == %RSP && !(%base == %RBP || %base == %R13) 2382 = %base 2383 | %index != %RSP && (%base == %RBP || %base == %R13) 2384 = d32 following SIB byte + (%index << scale) 2385 | %index != %RSP && !(%base == %RBP || %base == %R13) 2386 = %base + (%index << scale) 2387 */ 2388 UChar sib = getUChar(delta); 2389 UChar scale = toUChar((sib >> 6) & 3); 2390 UChar index_r = toUChar((sib >> 3) & 7); 2391 UChar base_r = toUChar(sib & 7); 2392 /* correct since #(R13) == 8 + #(RBP) */ 2393 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2394 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2395 delta++; 2396 2397 if ((!index_is_SP) && (!base_is_BPor13)) { 2398 if (scale == 0) { 2399 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2400 nameIRegRexB(8,pfx,base_r), 2401 nameIReg64rexX(pfx,index_r)); 2402 } else { 2403 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2404 nameIRegRexB(8,pfx,base_r), 2405 nameIReg64rexX(pfx,index_r), 1<<scale); 2406 } 2407 *len = 2; 2408 return 2409 disAMode_copy2tmp( 2410 handleAddrOverrides(vbi, pfx, 2411 binop(Iop_Add64, 2412 getIRegRexB(8,pfx,base_r), 2413 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2414 mkU8(scale))))); 2415 } 2416 2417 if ((!index_is_SP) && base_is_BPor13) { 2418 Long d = getSDisp32(delta); 2419 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2420 nameIReg64rexX(pfx,index_r), 1<<scale); 2421 *len = 6; 2422 return 2423 disAMode_copy2tmp( 2424 handleAddrOverrides(vbi, pfx, 2425 binop(Iop_Add64, 2426 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2427 mkU8(scale)), 2428 mkU64(d)))); 2429 } 2430 2431 if (index_is_SP && (!base_is_BPor13)) { 2432 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2433 *len = 2; 2434 return disAMode_copy2tmp( 2435 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2436 } 2437 2438 if (index_is_SP && base_is_BPor13) { 2439 Long d = getSDisp32(delta); 2440 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2441 *len = 6; 2442 return disAMode_copy2tmp( 2443 handleAddrOverrides(vbi, pfx, mkU64(d))); 2444 } 2445 2446 vassert(0); 2447 } 2448 2449 /* SIB, with 8-bit displacement. Special cases: 2450 -- %esp cannot act as an index value. 2451 If index_r indicates %esp, zero is used for the index. 2452 Denoted value is: 2453 | %index == %ESP 2454 = d8 + %base 2455 | %index != %ESP 2456 = d8 + %base + (%index << scale) 2457 */ 2458 case 0x0C: { 2459 UChar sib = getUChar(delta); 2460 UChar scale = toUChar((sib >> 6) & 3); 2461 UChar index_r = toUChar((sib >> 3) & 7); 2462 UChar base_r = toUChar(sib & 7); 2463 Long d = getSDisp8(delta+1); 2464 2465 if (index_r == R_RSP && 0==getRexX(pfx)) { 2466 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2467 d, nameIRegRexB(8,pfx,base_r)); 2468 *len = 3; 2469 return disAMode_copy2tmp( 2470 handleAddrOverrides(vbi, pfx, 2471 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2472 } else { 2473 if (scale == 0) { 2474 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2475 nameIRegRexB(8,pfx,base_r), 2476 nameIReg64rexX(pfx,index_r)); 2477 } else { 2478 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2479 nameIRegRexB(8,pfx,base_r), 2480 nameIReg64rexX(pfx,index_r), 1<<scale); 2481 } 2482 *len = 3; 2483 return 2484 disAMode_copy2tmp( 2485 handleAddrOverrides(vbi, pfx, 2486 binop(Iop_Add64, 2487 binop(Iop_Add64, 2488 getIRegRexB(8,pfx,base_r), 2489 binop(Iop_Shl64, 2490 getIReg64rexX(pfx,index_r), mkU8(scale))), 2491 mkU64(d)))); 2492 } 2493 vassert(0); /*NOTREACHED*/ 2494 } 2495 2496 /* SIB, with 32-bit displacement. Special cases: 2497 -- %rsp cannot act as an index value. 2498 If index_r indicates %rsp, zero is used for the index. 2499 Denoted value is: 2500 | %index == %RSP 2501 = d32 + %base 2502 | %index != %RSP 2503 = d32 + %base + (%index << scale) 2504 */ 2505 case 0x14: { 2506 UChar sib = getUChar(delta); 2507 UChar scale = toUChar((sib >> 6) & 3); 2508 UChar index_r = toUChar((sib >> 3) & 7); 2509 UChar base_r = toUChar(sib & 7); 2510 Long d = getSDisp32(delta+1); 2511 2512 if (index_r == R_RSP && 0==getRexX(pfx)) { 2513 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2514 d, nameIRegRexB(8,pfx,base_r)); 2515 *len = 6; 2516 return disAMode_copy2tmp( 2517 handleAddrOverrides(vbi, pfx, 2518 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2519 } else { 2520 if (scale == 0) { 2521 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2522 nameIRegRexB(8,pfx,base_r), 2523 nameIReg64rexX(pfx,index_r)); 2524 } else { 2525 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2526 nameIRegRexB(8,pfx,base_r), 2527 nameIReg64rexX(pfx,index_r), 1<<scale); 2528 } 2529 *len = 6; 2530 return 2531 disAMode_copy2tmp( 2532 handleAddrOverrides(vbi, pfx, 2533 binop(Iop_Add64, 2534 binop(Iop_Add64, 2535 getIRegRexB(8,pfx,base_r), 2536 binop(Iop_Shl64, 2537 getIReg64rexX(pfx,index_r), mkU8(scale))), 2538 mkU64(d)))); 2539 } 2540 vassert(0); /*NOTREACHED*/ 2541 } 2542 2543 default: 2544 vpanic("disAMode(amd64)"); 2545 return 0; /*notreached*/ 2546 } 2547 } 2548 2549 2550 /* Figure out the number of (insn-stream) bytes constituting the amode 2551 beginning at delta. Is useful for getting hold of literals beyond 2552 the end of the amode before it has been disassembled. */ 2553 2554 static UInt lengthAMode ( Prefix pfx, Long delta ) 2555 { 2556 UChar mod_reg_rm = getUChar(delta); 2557 delta++; 2558 2559 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2560 jump table seems a bit excessive. 2561 */ 2562 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2563 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2564 /* is now XX0XXYYY */ 2565 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2566 switch (mod_reg_rm) { 2567 2568 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2569 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2570 */ 2571 case 0x00: case 0x01: case 0x02: case 0x03: 2572 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2573 return 1; 2574 2575 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2576 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2577 */ 2578 case 0x08: case 0x09: case 0x0A: case 0x0B: 2579 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2580 return 2; 2581 2582 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2583 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2584 */ 2585 case 0x10: case 0x11: case 0x12: case 0x13: 2586 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2587 return 5; 2588 2589 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2590 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2591 /* Not an address, but still handled. */ 2592 case 0x18: case 0x19: case 0x1A: case 0x1B: 2593 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2594 return 1; 2595 2596 /* RIP + disp32. */ 2597 case 0x05: 2598 return 5; 2599 2600 case 0x04: { 2601 /* SIB, with no displacement. */ 2602 UChar sib = getUChar(delta); 2603 UChar base_r = toUChar(sib & 7); 2604 /* correct since #(R13) == 8 + #(RBP) */ 2605 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2606 2607 if (base_is_BPor13) { 2608 return 6; 2609 } else { 2610 return 2; 2611 } 2612 } 2613 2614 /* SIB, with 8-bit displacement. */ 2615 case 0x0C: 2616 return 3; 2617 2618 /* SIB, with 32-bit displacement. */ 2619 case 0x14: 2620 return 6; 2621 2622 default: 2623 vpanic("lengthAMode(amd64)"); 2624 return 0; /*notreached*/ 2625 } 2626 } 2627 2628 2629 /*------------------------------------------------------------*/ 2630 /*--- Disassembling common idioms ---*/ 2631 /*------------------------------------------------------------*/ 2632 2633 /* Handle binary integer instructions of the form 2634 op E, G meaning 2635 op reg-or-mem, reg 2636 Is passed the a ptr to the modRM byte, the actual operation, and the 2637 data size. Returns the address advanced completely over this 2638 instruction. 2639 2640 E(src) is reg-or-mem 2641 G(dst) is reg. 2642 2643 If E is reg, --> GET %G, tmp 2644 OP %E, tmp 2645 PUT tmp, %G 2646 2647 If E is mem and OP is not reversible, 2648 --> (getAddr E) -> tmpa 2649 LD (tmpa), tmpa 2650 GET %G, tmp2 2651 OP tmpa, tmp2 2652 PUT tmp2, %G 2653 2654 If E is mem and OP is reversible 2655 --> (getAddr E) -> tmpa 2656 LD (tmpa), tmpa 2657 OP %G, tmpa 2658 PUT tmpa, %G 2659 */ 2660 static 2661 ULong dis_op2_E_G ( VexAbiInfo* vbi, 2662 Prefix pfx, 2663 Bool addSubCarry, 2664 IROp op8, 2665 Bool keep, 2666 Int size, 2667 Long delta0, 2668 HChar* t_amd64opc ) 2669 { 2670 HChar dis_buf[50]; 2671 Int len; 2672 IRType ty = szToITy(size); 2673 IRTemp dst1 = newTemp(ty); 2674 IRTemp src = newTemp(ty); 2675 IRTemp dst0 = newTemp(ty); 2676 UChar rm = getUChar(delta0); 2677 IRTemp addr = IRTemp_INVALID; 2678 2679 /* addSubCarry == True indicates the intended operation is 2680 add-with-carry or subtract-with-borrow. */ 2681 if (addSubCarry) { 2682 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2683 vassert(keep); 2684 } 2685 2686 if (epartIsReg(rm)) { 2687 /* Specially handle XOR reg,reg, because that doesn't really 2688 depend on reg, and doing the obvious thing potentially 2689 generates a spurious value check failure due to the bogus 2690 dependency. */ 2691 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2692 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2693 if (False && op8 == Iop_Sub8) 2694 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2695 putIRegG(size,pfx,rm, mkU(ty,0)); 2696 } 2697 2698 assign( dst0, getIRegG(size,pfx,rm) ); 2699 assign( src, getIRegE(size,pfx,rm) ); 2700 2701 if (addSubCarry && op8 == Iop_Add8) { 2702 helper_ADC( size, dst1, dst0, src, 2703 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2704 putIRegG(size, pfx, rm, mkexpr(dst1)); 2705 } else 2706 if (addSubCarry && op8 == Iop_Sub8) { 2707 helper_SBB( size, dst1, dst0, src, 2708 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2709 putIRegG(size, pfx, rm, mkexpr(dst1)); 2710 } else { 2711 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2712 if (isAddSub(op8)) 2713 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2714 else 2715 setFlags_DEP1(op8, dst1, ty); 2716 if (keep) 2717 putIRegG(size, pfx, rm, mkexpr(dst1)); 2718 } 2719 2720 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2721 nameIRegE(size,pfx,rm), 2722 nameIRegG(size,pfx,rm)); 2723 return 1+delta0; 2724 } else { 2725 /* E refers to memory */ 2726 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2727 assign( dst0, getIRegG(size,pfx,rm) ); 2728 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2729 2730 if (addSubCarry && op8 == Iop_Add8) { 2731 helper_ADC( size, dst1, dst0, src, 2732 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2733 putIRegG(size, pfx, rm, mkexpr(dst1)); 2734 } else 2735 if (addSubCarry && op8 == Iop_Sub8) { 2736 helper_SBB( size, dst1, dst0, src, 2737 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2738 putIRegG(size, pfx, rm, mkexpr(dst1)); 2739 } else { 2740 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2741 if (isAddSub(op8)) 2742 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2743 else 2744 setFlags_DEP1(op8, dst1, ty); 2745 if (keep) 2746 putIRegG(size, pfx, rm, mkexpr(dst1)); 2747 } 2748 2749 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2750 dis_buf, nameIRegG(size, pfx, rm)); 2751 return len+delta0; 2752 } 2753 } 2754 2755 2756 2757 /* Handle binary integer instructions of the form 2758 op G, E meaning 2759 op reg, reg-or-mem 2760 Is passed the a ptr to the modRM byte, the actual operation, and the 2761 data size. Returns the address advanced completely over this 2762 instruction. 2763 2764 G(src) is reg. 2765 E(dst) is reg-or-mem 2766 2767 If E is reg, --> GET %E, tmp 2768 OP %G, tmp 2769 PUT tmp, %E 2770 2771 If E is mem, --> (getAddr E) -> tmpa 2772 LD (tmpa), tmpv 2773 OP %G, tmpv 2774 ST tmpv, (tmpa) 2775 */ 2776 static 2777 ULong dis_op2_G_E ( VexAbiInfo* vbi, 2778 Prefix pfx, 2779 Bool addSubCarry, 2780 IROp op8, 2781 Bool keep, 2782 Int size, 2783 Long delta0, 2784 HChar* t_amd64opc ) 2785 { 2786 HChar dis_buf[50]; 2787 Int len; 2788 IRType ty = szToITy(size); 2789 IRTemp dst1 = newTemp(ty); 2790 IRTemp src = newTemp(ty); 2791 IRTemp dst0 = newTemp(ty); 2792 UChar rm = getUChar(delta0); 2793 IRTemp addr = IRTemp_INVALID; 2794 2795 /* addSubCarry == True indicates the intended operation is 2796 add-with-carry or subtract-with-borrow. */ 2797 if (addSubCarry) { 2798 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2799 vassert(keep); 2800 } 2801 2802 if (epartIsReg(rm)) { 2803 /* Specially handle XOR reg,reg, because that doesn't really 2804 depend on reg, and doing the obvious thing potentially 2805 generates a spurious value check failure due to the bogus 2806 dependency. Ditto SBB reg,reg. */ 2807 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2808 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2809 putIRegE(size,pfx,rm, mkU(ty,0)); 2810 } 2811 2812 assign(dst0, getIRegE(size,pfx,rm)); 2813 assign(src, getIRegG(size,pfx,rm)); 2814 2815 if (addSubCarry && op8 == Iop_Add8) { 2816 helper_ADC( size, dst1, dst0, src, 2817 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2818 putIRegE(size, pfx, rm, mkexpr(dst1)); 2819 } else 2820 if (addSubCarry && op8 == Iop_Sub8) { 2821 helper_SBB( size, dst1, dst0, src, 2822 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2823 putIRegE(size, pfx, rm, mkexpr(dst1)); 2824 } else { 2825 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2826 if (isAddSub(op8)) 2827 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2828 else 2829 setFlags_DEP1(op8, dst1, ty); 2830 if (keep) 2831 putIRegE(size, pfx, rm, mkexpr(dst1)); 2832 } 2833 2834 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2835 nameIRegG(size,pfx,rm), 2836 nameIRegE(size,pfx,rm)); 2837 return 1+delta0; 2838 } 2839 2840 /* E refers to memory */ 2841 { 2842 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2843 assign(dst0, loadLE(ty,mkexpr(addr))); 2844 assign(src, getIRegG(size,pfx,rm)); 2845 2846 if (addSubCarry && op8 == Iop_Add8) { 2847 if (pfx & PFX_LOCK) { 2848 /* cas-style store */ 2849 helper_ADC( size, dst1, dst0, src, 2850 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2851 } else { 2852 /* normal store */ 2853 helper_ADC( size, dst1, dst0, src, 2854 /*store*/addr, IRTemp_INVALID, 0 ); 2855 } 2856 } else 2857 if (addSubCarry && op8 == Iop_Sub8) { 2858 if (pfx & PFX_LOCK) { 2859 /* cas-style store */ 2860 helper_SBB( size, dst1, dst0, src, 2861 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2862 } else { 2863 /* normal store */ 2864 helper_SBB( size, dst1, dst0, src, 2865 /*store*/addr, IRTemp_INVALID, 0 ); 2866 } 2867 } else { 2868 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2869 if (keep) { 2870 if (pfx & PFX_LOCK) { 2871 if (0) vex_printf("locked case\n" ); 2872 casLE( mkexpr(addr), 2873 mkexpr(dst0)/*expval*/, 2874 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 2875 } else { 2876 if (0) vex_printf("nonlocked case\n"); 2877 storeLE(mkexpr(addr), mkexpr(dst1)); 2878 } 2879 } 2880 if (isAddSub(op8)) 2881 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2882 else 2883 setFlags_DEP1(op8, dst1, ty); 2884 } 2885 2886 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2887 nameIRegG(size,pfx,rm), dis_buf); 2888 return len+delta0; 2889 } 2890 } 2891 2892 2893 /* Handle move instructions of the form 2894 mov E, G meaning 2895 mov reg-or-mem, reg 2896 Is passed the a ptr to the modRM byte, and the data size. Returns 2897 the address advanced completely over this instruction. 2898 2899 E(src) is reg-or-mem 2900 G(dst) is reg. 2901 2902 If E is reg, --> GET %E, tmpv 2903 PUT tmpv, %G 2904 2905 If E is mem --> (getAddr E) -> tmpa 2906 LD (tmpa), tmpb 2907 PUT tmpb, %G 2908 */ 2909 static 2910 ULong dis_mov_E_G ( VexAbiInfo* vbi, 2911 Prefix pfx, 2912 Int size, 2913 Long delta0 ) 2914 { 2915 Int len; 2916 UChar rm = getUChar(delta0); 2917 HChar dis_buf[50]; 2918 2919 if (epartIsReg(rm)) { 2920 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 2921 DIP("mov%c %s,%s\n", nameISize(size), 2922 nameIRegE(size,pfx,rm), 2923 nameIRegG(size,pfx,rm)); 2924 return 1+delta0; 2925 } 2926 2927 /* E refers to memory */ 2928 { 2929 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2930 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 2931 DIP("mov%c %s,%s\n", nameISize(size), 2932 dis_buf, 2933 nameIRegG(size,pfx,rm)); 2934 return delta0+len; 2935 } 2936 } 2937 2938 2939 /* Handle move instructions of the form 2940 mov G, E meaning 2941 mov reg, reg-or-mem 2942 Is passed the a ptr to the modRM byte, and the data size. Returns 2943 the address advanced completely over this instruction. 2944 2945 G(src) is reg. 2946 E(dst) is reg-or-mem 2947 2948 If E is reg, --> GET %G, tmp 2949 PUT tmp, %E 2950 2951 If E is mem, --> (getAddr E) -> tmpa 2952 GET %G, tmpv 2953 ST tmpv, (tmpa) 2954 */ 2955 static 2956 ULong dis_mov_G_E ( VexAbiInfo* vbi, 2957 Prefix pfx, 2958 Int size, 2959 Long delta0 ) 2960 { 2961 Int len; 2962 UChar rm = getUChar(delta0); 2963 HChar dis_buf[50]; 2964 2965 if (epartIsReg(rm)) { 2966 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 2967 DIP("mov%c %s,%s\n", nameISize(size), 2968 nameIRegG(size,pfx,rm), 2969 nameIRegE(size,pfx,rm)); 2970 return 1+delta0; 2971 } 2972 2973 /* E refers to memory */ 2974 { 2975 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2976 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 2977 DIP("mov%c %s,%s\n", nameISize(size), 2978 nameIRegG(size,pfx,rm), 2979 dis_buf); 2980 return len+delta0; 2981 } 2982 } 2983 2984 2985 /* op $immediate, AL/AX/EAX/RAX. */ 2986 static 2987 ULong dis_op_imm_A ( Int size, 2988 Bool carrying, 2989 IROp op8, 2990 Bool keep, 2991 Long delta, 2992 HChar* t_amd64opc ) 2993 { 2994 Int size4 = imin(size,4); 2995 IRType ty = szToITy(size); 2996 IRTemp dst0 = newTemp(ty); 2997 IRTemp src = newTemp(ty); 2998 IRTemp dst1 = newTemp(ty); 2999 Long lit = getSDisp(size4,delta); 3000 assign(dst0, getIRegRAX(size)); 3001 assign(src, mkU(ty,lit & mkSizeMask(size))); 3002 3003 if (isAddSub(op8) && !carrying) { 3004 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3005 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3006 } 3007 else 3008 if (isLogic(op8)) { 3009 vassert(!carrying); 3010 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3011 setFlags_DEP1(op8, dst1, ty); 3012 } 3013 else 3014 if (op8 == Iop_Add8 && carrying) { 3015 helper_ADC( size, dst1, dst0, src, 3016 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3017 } 3018 else 3019 if (op8 == Iop_Sub8 && carrying) { 3020 helper_SBB( size, dst1, dst0, src, 3021 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3022 } 3023 else 3024 vpanic("dis_op_imm_A(amd64,guest)"); 3025 3026 if (keep) 3027 putIRegRAX(size, mkexpr(dst1)); 3028 3029 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3030 lit, nameIRegRAX(size)); 3031 return delta+size4; 3032 } 3033 3034 3035 /* Sign- and Zero-extending moves. */ 3036 static 3037 ULong dis_movx_E_G ( VexAbiInfo* vbi, 3038 Prefix pfx, 3039 Long delta, Int szs, Int szd, Bool sign_extend ) 3040 { 3041 UChar rm = getUChar(delta); 3042 if (epartIsReg(rm)) { 3043 putIRegG(szd, pfx, rm, 3044 doScalarWidening( 3045 szs,szd,sign_extend, 3046 getIRegE(szs,pfx,rm))); 3047 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3048 nameISize(szs), 3049 nameISize(szd), 3050 nameIRegE(szs,pfx,rm), 3051 nameIRegG(szd,pfx,rm)); 3052 return 1+delta; 3053 } 3054 3055 /* E refers to memory */ 3056 { 3057 Int len; 3058 HChar dis_buf[50]; 3059 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3060 putIRegG(szd, pfx, rm, 3061 doScalarWidening( 3062 szs,szd,sign_extend, 3063 loadLE(szToITy(szs),mkexpr(addr)))); 3064 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3065 nameISize(szs), 3066 nameISize(szd), 3067 dis_buf, 3068 nameIRegG(szd,pfx,rm)); 3069 return len+delta; 3070 } 3071 } 3072 3073 3074 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3075 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3076 static 3077 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3078 { 3079 /* special-case the 64-bit case */ 3080 if (sz == 8) { 3081 IROp op = signed_divide ? Iop_DivModS128to64 3082 : Iop_DivModU128to64; 3083 IRTemp src128 = newTemp(Ity_I128); 3084 IRTemp dst128 = newTemp(Ity_I128); 3085 assign( src128, binop(Iop_64HLto128, 3086 getIReg64(R_RDX), 3087 getIReg64(R_RAX)) ); 3088 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3089 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3090 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3091 } else { 3092 IROp op = signed_divide ? Iop_DivModS64to32 3093 : Iop_DivModU64to32; 3094 IRTemp src64 = newTemp(Ity_I64); 3095 IRTemp dst64 = newTemp(Ity_I64); 3096 switch (sz) { 3097 case 4: 3098 assign( src64, 3099 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3100 assign( dst64, 3101 binop(op, mkexpr(src64), mkexpr(t)) ); 3102 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3103 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3104 break; 3105 case 2: { 3106 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3107 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3108 assign( src64, unop(widen3264, 3109 binop(Iop_16HLto32, 3110 getIRegRDX(2), 3111 getIRegRAX(2))) ); 3112 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3113 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3114 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3115 break; 3116 } 3117 case 1: { 3118 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3119 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3120 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3121 assign( src64, unop(widen3264, 3122 unop(widen1632, getIRegRAX(2))) ); 3123 assign( dst64, 3124 binop(op, mkexpr(src64), 3125 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3126 putIRegRAX( 1, unop(Iop_16to8, 3127 unop(Iop_32to16, 3128 unop(Iop_64to32,mkexpr(dst64)))) ); 3129 putIRegAH( unop(Iop_16to8, 3130 unop(Iop_32to16, 3131 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3132 break; 3133 } 3134 default: 3135 vpanic("codegen_div(amd64)"); 3136 } 3137 } 3138 } 3139 3140 static 3141 ULong dis_Grp1 ( VexAbiInfo* vbi, 3142 Prefix pfx, 3143 Long delta, UChar modrm, 3144 Int am_sz, Int d_sz, Int sz, Long d64 ) 3145 { 3146 Int len; 3147 HChar dis_buf[50]; 3148 IRType ty = szToITy(sz); 3149 IRTemp dst1 = newTemp(ty); 3150 IRTemp src = newTemp(ty); 3151 IRTemp dst0 = newTemp(ty); 3152 IRTemp addr = IRTemp_INVALID; 3153 IROp op8 = Iop_INVALID; 3154 ULong mask = mkSizeMask(sz); 3155 3156 switch (gregLO3ofRM(modrm)) { 3157 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3158 case 2: break; // ADC 3159 case 3: break; // SBB 3160 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3161 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3162 /*NOTREACHED*/ 3163 default: vpanic("dis_Grp1(amd64): unhandled case"); 3164 } 3165 3166 if (epartIsReg(modrm)) { 3167 vassert(am_sz == 1); 3168 3169 assign(dst0, getIRegE(sz,pfx,modrm)); 3170 assign(src, mkU(ty,d64 & mask)); 3171 3172 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3173 helper_ADC( sz, dst1, dst0, src, 3174 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3175 } else 3176 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3177 helper_SBB( sz, dst1, dst0, src, 3178 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3179 } else { 3180 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3181 if (isAddSub(op8)) 3182 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3183 else 3184 setFlags_DEP1(op8, dst1, ty); 3185 } 3186 3187 if (gregLO3ofRM(modrm) < 7) 3188 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3189 3190 delta += (am_sz + d_sz); 3191 DIP("%s%c $%lld, %s\n", 3192 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3193 nameIRegE(sz,pfx,modrm)); 3194 } else { 3195 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3196 3197 assign(dst0, loadLE(ty,mkexpr(addr))); 3198 assign(src, mkU(ty,d64 & mask)); 3199 3200 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3201 if (pfx & PFX_LOCK) { 3202 /* cas-style store */ 3203 helper_ADC( sz, dst1, dst0, src, 3204 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3205 } else { 3206 /* normal store */ 3207 helper_ADC( sz, dst1, dst0, src, 3208 /*store*/addr, IRTemp_INVALID, 0 ); 3209 } 3210 } else 3211 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3212 if (pfx & PFX_LOCK) { 3213 /* cas-style store */ 3214 helper_SBB( sz, dst1, dst0, src, 3215 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3216 } else { 3217 /* normal store */ 3218 helper_SBB( sz, dst1, dst0, src, 3219 /*store*/addr, IRTemp_INVALID, 0 ); 3220 } 3221 } else { 3222 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3223 if (gregLO3ofRM(modrm) < 7) { 3224 if (pfx & PFX_LOCK) { 3225 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3226 mkexpr(dst1)/*newVal*/, 3227 guest_RIP_curr_instr ); 3228 } else { 3229 storeLE(mkexpr(addr), mkexpr(dst1)); 3230 } 3231 } 3232 if (isAddSub(op8)) 3233 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3234 else 3235 setFlags_DEP1(op8, dst1, ty); 3236 } 3237 3238 delta += (len+d_sz); 3239 DIP("%s%c $%lld, %s\n", 3240 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3241 d64, dis_buf); 3242 } 3243 return delta; 3244 } 3245 3246 3247 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3248 expression. */ 3249 3250 static 3251 ULong dis_Grp2 ( VexAbiInfo* vbi, 3252 Prefix pfx, 3253 Long delta, UChar modrm, 3254 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3255 HChar* shift_expr_txt, Bool* decode_OK ) 3256 { 3257 /* delta on entry points at the modrm byte. */ 3258 HChar dis_buf[50]; 3259 Int len; 3260 Bool isShift, isRotate, isRotateC; 3261 IRType ty = szToITy(sz); 3262 IRTemp dst0 = newTemp(ty); 3263 IRTemp dst1 = newTemp(ty); 3264 IRTemp addr = IRTemp_INVALID; 3265 3266 *decode_OK = True; 3267 3268 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3269 3270 /* Put value to shift/rotate in dst0. */ 3271 if (epartIsReg(modrm)) { 3272 assign(dst0, getIRegE(sz, pfx, modrm)); 3273 delta += (am_sz + d_sz); 3274 } else { 3275 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3276 assign(dst0, loadLE(ty,mkexpr(addr))); 3277 delta += len + d_sz; 3278 } 3279 3280 isShift = False; 3281 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3282 3283 isRotate = False; 3284 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3285 3286 isRotateC = False; 3287 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3288 3289 if (!isShift && !isRotate && !isRotateC) { 3290 /*NOTREACHED*/ 3291 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3292 } 3293 3294 if (isRotateC) { 3295 /* Call a helper; this insn is so ridiculous it does not deserve 3296 better. One problem is, the helper has to calculate both the 3297 new value and the new flags. This is more than 64 bits, and 3298 there is no way to return more than 64 bits from the helper. 3299 Hence the crude and obvious solution is to call it twice, 3300 using the sign of the sz field to indicate whether it is the 3301 value or rflags result we want. 3302 */ 3303 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3304 IRExpr** argsVALUE; 3305 IRExpr** argsRFLAGS; 3306 3307 IRTemp new_value = newTemp(Ity_I64); 3308 IRTemp new_rflags = newTemp(Ity_I64); 3309 IRTemp old_rflags = newTemp(Ity_I64); 3310 3311 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3312 3313 argsVALUE 3314 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3315 widenUto64(shift_expr), /* rotate amount */ 3316 mkexpr(old_rflags), 3317 mkU64(sz) ); 3318 assign( new_value, 3319 mkIRExprCCall( 3320 Ity_I64, 3321 0/*regparm*/, 3322 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3323 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3324 argsVALUE 3325 ) 3326 ); 3327 3328 argsRFLAGS 3329 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3330 widenUto64(shift_expr), /* rotate amount */ 3331 mkexpr(old_rflags), 3332 mkU64(-sz) ); 3333 assign( new_rflags, 3334 mkIRExprCCall( 3335 Ity_I64, 3336 0/*regparm*/, 3337 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3338 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3339 argsRFLAGS 3340 ) 3341 ); 3342 3343 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3344 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3345 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3346 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3347 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3348 } 3349 3350 else 3351 if (isShift) { 3352 3353 IRTemp pre64 = newTemp(Ity_I64); 3354 IRTemp res64 = newTemp(Ity_I64); 3355 IRTemp res64ss = newTemp(Ity_I64); 3356 IRTemp shift_amt = newTemp(Ity_I8); 3357 UChar mask = toUChar(sz==8 ? 63 : 31); 3358 IROp op64; 3359 3360 switch (gregLO3ofRM(modrm)) { 3361 case 4: op64 = Iop_Shl64; break; 3362 case 5: op64 = Iop_Shr64; break; 3363 case 6: op64 = Iop_Shl64; break; 3364 case 7: op64 = Iop_Sar64; break; 3365 /*NOTREACHED*/ 3366 default: vpanic("dis_Grp2:shift"); break; 3367 } 3368 3369 /* Widen the value to be shifted to 64 bits, do the shift, and 3370 narrow back down. This seems surprisingly long-winded, but 3371 unfortunately the AMD semantics requires that 8/16/32-bit 3372 shifts give defined results for shift values all the way up 3373 to 32, and this seems the simplest way to do it. It has the 3374 advantage that the only IR level shifts generated are of 64 3375 bit values, and the shift amount is guaranteed to be in the 3376 range 0 .. 63, thereby observing the IR semantics requiring 3377 all shift values to be in the range 0 .. 2^word_size-1. 3378 3379 Therefore the shift amount is masked with 63 for 64-bit shifts 3380 and 31 for all others. 3381 */ 3382 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3383 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3384 3385 /* suitably widen the value to be shifted to 64 bits. */ 3386 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3387 : widenUto64(mkexpr(dst0)) ); 3388 3389 /* res64 = pre64 `shift` shift_amt */ 3390 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3391 3392 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3393 assign( res64ss, 3394 binop(op64, 3395 mkexpr(pre64), 3396 binop(Iop_And8, 3397 binop(Iop_Sub8, 3398 mkexpr(shift_amt), mkU8(1)), 3399 mkU8(mask))) ); 3400 3401 /* Build the flags thunk. */ 3402 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3403 3404 /* Narrow the result back down. */ 3405 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3406 3407 } /* if (isShift) */ 3408 3409 else 3410 if (isRotate) { 3411 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3412 : (ty==Ity_I32 ? 2 : 3)); 3413 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3414 IRTemp rot_amt = newTemp(Ity_I8); 3415 IRTemp rot_amt64 = newTemp(Ity_I8); 3416 IRTemp oldFlags = newTemp(Ity_I64); 3417 UChar mask = toUChar(sz==8 ? 63 : 31); 3418 3419 /* rot_amt = shift_expr & mask */ 3420 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3421 expressions never shift beyond the word size and thus remain 3422 well defined. */ 3423 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3424 3425 if (ty == Ity_I64) 3426 assign(rot_amt, mkexpr(rot_amt64)); 3427 else 3428 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3429 3430 if (left) { 3431 3432 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3433 assign(dst1, 3434 binop( mkSizedOp(ty,Iop_Or8), 3435 binop( mkSizedOp(ty,Iop_Shl8), 3436 mkexpr(dst0), 3437 mkexpr(rot_amt) 3438 ), 3439 binop( mkSizedOp(ty,Iop_Shr8), 3440 mkexpr(dst0), 3441 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3442 ) 3443 ) 3444 ); 3445 ccOp += AMD64G_CC_OP_ROLB; 3446 3447 } else { /* right */ 3448 3449 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3450 assign(dst1, 3451 binop( mkSizedOp(ty,Iop_Or8), 3452 binop( mkSizedOp(ty,Iop_Shr8), 3453 mkexpr(dst0), 3454 mkexpr(rot_amt) 3455 ), 3456 binop( mkSizedOp(ty,Iop_Shl8), 3457 mkexpr(dst0), 3458 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3459 ) 3460 ) 3461 ); 3462 ccOp += AMD64G_CC_OP_RORB; 3463 3464 } 3465 3466 /* dst1 now holds the rotated value. Build flag thunk. We 3467 need the resulting value for this, and the previous flags. 3468 Except don't set it if the rotate count is zero. */ 3469 3470 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3471 3472 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3473 stmt( IRStmt_Put( OFFB_CC_OP, 3474 IRExpr_Mux0X( mkexpr(rot_amt64), 3475 IRExpr_Get(OFFB_CC_OP,Ity_I64), 3476 mkU64(ccOp))) ); 3477 stmt( IRStmt_Put( OFFB_CC_DEP1, 3478 IRExpr_Mux0X( mkexpr(rot_amt64), 3479 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 3480 widenUto64(mkexpr(dst1)))) ); 3481 stmt( IRStmt_Put( OFFB_CC_DEP2, 3482 IRExpr_Mux0X( mkexpr(rot_amt64), 3483 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 3484 mkU64(0))) ); 3485 stmt( IRStmt_Put( OFFB_CC_NDEP, 3486 IRExpr_Mux0X( mkexpr(rot_amt64), 3487 IRExpr_Get(OFFB_CC_NDEP,Ity_I64), 3488 mkexpr(oldFlags))) ); 3489 } /* if (isRotate) */ 3490 3491 /* Save result, and finish up. */ 3492 if (epartIsReg(modrm)) { 3493 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3494 if (vex_traceflags & VEX_TRACE_FE) { 3495 vex_printf("%s%c ", 3496 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3497 if (shift_expr_txt) 3498 vex_printf("%s", shift_expr_txt); 3499 else 3500 ppIRExpr(shift_expr); 3501 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3502 } 3503 } else { 3504 storeLE(mkexpr(addr), mkexpr(dst1)); 3505 if (vex_traceflags & VEX_TRACE_FE) { 3506 vex_printf("%s%c ", 3507 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3508 if (shift_expr_txt) 3509 vex_printf("%s", shift_expr_txt); 3510 else 3511 ppIRExpr(shift_expr); 3512 vex_printf(", %s\n", dis_buf); 3513 } 3514 } 3515 return delta; 3516 } 3517 3518 3519 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3520 static 3521 ULong dis_Grp8_Imm ( VexAbiInfo* vbi, 3522 Prefix pfx, 3523 Long delta, UChar modrm, 3524 Int am_sz, Int sz, ULong src_val, 3525 Bool* decode_OK ) 3526 { 3527 /* src_val denotes a d8. 3528 And delta on entry points at the modrm byte. */ 3529 3530 IRType ty = szToITy(sz); 3531 IRTemp t2 = newTemp(Ity_I64); 3532 IRTemp t2m = newTemp(Ity_I64); 3533 IRTemp t_addr = IRTemp_INVALID; 3534 HChar dis_buf[50]; 3535 ULong mask; 3536 3537 /* we're optimists :-) */ 3538 *decode_OK = True; 3539 3540 /* Limit src_val -- the bit offset -- to something within a word. 3541 The Intel docs say that literal offsets larger than a word are 3542 masked in this way. */ 3543 switch (sz) { 3544 case 2: src_val &= 15; break; 3545 case 4: src_val &= 31; break; 3546 case 8: src_val &= 63; break; 3547 default: *decode_OK = False; return delta; 3548 } 3549 3550 /* Invent a mask suitable for the operation. */ 3551 switch (gregLO3ofRM(modrm)) { 3552 case 4: /* BT */ mask = 0; break; 3553 case 5: /* BTS */ mask = 1ULL << src_val; break; 3554 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3555 case 7: /* BTC */ mask = 1ULL << src_val; break; 3556 /* If this needs to be extended, probably simplest to make a 3557 new function to handle the other cases (0 .. 3). The 3558 Intel docs do however not indicate any use for 0 .. 3, so 3559 we don't expect this to happen. */ 3560 default: *decode_OK = False; return delta; 3561 } 3562 3563 /* Fetch the value to be tested and modified into t2, which is 3564 64-bits wide regardless of sz. */ 3565 if (epartIsReg(modrm)) { 3566 vassert(am_sz == 1); 3567 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3568 delta += (am_sz + 1); 3569 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3570 nameISize(sz), 3571 src_val, nameIRegE(sz,pfx,modrm)); 3572 } else { 3573 Int len; 3574 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3575 delta += (len+1); 3576 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3577 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3578 nameISize(sz), 3579 src_val, dis_buf); 3580 } 3581 3582 /* Compute the new value into t2m, if non-BT. */ 3583 switch (gregLO3ofRM(modrm)) { 3584 case 4: /* BT */ 3585 break; 3586 case 5: /* BTS */ 3587 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3588 break; 3589 case 6: /* BTR */ 3590 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3591 break; 3592 case 7: /* BTC */ 3593 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3594 break; 3595 default: 3596 /*NOTREACHED*/ /*the previous switch guards this*/ 3597 vassert(0); 3598 } 3599 3600 /* Write the result back, if non-BT. */ 3601 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3602 if (epartIsReg(modrm)) { 3603 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3604 } else { 3605 if (pfx & PFX_LOCK) { 3606 casLE( mkexpr(t_addr), 3607 narrowTo(ty, mkexpr(t2))/*expd*/, 3608 narrowTo(ty, mkexpr(t2m))/*new*/, 3609 guest_RIP_curr_instr ); 3610 } else { 3611 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3612 } 3613 } 3614 } 3615 3616 /* Copy relevant bit from t2 into the carry flag. */ 3617 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3618 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3619 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3620 stmt( IRStmt_Put( 3621 OFFB_CC_DEP1, 3622 binop(Iop_And64, 3623 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3624 mkU64(1)) 3625 )); 3626 /* Set NDEP even though it isn't used. This makes redundant-PUT 3627 elimination of previous stores to this field work better. */ 3628 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3629 3630 return delta; 3631 } 3632 3633 3634 /* Signed/unsigned widening multiply. Generate IR to multiply the 3635 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3636 RDX:RAX/EDX:EAX/DX:AX/AX. 3637 */ 3638 static void codegen_mulL_A_D ( Int sz, Bool syned, 3639 IRTemp tmp, HChar* tmp_txt ) 3640 { 3641 IRType ty = szToITy(sz); 3642 IRTemp t1 = newTemp(ty); 3643 3644 assign( t1, getIRegRAX(sz) ); 3645 3646 switch (ty) { 3647 case Ity_I64: { 3648 IRTemp res128 = newTemp(Ity_I128); 3649 IRTemp resHi = newTemp(Ity_I64); 3650 IRTemp resLo = newTemp(Ity_I64); 3651 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3652 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3653 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3654 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3655 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3656 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3657 putIReg64(R_RDX, mkexpr(resHi)); 3658 putIReg64(R_RAX, mkexpr(resLo)); 3659 break; 3660 } 3661 case Ity_I32: { 3662 IRTemp res64 = newTemp(Ity_I64); 3663 IRTemp resHi = newTemp(Ity_I32); 3664 IRTemp resLo = newTemp(Ity_I32); 3665 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3666 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3667 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3668 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3669 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3670 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3671 putIRegRDX(4, mkexpr(resHi)); 3672 putIRegRAX(4, mkexpr(resLo)); 3673 break; 3674 } 3675 case Ity_I16: { 3676 IRTemp res32 = newTemp(Ity_I32); 3677 IRTemp resHi = newTemp(Ity_I16); 3678 IRTemp resLo = newTemp(Ity_I16); 3679 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3680 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3681 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3682 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3683 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3684 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3685 putIRegRDX(2, mkexpr(resHi)); 3686 putIRegRAX(2, mkexpr(resLo)); 3687 break; 3688 } 3689 case Ity_I8: { 3690 IRTemp res16 = newTemp(Ity_I16); 3691 IRTemp resHi = newTemp(Ity_I8); 3692 IRTemp resLo = newTemp(Ity_I8); 3693 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3694 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3695 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3696 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3697 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3698 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3699 putIRegRAX(2, mkexpr(res16)); 3700 break; 3701 } 3702 default: 3703 ppIRType(ty); 3704 vpanic("codegen_mulL_A_D(amd64)"); 3705 } 3706 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3707 } 3708 3709 3710 /* Group 3 extended opcodes. */ 3711 static 3712 ULong dis_Grp3 ( VexAbiInfo* vbi, 3713 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3714 { 3715 Long d64; 3716 UChar modrm; 3717 HChar dis_buf[50]; 3718 Int len; 3719 IRTemp addr; 3720 IRType ty = szToITy(sz); 3721 IRTemp t1 = newTemp(ty); 3722 IRTemp dst1, src, dst0; 3723 *decode_OK = True; 3724 modrm = getUChar(delta); 3725 if (epartIsReg(modrm)) { 3726 switch (gregLO3ofRM(modrm)) { 3727 case 0: { /* TEST */ 3728 delta++; 3729 d64 = getSDisp(imin(4,sz), delta); 3730 delta += imin(4,sz); 3731 dst1 = newTemp(ty); 3732 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3733 getIRegE(sz,pfx,modrm), 3734 mkU(ty, d64 & mkSizeMask(sz)))); 3735 setFlags_DEP1( Iop_And8, dst1, ty ); 3736 DIP("test%c $%lld, %s\n", 3737 nameISize(sz), d64, 3738 nameIRegE(sz, pfx, modrm)); 3739 break; 3740 } 3741 case 1: 3742 *decode_OK = False; 3743 return delta; 3744 case 2: /* NOT */ 3745 delta++; 3746 putIRegE(sz, pfx, modrm, 3747 unop(mkSizedOp(ty,Iop_Not8), 3748 getIRegE(sz, pfx, modrm))); 3749 DIP("not%c %s\n", nameISize(sz), 3750 nameIRegE(sz, pfx, modrm)); 3751 break; 3752 case 3: /* NEG */ 3753 delta++; 3754 dst0 = newTemp(ty); 3755 src = newTemp(ty); 3756 dst1 = newTemp(ty); 3757 assign(dst0, mkU(ty,0)); 3758 assign(src, getIRegE(sz, pfx, modrm)); 3759 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3760 mkexpr(src))); 3761 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3762 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3763 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 3764 break; 3765 case 4: /* MUL (unsigned widening) */ 3766 delta++; 3767 src = newTemp(ty); 3768 assign(src, getIRegE(sz,pfx,modrm)); 3769 codegen_mulL_A_D ( sz, False, src, 3770 nameIRegE(sz,pfx,modrm) ); 3771 break; 3772 case 5: /* IMUL (signed widening) */ 3773 delta++; 3774 src = newTemp(ty); 3775 assign(src, getIRegE(sz,pfx,modrm)); 3776 codegen_mulL_A_D ( sz, True, src, 3777 nameIRegE(sz,pfx,modrm) ); 3778 break; 3779 case 6: /* DIV */ 3780 delta++; 3781 assign( t1, getIRegE(sz, pfx, modrm) ); 3782 codegen_div ( sz, t1, False ); 3783 DIP("div%c %s\n", nameISize(sz), 3784 nameIRegE(sz, pfx, modrm)); 3785 break; 3786 case 7: /* IDIV */ 3787 delta++; 3788 assign( t1, getIRegE(sz, pfx, modrm) ); 3789 codegen_div ( sz, t1, True ); 3790 DIP("idiv%c %s\n", nameISize(sz), 3791 nameIRegE(sz, pfx, modrm)); 3792 break; 3793 default: 3794 /*NOTREACHED*/ 3795 vpanic("Grp3(amd64,R)"); 3796 } 3797 } else { 3798 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 3799 /* we have to inform disAMode of any immediate 3800 bytes used */ 3801 gregLO3ofRM(modrm)==0/*TEST*/ 3802 ? imin(4,sz) 3803 : 0 3804 ); 3805 t1 = newTemp(ty); 3806 delta += len; 3807 assign(t1, loadLE(ty,mkexpr(addr))); 3808 switch (gregLO3ofRM(modrm)) { 3809 case 0: { /* TEST */ 3810 d64 = getSDisp(imin(4,sz), delta); 3811 delta += imin(4,sz); 3812 dst1 = newTemp(ty); 3813 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3814 mkexpr(t1), 3815 mkU(ty, d64 & mkSizeMask(sz)))); 3816 setFlags_DEP1( Iop_And8, dst1, ty ); 3817 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 3818 break; 3819 } 3820 case 1: 3821 *decode_OK = False; 3822 return delta; 3823 case 2: /* NOT */ 3824 dst1 = newTemp(ty); 3825 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 3826 if (pfx & PFX_LOCK) { 3827 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3828 guest_RIP_curr_instr ); 3829 } else { 3830 storeLE( mkexpr(addr), mkexpr(dst1) ); 3831 } 3832 DIP("not%c %s\n", nameISize(sz), dis_buf); 3833 break; 3834 case 3: /* NEG */ 3835 dst0 = newTemp(ty); 3836 src = newTemp(ty); 3837 dst1 = newTemp(ty); 3838 assign(dst0, mkU(ty,0)); 3839 assign(src, mkexpr(t1)); 3840 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3841 mkexpr(src))); 3842 if (pfx & PFX_LOCK) { 3843 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3844 guest_RIP_curr_instr ); 3845 } else { 3846 storeLE( mkexpr(addr), mkexpr(dst1) ); 3847 } 3848 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3849 DIP("neg%c %s\n", nameISize(sz), dis_buf); 3850 break; 3851 case 4: /* MUL (unsigned widening) */ 3852 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 3853 break; 3854 case 5: /* IMUL */ 3855 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 3856 break; 3857 case 6: /* DIV */ 3858 codegen_div ( sz, t1, False ); 3859 DIP("div%c %s\n", nameISize(sz), dis_buf); 3860 break; 3861 case 7: /* IDIV */ 3862 codegen_div ( sz, t1, True ); 3863 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 3864 break; 3865 default: 3866 /*NOTREACHED*/ 3867 vpanic("Grp3(amd64,M)"); 3868 } 3869 } 3870 return delta; 3871 } 3872 3873 3874 /* Group 4 extended opcodes. */ 3875 static 3876 ULong dis_Grp4 ( VexAbiInfo* vbi, 3877 Prefix pfx, Long delta, Bool* decode_OK ) 3878 { 3879 Int alen; 3880 UChar modrm; 3881 HChar dis_buf[50]; 3882 IRType ty = Ity_I8; 3883 IRTemp t1 = newTemp(ty); 3884 IRTemp t2 = newTemp(ty); 3885 3886 *decode_OK = True; 3887 3888 modrm = getUChar(delta); 3889 if (epartIsReg(modrm)) { 3890 assign(t1, getIRegE(1, pfx, modrm)); 3891 switch (gregLO3ofRM(modrm)) { 3892 case 0: /* INC */ 3893 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3894 putIRegE(1, pfx, modrm, mkexpr(t2)); 3895 setFlags_INC_DEC( True, t2, ty ); 3896 break; 3897 case 1: /* DEC */ 3898 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3899 putIRegE(1, pfx, modrm, mkexpr(t2)); 3900 setFlags_INC_DEC( False, t2, ty ); 3901 break; 3902 default: 3903 *decode_OK = False; 3904 return delta; 3905 } 3906 delta++; 3907 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 3908 nameIRegE(1, pfx, modrm)); 3909 } else { 3910 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 3911 assign( t1, loadLE(ty, mkexpr(addr)) ); 3912 switch (gregLO3ofRM(modrm)) { 3913 case 0: /* INC */ 3914 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3915 if (pfx & PFX_LOCK) { 3916 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3917 guest_RIP_curr_instr ); 3918 } else { 3919 storeLE( mkexpr(addr), mkexpr(t2) ); 3920 } 3921 setFlags_INC_DEC( True, t2, ty ); 3922 break; 3923 case 1: /* DEC */ 3924 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3925 if (pfx & PFX_LOCK) { 3926 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3927 guest_RIP_curr_instr ); 3928 } else { 3929 storeLE( mkexpr(addr), mkexpr(t2) ); 3930 } 3931 setFlags_INC_DEC( False, t2, ty ); 3932 break; 3933 default: 3934 *decode_OK = False; 3935 return delta; 3936 } 3937 delta += alen; 3938 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 3939 } 3940 return delta; 3941 } 3942 3943 3944 /* Group 5 extended opcodes. */ 3945 static 3946 ULong dis_Grp5 ( VexAbiInfo* vbi, 3947 Prefix pfx, Int sz, Long delta, 3948 DisResult* dres, Bool* decode_OK ) 3949 { 3950 Int len; 3951 UChar modrm; 3952 HChar dis_buf[50]; 3953 IRTemp addr = IRTemp_INVALID; 3954 IRType ty = szToITy(sz); 3955 IRTemp t1 = newTemp(ty); 3956 IRTemp t2 = IRTemp_INVALID; 3957 IRTemp t3 = IRTemp_INVALID; 3958 Bool showSz = True; 3959 3960 *decode_OK = True; 3961 3962 modrm = getUChar(delta); 3963 if (epartIsReg(modrm)) { 3964 assign(t1, getIRegE(sz,pfx,modrm)); 3965 switch (gregLO3ofRM(modrm)) { 3966 case 0: /* INC */ 3967 t2 = newTemp(ty); 3968 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3969 mkexpr(t1), mkU(ty,1))); 3970 setFlags_INC_DEC( True, t2, ty ); 3971 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3972 break; 3973 case 1: /* DEC */ 3974 t2 = newTemp(ty); 3975 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3976 mkexpr(t1), mkU(ty,1))); 3977 setFlags_INC_DEC( False, t2, ty ); 3978 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3979 break; 3980 case 2: /* call Ev */ 3981 /* Ignore any sz value and operate as if sz==8. */ 3982 if (!(sz == 4 || sz == 8)) goto unhandled; 3983 sz = 8; 3984 t3 = newTemp(Ity_I64); 3985 assign(t3, getIRegE(sz,pfx,modrm)); 3986 t2 = newTemp(Ity_I64); 3987 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 3988 putIReg64(R_RSP, mkexpr(t2)); 3989 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 3990 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 3991 jmp_treg(Ijk_Call,t3); 3992 dres->whatNext = Dis_StopHere; 3993 showSz = False; 3994 break; 3995 case 4: /* jmp Ev */ 3996 /* Ignore any sz value and operate as if sz==8. */ 3997 if (!(sz == 4 || sz == 8)) goto unhandled; 3998 sz = 8; 3999 t3 = newTemp(Ity_I64); 4000 assign(t3, getIRegE(sz,pfx,modrm)); 4001 jmp_treg(Ijk_Boring,t3); 4002 dres->whatNext = Dis_StopHere; 4003 showSz = False; 4004 break; 4005 default: 4006 *decode_OK = False; 4007 return delta; 4008 } 4009 delta++; 4010 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4011 showSz ? nameISize(sz) : ' ', 4012 nameIRegE(sz, pfx, modrm)); 4013 } else { 4014 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4015 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4016 && gregLO3ofRM(modrm) != 6) { 4017 assign(t1, loadLE(ty,mkexpr(addr))); 4018 } 4019 switch (gregLO3ofRM(modrm)) { 4020 case 0: /* INC */ 4021 t2 = newTemp(ty); 4022 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4023 mkexpr(t1), mkU(ty,1))); 4024 if (pfx & PFX_LOCK) { 4025 casLE( mkexpr(addr), 4026 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4027 } else { 4028 storeLE(mkexpr(addr),mkexpr(t2)); 4029 } 4030 setFlags_INC_DEC( True, t2, ty ); 4031 break; 4032 case 1: /* DEC */ 4033 t2 = newTemp(ty); 4034 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4035 mkexpr(t1), mkU(ty,1))); 4036 if (pfx & PFX_LOCK) { 4037 casLE( mkexpr(addr), 4038 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4039 } else { 4040 storeLE(mkexpr(addr),mkexpr(t2)); 4041 } 4042 setFlags_INC_DEC( False, t2, ty ); 4043 break; 4044 case 2: /* call Ev */ 4045 /* Ignore any sz value and operate as if sz==8. */ 4046 if (!(sz == 4 || sz == 8)) goto unhandled; 4047 sz = 8; 4048 t3 = newTemp(Ity_I64); 4049 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4050 t2 = newTemp(Ity_I64); 4051 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4052 putIReg64(R_RSP, mkexpr(t2)); 4053 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4054 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4055 jmp_treg(Ijk_Call,t3); 4056 dres->whatNext = Dis_StopHere; 4057 showSz = False; 4058 break; 4059 case 4: /* JMP Ev */ 4060 /* Ignore any sz value and operate as if sz==8. */ 4061 if (!(sz == 4 || sz == 8)) goto unhandled; 4062 sz = 8; 4063 t3 = newTemp(Ity_I64); 4064 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4065 jmp_treg(Ijk_Boring,t3); 4066 dres->whatNext = Dis_StopHere; 4067 showSz = False; 4068 break; 4069 case 6: /* PUSH Ev */ 4070 /* There is no encoding for 32-bit operand size; hence ... */ 4071 if (sz == 4) sz = 8; 4072 if (!(sz == 8 || sz == 2)) goto unhandled; 4073 if (sz == 8) { 4074 t3 = newTemp(Ity_I64); 4075 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4076 t2 = newTemp(Ity_I64); 4077 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4078 putIReg64(R_RSP, mkexpr(t2) ); 4079 storeLE( mkexpr(t2), mkexpr(t3) ); 4080 break; 4081 } else { 4082 goto unhandled; /* awaiting test case */ 4083 } 4084 default: 4085 unhandled: 4086 *decode_OK = False; 4087 return delta; 4088 } 4089 delta += len; 4090 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4091 showSz ? nameISize(sz) : ' ', 4092 dis_buf); 4093 } 4094 return delta; 4095 } 4096 4097 4098 /*------------------------------------------------------------*/ 4099 /*--- Disassembling string ops (including REP prefixes) ---*/ 4100 /*------------------------------------------------------------*/ 4101 4102 /* Code shared by all the string ops */ 4103 static 4104 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4105 { 4106 UChar logSz; 4107 if (sz == 8 || sz == 4 || sz == 2) { 4108 logSz = 1; 4109 if (sz == 4) logSz = 2; 4110 if (sz == 8) logSz = 3; 4111 assign( t_inc, 4112 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4113 mkU8(logSz) ) ); 4114 } else { 4115 assign( t_inc, 4116 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4117 } 4118 } 4119 4120 static 4121 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4122 Int sz, HChar* name, Prefix pfx ) 4123 { 4124 IRTemp t_inc = newTemp(Ity_I64); 4125 /* Really we ought to inspect the override prefixes, but we don't. 4126 The following assertion catches any resulting sillyness. */ 4127 vassert(pfx == clearSegBits(pfx)); 4128 dis_string_op_increment(sz, t_inc); 4129 dis_OP( sz, t_inc, pfx ); 4130 DIP("%s%c\n", name, nameISize(sz)); 4131 } 4132 4133 static 4134 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4135 { 4136 IRType ty = szToITy(sz); 4137 IRTemp td = newTemp(Ity_I64); /* RDI */ 4138 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4139 IRExpr *incd, *incs; 4140 4141 if (haveASO(pfx)) { 4142 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4143 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4144 } else { 4145 assign( td, getIReg64(R_RDI) ); 4146 assign( ts, getIReg64(R_RSI) ); 4147 } 4148 4149 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4150 4151 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4152 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4153 if (haveASO(pfx)) { 4154 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4155 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4156 } 4157 putIReg64( R_RDI, incd ); 4158 putIReg64( R_RSI, incs ); 4159 } 4160 4161 static 4162 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4163 { 4164 IRType ty = szToITy(sz); 4165 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4166 IRExpr *incs; 4167 4168 if (haveASO(pfx)) 4169 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4170 else 4171 assign( ts, getIReg64(R_RSI) ); 4172 4173 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4174 4175 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4176 if (haveASO(pfx)) 4177 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4178 putIReg64( R_RSI, incs ); 4179 } 4180 4181 static 4182 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4183 { 4184 IRType ty = szToITy(sz); 4185 IRTemp ta = newTemp(ty); /* rAX */ 4186 IRTemp td = newTemp(Ity_I64); /* RDI */ 4187 IRExpr *incd; 4188 4189 assign( ta, getIRegRAX(sz) ); 4190 4191 if (haveASO(pfx)) 4192 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4193 else 4194 assign( td, getIReg64(R_RDI) ); 4195 4196 storeLE( mkexpr(td), mkexpr(ta) ); 4197 4198 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4199 if (haveASO(pfx)) 4200 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4201 putIReg64( R_RDI, incd ); 4202 } 4203 4204 static 4205 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4206 { 4207 IRType ty = szToITy(sz); 4208 IRTemp tdv = newTemp(ty); /* (RDI) */ 4209 IRTemp tsv = newTemp(ty); /* (RSI) */ 4210 IRTemp td = newTemp(Ity_I64); /* RDI */ 4211 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4212 IRExpr *incd, *incs; 4213 4214 if (haveASO(pfx)) { 4215 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4216 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4217 } else { 4218 assign( td, getIReg64(R_RDI) ); 4219 assign( ts, getIReg64(R_RSI) ); 4220 } 4221 4222 assign( tdv, loadLE(ty,mkexpr(td)) ); 4223 4224 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4225 4226 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4227 4228 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4229 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4230 if (haveASO(pfx)) { 4231 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4232 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4233 } 4234 putIReg64( R_RDI, incd ); 4235 putIReg64( R_RSI, incs ); 4236 } 4237 4238 static 4239 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4240 { 4241 IRType ty = szToITy(sz); 4242 IRTemp ta = newTemp(ty); /* rAX */ 4243 IRTemp td = newTemp(Ity_I64); /* RDI */ 4244 IRTemp tdv = newTemp(ty); /* (RDI) */ 4245 IRExpr *incd; 4246 4247 assign( ta, getIRegRAX(sz) ); 4248 4249 if (haveASO(pfx)) 4250 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4251 else 4252 assign( td, getIReg64(R_RDI) ); 4253 4254 assign( tdv, loadLE(ty,mkexpr(td)) ); 4255 4256 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4257 4258 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4259 if (haveASO(pfx)) 4260 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4261 putIReg64( R_RDI, incd ); 4262 } 4263 4264 4265 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4266 the insn is the last one in the basic block, and so emit a jump to 4267 the next insn, rather than just falling through. */ 4268 static 4269 void dis_REP_op ( AMD64Condcode cond, 4270 void (*dis_OP)(Int, IRTemp, Prefix), 4271 Int sz, Addr64 rip, Addr64 rip_next, HChar* name, 4272 Prefix pfx ) 4273 { 4274 IRTemp t_inc = newTemp(Ity_I64); 4275 IRTemp tc; 4276 IRExpr* cmp; 4277 4278 /* Really we ought to inspect the override prefixes, but we don't. 4279 The following assertion catches any resulting sillyness. */ 4280 vassert(pfx == clearSegBits(pfx)); 4281 4282 if (haveASO(pfx)) { 4283 tc = newTemp(Ity_I32); /* ECX */ 4284 assign( tc, getIReg32(R_RCX) ); 4285 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4286 } else { 4287 tc = newTemp(Ity_I64); /* RCX */ 4288 assign( tc, getIReg64(R_RCX) ); 4289 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4290 } 4291 4292 stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) ); 4293 4294 if (haveASO(pfx)) 4295 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4296 else 4297 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4298 4299 dis_string_op_increment(sz, t_inc); 4300 dis_OP (sz, t_inc, pfx); 4301 4302 if (cond == AMD64CondAlways) { 4303 jmp_lit(Ijk_Boring,rip); 4304 } else { 4305 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4306 Ijk_Boring, 4307 IRConst_U64(rip) ) ); 4308 jmp_lit(Ijk_Boring,rip_next); 4309 } 4310 DIP("%s%c\n", name, nameISize(sz)); 4311 } 4312 4313 4314 /*------------------------------------------------------------*/ 4315 /*--- Arithmetic, etc. ---*/ 4316 /*------------------------------------------------------------*/ 4317 4318 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4319 static 4320 ULong dis_mul_E_G ( VexAbiInfo* vbi, 4321 Prefix pfx, 4322 Int size, 4323 Long delta0 ) 4324 { 4325 Int alen; 4326 HChar dis_buf[50]; 4327 UChar rm = getUChar(delta0); 4328 IRType ty = szToITy(size); 4329 IRTemp te = newTemp(ty); 4330 IRTemp tg = newTemp(ty); 4331 IRTemp resLo = newTemp(ty); 4332 4333 assign( tg, getIRegG(size, pfx, rm) ); 4334 if (epartIsReg(rm)) { 4335 assign( te, getIRegE(size, pfx, rm) ); 4336 } else { 4337 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4338 assign( te, loadLE(ty,mkexpr(addr)) ); 4339 } 4340 4341 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4342 4343 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4344 4345 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4346 4347 if (epartIsReg(rm)) { 4348 DIP("imul%c %s, %s\n", nameISize(size), 4349 nameIRegE(size,pfx,rm), 4350 nameIRegG(size,pfx,rm)); 4351 return 1+delta0; 4352 } else { 4353 DIP("imul%c %s, %s\n", nameISize(size), 4354 dis_buf, 4355 nameIRegG(size,pfx,rm)); 4356 return alen+delta0; 4357 } 4358 } 4359 4360 4361 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4362 static 4363 ULong dis_imul_I_E_G ( VexAbiInfo* vbi, 4364 Prefix pfx, 4365 Int size, 4366 Long delta, 4367 Int litsize ) 4368 { 4369 Long d64; 4370 Int alen; 4371 HChar dis_buf[50]; 4372 UChar rm = getUChar(delta); 4373 IRType ty = szToITy(size); 4374 IRTemp te = newTemp(ty); 4375 IRTemp tl = newTemp(ty); 4376 IRTemp resLo = newTemp(ty); 4377 4378 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4379 4380 if (epartIsReg(rm)) { 4381 assign(te, getIRegE(size, pfx, rm)); 4382 delta++; 4383 } else { 4384 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4385 imin(4,litsize) ); 4386 assign(te, loadLE(ty, mkexpr(addr))); 4387 delta += alen; 4388 } 4389 d64 = getSDisp(imin(4,litsize),delta); 4390 delta += imin(4,litsize); 4391 4392 d64 &= mkSizeMask(size); 4393 assign(tl, mkU(ty,d64)); 4394 4395 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4396 4397 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4398 4399 putIRegG(size, pfx, rm, mkexpr(resLo)); 4400 4401 DIP("imul%c $%lld, %s, %s\n", 4402 nameISize(size), d64, 4403 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4404 nameIRegG(size,pfx,rm) ); 4405 return delta; 4406 } 4407 4408 4409 /* Generate an IR sequence to do a popcount operation on the supplied 4410 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4411 Ity_I16, Ity_I32 or Ity_I64 only. */ 4412 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4413 { 4414 Int i; 4415 if (ty == Ity_I16) { 4416 IRTemp old = IRTemp_INVALID; 4417 IRTemp nyu = IRTemp_INVALID; 4418 IRTemp mask[4], shift[4]; 4419 for (i = 0; i < 4; i++) { 4420 mask[i] = newTemp(ty); 4421 shift[i] = 1 << i; 4422 } 4423 assign(mask[0], mkU16(0x5555)); 4424 assign(mask[1], mkU16(0x3333)); 4425 assign(mask[2], mkU16(0x0F0F)); 4426 assign(mask[3], mkU16(0x00FF)); 4427 old = src; 4428 for (i = 0; i < 4; i++) { 4429 nyu = newTemp(ty); 4430 assign(nyu, 4431 binop(Iop_Add16, 4432 binop(Iop_And16, 4433 mkexpr(old), 4434 mkexpr(mask[i])), 4435 binop(Iop_And16, 4436 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4437 mkexpr(mask[i])))); 4438 old = nyu; 4439 } 4440 return nyu; 4441 } 4442 if (ty == Ity_I32) { 4443 IRTemp old = IRTemp_INVALID; 4444 IRTemp nyu = IRTemp_INVALID; 4445 IRTemp mask[5], shift[5]; 4446 for (i = 0; i < 5; i++) { 4447 mask[i] = newTemp(ty); 4448 shift[i] = 1 << i; 4449 } 4450 assign(mask[0], mkU32(0x55555555)); 4451 assign(mask[1], mkU32(0x33333333)); 4452 assign(mask[2], mkU32(0x0F0F0F0F)); 4453 assign(mask[3], mkU32(0x00FF00FF)); 4454 assign(mask[4], mkU32(0x0000FFFF)); 4455 old = src; 4456 for (i = 0; i < 5; i++) { 4457 nyu = newTemp(ty); 4458 assign(nyu, 4459 binop(Iop_Add32, 4460 binop(Iop_And32, 4461 mkexpr(old), 4462 mkexpr(mask[i])), 4463 binop(Iop_And32, 4464 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4465 mkexpr(mask[i])))); 4466 old = nyu; 4467 } 4468 return nyu; 4469 } 4470 if (ty == Ity_I64) { 4471 IRTemp old = IRTemp_INVALID; 4472 IRTemp nyu = IRTemp_INVALID; 4473 IRTemp mask[6], shift[6]; 4474 for (i = 0; i < 6; i++) { 4475 mask[i] = newTemp(ty); 4476 shift[i] = 1 << i; 4477 } 4478 assign(mask[0], mkU64(0x5555555555555555ULL)); 4479 assign(mask[1], mkU64(0x3333333333333333ULL)); 4480 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4481 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4482 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4483 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4484 old = src; 4485 for (i = 0; i < 6; i++) { 4486 nyu = newTemp(ty); 4487 assign(nyu, 4488 binop(Iop_Add64, 4489 binop(Iop_And64, 4490 mkexpr(old), 4491 mkexpr(mask[i])), 4492 binop(Iop_And64, 4493 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4494 mkexpr(mask[i])))); 4495 old = nyu; 4496 } 4497 return nyu; 4498 } 4499 /*NOTREACHED*/ 4500 vassert(0); 4501 } 4502 4503 4504 /* Generate an IR sequence to do a count-leading-zeroes operation on 4505 the supplied IRTemp, and return a new IRTemp holding the result. 4506 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4507 the argument is zero, return the number of bits in the word (the 4508 natural semantics). */ 4509 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4510 { 4511 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4512 4513 IRTemp src64 = newTemp(Ity_I64); 4514 assign(src64, widenUto64( mkexpr(src) )); 4515 4516 IRTemp src64x = newTemp(Ity_I64); 4517 assign(src64x, 4518 binop(Iop_Shl64, mkexpr(src64), 4519 mkU8(64 - 8 * sizeofIRType(ty)))); 4520 4521 // Clz64 has undefined semantics when its input is zero, so 4522 // special-case around that. 4523 IRTemp res64 = newTemp(Ity_I64); 4524 assign(res64, 4525 IRExpr_Mux0X( 4526 unop(Iop_1Uto8, 4527 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))), 4528 unop(Iop_Clz64, mkexpr(src64x)), 4529 mkU64(8 * sizeofIRType(ty)) 4530 )); 4531 4532 IRTemp res = newTemp(ty); 4533 assign(res, narrowTo(ty, mkexpr(res64))); 4534 return res; 4535 } 4536 4537 4538 /*------------------------------------------------------------*/ 4539 /*--- ---*/ 4540 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4541 /*--- ---*/ 4542 /*------------------------------------------------------------*/ 4543 4544 /* --- Helper functions for dealing with the register stack. --- */ 4545 4546 /* --- Set the emulation-warning pseudo-register. --- */ 4547 4548 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4549 { 4550 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4551 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 4552 } 4553 4554 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4555 4556 static IRExpr* mkQNaN64 ( void ) 4557 { 4558 /* QNaN is 0 2047 1 0(51times) 4559 == 0b 11111111111b 1 0(51times) 4560 == 0x7FF8 0000 0000 0000 4561 */ 4562 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4563 } 4564 4565 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4566 4567 static IRExpr* get_ftop ( void ) 4568 { 4569 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4570 } 4571 4572 static void put_ftop ( IRExpr* e ) 4573 { 4574 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4575 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4576 } 4577 4578 /* --------- Get/put the C3210 bits. --------- */ 4579 4580 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4581 { 4582 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4583 } 4584 4585 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4586 { 4587 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4588 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4589 } 4590 4591 /* --------- Get/put the FPU rounding mode. --------- */ 4592 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4593 { 4594 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4595 } 4596 4597 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4598 { 4599 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4600 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4601 } 4602 4603 4604 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4605 /* Produces a value in 0 .. 3, which is encoded as per the type 4606 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4607 per IRRoundingMode, we merely need to get it and mask it for 4608 safety. 4609 */ 4610 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4611 { 4612 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4613 } 4614 4615 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4616 { 4617 return mkU32(Irrm_NEAREST); 4618 } 4619 4620 4621 /* --------- Get/set FP register tag bytes. --------- */ 4622 4623 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4624 4625 static void put_ST_TAG ( Int i, IRExpr* value ) 4626 { 4627 IRRegArray* descr; 4628 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 4629 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4630 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4631 } 4632 4633 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 4634 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 4635 4636 static IRExpr* get_ST_TAG ( Int i ) 4637 { 4638 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4639 return IRExpr_GetI( descr, get_ftop(), i ); 4640 } 4641 4642 4643 /* --------- Get/set FP registers. --------- */ 4644 4645 /* Given i, and some expression e, emit 'ST(i) = e' and set the 4646 register's tag to indicate the register is full. The previous 4647 state of the register is not checked. */ 4648 4649 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 4650 { 4651 IRRegArray* descr; 4652 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 4653 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4654 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4655 /* Mark the register as in-use. */ 4656 put_ST_TAG(i, mkU8(1)); 4657 } 4658 4659 /* Given i, and some expression e, emit 4660 ST(i) = is_full(i) ? NaN : e 4661 and set the tag accordingly. 4662 */ 4663 4664 static void put_ST ( Int i, IRExpr* value ) 4665 { 4666 put_ST_UNCHECKED( i, 4667 IRExpr_Mux0X( get_ST_TAG(i), 4668 /* 0 means empty */ 4669 value, 4670 /* non-0 means full */ 4671 mkQNaN64() 4672 ) 4673 ); 4674 } 4675 4676 4677 /* Given i, generate an expression yielding 'ST(i)'. */ 4678 4679 static IRExpr* get_ST_UNCHECKED ( Int i ) 4680 { 4681 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4682 return IRExpr_GetI( descr, get_ftop(), i ); 4683 } 4684 4685 4686 /* Given i, generate an expression yielding 4687 is_full(i) ? ST(i) : NaN 4688 */ 4689 4690 static IRExpr* get_ST ( Int i ) 4691 { 4692 return 4693 IRExpr_Mux0X( get_ST_TAG(i), 4694 /* 0 means empty */ 4695 mkQNaN64(), 4696 /* non-0 means full */ 4697 get_ST_UNCHECKED(i)); 4698 } 4699 4700 4701 /* Adjust FTOP downwards by one register. */ 4702 4703 static void fp_push ( void ) 4704 { 4705 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 4706 } 4707 4708 /* Adjust FTOP upwards by one register, and mark the vacated register 4709 as empty. */ 4710 4711 static void fp_pop ( void ) 4712 { 4713 put_ST_TAG(0, mkU8(0)); 4714 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4715 } 4716 4717 /* Clear the C2 bit of the FPU status register, for 4718 sin/cos/tan/sincos. */ 4719 4720 static void clear_C2 ( void ) 4721 { 4722 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); 4723 } 4724 4725 /* Invent a plausible-looking FPU status word value: 4726 ((ftop & 7) << 11) | (c3210 & 0x4700) 4727 */ 4728 static IRExpr* get_FPU_sw ( void ) 4729 { 4730 return 4731 unop(Iop_32to16, 4732 binop(Iop_Or32, 4733 binop(Iop_Shl32, 4734 binop(Iop_And32, get_ftop(), mkU32(7)), 4735 mkU8(11)), 4736 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 4737 mkU32(0x4700)) 4738 )); 4739 } 4740 4741 4742 /* ------------------------------------------------------- */ 4743 /* Given all that stack-mangling junk, we can now go ahead 4744 and describe FP instructions. 4745 */ 4746 4747 /* ST(0) = ST(0) `op` mem64/32(addr) 4748 Need to check ST(0)'s tag on read, but not on write. 4749 */ 4750 static 4751 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4752 IROp op, Bool dbl ) 4753 { 4754 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4755 if (dbl) { 4756 put_ST_UNCHECKED(0, 4757 triop( op, 4758 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4759 get_ST(0), 4760 loadLE(Ity_F64,mkexpr(addr)) 4761 )); 4762 } else { 4763 put_ST_UNCHECKED(0, 4764 triop( op, 4765 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4766 get_ST(0), 4767 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 4768 )); 4769 } 4770 } 4771 4772 4773 /* ST(0) = mem64/32(addr) `op` ST(0) 4774 Need to check ST(0)'s tag on read, but not on write. 4775 */ 4776 static 4777 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4778 IROp op, Bool dbl ) 4779 { 4780 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4781 if (dbl) { 4782 put_ST_UNCHECKED(0, 4783 triop( op, 4784 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4785 loadLE(Ity_F64,mkexpr(addr)), 4786 get_ST(0) 4787 )); 4788 } else { 4789 put_ST_UNCHECKED(0, 4790 triop( op, 4791 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4792 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 4793 get_ST(0) 4794 )); 4795 } 4796 } 4797 4798 4799 /* ST(dst) = ST(dst) `op` ST(src). 4800 Check dst and src tags when reading but not on write. 4801 */ 4802 static 4803 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4804 Bool pop_after ) 4805 { 4806 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4807 put_ST_UNCHECKED( 4808 st_dst, 4809 triop( op, 4810 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4811 get_ST(st_dst), 4812 get_ST(st_src) ) 4813 ); 4814 if (pop_after) 4815 fp_pop(); 4816 } 4817 4818 /* ST(dst) = ST(src) `op` ST(dst). 4819 Check dst and src tags when reading but not on write. 4820 */ 4821 static 4822 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4823 Bool pop_after ) 4824 { 4825 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4826 put_ST_UNCHECKED( 4827 st_dst, 4828 triop( op, 4829 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4830 get_ST(st_src), 4831 get_ST(st_dst) ) 4832 ); 4833 if (pop_after) 4834 fp_pop(); 4835 } 4836 4837 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 4838 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 4839 { 4840 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 4841 /* This is a bit of a hack (and isn't really right). It sets 4842 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 4843 documentation implies A and S are unchanged. 4844 */ 4845 /* It's also fishy in that it is used both for COMIP and 4846 UCOMIP, and they aren't the same (although similar). */ 4847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 4848 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 4849 stmt( IRStmt_Put( 4850 OFFB_CC_DEP1, 4851 binop( Iop_And64, 4852 unop( Iop_32Uto64, 4853 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 4854 mkU64(0x45) 4855 ))); 4856 if (pop_after) 4857 fp_pop(); 4858 } 4859 4860 4861 /* returns 4862 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 4863 */ 4864 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 4865 { 4866 IRTemp t32 = newTemp(Ity_I32); 4867 assign( t32, e32 ); 4868 return 4869 IRExpr_Mux0X( 4870 unop(Iop_1Uto8, 4871 binop(Iop_CmpLT64U, 4872 unop(Iop_32Uto64, 4873 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 4874 mkU64(65536))), 4875 mkU16( 0x8000 ), 4876 unop(Iop_32to16, mkexpr(t32))); 4877 } 4878 4879 4880 static 4881 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 4882 VexAbiInfo* vbi, Prefix pfx, Long delta ) 4883 { 4884 Int len; 4885 UInt r_src, r_dst; 4886 HChar dis_buf[50]; 4887 IRTemp t1, t2; 4888 4889 /* On entry, delta points at the second byte of the insn (the modrm 4890 byte).*/ 4891 UChar first_opcode = getUChar(delta-1); 4892 UChar modrm = getUChar(delta+0); 4893 4894 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 4895 4896 if (first_opcode == 0xD8) { 4897 if (modrm < 0xC0) { 4898 4899 /* bits 5,4,3 are an opcode extension, and the modRM also 4900 specifies an address. */ 4901 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 4902 delta += len; 4903 4904 switch (gregLO3ofRM(modrm)) { 4905 4906 case 0: /* FADD single-real */ 4907 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 4908 break; 4909 4910 case 1: /* FMUL single-real */ 4911 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 4912 break; 4913 4914 //.. case 2: /* FCOM single-real */ 4915 //.. DIP("fcoms %s\n", dis_buf); 4916 //.. /* This forces C1 to zero, which isn't right. */ 4917 //.. put_C3210( 4918 //.. binop( Iop_And32, 4919 //.. binop(Iop_Shl32, 4920 //.. binop(Iop_CmpF64, 4921 //.. get_ST(0), 4922 //.. unop(Iop_F32toF64, 4923 //.. loadLE(Ity_F32,mkexpr(addr)))), 4924 //.. mkU8(8)), 4925 //.. mkU32(0x4500) 4926 //.. )); 4927 //.. break; 4928 //.. 4929 //.. case 3: /* FCOMP single-real */ 4930 //.. DIP("fcomps %s\n", dis_buf); 4931 //.. /* This forces C1 to zero, which isn't right. */ 4932 //.. put_C3210( 4933 //.. binop( Iop_And32, 4934 //.. binop(Iop_Shl32, 4935 //.. binop(Iop_CmpF64, 4936 //.. get_ST(0), 4937 //.. unop(Iop_F32toF64, 4938 //.. loadLE(Ity_F32,mkexpr(addr)))), 4939 //.. mkU8(8)), 4940 //.. mkU32(0x4500) 4941 //.. )); 4942 //.. fp_pop(); 4943 //.. break; 4944 4945 case 4: /* FSUB single-real */ 4946 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 4947 break; 4948 4949 case 5: /* FSUBR single-real */ 4950 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 4951 break; 4952 4953 case 6: /* FDIV single-real */ 4954 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 4955 break; 4956 4957 case 7: /* FDIVR single-real */ 4958 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 4959 break; 4960 4961 default: 4962 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 4963 vex_printf("first_opcode == 0xD8\n"); 4964 goto decode_fail; 4965 } 4966 } else { 4967 delta++; 4968 switch (modrm) { 4969 4970 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 4971 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 4972 break; 4973 4974 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 4975 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 4976 break; 4977 4978 /* Dunno if this is right */ 4979 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 4980 r_dst = (UInt)modrm - 0xD0; 4981 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 4982 /* This forces C1 to zero, which isn't right. */ 4983 put_C3210( 4984 unop(Iop_32Uto64, 4985 binop( Iop_And32, 4986 binop(Iop_Shl32, 4987 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 4988 mkU8(8)), 4989 mkU32(0x4500) 4990 ))); 4991 break; 4992 4993 /* Dunno if this is right */ 4994 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 4995 r_dst = (UInt)modrm - 0xD8; 4996 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 4997 /* This forces C1 to zero, which isn't right. */ 4998 put_C3210( 4999 unop(Iop_32Uto64, 5000 binop( Iop_And32, 5001 binop(Iop_Shl32, 5002 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5003 mkU8(8)), 5004 mkU32(0x4500) 5005 ))); 5006 fp_pop(); 5007 break; 5008 5009 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5010 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5011 break; 5012 5013 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5014 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5015 break; 5016 5017 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5018 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5019 break; 5020 5021 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5022 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5023 break; 5024 5025 default: 5026 goto decode_fail; 5027 } 5028 } 5029 } 5030 5031 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5032 else 5033 if (first_opcode == 0xD9) { 5034 if (modrm < 0xC0) { 5035 5036 /* bits 5,4,3 are an opcode extension, and the modRM also 5037 specifies an address. */ 5038 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5039 delta += len; 5040 5041 switch (gregLO3ofRM(modrm)) { 5042 5043 case 0: /* FLD single-real */ 5044 DIP("flds %s\n", dis_buf); 5045 fp_push(); 5046 put_ST(0, unop(Iop_F32toF64, 5047 loadLE(Ity_F32, mkexpr(addr)))); 5048 break; 5049 5050 case 2: /* FST single-real */ 5051 DIP("fsts %s\n", dis_buf); 5052 storeLE(mkexpr(addr), 5053 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5054 break; 5055 5056 case 3: /* FSTP single-real */ 5057 DIP("fstps %s\n", dis_buf); 5058 storeLE(mkexpr(addr), 5059 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5060 fp_pop(); 5061 break; 5062 5063 case 4: { /* FLDENV m28 */ 5064 /* Uses dirty helper: 5065 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5066 IRTemp ew = newTemp(Ity_I32); 5067 IRTemp w64 = newTemp(Ity_I64); 5068 IRDirty* d = unsafeIRDirty_0_N ( 5069 0/*regparms*/, 5070 "amd64g_dirtyhelper_FLDENV", 5071 &amd64g_dirtyhelper_FLDENV, 5072 mkIRExprVec_1( mkexpr(addr) ) 5073 ); 5074 d->needsBBP = True; 5075 d->tmp = w64; 5076 /* declare we're reading memory */ 5077 d->mFx = Ifx_Read; 5078 d->mAddr = mkexpr(addr); 5079 d->mSize = 28; 5080 5081 /* declare we're writing guest state */ 5082 d->nFxState = 4; 5083 5084 d->fxState[0].fx = Ifx_Write; 5085 d->fxState[0].offset = OFFB_FTOP; 5086 d->fxState[0].size = sizeof(UInt); 5087 5088 d->fxState[1].fx = Ifx_Write; 5089 d->fxState[1].offset = OFFB_FPTAGS; 5090 d->fxState[1].size = 8 * sizeof(UChar); 5091 5092 d->fxState[2].fx = Ifx_Write; 5093 d->fxState[2].offset = OFFB_FPROUND; 5094 d->fxState[2].size = sizeof(ULong); 5095 5096 d->fxState[3].fx = Ifx_Write; 5097 d->fxState[3].offset = OFFB_FC3210; 5098 d->fxState[3].size = sizeof(ULong); 5099 5100 stmt( IRStmt_Dirty(d) ); 5101 5102 /* ew contains any emulation warning we may need to 5103 issue. If needed, side-exit to the next insn, 5104 reporting the warning, so that Valgrind's dispatcher 5105 sees the warning. */ 5106 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5107 put_emwarn( mkexpr(ew) ); 5108 stmt( 5109 IRStmt_Exit( 5110 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5111 Ijk_EmWarn, 5112 IRConst_U64( guest_RIP_bbstart+delta ) 5113 ) 5114 ); 5115 5116 DIP("fldenv %s\n", dis_buf); 5117 break; 5118 } 5119 5120 case 5: {/* FLDCW */ 5121 /* The only thing we observe in the control word is the 5122 rounding mode. Therefore, pass the 16-bit value 5123 (x87 native-format control word) to a clean helper, 5124 getting back a 64-bit value, the lower half of which 5125 is the FPROUND value to store, and the upper half of 5126 which is the emulation-warning token which may be 5127 generated. 5128 */ 5129 /* ULong amd64h_check_fldcw ( ULong ); */ 5130 IRTemp t64 = newTemp(Ity_I64); 5131 IRTemp ew = newTemp(Ity_I32); 5132 DIP("fldcw %s\n", dis_buf); 5133 assign( t64, mkIRExprCCall( 5134 Ity_I64, 0/*regparms*/, 5135 "amd64g_check_fldcw", 5136 &amd64g_check_fldcw, 5137 mkIRExprVec_1( 5138 unop( Iop_16Uto64, 5139 loadLE(Ity_I16, mkexpr(addr))) 5140 ) 5141 ) 5142 ); 5143 5144 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5145 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5146 put_emwarn( mkexpr(ew) ); 5147 /* Finally, if an emulation warning was reported, 5148 side-exit to the next insn, reporting the warning, 5149 so that Valgrind's dispatcher sees the warning. */ 5150 stmt( 5151 IRStmt_Exit( 5152 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5153 Ijk_EmWarn, 5154 IRConst_U64( guest_RIP_bbstart+delta ) 5155 ) 5156 ); 5157 break; 5158 } 5159 5160 case 6: { /* FNSTENV m28 */ 5161 /* Uses dirty helper: 5162 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5163 IRDirty* d = unsafeIRDirty_0_N ( 5164 0/*regparms*/, 5165 "amd64g_dirtyhelper_FSTENV", 5166 &amd64g_dirtyhelper_FSTENV, 5167 mkIRExprVec_1( mkexpr(addr) ) 5168 ); 5169 d->needsBBP = True; 5170 /* declare we're writing memory */ 5171 d->mFx = Ifx_Write; 5172 d->mAddr = mkexpr(addr); 5173 d->mSize = 28; 5174 5175 /* declare we're reading guest state */ 5176 d->nFxState = 4; 5177 5178 d->fxState[0].fx = Ifx_Read; 5179 d->fxState[0].offset = OFFB_FTOP; 5180 d->fxState[0].size = sizeof(UInt); 5181 5182 d->fxState[1].fx = Ifx_Read; 5183 d->fxState[1].offset = OFFB_FPTAGS; 5184 d->fxState[1].size = 8 * sizeof(UChar); 5185 5186 d->fxState[2].fx = Ifx_Read; 5187 d->fxState[2].offset = OFFB_FPROUND; 5188 d->fxState[2].size = sizeof(ULong); 5189 5190 d->fxState[3].fx = Ifx_Read; 5191 d->fxState[3].offset = OFFB_FC3210; 5192 d->fxState[3].size = sizeof(ULong); 5193 5194 stmt( IRStmt_Dirty(d) ); 5195 5196 DIP("fnstenv %s\n", dis_buf); 5197 break; 5198 } 5199 5200 case 7: /* FNSTCW */ 5201 /* Fake up a native x87 FPU control word. The only 5202 thing it depends on is FPROUND[1:0], so call a clean 5203 helper to cook it up. */ 5204 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5205 DIP("fnstcw %s\n", dis_buf); 5206 storeLE( 5207 mkexpr(addr), 5208 unop( Iop_64to16, 5209 mkIRExprCCall( 5210 Ity_I64, 0/*regp*/, 5211 "amd64g_create_fpucw", &amd64g_create_fpucw, 5212 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5213 ) 5214 ) 5215 ); 5216 break; 5217 5218 default: 5219 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5220 vex_printf("first_opcode == 0xD9\n"); 5221 goto decode_fail; 5222 } 5223 5224 } else { 5225 delta++; 5226 switch (modrm) { 5227 5228 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5229 r_src = (UInt)modrm - 0xC0; 5230 DIP("fld %%st(%u)\n", r_src); 5231 t1 = newTemp(Ity_F64); 5232 assign(t1, get_ST(r_src)); 5233 fp_push(); 5234 put_ST(0, mkexpr(t1)); 5235 break; 5236 5237 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5238 r_src = (UInt)modrm - 0xC8; 5239 DIP("fxch %%st(%u)\n", r_src); 5240 t1 = newTemp(Ity_F64); 5241 t2 = newTemp(Ity_F64); 5242 assign(t1, get_ST(0)); 5243 assign(t2, get_ST(r_src)); 5244 put_ST_UNCHECKED(0, mkexpr(t2)); 5245 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5246 break; 5247 5248 case 0xE0: /* FCHS */ 5249 DIP("fchs\n"); 5250 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5251 break; 5252 5253 case 0xE1: /* FABS */ 5254 DIP("fabs\n"); 5255 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5256 break; 5257 5258 case 0xE5: { /* FXAM */ 5259 /* This is an interesting one. It examines %st(0), 5260 regardless of whether the tag says it's empty or not. 5261 Here, just pass both the tag (in our format) and the 5262 value (as a double, actually a ULong) to a helper 5263 function. */ 5264 IRExpr** args 5265 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5266 unop(Iop_ReinterpF64asI64, 5267 get_ST_UNCHECKED(0)) ); 5268 put_C3210(mkIRExprCCall( 5269 Ity_I64, 5270 0/*regparm*/, 5271 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5272 args 5273 )); 5274 DIP("fxam\n"); 5275 break; 5276 } 5277 5278 case 0xE8: /* FLD1 */ 5279 DIP("fld1\n"); 5280 fp_push(); 5281 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5282 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5283 break; 5284 5285 case 0xE9: /* FLDL2T */ 5286 DIP("fldl2t\n"); 5287 fp_push(); 5288 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5289 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5290 break; 5291 5292 case 0xEA: /* FLDL2E */ 5293 DIP("fldl2e\n"); 5294 fp_push(); 5295 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5296 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5297 break; 5298 5299 case 0xEB: /* FLDPI */ 5300 DIP("fldpi\n"); 5301 fp_push(); 5302 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5303 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5304 break; 5305 5306 case 0xEC: /* FLDLG2 */ 5307 DIP("fldlg2\n"); 5308 fp_push(); 5309 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5310 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5311 break; 5312 5313 case 0xED: /* FLDLN2 */ 5314 DIP("fldln2\n"); 5315 fp_push(); 5316 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5317 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5318 break; 5319 5320 case 0xEE: /* FLDZ */ 5321 DIP("fldz\n"); 5322 fp_push(); 5323 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5324 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5325 break; 5326 5327 case 0xF0: /* F2XM1 */ 5328 DIP("f2xm1\n"); 5329 put_ST_UNCHECKED(0, 5330 binop(Iop_2xm1F64, 5331 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5332 get_ST(0))); 5333 break; 5334 5335 case 0xF1: /* FYL2X */ 5336 DIP("fyl2x\n"); 5337 put_ST_UNCHECKED(1, 5338 triop(Iop_Yl2xF64, 5339 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5340 get_ST(1), 5341 get_ST(0))); 5342 fp_pop(); 5343 break; 5344 5345 case 0xF2: /* FPTAN */ 5346 DIP("ftan\n"); 5347 put_ST_UNCHECKED(0, 5348 binop(Iop_TanF64, 5349 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5350 get_ST(0))); 5351 fp_push(); 5352 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 5353 clear_C2(); /* HACK */ 5354 break; 5355 5356 case 0xF3: /* FPATAN */ 5357 DIP("fpatan\n"); 5358 put_ST_UNCHECKED(1, 5359 triop(Iop_AtanF64, 5360 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5361 get_ST(1), 5362 get_ST(0))); 5363 fp_pop(); 5364 break; 5365 5366 case 0xF4: { /* FXTRACT */ 5367 IRTemp argF = newTemp(Ity_F64); 5368 IRTemp sigF = newTemp(Ity_F64); 5369 IRTemp expF = newTemp(Ity_F64); 5370 IRTemp argI = newTemp(Ity_I64); 5371 IRTemp sigI = newTemp(Ity_I64); 5372 IRTemp expI = newTemp(Ity_I64); 5373 DIP("fxtract\n"); 5374 assign( argF, get_ST(0) ); 5375 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5376 assign( sigI, 5377 mkIRExprCCall( 5378 Ity_I64, 0/*regparms*/, 5379 "x86amd64g_calculate_FXTRACT", 5380 &x86amd64g_calculate_FXTRACT, 5381 mkIRExprVec_2( mkexpr(argI), 5382 mkIRExpr_HWord(0)/*sig*/ )) 5383 ); 5384 assign( expI, 5385 mkIRExprCCall( 5386 Ity_I64, 0/*regparms*/, 5387 "x86amd64g_calculate_FXTRACT", 5388 &x86amd64g_calculate_FXTRACT, 5389 mkIRExprVec_2( mkexpr(argI), 5390 mkIRExpr_HWord(1)/*exp*/ )) 5391 ); 5392 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5393 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5394 /* exponent */ 5395 put_ST_UNCHECKED(0, mkexpr(expF) ); 5396 fp_push(); 5397 /* significand */ 5398 put_ST(0, mkexpr(sigF) ); 5399 break; 5400 } 5401 5402 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5403 IRTemp a1 = newTemp(Ity_F64); 5404 IRTemp a2 = newTemp(Ity_F64); 5405 DIP("fprem1\n"); 5406 /* Do FPREM1 twice, once to get the remainder, and once 5407 to get the C3210 flag values. */ 5408 assign( a1, get_ST(0) ); 5409 assign( a2, get_ST(1) ); 5410 put_ST_UNCHECKED(0, 5411 triop(Iop_PRem1F64, 5412 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5413 mkexpr(a1), 5414 mkexpr(a2))); 5415 put_C3210( 5416 unop(Iop_32Uto64, 5417 triop(Iop_PRem1C3210F64, 5418 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5419 mkexpr(a1), 5420 mkexpr(a2)) )); 5421 break; 5422 } 5423 5424 case 0xF7: /* FINCSTP */ 5425 DIP("fincstp\n"); 5426 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5427 break; 5428 5429 case 0xF8: { /* FPREM -- not IEEE compliant */ 5430 IRTemp a1 = newTemp(Ity_F64); 5431 IRTemp a2 = newTemp(Ity_F64); 5432 DIP("fprem\n"); 5433 /* Do FPREM twice, once to get the remainder, and once 5434 to get the C3210 flag values. */ 5435 assign( a1, get_ST(0) ); 5436 assign( a2, get_ST(1) ); 5437 put_ST_UNCHECKED(0, 5438 triop(Iop_PRemF64, 5439 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5440 mkexpr(a1), 5441 mkexpr(a2))); 5442 put_C3210( 5443 unop(Iop_32Uto64, 5444 triop(Iop_PRemC3210F64, 5445 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5446 mkexpr(a1), 5447 mkexpr(a2)) )); 5448 break; 5449 } 5450 5451 case 0xF9: /* FYL2XP1 */ 5452 DIP("fyl2xp1\n"); 5453 put_ST_UNCHECKED(1, 5454 triop(Iop_Yl2xp1F64, 5455 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5456 get_ST(1), 5457 get_ST(0))); 5458 fp_pop(); 5459 break; 5460 5461 case 0xFA: /* FSQRT */ 5462 DIP("fsqrt\n"); 5463 put_ST_UNCHECKED(0, 5464 binop(Iop_SqrtF64, 5465 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5466 get_ST(0))); 5467 break; 5468 5469 case 0xFB: { /* FSINCOS */ 5470 IRTemp a1 = newTemp(Ity_F64); 5471 assign( a1, get_ST(0) ); 5472 DIP("fsincos\n"); 5473 put_ST_UNCHECKED(0, 5474 binop(Iop_SinF64, 5475 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5476 mkexpr(a1))); 5477 fp_push(); 5478 put_ST(0, 5479 binop(Iop_CosF64, 5480 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5481 mkexpr(a1))); 5482 clear_C2(); /* HACK */ 5483 break; 5484 } 5485 5486 case 0xFC: /* FRNDINT */ 5487 DIP("frndint\n"); 5488 put_ST_UNCHECKED(0, 5489 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5490 break; 5491 5492 case 0xFD: /* FSCALE */ 5493 DIP("fscale\n"); 5494 put_ST_UNCHECKED(0, 5495 triop(Iop_ScaleF64, 5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5497 get_ST(0), 5498 get_ST(1))); 5499 break; 5500 5501 case 0xFE: /* FSIN */ 5502 DIP("fsin\n"); 5503 put_ST_UNCHECKED(0, 5504 binop(Iop_SinF64, 5505 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5506 get_ST(0))); 5507 clear_C2(); /* HACK */ 5508 break; 5509 5510 case 0xFF: /* FCOS */ 5511 DIP("fcos\n"); 5512 put_ST_UNCHECKED(0, 5513 binop(Iop_CosF64, 5514 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5515 get_ST(0))); 5516 clear_C2(); /* HACK */ 5517 break; 5518 5519 default: 5520 goto decode_fail; 5521 } 5522 } 5523 } 5524 5525 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 5526 else 5527 if (first_opcode == 0xDA) { 5528 5529 if (modrm < 0xC0) { 5530 5531 /* bits 5,4,3 are an opcode extension, and the modRM also 5532 specifies an address. */ 5533 IROp fop; 5534 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5535 delta += len; 5536 switch (gregLO3ofRM(modrm)) { 5537 5538 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 5539 DIP("fiaddl %s\n", dis_buf); 5540 fop = Iop_AddF64; 5541 goto do_fop_m32; 5542 5543 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 5544 DIP("fimull %s\n", dis_buf); 5545 fop = Iop_MulF64; 5546 goto do_fop_m32; 5547 5548 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 5549 DIP("fisubl %s\n", dis_buf); 5550 fop = Iop_SubF64; 5551 goto do_fop_m32; 5552 5553 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 5554 DIP("fisubrl %s\n", dis_buf); 5555 fop = Iop_SubF64; 5556 goto do_foprev_m32; 5557 5558 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 5559 DIP("fisubl %s\n", dis_buf); 5560 fop = Iop_DivF64; 5561 goto do_fop_m32; 5562 5563 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 5564 DIP("fidivrl %s\n", dis_buf); 5565 fop = Iop_DivF64; 5566 goto do_foprev_m32; 5567 5568 do_fop_m32: 5569 put_ST_UNCHECKED(0, 5570 triop(fop, 5571 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5572 get_ST(0), 5573 unop(Iop_I32StoF64, 5574 loadLE(Ity_I32, mkexpr(addr))))); 5575 break; 5576 5577 do_foprev_m32: 5578 put_ST_UNCHECKED(0, 5579 triop(fop, 5580 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5581 unop(Iop_I32StoF64, 5582 loadLE(Ity_I32, mkexpr(addr))), 5583 get_ST(0))); 5584 break; 5585 5586 default: 5587 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5588 vex_printf("first_opcode == 0xDA\n"); 5589 goto decode_fail; 5590 } 5591 5592 } else { 5593 5594 delta++; 5595 switch (modrm) { 5596 5597 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 5598 r_src = (UInt)modrm - 0xC0; 5599 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 5600 put_ST_UNCHECKED(0, 5601 IRExpr_Mux0X( 5602 unop(Iop_1Uto8, 5603 mk_amd64g_calculate_condition(AMD64CondB)), 5604 get_ST(0), get_ST(r_src)) ); 5605 break; 5606 5607 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 5608 r_src = (UInt)modrm - 0xC8; 5609 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 5610 put_ST_UNCHECKED(0, 5611 IRExpr_Mux0X( 5612 unop(Iop_1Uto8, 5613 mk_amd64g_calculate_condition(AMD64CondZ)), 5614 get_ST(0), get_ST(r_src)) ); 5615 break; 5616 5617 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 5618 r_src = (UInt)modrm - 0xD0; 5619 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 5620 put_ST_UNCHECKED(0, 5621 IRExpr_Mux0X( 5622 unop(Iop_1Uto8, 5623 mk_amd64g_calculate_condition(AMD64CondBE)), 5624 get_ST(0), get_ST(r_src)) ); 5625 break; 5626 5627 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 5628 r_src = (UInt)modrm - 0xD8; 5629 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 5630 put_ST_UNCHECKED(0, 5631 IRExpr_Mux0X( 5632 unop(Iop_1Uto8, 5633 mk_amd64g_calculate_condition(AMD64CondP)), 5634 get_ST(0), get_ST(r_src)) ); 5635 break; 5636 5637 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 5638 DIP("fucompp %%st(0),%%st(1)\n"); 5639 /* This forces C1 to zero, which isn't right. */ 5640 put_C3210( 5641 unop(Iop_32Uto64, 5642 binop( Iop_And32, 5643 binop(Iop_Shl32, 5644 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5645 mkU8(8)), 5646 mkU32(0x4500) 5647 ))); 5648 fp_pop(); 5649 fp_pop(); 5650 break; 5651 5652 default: 5653 goto decode_fail; 5654 } 5655 5656 } 5657 } 5658 5659 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 5660 else 5661 if (first_opcode == 0xDB) { 5662 if (modrm < 0xC0) { 5663 5664 /* bits 5,4,3 are an opcode extension, and the modRM also 5665 specifies an address. */ 5666 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5667 delta += len; 5668 5669 switch (gregLO3ofRM(modrm)) { 5670 5671 case 0: /* FILD m32int */ 5672 DIP("fildl %s\n", dis_buf); 5673 fp_push(); 5674 put_ST(0, unop(Iop_I32StoF64, 5675 loadLE(Ity_I32, mkexpr(addr)))); 5676 break; 5677 5678 case 1: /* FISTTPL m32 (SSE3) */ 5679 DIP("fisttpl %s\n", dis_buf); 5680 storeLE( mkexpr(addr), 5681 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 5682 fp_pop(); 5683 break; 5684 5685 case 2: /* FIST m32 */ 5686 DIP("fistl %s\n", dis_buf); 5687 storeLE( mkexpr(addr), 5688 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5689 break; 5690 5691 case 3: /* FISTP m32 */ 5692 DIP("fistpl %s\n", dis_buf); 5693 storeLE( mkexpr(addr), 5694 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5695 fp_pop(); 5696 break; 5697 5698 case 5: { /* FLD extended-real */ 5699 /* Uses dirty helper: 5700 ULong amd64g_loadF80le ( ULong ) 5701 addr holds the address. First, do a dirty call to 5702 get hold of the data. */ 5703 IRTemp val = newTemp(Ity_I64); 5704 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 5705 5706 IRDirty* d = unsafeIRDirty_1_N ( 5707 val, 5708 0/*regparms*/, 5709 "amd64g_dirtyhelper_loadF80le", 5710 &amd64g_dirtyhelper_loadF80le, 5711 args 5712 ); 5713 /* declare that we're reading memory */ 5714 d->mFx = Ifx_Read; 5715 d->mAddr = mkexpr(addr); 5716 d->mSize = 10; 5717 5718 /* execute the dirty call, dumping the result in val. */ 5719 stmt( IRStmt_Dirty(d) ); 5720 fp_push(); 5721 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 5722 5723 DIP("fldt %s\n", dis_buf); 5724 break; 5725 } 5726 5727 case 7: { /* FSTP extended-real */ 5728 /* Uses dirty helper: 5729 void amd64g_storeF80le ( ULong addr, ULong data ) 5730 */ 5731 IRExpr** args 5732 = mkIRExprVec_2( mkexpr(addr), 5733 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 5734 5735 IRDirty* d = unsafeIRDirty_0_N ( 5736 0/*regparms*/, 5737 "amd64g_dirtyhelper_storeF80le", 5738 &amd64g_dirtyhelper_storeF80le, 5739 args 5740 ); 5741 /* declare we're writing memory */ 5742 d->mFx = Ifx_Write; 5743 d->mAddr = mkexpr(addr); 5744 d->mSize = 10; 5745 5746 /* execute the dirty call. */ 5747 stmt( IRStmt_Dirty(d) ); 5748 fp_pop(); 5749 5750 DIP("fstpt\n %s", dis_buf); 5751 break; 5752 } 5753 5754 default: 5755 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5756 vex_printf("first_opcode == 0xDB\n"); 5757 goto decode_fail; 5758 } 5759 5760 } else { 5761 5762 delta++; 5763 switch (modrm) { 5764 5765 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 5766 r_src = (UInt)modrm - 0xC0; 5767 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 5768 put_ST_UNCHECKED(0, 5769 IRExpr_Mux0X( 5770 unop(Iop_1Uto8, 5771 mk_amd64g_calculate_condition(AMD64CondNB)), 5772 get_ST(0), get_ST(r_src)) ); 5773 break; 5774 5775 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 5776 r_src = (UInt)modrm - 0xC8; 5777 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 5778 put_ST_UNCHECKED( 5779 0, 5780 IRExpr_Mux0X( 5781 unop(Iop_1Uto8, 5782 mk_amd64g_calculate_condition(AMD64CondNZ)), 5783 get_ST(0), 5784 get_ST(r_src) 5785 ) 5786 ); 5787 break; 5788 5789 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 5790 r_src = (UInt)modrm - 0xD0; 5791 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 5792 put_ST_UNCHECKED( 5793 0, 5794 IRExpr_Mux0X( 5795 unop(Iop_1Uto8, 5796 mk_amd64g_calculate_condition(AMD64CondNBE)), 5797 get_ST(0), 5798 get_ST(r_src) 5799 ) 5800 ); 5801 break; 5802 5803 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 5804 r_src = (UInt)modrm - 0xD8; 5805 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 5806 put_ST_UNCHECKED( 5807 0, 5808 IRExpr_Mux0X( 5809 unop(Iop_1Uto8, 5810 mk_amd64g_calculate_condition(AMD64CondNP)), 5811 get_ST(0), 5812 get_ST(r_src) 5813 ) 5814 ); 5815 break; 5816 5817 case 0xE2: 5818 DIP("fnclex\n"); 5819 break; 5820 5821 case 0xE3: { 5822 /* Uses dirty helper: 5823 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5824 IRDirty* d = unsafeIRDirty_0_N ( 5825 0/*regparms*/, 5826 "amd64g_dirtyhelper_FINIT", 5827 &amd64g_dirtyhelper_FINIT, 5828 mkIRExprVec_0() 5829 ); 5830 d->needsBBP = True; 5831 5832 /* declare we're writing guest state */ 5833 d->nFxState = 5; 5834 5835 d->fxState[0].fx = Ifx_Write; 5836 d->fxState[0].offset = OFFB_FTOP; 5837 d->fxState[0].size = sizeof(UInt); 5838 5839 d->fxState[1].fx = Ifx_Write; 5840 d->fxState[1].offset = OFFB_FPREGS; 5841 d->fxState[1].size = 8 * sizeof(ULong); 5842 5843 d->fxState[2].fx = Ifx_Write; 5844 d->fxState[2].offset = OFFB_FPTAGS; 5845 d->fxState[2].size = 8 * sizeof(UChar); 5846 5847 d->fxState[3].fx = Ifx_Write; 5848 d->fxState[3].offset = OFFB_FPROUND; 5849 d->fxState[3].size = sizeof(ULong); 5850 5851 d->fxState[4].fx = Ifx_Write; 5852 d->fxState[4].offset = OFFB_FC3210; 5853 d->fxState[4].size = sizeof(ULong); 5854 5855 stmt( IRStmt_Dirty(d) ); 5856 5857 DIP("fninit\n"); 5858 break; 5859 } 5860 5861 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 5862 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 5863 break; 5864 5865 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 5866 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 5867 break; 5868 5869 default: 5870 goto decode_fail; 5871 } 5872 } 5873 } 5874 5875 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 5876 else 5877 if (first_opcode == 0xDC) { 5878 if (modrm < 0xC0) { 5879 5880 /* bits 5,4,3 are an opcode extension, and the modRM also 5881 specifies an address. */ 5882 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5883 delta += len; 5884 5885 switch (gregLO3ofRM(modrm)) { 5886 5887 case 0: /* FADD double-real */ 5888 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 5889 break; 5890 5891 case 1: /* FMUL double-real */ 5892 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 5893 break; 5894 5895 //.. case 2: /* FCOM double-real */ 5896 //.. DIP("fcoml %s\n", dis_buf); 5897 //.. /* This forces C1 to zero, which isn't right. */ 5898 //.. put_C3210( 5899 //.. binop( Iop_And32, 5900 //.. binop(Iop_Shl32, 5901 //.. binop(Iop_CmpF64, 5902 //.. get_ST(0), 5903 //.. loadLE(Ity_F64,mkexpr(addr))), 5904 //.. mkU8(8)), 5905 //.. mkU32(0x4500) 5906 //.. )); 5907 //.. break; 5908 5909 case 3: /* FCOMP double-real */ 5910 DIP("fcompl %s\n", dis_buf); 5911 /* This forces C1 to zero, which isn't right. */ 5912 put_C3210( 5913 unop(Iop_32Uto64, 5914 binop( Iop_And32, 5915 binop(Iop_Shl32, 5916 binop(Iop_CmpF64, 5917 get_ST(0), 5918 loadLE(Ity_F64,mkexpr(addr))), 5919 mkU8(8)), 5920 mkU32(0x4500) 5921 ))); 5922 fp_pop(); 5923 break; 5924 5925 case 4: /* FSUB double-real */ 5926 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 5927 break; 5928 5929 case 5: /* FSUBR double-real */ 5930 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 5931 break; 5932 5933 case 6: /* FDIV double-real */ 5934 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 5935 break; 5936 5937 case 7: /* FDIVR double-real */ 5938 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 5939 break; 5940 5941 default: 5942 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5943 vex_printf("first_opcode == 0xDC\n"); 5944 goto decode_fail; 5945 } 5946 5947 } else { 5948 5949 delta++; 5950 switch (modrm) { 5951 5952 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 5953 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 5954 break; 5955 5956 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 5957 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 5958 break; 5959 5960 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 5961 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 5962 break; 5963 5964 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 5965 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 5966 break; 5967 5968 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 5969 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 5970 break; 5971 5972 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 5973 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 5974 break; 5975 5976 default: 5977 goto decode_fail; 5978 } 5979 5980 } 5981 } 5982 5983 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 5984 else 5985 if (first_opcode == 0xDD) { 5986 5987 if (modrm < 0xC0) { 5988 5989 /* bits 5,4,3 are an opcode extension, and the modRM also 5990 specifies an address. */ 5991 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5992 delta += len; 5993 5994 switch (gregLO3ofRM(modrm)) { 5995 5996 case 0: /* FLD double-real */ 5997 DIP("fldl %s\n", dis_buf); 5998 fp_push(); 5999 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6000 break; 6001 6002 case 1: /* FISTTPQ m64 (SSE3) */ 6003 DIP("fistppll %s\n", dis_buf); 6004 storeLE( mkexpr(addr), 6005 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6006 fp_pop(); 6007 break; 6008 6009 case 2: /* FST double-real */ 6010 DIP("fstl %s\n", dis_buf); 6011 storeLE(mkexpr(addr), get_ST(0)); 6012 break; 6013 6014 case 3: /* FSTP double-real */ 6015 DIP("fstpl %s\n", dis_buf); 6016 storeLE(mkexpr(addr), get_ST(0)); 6017 fp_pop(); 6018 break; 6019 6020 //.. case 4: { /* FRSTOR m108 */ 6021 //.. /* Uses dirty helper: 6022 //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 6023 //.. IRTemp ew = newTemp(Ity_I32); 6024 //.. IRDirty* d = unsafeIRDirty_0_N ( 6025 //.. 0/*regparms*/, 6026 //.. "x86g_dirtyhelper_FRSTOR", 6027 //.. &x86g_dirtyhelper_FRSTOR, 6028 //.. mkIRExprVec_1( mkexpr(addr) ) 6029 //.. ); 6030 //.. d->needsBBP = True; 6031 //.. d->tmp = ew; 6032 //.. /* declare we're reading memory */ 6033 //.. d->mFx = Ifx_Read; 6034 //.. d->mAddr = mkexpr(addr); 6035 //.. d->mSize = 108; 6036 //.. 6037 //.. /* declare we're writing guest state */ 6038 //.. d->nFxState = 5; 6039 //.. 6040 //.. d->fxState[0].fx = Ifx_Write; 6041 //.. d->fxState[0].offset = OFFB_FTOP; 6042 //.. d->fxState[0].size = sizeof(UInt); 6043 //.. 6044 //.. d->fxState[1].fx = Ifx_Write; 6045 //.. d->fxState[1].offset = OFFB_FPREGS; 6046 //.. d->fxState[1].size = 8 * sizeof(ULong); 6047 //.. 6048 //.. d->fxState[2].fx = Ifx_Write; 6049 //.. d->fxState[2].offset = OFFB_FPTAGS; 6050 //.. d->fxState[2].size = 8 * sizeof(UChar); 6051 //.. 6052 //.. d->fxState[3].fx = Ifx_Write; 6053 //.. d->fxState[3].offset = OFFB_FPROUND; 6054 //.. d->fxState[3].size = sizeof(UInt); 6055 //.. 6056 //.. d->fxState[4].fx = Ifx_Write; 6057 //.. d->fxState[4].offset = OFFB_FC3210; 6058 //.. d->fxState[4].size = sizeof(UInt); 6059 //.. 6060 //.. stmt( IRStmt_Dirty(d) ); 6061 //.. 6062 //.. /* ew contains any emulation warning we may need to 6063 //.. issue. If needed, side-exit to the next insn, 6064 //.. reporting the warning, so that Valgrind's dispatcher 6065 //.. sees the warning. */ 6066 //.. put_emwarn( mkexpr(ew) ); 6067 //.. stmt( 6068 //.. IRStmt_Exit( 6069 //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6070 //.. Ijk_EmWarn, 6071 //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta) 6072 //.. ) 6073 //.. ); 6074 //.. 6075 //.. DIP("frstor %s\n", dis_buf); 6076 //.. break; 6077 //.. } 6078 //.. 6079 //.. case 6: { /* FNSAVE m108 */ 6080 //.. /* Uses dirty helper: 6081 //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 6082 //.. IRDirty* d = unsafeIRDirty_0_N ( 6083 //.. 0/*regparms*/, 6084 //.. "x86g_dirtyhelper_FSAVE", 6085 //.. &x86g_dirtyhelper_FSAVE, 6086 //.. mkIRExprVec_1( mkexpr(addr) ) 6087 //.. ); 6088 //.. d->needsBBP = True; 6089 //.. /* declare we're writing memory */ 6090 //.. d->mFx = Ifx_Write; 6091 //.. d->mAddr = mkexpr(addr); 6092 //.. d->mSize = 108; 6093 //.. 6094 //.. /* declare we're reading guest state */ 6095 //.. d->nFxState = 5; 6096 //.. 6097 //.. d->fxState[0].fx = Ifx_Read; 6098 //.. d->fxState[0].offset = OFFB_FTOP; 6099 //.. d->fxState[0].size = sizeof(UInt); 6100 //.. 6101 //.. d->fxState[1].fx = Ifx_Read; 6102 //.. d->fxState[1].offset = OFFB_FPREGS; 6103 //.. d->fxState[1].size = 8 * sizeof(ULong); 6104 //.. 6105 //.. d->fxState[2].fx = Ifx_Read; 6106 //.. d->fxState[2].offset = OFFB_FPTAGS; 6107 //.. d->fxState[2].size = 8 * sizeof(UChar); 6108 //.. 6109 //.. d->fxState[3].fx = Ifx_Read; 6110 //.. d->fxState[3].offset = OFFB_FPROUND; 6111 //.. d->fxState[3].size = sizeof(UInt); 6112 //.. 6113 //.. d->fxState[4].fx = Ifx_Read; 6114 //.. d->fxState[4].offset = OFFB_FC3210; 6115 //.. d->fxState[4].size = sizeof(UInt); 6116 //.. 6117 //.. stmt( IRStmt_Dirty(d) ); 6118 //.. 6119 //.. DIP("fnsave %s\n", dis_buf); 6120 //.. break; 6121 //.. } 6122 6123 case 7: { /* FNSTSW m16 */ 6124 IRExpr* sw = get_FPU_sw(); 6125 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6126 storeLE( mkexpr(addr), sw ); 6127 DIP("fnstsw %s\n", dis_buf); 6128 break; 6129 } 6130 6131 default: 6132 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6133 vex_printf("first_opcode == 0xDD\n"); 6134 goto decode_fail; 6135 } 6136 } else { 6137 delta++; 6138 switch (modrm) { 6139 6140 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6141 r_dst = (UInt)modrm - 0xC0; 6142 DIP("ffree %%st(%u)\n", r_dst); 6143 put_ST_TAG ( r_dst, mkU8(0) ); 6144 break; 6145 6146 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6147 r_dst = (UInt)modrm - 0xD0; 6148 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6149 /* P4 manual says: "If the destination operand is a 6150 non-empty register, the invalid-operation exception 6151 is not generated. Hence put_ST_UNCHECKED. */ 6152 put_ST_UNCHECKED(r_dst, get_ST(0)); 6153 break; 6154 6155 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6156 r_dst = (UInt)modrm - 0xD8; 6157 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6158 /* P4 manual says: "If the destination operand is a 6159 non-empty register, the invalid-operation exception 6160 is not generated. Hence put_ST_UNCHECKED. */ 6161 put_ST_UNCHECKED(r_dst, get_ST(0)); 6162 fp_pop(); 6163 break; 6164 6165 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6166 r_dst = (UInt)modrm - 0xE0; 6167 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6168 /* This forces C1 to zero, which isn't right. */ 6169 put_C3210( 6170 unop(Iop_32Uto64, 6171 binop( Iop_And32, 6172 binop(Iop_Shl32, 6173 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6174 mkU8(8)), 6175 mkU32(0x4500) 6176 ))); 6177 break; 6178 6179 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6180 r_dst = (UInt)modrm - 0xE8; 6181 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6182 /* This forces C1 to zero, which isn't right. */ 6183 put_C3210( 6184 unop(Iop_32Uto64, 6185 binop( Iop_And32, 6186 binop(Iop_Shl32, 6187 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6188 mkU8(8)), 6189 mkU32(0x4500) 6190 ))); 6191 fp_pop(); 6192 break; 6193 6194 default: 6195 goto decode_fail; 6196 } 6197 } 6198 } 6199 6200 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6201 else 6202 if (first_opcode == 0xDE) { 6203 6204 if (modrm < 0xC0) { 6205 6206 /* bits 5,4,3 are an opcode extension, and the modRM also 6207 specifies an address. */ 6208 IROp fop; 6209 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6210 delta += len; 6211 6212 switch (gregLO3ofRM(modrm)) { 6213 6214 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6215 DIP("fiaddw %s\n", dis_buf); 6216 fop = Iop_AddF64; 6217 goto do_fop_m16; 6218 6219 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6220 DIP("fimulw %s\n", dis_buf); 6221 fop = Iop_MulF64; 6222 goto do_fop_m16; 6223 6224 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6225 DIP("fisubw %s\n", dis_buf); 6226 fop = Iop_SubF64; 6227 goto do_fop_m16; 6228 6229 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6230 DIP("fisubrw %s\n", dis_buf); 6231 fop = Iop_SubF64; 6232 goto do_foprev_m16; 6233 6234 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6235 DIP("fisubw %s\n", dis_buf); 6236 fop = Iop_DivF64; 6237 goto do_fop_m16; 6238 6239 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6240 DIP("fidivrw %s\n", dis_buf); 6241 fop = Iop_DivF64; 6242 goto do_foprev_m16; 6243 6244 do_fop_m16: 6245 put_ST_UNCHECKED(0, 6246 triop(fop, 6247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6248 get_ST(0), 6249 unop(Iop_I32StoF64, 6250 unop(Iop_16Sto32, 6251 loadLE(Ity_I16, mkexpr(addr)))))); 6252 break; 6253 6254 do_foprev_m16: 6255 put_ST_UNCHECKED(0, 6256 triop(fop, 6257 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6258 unop(Iop_I32StoF64, 6259 unop(Iop_16Sto32, 6260 loadLE(Ity_I16, mkexpr(addr)))), 6261 get_ST(0))); 6262 break; 6263 6264 default: 6265 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6266 vex_printf("first_opcode == 0xDE\n"); 6267 goto decode_fail; 6268 } 6269 6270 } else { 6271 6272 delta++; 6273 switch (modrm) { 6274 6275 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6276 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6277 break; 6278 6279 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6280 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6281 break; 6282 6283 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6284 DIP("fcompp %%st(0),%%st(1)\n"); 6285 /* This forces C1 to zero, which isn't right. */ 6286 put_C3210( 6287 unop(Iop_32Uto64, 6288 binop( Iop_And32, 6289 binop(Iop_Shl32, 6290 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6291 mkU8(8)), 6292 mkU32(0x4500) 6293 ))); 6294 fp_pop(); 6295 fp_pop(); 6296 break; 6297 6298 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6299 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6300 break; 6301 6302 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6303 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6304 break; 6305 6306 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6307 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6308 break; 6309 6310 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6311 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6312 break; 6313 6314 default: 6315 goto decode_fail; 6316 } 6317 6318 } 6319 } 6320 6321 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6322 else 6323 if (first_opcode == 0xDF) { 6324 6325 if (modrm < 0xC0) { 6326 6327 /* bits 5,4,3 are an opcode extension, and the modRM also 6328 specifies an address. */ 6329 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6330 delta += len; 6331 6332 switch (gregLO3ofRM(modrm)) { 6333 6334 case 0: /* FILD m16int */ 6335 DIP("fildw %s\n", dis_buf); 6336 fp_push(); 6337 put_ST(0, unop(Iop_I32StoF64, 6338 unop(Iop_16Sto32, 6339 loadLE(Ity_I16, mkexpr(addr))))); 6340 break; 6341 6342 case 1: /* FISTTPS m16 (SSE3) */ 6343 DIP("fisttps %s\n", dis_buf); 6344 storeLE( mkexpr(addr), 6345 x87ishly_qnarrow_32_to_16( 6346 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6347 fp_pop(); 6348 break; 6349 6350 case 2: /* FIST m16 */ 6351 DIP("fists %s\n", dis_buf); 6352 storeLE( mkexpr(addr), 6353 x87ishly_qnarrow_32_to_16( 6354 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6355 break; 6356 6357 case 3: /* FISTP m16 */ 6358 DIP("fistps %s\n", dis_buf); 6359 storeLE( mkexpr(addr), 6360 x87ishly_qnarrow_32_to_16( 6361 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6362 fp_pop(); 6363 break; 6364 6365 case 5: /* FILD m64 */ 6366 DIP("fildll %s\n", dis_buf); 6367 fp_push(); 6368 put_ST(0, binop(Iop_I64StoF64, 6369 get_roundingmode(), 6370 loadLE(Ity_I64, mkexpr(addr)))); 6371 break; 6372 6373 case 7: /* FISTP m64 */ 6374 DIP("fistpll %s\n", dis_buf); 6375 storeLE( mkexpr(addr), 6376 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6377 fp_pop(); 6378 break; 6379 6380 default: 6381 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6382 vex_printf("first_opcode == 0xDF\n"); 6383 goto decode_fail; 6384 } 6385 6386 } else { 6387 6388 delta++; 6389 switch (modrm) { 6390 6391 case 0xC0: /* FFREEP %st(0) */ 6392 DIP("ffreep %%st(%d)\n", 0); 6393 put_ST_TAG ( 0, mkU8(0) ); 6394 fp_pop(); 6395 break; 6396 6397 case 0xE0: /* FNSTSW %ax */ 6398 DIP("fnstsw %%ax\n"); 6399 /* Invent a plausible-looking FPU status word value and 6400 dump it in %AX: 6401 ((ftop & 7) << 11) | (c3210 & 0x4700) 6402 */ 6403 putIRegRAX( 6404 2, 6405 unop(Iop_32to16, 6406 binop(Iop_Or32, 6407 binop(Iop_Shl32, 6408 binop(Iop_And32, get_ftop(), mkU32(7)), 6409 mkU8(11)), 6410 binop(Iop_And32, 6411 unop(Iop_64to32, get_C3210()), 6412 mkU32(0x4700)) 6413 ))); 6414 break; 6415 6416 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6417 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6418 break; 6419 6420 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6421 /* not really right since COMIP != UCOMIP */ 6422 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6423 break; 6424 6425 default: 6426 goto decode_fail; 6427 } 6428 } 6429 6430 } 6431 6432 else 6433 goto decode_fail; 6434 6435 *decode_ok = True; 6436 return delta; 6437 6438 decode_fail: 6439 *decode_ok = False; 6440 return delta; 6441 } 6442 6443 6444 /*------------------------------------------------------------*/ 6445 /*--- ---*/ 6446 /*--- MMX INSTRUCTIONS ---*/ 6447 /*--- ---*/ 6448 /*------------------------------------------------------------*/ 6449 6450 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6451 IA32 arch manual, volume 3): 6452 6453 Read from, or write to MMX register (viz, any insn except EMMS): 6454 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6455 * FP stack pointer set to zero 6456 6457 EMMS: 6458 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6459 * FP stack pointer set to zero 6460 */ 6461 6462 static void do_MMX_preamble ( void ) 6463 { 6464 Int i; 6465 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6466 IRExpr* zero = mkU32(0); 6467 IRExpr* tag1 = mkU8(1); 6468 put_ftop(zero); 6469 for (i = 0; i < 8; i++) 6470 stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); 6471 } 6472 6473 static void do_EMMS_preamble ( void ) 6474 { 6475 Int i; 6476 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6477 IRExpr* zero = mkU32(0); 6478 IRExpr* tag0 = mkU8(0); 6479 put_ftop(zero); 6480 for (i = 0; i < 8; i++) 6481 stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); 6482 } 6483 6484 6485 static IRExpr* getMMXReg ( UInt archreg ) 6486 { 6487 vassert(archreg < 8); 6488 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 6489 } 6490 6491 6492 static void putMMXReg ( UInt archreg, IRExpr* e ) 6493 { 6494 vassert(archreg < 8); 6495 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 6496 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 6497 } 6498 6499 6500 /* Helper for non-shift MMX insns. Note this is incomplete in the 6501 sense that it does not first call do_MMX_preamble() -- that is the 6502 responsibility of its caller. */ 6503 6504 static 6505 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, 6506 Prefix pfx, 6507 Long delta, 6508 UChar opc, 6509 HChar* name, 6510 Bool show_granularity ) 6511 { 6512 HChar dis_buf[50]; 6513 UChar modrm = getUChar(delta); 6514 Bool isReg = epartIsReg(modrm); 6515 IRExpr* argL = NULL; 6516 IRExpr* argR = NULL; 6517 IRExpr* argG = NULL; 6518 IRExpr* argE = NULL; 6519 IRTemp res = newTemp(Ity_I64); 6520 6521 Bool invG = False; 6522 IROp op = Iop_INVALID; 6523 void* hAddr = NULL; 6524 HChar* hName = NULL; 6525 Bool eLeft = False; 6526 6527 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 6528 6529 switch (opc) { 6530 /* Original MMX ones */ 6531 case 0xFC: op = Iop_Add8x8; break; 6532 case 0xFD: op = Iop_Add16x4; break; 6533 case 0xFE: op = Iop_Add32x2; break; 6534 6535 case 0xEC: op = Iop_QAdd8Sx8; break; 6536 case 0xED: op = Iop_QAdd16Sx4; break; 6537 6538 case 0xDC: op = Iop_QAdd8Ux8; break; 6539 case 0xDD: op = Iop_QAdd16Ux4; break; 6540 6541 case 0xF8: op = Iop_Sub8x8; break; 6542 case 0xF9: op = Iop_Sub16x4; break; 6543 case 0xFA: op = Iop_Sub32x2; break; 6544 6545 case 0xE8: op = Iop_QSub8Sx8; break; 6546 case 0xE9: op = Iop_QSub16Sx4; break; 6547 6548 case 0xD8: op = Iop_QSub8Ux8; break; 6549 case 0xD9: op = Iop_QSub16Ux4; break; 6550 6551 case 0xE5: op = Iop_MulHi16Sx4; break; 6552 case 0xD5: op = Iop_Mul16x4; break; 6553 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 6554 6555 case 0x74: op = Iop_CmpEQ8x8; break; 6556 case 0x75: op = Iop_CmpEQ16x4; break; 6557 case 0x76: op = Iop_CmpEQ32x2; break; 6558 6559 case 0x64: op = Iop_CmpGT8Sx8; break; 6560 case 0x65: op = Iop_CmpGT16Sx4; break; 6561 case 0x66: op = Iop_CmpGT32Sx2; break; 6562 6563 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 6564 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 6565 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 6566 6567 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 6568 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 6569 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 6570 6571 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 6572 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 6573 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 6574 6575 case 0xDB: op = Iop_And64; break; 6576 case 0xDF: op = Iop_And64; invG = True; break; 6577 case 0xEB: op = Iop_Or64; break; 6578 case 0xEF: /* Possibly do better here if argL and argR are the 6579 same reg */ 6580 op = Iop_Xor64; break; 6581 6582 /* Introduced in SSE1 */ 6583 case 0xE0: op = Iop_Avg8Ux8; break; 6584 case 0xE3: op = Iop_Avg16Ux4; break; 6585 case 0xEE: op = Iop_Max16Sx4; break; 6586 case 0xDE: op = Iop_Max8Ux8; break; 6587 case 0xEA: op = Iop_Min16Sx4; break; 6588 case 0xDA: op = Iop_Min8Ux8; break; 6589 case 0xE4: op = Iop_MulHi16Ux4; break; 6590 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 6591 6592 /* Introduced in SSE2 */ 6593 case 0xD4: op = Iop_Add64; break; 6594 case 0xFB: op = Iop_Sub64; break; 6595 6596 default: 6597 vex_printf("\n0x%x\n", (Int)opc); 6598 vpanic("dis_MMXop_regmem_to_reg"); 6599 } 6600 6601 # undef XXX 6602 6603 argG = getMMXReg(gregLO3ofRM(modrm)); 6604 if (invG) 6605 argG = unop(Iop_Not64, argG); 6606 6607 if (isReg) { 6608 delta++; 6609 argE = getMMXReg(eregLO3ofRM(modrm)); 6610 } else { 6611 Int len; 6612 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6613 delta += len; 6614 argE = loadLE(Ity_I64, mkexpr(addr)); 6615 } 6616 6617 if (eLeft) { 6618 argL = argE; 6619 argR = argG; 6620 } else { 6621 argL = argG; 6622 argR = argE; 6623 } 6624 6625 if (op != Iop_INVALID) { 6626 vassert(hName == NULL); 6627 vassert(hAddr == NULL); 6628 assign(res, binop(op, argL, argR)); 6629 } else { 6630 vassert(hName != NULL); 6631 vassert(hAddr != NULL); 6632 assign( res, 6633 mkIRExprCCall( 6634 Ity_I64, 6635 0/*regparms*/, hName, hAddr, 6636 mkIRExprVec_2( argL, argR ) 6637 ) 6638 ); 6639 } 6640 6641 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 6642 6643 DIP("%s%s %s, %s\n", 6644 name, show_granularity ? nameMMXGran(opc & 3) : "", 6645 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 6646 nameMMXReg(gregLO3ofRM(modrm)) ); 6647 6648 return delta; 6649 } 6650 6651 6652 /* Vector by scalar shift of G by the amount specified at the bottom 6653 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 6654 6655 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, 6656 Prefix pfx, Long delta, 6657 HChar* opname, IROp op ) 6658 { 6659 HChar dis_buf[50]; 6660 Int alen, size; 6661 IRTemp addr; 6662 Bool shl, shr, sar; 6663 UChar rm = getUChar(delta); 6664 IRTemp g0 = newTemp(Ity_I64); 6665 IRTemp g1 = newTemp(Ity_I64); 6666 IRTemp amt = newTemp(Ity_I64); 6667 IRTemp amt8 = newTemp(Ity_I8); 6668 6669 if (epartIsReg(rm)) { 6670 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 6671 DIP("%s %s,%s\n", opname, 6672 nameMMXReg(eregLO3ofRM(rm)), 6673 nameMMXReg(gregLO3ofRM(rm)) ); 6674 delta++; 6675 } else { 6676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 6677 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 6678 DIP("%s %s,%s\n", opname, 6679 dis_buf, 6680 nameMMXReg(gregLO3ofRM(rm)) ); 6681 delta += alen; 6682 } 6683 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 6684 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 6685 6686 shl = shr = sar = False; 6687 size = 0; 6688 switch (op) { 6689 case Iop_ShlN16x4: shl = True; size = 32; break; 6690 case Iop_ShlN32x2: shl = True; size = 32; break; 6691 case Iop_Shl64: shl = True; size = 64; break; 6692 case Iop_ShrN16x4: shr = True; size = 16; break; 6693 case Iop_ShrN32x2: shr = True; size = 32; break; 6694 case Iop_Shr64: shr = True; size = 64; break; 6695 case Iop_SarN16x4: sar = True; size = 16; break; 6696 case Iop_SarN32x2: sar = True; size = 32; break; 6697 default: vassert(0); 6698 } 6699 6700 if (shl || shr) { 6701 assign( 6702 g1, 6703 IRExpr_Mux0X( 6704 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6705 mkU64(0), 6706 binop(op, mkexpr(g0), mkexpr(amt8)) 6707 ) 6708 ); 6709 } else 6710 if (sar) { 6711 assign( 6712 g1, 6713 IRExpr_Mux0X( 6714 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6715 binop(op, mkexpr(g0), mkU8(size-1)), 6716 binop(op, mkexpr(g0), mkexpr(amt8)) 6717 ) 6718 ); 6719 } else { 6720 vassert(0); 6721 } 6722 6723 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 6724 return delta; 6725 } 6726 6727 6728 /* Vector by scalar shift of E by an immediate byte. This is a 6729 straight copy of dis_SSE_shiftE_imm. */ 6730 6731 static 6732 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op ) 6733 { 6734 Bool shl, shr, sar; 6735 UChar rm = getUChar(delta); 6736 IRTemp e0 = newTemp(Ity_I64); 6737 IRTemp e1 = newTemp(Ity_I64); 6738 UChar amt, size; 6739 vassert(epartIsReg(rm)); 6740 vassert(gregLO3ofRM(rm) == 2 6741 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 6742 amt = getUChar(delta+1); 6743 delta += 2; 6744 DIP("%s $%d,%s\n", opname, 6745 (Int)amt, 6746 nameMMXReg(eregLO3ofRM(rm)) ); 6747 6748 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 6749 6750 shl = shr = sar = False; 6751 size = 0; 6752 switch (op) { 6753 case Iop_ShlN16x4: shl = True; size = 16; break; 6754 case Iop_ShlN32x2: shl = True; size = 32; break; 6755 case Iop_Shl64: shl = True; size = 64; break; 6756 case Iop_SarN16x4: sar = True; size = 16; break; 6757 case Iop_SarN32x2: sar = True; size = 32; break; 6758 case Iop_ShrN16x4: shr = True; size = 16; break; 6759 case Iop_ShrN32x2: shr = True; size = 32; break; 6760 case Iop_Shr64: shr = True; size = 64; break; 6761 default: vassert(0); 6762 } 6763 6764 if (shl || shr) { 6765 assign( e1, amt >= size 6766 ? mkU64(0) 6767 : binop(op, mkexpr(e0), mkU8(amt)) 6768 ); 6769 } else 6770 if (sar) { 6771 assign( e1, amt >= size 6772 ? binop(op, mkexpr(e0), mkU8(size-1)) 6773 : binop(op, mkexpr(e0), mkU8(amt)) 6774 ); 6775 } else { 6776 vassert(0); 6777 } 6778 6779 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 6780 return delta; 6781 } 6782 6783 6784 /* Completely handle all MMX instructions except emms. */ 6785 6786 static 6787 ULong dis_MMX ( Bool* decode_ok, 6788 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 6789 { 6790 Int len; 6791 UChar modrm; 6792 HChar dis_buf[50]; 6793 UChar opc = getUChar(delta); 6794 delta++; 6795 6796 /* dis_MMX handles all insns except emms. */ 6797 do_MMX_preamble(); 6798 6799 switch (opc) { 6800 6801 case 0x6E: 6802 if (sz == 4) { 6803 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 6804 modrm = getUChar(delta); 6805 if (epartIsReg(modrm)) { 6806 delta++; 6807 putMMXReg( 6808 gregLO3ofRM(modrm), 6809 binop( Iop_32HLto64, 6810 mkU32(0), 6811 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 6812 DIP("movd %s, %s\n", 6813 nameIReg32(eregOfRexRM(pfx,modrm)), 6814 nameMMXReg(gregLO3ofRM(modrm))); 6815 } else { 6816 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6817 delta += len; 6818 putMMXReg( 6819 gregLO3ofRM(modrm), 6820 binop( Iop_32HLto64, 6821 mkU32(0), 6822 loadLE(Ity_I32, mkexpr(addr)) ) ); 6823 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6824 } 6825 } 6826 else 6827 if (sz == 8) { 6828 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 6829 modrm = getUChar(delta); 6830 if (epartIsReg(modrm)) { 6831 delta++; 6832 putMMXReg( gregLO3ofRM(modrm), 6833 getIReg64(eregOfRexRM(pfx,modrm)) ); 6834 DIP("movd %s, %s\n", 6835 nameIReg64(eregOfRexRM(pfx,modrm)), 6836 nameMMXReg(gregLO3ofRM(modrm))); 6837 } else { 6838 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6839 delta += len; 6840 putMMXReg( gregLO3ofRM(modrm), 6841 loadLE(Ity_I64, mkexpr(addr)) ); 6842 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6843 } 6844 } 6845 else { 6846 goto mmx_decode_failure; 6847 } 6848 break; 6849 6850 case 0x7E: 6851 if (sz == 4) { 6852 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 6853 modrm = getUChar(delta); 6854 if (epartIsReg(modrm)) { 6855 delta++; 6856 putIReg32( eregOfRexRM(pfx,modrm), 6857 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 6858 DIP("movd %s, %s\n", 6859 nameMMXReg(gregLO3ofRM(modrm)), 6860 nameIReg32(eregOfRexRM(pfx,modrm))); 6861 } else { 6862 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6863 delta += len; 6864 storeLE( mkexpr(addr), 6865 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 6866 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6867 } 6868 } 6869 else 6870 if (sz == 8) { 6871 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 6872 modrm = getUChar(delta); 6873 if (epartIsReg(modrm)) { 6874 delta++; 6875 putIReg64( eregOfRexRM(pfx,modrm), 6876 getMMXReg(gregLO3ofRM(modrm)) ); 6877 DIP("movd %s, %s\n", 6878 nameMMXReg(gregLO3ofRM(modrm)), 6879 nameIReg64(eregOfRexRM(pfx,modrm))); 6880 } else { 6881 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6882 delta += len; 6883 storeLE( mkexpr(addr), 6884 getMMXReg(gregLO3ofRM(modrm)) ); 6885 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6886 } 6887 } else { 6888 goto mmx_decode_failure; 6889 } 6890 break; 6891 6892 case 0x6F: 6893 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 6894 if (sz != 4 6895 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6896 goto mmx_decode_failure; 6897 modrm = getUChar(delta); 6898 if (epartIsReg(modrm)) { 6899 delta++; 6900 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 6901 DIP("movq %s, %s\n", 6902 nameMMXReg(eregLO3ofRM(modrm)), 6903 nameMMXReg(gregLO3ofRM(modrm))); 6904 } else { 6905 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6906 delta += len; 6907 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 6908 DIP("movq %s, %s\n", 6909 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6910 } 6911 break; 6912 6913 case 0x7F: 6914 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 6915 if (sz != 4 6916 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6917 goto mmx_decode_failure; 6918 modrm = getUChar(delta); 6919 if (epartIsReg(modrm)) { 6920 /* Fall through. The assembler doesn't appear to generate 6921 these. */ 6922 goto mmx_decode_failure; 6923 } else { 6924 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6925 delta += len; 6926 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 6927 DIP("mov(nt)q %s, %s\n", 6928 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6929 } 6930 break; 6931 6932 case 0xFC: 6933 case 0xFD: 6934 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 6935 if (sz != 4) 6936 goto mmx_decode_failure; 6937 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 6938 break; 6939 6940 case 0xEC: 6941 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6942 if (sz != 4 6943 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6944 goto mmx_decode_failure; 6945 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 6946 break; 6947 6948 case 0xDC: 6949 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6950 if (sz != 4) 6951 goto mmx_decode_failure; 6952 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 6953 break; 6954 6955 case 0xF8: 6956 case 0xF9: 6957 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 6958 if (sz != 4) 6959 goto mmx_decode_failure; 6960 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 6961 break; 6962 6963 case 0xE8: 6964 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6965 if (sz != 4) 6966 goto mmx_decode_failure; 6967 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 6968 break; 6969 6970 case 0xD8: 6971 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6972 if (sz != 4) 6973 goto mmx_decode_failure; 6974 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 6975 break; 6976 6977 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 6978 if (sz != 4) 6979 goto mmx_decode_failure; 6980 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 6981 break; 6982 6983 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 6984 if (sz != 4) 6985 goto mmx_decode_failure; 6986 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 6987 break; 6988 6989 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 6990 vassert(sz == 4); 6991 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 6992 break; 6993 6994 case 0x74: 6995 case 0x75: 6996 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 6997 if (sz != 4) 6998 goto mmx_decode_failure; 6999 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7000 break; 7001 7002 case 0x64: 7003 case 0x65: 7004 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7005 if (sz != 4) 7006 goto mmx_decode_failure; 7007 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7008 break; 7009 7010 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7011 if (sz != 4) 7012 goto mmx_decode_failure; 7013 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7014 break; 7015 7016 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7017 if (sz != 4) 7018 goto mmx_decode_failure; 7019 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7020 break; 7021 7022 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7023 if (sz != 4) 7024 goto mmx_decode_failure; 7025 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7026 break; 7027 7028 case 0x68: 7029 case 0x69: 7030 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7031 if (sz != 4 7032 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7033 goto mmx_decode_failure; 7034 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7035 break; 7036 7037 case 0x60: 7038 case 0x61: 7039 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7040 if (sz != 4 7041 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7042 goto mmx_decode_failure; 7043 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7044 break; 7045 7046 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7047 if (sz != 4) 7048 goto mmx_decode_failure; 7049 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7050 break; 7051 7052 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7053 if (sz != 4) 7054 goto mmx_decode_failure; 7055 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7056 break; 7057 7058 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7059 if (sz != 4) 7060 goto mmx_decode_failure; 7061 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7062 break; 7063 7064 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7065 if (sz != 4) 7066 goto mmx_decode_failure; 7067 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7068 break; 7069 7070 # define SHIFT_BY_REG(_name,_op) \ 7071 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7072 break; 7073 7074 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7075 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7076 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7077 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7078 7079 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7080 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7081 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7082 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7083 7084 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7085 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7086 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7087 7088 # undef SHIFT_BY_REG 7089 7090 case 0x71: 7091 case 0x72: 7092 case 0x73: { 7093 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7094 UChar byte2, subopc; 7095 if (sz != 4) 7096 goto mmx_decode_failure; 7097 byte2 = getUChar(delta); /* amode / sub-opcode */ 7098 subopc = toUChar( (byte2 >> 3) & 7 ); 7099 7100 # define SHIFT_BY_IMM(_name,_op) \ 7101 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7102 } while (0) 7103 7104 if (subopc == 2 /*SRL*/ && opc == 0x71) 7105 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7106 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7107 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7108 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7109 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7110 7111 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7112 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7113 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7114 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7115 7116 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7117 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7118 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7119 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7120 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7121 SHIFT_BY_IMM("psllq", Iop_Shl64); 7122 7123 else goto mmx_decode_failure; 7124 7125 # undef SHIFT_BY_IMM 7126 break; 7127 } 7128 7129 case 0xF7: { 7130 IRTemp addr = newTemp(Ity_I64); 7131 IRTemp regD = newTemp(Ity_I64); 7132 IRTemp regM = newTemp(Ity_I64); 7133 IRTemp mask = newTemp(Ity_I64); 7134 IRTemp olddata = newTemp(Ity_I64); 7135 IRTemp newdata = newTemp(Ity_I64); 7136 7137 modrm = getUChar(delta); 7138 if (sz != 4 || (!epartIsReg(modrm))) 7139 goto mmx_decode_failure; 7140 delta++; 7141 7142 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7143 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7144 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7145 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7146 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7147 assign( newdata, 7148 binop(Iop_Or64, 7149 binop(Iop_And64, 7150 mkexpr(regD), 7151 mkexpr(mask) ), 7152 binop(Iop_And64, 7153 mkexpr(olddata), 7154 unop(Iop_Not64, mkexpr(mask)))) ); 7155 storeLE( mkexpr(addr), mkexpr(newdata) ); 7156 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7157 nameMMXReg( gregLO3ofRM(modrm) ) ); 7158 break; 7159 } 7160 7161 /* --- MMX decode failure --- */ 7162 default: 7163 mmx_decode_failure: 7164 *decode_ok = False; 7165 return delta; /* ignored */ 7166 7167 } 7168 7169 *decode_ok = True; 7170 return delta; 7171 } 7172 7173 7174 /*------------------------------------------------------------*/ 7175 /*--- More misc arithmetic and other obscure insns. ---*/ 7176 /*------------------------------------------------------------*/ 7177 7178 /* Generate base << amt with vacated places filled with stuff 7179 from xtra. amt guaranteed in 0 .. 63. */ 7180 static 7181 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7182 { 7183 /* if amt == 0 7184 then base 7185 else (base << amt) | (xtra >>u (64-amt)) 7186 */ 7187 return 7188 IRExpr_Mux0X( 7189 mkexpr(amt), 7190 mkexpr(base), 7191 binop(Iop_Or64, 7192 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7193 binop(Iop_Shr64, mkexpr(xtra), 7194 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7195 ) 7196 ); 7197 } 7198 7199 /* Generate base >>u amt with vacated places filled with stuff 7200 from xtra. amt guaranteed in 0 .. 63. */ 7201 static 7202 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7203 { 7204 /* if amt == 0 7205 then base 7206 else (base >>u amt) | (xtra << (64-amt)) 7207 */ 7208 return 7209 IRExpr_Mux0X( 7210 mkexpr(amt), 7211 mkexpr(base), 7212 binop(Iop_Or64, 7213 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7214 binop(Iop_Shl64, mkexpr(xtra), 7215 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7216 ) 7217 ); 7218 } 7219 7220 /* Double length left and right shifts. Apparently only required in 7221 v-size (no b- variant). */ 7222 static 7223 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi, 7224 Prefix pfx, 7225 Long delta, UChar modrm, 7226 Int sz, 7227 IRExpr* shift_amt, 7228 Bool amt_is_literal, 7229 HChar* shift_amt_txt, 7230 Bool left_shift ) 7231 { 7232 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7233 for printing it. And eip on entry points at the modrm byte. */ 7234 Int len; 7235 HChar dis_buf[50]; 7236 7237 IRType ty = szToITy(sz); 7238 IRTemp gsrc = newTemp(ty); 7239 IRTemp esrc = newTemp(ty); 7240 IRTemp addr = IRTemp_INVALID; 7241 IRTemp tmpSH = newTemp(Ity_I8); 7242 IRTemp tmpSS = newTemp(Ity_I8); 7243 IRTemp tmp64 = IRTemp_INVALID; 7244 IRTemp res64 = IRTemp_INVALID; 7245 IRTemp rss64 = IRTemp_INVALID; 7246 IRTemp resTy = IRTemp_INVALID; 7247 IRTemp rssTy = IRTemp_INVALID; 7248 Int mask = sz==8 ? 63 : 31; 7249 7250 vassert(sz == 2 || sz == 4 || sz == 8); 7251 7252 /* The E-part is the destination; this is shifted. The G-part 7253 supplies bits to be shifted into the E-part, but is not 7254 changed. 7255 7256 If shifting left, form a double-length word with E at the top 7257 and G at the bottom, and shift this left. The result is then in 7258 the high part. 7259 7260 If shifting right, form a double-length word with G at the top 7261 and E at the bottom, and shift this right. The result is then 7262 at the bottom. */ 7263 7264 /* Fetch the operands. */ 7265 7266 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7267 7268 if (epartIsReg(modrm)) { 7269 delta++; 7270 assign( esrc, getIRegE(sz, pfx, modrm) ); 7271 DIP("sh%cd%c %s, %s, %s\n", 7272 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7273 shift_amt_txt, 7274 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7275 } else { 7276 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7277 /* # bytes following amode */ 7278 amt_is_literal ? 1 : 0 ); 7279 delta += len; 7280 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7281 DIP("sh%cd%c %s, %s, %s\n", 7282 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7283 shift_amt_txt, 7284 nameIRegG(sz, pfx, modrm), dis_buf); 7285 } 7286 7287 /* Calculate the masked shift amount (tmpSH), the masked subshift 7288 amount (tmpSS), the shifted value (res64) and the subshifted 7289 value (rss64). */ 7290 7291 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7292 assign( tmpSS, binop(Iop_And8, 7293 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7294 mkU8(mask))); 7295 7296 tmp64 = newTemp(Ity_I64); 7297 res64 = newTemp(Ity_I64); 7298 rss64 = newTemp(Ity_I64); 7299 7300 if (sz == 2 || sz == 4) { 7301 7302 /* G is xtra; E is data */ 7303 /* what a freaking nightmare: */ 7304 if (sz == 4 && left_shift) { 7305 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7306 assign( res64, 7307 binop(Iop_Shr64, 7308 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7309 mkU8(32)) ); 7310 assign( rss64, 7311 binop(Iop_Shr64, 7312 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7313 mkU8(32)) ); 7314 } 7315 else 7316 if (sz == 4 && !left_shift) { 7317 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7318 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7319 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7320 } 7321 else 7322 if (sz == 2 && left_shift) { 7323 assign( tmp64, 7324 binop(Iop_32HLto64, 7325 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7326 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7327 )); 7328 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7329 assign( res64, 7330 binop(Iop_Shr64, 7331 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7332 mkU8(48)) ); 7333 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7334 assign( rss64, 7335 binop(Iop_Shr64, 7336 binop(Iop_Shl64, 7337 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7338 mkU8(48)), 7339 mkexpr(tmpSS)), 7340 mkU8(48)) ); 7341 } 7342 else 7343 if (sz == 2 && !left_shift) { 7344 assign( tmp64, 7345 binop(Iop_32HLto64, 7346 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7347 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7348 )); 7349 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7350 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7351 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7352 assign( rss64, binop(Iop_Shr64, 7353 unop(Iop_16Uto64, mkexpr(esrc)), 7354 mkexpr(tmpSS)) ); 7355 } 7356 7357 } else { 7358 7359 vassert(sz == 8); 7360 if (left_shift) { 7361 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7362 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7363 } else { 7364 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7365 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7366 } 7367 7368 } 7369 7370 resTy = newTemp(ty); 7371 rssTy = newTemp(ty); 7372 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7373 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7374 7375 /* Put result back and write the flags thunk. */ 7376 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7377 resTy, rssTy, ty, tmpSH ); 7378 7379 if (epartIsReg(modrm)) { 7380 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7381 } else { 7382 storeLE( mkexpr(addr), mkexpr(resTy) ); 7383 } 7384 7385 if (amt_is_literal) delta++; 7386 return delta; 7387 } 7388 7389 7390 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7391 required. */ 7392 7393 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7394 7395 static HChar* nameBtOp ( BtOp op ) 7396 { 7397 switch (op) { 7398 case BtOpNone: return ""; 7399 case BtOpSet: return "s"; 7400 case BtOpReset: return "r"; 7401 case BtOpComp: return "c"; 7402 default: vpanic("nameBtOp(amd64)"); 7403 } 7404 } 7405 7406 7407 static 7408 ULong dis_bt_G_E ( VexAbiInfo* vbi, 7409 Prefix pfx, Int sz, Long delta, BtOp op ) 7410 { 7411 HChar dis_buf[50]; 7412 UChar modrm; 7413 Int len; 7414 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7415 t_addr1, t_rsp, t_mask, t_new; 7416 7417 vassert(sz == 2 || sz == 4 || sz == 8); 7418 7419 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7420 = t_addr0 = t_addr1 = t_rsp 7421 = t_mask = t_new = IRTemp_INVALID; 7422 7423 t_fetched = newTemp(Ity_I8); 7424 t_new = newTemp(Ity_I8); 7425 t_bitno0 = newTemp(Ity_I64); 7426 t_bitno1 = newTemp(Ity_I64); 7427 t_bitno2 = newTemp(Ity_I8); 7428 t_addr1 = newTemp(Ity_I64); 7429 modrm = getUChar(delta); 7430 7431 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 7432 7433 if (epartIsReg(modrm)) { 7434 delta++; 7435 /* Get it onto the client's stack. Oh, this is a horrible 7436 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 7437 Because of the ELF ABI stack redzone, there may be live data 7438 up to 128 bytes below %RSP. So we can't just push it on the 7439 stack, else we may wind up trashing live data, and causing 7440 impossible-to-find simulation errors. (Yes, this did 7441 happen.) So we need to drop RSP before at least 128 before 7442 pushing it. That unfortunately means hitting Memcheck's 7443 fast-case painting code. Ideally we should drop more than 7444 128, to reduce the chances of breaking buggy programs that 7445 have live data below -128(%RSP). Memcheck fast-cases moves 7446 of 288 bytes due to the need to handle ppc64-linux quickly, 7447 so let's use 288. Of course the real fix is to get rid of 7448 this kludge entirely. */ 7449 t_rsp = newTemp(Ity_I64); 7450 t_addr0 = newTemp(Ity_I64); 7451 7452 vassert(vbi->guest_stack_redzone_size == 128); 7453 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 7454 putIReg64(R_RSP, mkexpr(t_rsp)); 7455 7456 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 7457 7458 /* Make t_addr0 point at it. */ 7459 assign( t_addr0, mkexpr(t_rsp) ); 7460 7461 /* Mask out upper bits of the shift amount, since we're doing a 7462 reg. */ 7463 assign( t_bitno1, binop(Iop_And64, 7464 mkexpr(t_bitno0), 7465 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 7466 7467 } else { 7468 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 7469 delta += len; 7470 assign( t_bitno1, mkexpr(t_bitno0) ); 7471 } 7472 7473 /* At this point: t_addr0 is the address being operated on. If it 7474 was a reg, we will have pushed it onto the client's stack. 7475 t_bitno1 is the bit number, suitably masked in the case of a 7476 reg. */ 7477 7478 /* Now the main sequence. */ 7479 assign( t_addr1, 7480 binop(Iop_Add64, 7481 mkexpr(t_addr0), 7482 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 7483 7484 /* t_addr1 now holds effective address */ 7485 7486 assign( t_bitno2, 7487 unop(Iop_64to8, 7488 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 7489 7490 /* t_bitno2 contains offset of bit within byte */ 7491 7492 if (op != BtOpNone) { 7493 t_mask = newTemp(Ity_I8); 7494 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 7495 } 7496 7497 /* t_mask is now a suitable byte mask */ 7498 7499 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 7500 7501 if (op != BtOpNone) { 7502 switch (op) { 7503 case BtOpSet: 7504 assign( t_new, 7505 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7506 break; 7507 case BtOpComp: 7508 assign( t_new, 7509 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7510 break; 7511 case BtOpReset: 7512 assign( t_new, 7513 binop(Iop_And8, mkexpr(t_fetched), 7514 unop(Iop_Not8, mkexpr(t_mask))) ); 7515 break; 7516 default: 7517 vpanic("dis_bt_G_E(amd64)"); 7518 } 7519 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) { 7520 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 7521 mkexpr(t_new)/*new*/, 7522 guest_RIP_curr_instr ); 7523 } else { 7524 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 7525 } 7526 } 7527 7528 /* Side effect done; now get selected bit into Carry flag */ 7529 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 7530 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7531 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7532 stmt( IRStmt_Put( 7533 OFFB_CC_DEP1, 7534 binop(Iop_And64, 7535 binop(Iop_Shr64, 7536 unop(Iop_8Uto64, mkexpr(t_fetched)), 7537 mkexpr(t_bitno2)), 7538 mkU64(1))) 7539 ); 7540 /* Set NDEP even though it isn't used. This makes redundant-PUT 7541 elimination of previous stores to this field work better. */ 7542 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7543 7544 /* Move reg operand from stack back to reg */ 7545 if (epartIsReg(modrm)) { 7546 /* t_rsp still points at it. */ 7547 /* only write the reg if actually modifying it; doing otherwise 7548 zeroes the top half erroneously when doing btl due to 7549 standard zero-extend rule */ 7550 if (op != BtOpNone) 7551 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 7552 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 7553 } 7554 7555 DIP("bt%s%c %s, %s\n", 7556 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 7557 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 7558 7559 return delta; 7560 } 7561 7562 7563 7564 /* Handle BSF/BSR. Only v-size seems necessary. */ 7565 static 7566 ULong dis_bs_E_G ( VexAbiInfo* vbi, 7567 Prefix pfx, Int sz, Long delta, Bool fwds ) 7568 { 7569 Bool isReg; 7570 UChar modrm; 7571 HChar dis_buf[50]; 7572 7573 IRType ty = szToITy(sz); 7574 IRTemp src = newTemp(ty); 7575 IRTemp dst = newTemp(ty); 7576 IRTemp src64 = newTemp(Ity_I64); 7577 IRTemp dst64 = newTemp(Ity_I64); 7578 IRTemp src8 = newTemp(Ity_I8); 7579 7580 vassert(sz == 8 || sz == 4 || sz == 2); 7581 7582 modrm = getUChar(delta); 7583 isReg = epartIsReg(modrm); 7584 if (isReg) { 7585 delta++; 7586 assign( src, getIRegE(sz, pfx, modrm) ); 7587 } else { 7588 Int len; 7589 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7590 delta += len; 7591 assign( src, loadLE(ty, mkexpr(addr)) ); 7592 } 7593 7594 DIP("bs%c%c %s, %s\n", 7595 fwds ? 'f' : 'r', nameISize(sz), 7596 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 7597 nameIRegG(sz, pfx, modrm)); 7598 7599 /* First, widen src to 64 bits if it is not already. */ 7600 assign( src64, widenUto64(mkexpr(src)) ); 7601 7602 /* Generate an 8-bit expression which is zero iff the 7603 original is zero, and nonzero otherwise */ 7604 assign( src8, 7605 unop(Iop_1Uto8, 7606 binop(Iop_CmpNE64, 7607 mkexpr(src64), mkU64(0))) ); 7608 7609 /* Flags: Z is 1 iff source value is zero. All others 7610 are undefined -- we force them to zero. */ 7611 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7612 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7613 stmt( IRStmt_Put( 7614 OFFB_CC_DEP1, 7615 IRExpr_Mux0X( mkexpr(src8), 7616 /* src==0 */ 7617 mkU64(AMD64G_CC_MASK_Z), 7618 /* src!=0 */ 7619 mkU64(0) 7620 ) 7621 )); 7622 /* Set NDEP even though it isn't used. This makes redundant-PUT 7623 elimination of previous stores to this field work better. */ 7624 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7625 7626 /* Result: iff source value is zero, we can't use 7627 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 7628 But anyway, amd64 semantics say the result is undefined in 7629 such situations. Hence handle the zero case specially. */ 7630 7631 /* Bleh. What we compute: 7632 7633 bsf64: if src == 0 then {dst is unchanged} 7634 else Ctz64(src) 7635 7636 bsr64: if src == 0 then {dst is unchanged} 7637 else 63 - Clz64(src) 7638 7639 bsf32: if src == 0 then {dst is unchanged} 7640 else Ctz64(32Uto64(src)) 7641 7642 bsr32: if src == 0 then {dst is unchanged} 7643 else 63 - Clz64(32Uto64(src)) 7644 7645 bsf16: if src == 0 then {dst is unchanged} 7646 else Ctz64(32Uto64(16Uto32(src))) 7647 7648 bsr16: if src == 0 then {dst is unchanged} 7649 else 63 - Clz64(32Uto64(16Uto32(src))) 7650 */ 7651 7652 /* The main computation, guarding against zero. */ 7653 assign( dst64, 7654 IRExpr_Mux0X( 7655 mkexpr(src8), 7656 /* src == 0 -- leave dst unchanged */ 7657 widenUto64( getIRegG( sz, pfx, modrm ) ), 7658 /* src != 0 */ 7659 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 7660 : binop(Iop_Sub64, 7661 mkU64(63), 7662 unop(Iop_Clz64, mkexpr(src64))) 7663 ) 7664 ); 7665 7666 if (sz == 2) 7667 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 7668 else 7669 if (sz == 4) 7670 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 7671 else 7672 assign( dst, mkexpr(dst64) ); 7673 7674 /* dump result back */ 7675 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 7676 7677 return delta; 7678 } 7679 7680 7681 /* swap rAX with the reg specified by reg and REX.B */ 7682 static 7683 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 7684 { 7685 IRType ty = szToITy(sz); 7686 IRTemp t1 = newTemp(ty); 7687 IRTemp t2 = newTemp(ty); 7688 vassert(sz == 2 || sz == 4 || sz == 8); 7689 vassert(regLo3 < 8); 7690 if (sz == 8) { 7691 assign( t1, getIReg64(R_RAX) ); 7692 assign( t2, getIRegRexB(8, pfx, regLo3) ); 7693 putIReg64( R_RAX, mkexpr(t2) ); 7694 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 7695 } else if (sz == 4) { 7696 assign( t1, getIReg32(R_RAX) ); 7697 assign( t2, getIRegRexB(4, pfx, regLo3) ); 7698 putIReg32( R_RAX, mkexpr(t2) ); 7699 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 7700 } else { 7701 assign( t1, getIReg16(R_RAX) ); 7702 assign( t2, getIRegRexB(2, pfx, regLo3) ); 7703 putIReg16( R_RAX, mkexpr(t2) ); 7704 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 7705 } 7706 DIP("xchg%c %s, %s\n", 7707 nameISize(sz), nameIRegRAX(sz), 7708 nameIRegRexB(sz,pfx, regLo3)); 7709 } 7710 7711 7712 static 7713 void codegen_SAHF ( void ) 7714 { 7715 /* Set the flags to: 7716 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 7717 -- retain the old O flag 7718 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7719 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 7720 */ 7721 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7722 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7723 IRTemp oldflags = newTemp(Ity_I64); 7724 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 7725 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7726 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7727 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7728 stmt( IRStmt_Put( OFFB_CC_DEP1, 7729 binop(Iop_Or64, 7730 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 7731 binop(Iop_And64, 7732 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 7733 mkU64(mask_SZACP)) 7734 ) 7735 )); 7736 } 7737 7738 7739 static 7740 void codegen_LAHF ( void ) 7741 { 7742 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 7743 IRExpr* rax_with_hole; 7744 IRExpr* new_byte; 7745 IRExpr* new_rax; 7746 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7747 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7748 7749 IRTemp flags = newTemp(Ity_I64); 7750 assign( flags, mk_amd64g_calculate_rflags_all() ); 7751 7752 rax_with_hole 7753 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 7754 new_byte 7755 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 7756 mkU64(1<<1)); 7757 new_rax 7758 = binop(Iop_Or64, rax_with_hole, 7759 binop(Iop_Shl64, new_byte, mkU8(8))); 7760 putIReg64(R_RAX, new_rax); 7761 } 7762 7763 7764 static 7765 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 7766 VexAbiInfo* vbi, 7767 Prefix pfx, 7768 Int size, 7769 Long delta0 ) 7770 { 7771 HChar dis_buf[50]; 7772 Int len; 7773 7774 IRType ty = szToITy(size); 7775 IRTemp acc = newTemp(ty); 7776 IRTemp src = newTemp(ty); 7777 IRTemp dest = newTemp(ty); 7778 IRTemp dest2 = newTemp(ty); 7779 IRTemp acc2 = newTemp(ty); 7780 IRTemp cond8 = newTemp(Ity_I8); 7781 IRTemp addr = IRTemp_INVALID; 7782 UChar rm = getUChar(delta0); 7783 7784 /* There are 3 cases to consider: 7785 7786 reg-reg: ignore any lock prefix, generate sequence based 7787 on Mux0X 7788 7789 reg-mem, not locked: ignore any lock prefix, generate sequence 7790 based on Mux0X 7791 7792 reg-mem, locked: use IRCAS 7793 */ 7794 7795 if (epartIsReg(rm)) { 7796 /* case 1 */ 7797 assign( dest, getIRegE(size, pfx, rm) ); 7798 delta0++; 7799 assign( src, getIRegG(size, pfx, rm) ); 7800 assign( acc, getIRegRAX(size) ); 7801 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7802 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7803 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 7804 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7805 putIRegRAX(size, mkexpr(acc2)); 7806 putIRegE(size, pfx, rm, mkexpr(dest2)); 7807 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7808 nameIRegG(size,pfx,rm), 7809 nameIRegE(size,pfx,rm) ); 7810 } 7811 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 7812 /* case 2 */ 7813 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7814 assign( dest, loadLE(ty, mkexpr(addr)) ); 7815 delta0 += len; 7816 assign( src, getIRegG(size, pfx, rm) ); 7817 assign( acc, getIRegRAX(size) ); 7818 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7819 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7820 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 7821 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7822 putIRegRAX(size, mkexpr(acc2)); 7823 storeLE( mkexpr(addr), mkexpr(dest2) ); 7824 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7825 nameIRegG(size,pfx,rm), dis_buf); 7826 } 7827 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 7828 /* case 3 */ 7829 /* src is new value. acc is expected value. dest is old value. 7830 Compute success from the output of the IRCAS, and steer the 7831 new value for RAX accordingly: in case of success, RAX is 7832 unchanged. */ 7833 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7834 delta0 += len; 7835 assign( src, getIRegG(size, pfx, rm) ); 7836 assign( acc, getIRegRAX(size) ); 7837 stmt( IRStmt_CAS( 7838 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 7839 NULL, mkexpr(acc), NULL, mkexpr(src) ) 7840 )); 7841 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7842 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7843 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7844 putIRegRAX(size, mkexpr(acc2)); 7845 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7846 nameIRegG(size,pfx,rm), dis_buf); 7847 } 7848 else vassert(0); 7849 7850 *ok = True; 7851 return delta0; 7852 } 7853 7854 7855 /* Handle conditional move instructions of the form 7856 cmovcc E(reg-or-mem), G(reg) 7857 7858 E(src) is reg-or-mem 7859 G(dst) is reg. 7860 7861 If E is reg, --> GET %E, tmps 7862 GET %G, tmpd 7863 CMOVcc tmps, tmpd 7864 PUT tmpd, %G 7865 7866 If E is mem --> (getAddr E) -> tmpa 7867 LD (tmpa), tmps 7868 GET %G, tmpd 7869 CMOVcc tmps, tmpd 7870 PUT tmpd, %G 7871 */ 7872 static 7873 ULong dis_cmov_E_G ( VexAbiInfo* vbi, 7874 Prefix pfx, 7875 Int sz, 7876 AMD64Condcode cond, 7877 Long delta0 ) 7878 { 7879 UChar rm = getUChar(delta0); 7880 HChar dis_buf[50]; 7881 Int len; 7882 7883 IRType ty = szToITy(sz); 7884 IRTemp tmps = newTemp(ty); 7885 IRTemp tmpd = newTemp(ty); 7886 7887 if (epartIsReg(rm)) { 7888 assign( tmps, getIRegE(sz, pfx, rm) ); 7889 assign( tmpd, getIRegG(sz, pfx, rm) ); 7890 7891 putIRegG( sz, pfx, rm, 7892 IRExpr_Mux0X( unop(Iop_1Uto8, 7893 mk_amd64g_calculate_condition(cond)), 7894 mkexpr(tmpd), 7895 mkexpr(tmps) ) 7896 ); 7897 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 7898 nameIRegE(sz,pfx,rm), 7899 nameIRegG(sz,pfx,rm)); 7900 return 1+delta0; 7901 } 7902 7903 /* E refers to memory */ 7904 { 7905 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7906 assign( tmps, loadLE(ty, mkexpr(addr)) ); 7907 assign( tmpd, getIRegG(sz, pfx, rm) ); 7908 7909 putIRegG( sz, pfx, rm, 7910 IRExpr_Mux0X( unop(Iop_1Uto8, 7911 mk_amd64g_calculate_condition(cond)), 7912 mkexpr(tmpd), 7913 mkexpr(tmps) ) 7914 ); 7915 7916 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 7917 dis_buf, 7918 nameIRegG(sz,pfx,rm)); 7919 return len+delta0; 7920 } 7921 } 7922 7923 7924 static 7925 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 7926 VexAbiInfo* vbi, 7927 Prefix pfx, Int sz, Long delta0 ) 7928 { 7929 Int len; 7930 UChar rm = getUChar(delta0); 7931 HChar dis_buf[50]; 7932 7933 IRType ty = szToITy(sz); 7934 IRTemp tmpd = newTemp(ty); 7935 IRTemp tmpt0 = newTemp(ty); 7936 IRTemp tmpt1 = newTemp(ty); 7937 7938 /* There are 3 cases to consider: 7939 7940 reg-reg: ignore any lock prefix, 7941 generate 'naive' (non-atomic) sequence 7942 7943 reg-mem, not locked: ignore any lock prefix, generate 'naive' 7944 (non-atomic) sequence 7945 7946 reg-mem, locked: use IRCAS 7947 */ 7948 7949 if (epartIsReg(rm)) { 7950 /* case 1 */ 7951 assign( tmpd, getIRegE(sz, pfx, rm) ); 7952 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7953 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7954 mkexpr(tmpd), mkexpr(tmpt0)) ); 7955 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7956 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7957 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 7958 DIP("xadd%c %s, %s\n", 7959 nameISize(sz), nameIRegG(sz,pfx,rm), 7960 nameIRegE(sz,pfx,rm)); 7961 *decode_ok = True; 7962 return 1+delta0; 7963 } 7964 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 7965 /* case 2 */ 7966 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7967 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 7968 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7969 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7970 mkexpr(tmpd), mkexpr(tmpt0)) ); 7971 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7972 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 7973 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7974 DIP("xadd%c %s, %s\n", 7975 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 7976 *decode_ok = True; 7977 return len+delta0; 7978 } 7979 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 7980 /* case 3 */ 7981 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7982 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 7983 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7984 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7985 mkexpr(tmpd), mkexpr(tmpt0)) ); 7986 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 7987 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 7988 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7989 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7990 DIP("xadd%c %s, %s\n", 7991 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 7992 *decode_ok = True; 7993 return len+delta0; 7994 } 7995 /*UNREACHED*/ 7996 vassert(0); 7997 } 7998 7999 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8000 //.. 8001 //.. static 8002 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8003 //.. { 8004 //.. Int len; 8005 //.. IRTemp addr; 8006 //.. UChar rm = getUChar(delta0); 8007 //.. HChar dis_buf[50]; 8008 //.. 8009 //.. if (epartIsReg(rm)) { 8010 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8011 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8012 //.. return 1+delta0; 8013 //.. } else { 8014 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8015 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8016 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8017 //.. return len+delta0; 8018 //.. } 8019 //.. } 8020 //.. 8021 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8022 //.. dst is ireg and sz==4, zero out top half of it. */ 8023 //.. 8024 //.. static 8025 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8026 //.. Int sz, 8027 //.. UInt delta0 ) 8028 //.. { 8029 //.. Int len; 8030 //.. IRTemp addr; 8031 //.. UChar rm = getUChar(delta0); 8032 //.. HChar dis_buf[50]; 8033 //.. 8034 //.. vassert(sz == 2 || sz == 4); 8035 //.. 8036 //.. if (epartIsReg(rm)) { 8037 //.. if (sz == 4) 8038 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8039 //.. else 8040 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8041 //.. 8042 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8043 //.. return 1+delta0; 8044 //.. } else { 8045 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8046 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8047 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8048 //.. return len+delta0; 8049 //.. } 8050 //.. } 8051 //.. 8052 //.. 8053 //.. static 8054 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8055 //.. { 8056 //.. IRTemp t1 = newTemp(Ity_I16); 8057 //.. IRTemp ta = newTemp(Ity_I32); 8058 //.. vassert(sz == 2 || sz == 4); 8059 //.. 8060 //.. assign( t1, getSReg(sreg) ); 8061 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8062 //.. putIReg(4, R_ESP, mkexpr(ta)); 8063 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8064 //.. 8065 //.. DIP("pushw %s\n", nameSReg(sreg)); 8066 //.. } 8067 //.. 8068 //.. static 8069 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8070 //.. { 8071 //.. IRTemp t1 = newTemp(Ity_I16); 8072 //.. IRTemp ta = newTemp(Ity_I32); 8073 //.. vassert(sz == 2 || sz == 4); 8074 //.. 8075 //.. assign( ta, getIReg(4, R_ESP) ); 8076 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8077 //.. 8078 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8079 //.. putSReg( sreg, mkexpr(t1) ); 8080 //.. DIP("pop %s\n", nameSReg(sreg)); 8081 //.. } 8082 8083 static 8084 void dis_ret ( VexAbiInfo* vbi, ULong d64 ) 8085 { 8086 IRTemp t1 = newTemp(Ity_I64); 8087 IRTemp t2 = newTemp(Ity_I64); 8088 IRTemp t3 = newTemp(Ity_I64); 8089 assign(t1, getIReg64(R_RSP)); 8090 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8091 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8092 putIReg64(R_RSP, mkexpr(t3)); 8093 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8094 jmp_treg(Ijk_Ret,t2); 8095 } 8096 8097 8098 /*------------------------------------------------------------*/ 8099 /*--- SSE/SSE2/SSE3 helpers ---*/ 8100 /*------------------------------------------------------------*/ 8101 8102 /* Worker function; do not call directly. 8103 Handles full width G = G `op` E and G = (not G) `op` E. 8104 */ 8105 8106 static ULong dis_SSE_E_to_G_all_wrk ( 8107 VexAbiInfo* vbi, 8108 Prefix pfx, Long delta, 8109 HChar* opname, IROp op, 8110 Bool invertG 8111 ) 8112 { 8113 HChar dis_buf[50]; 8114 Int alen; 8115 IRTemp addr; 8116 UChar rm = getUChar(delta); 8117 IRExpr* gpart 8118 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8119 : getXMMReg(gregOfRexRM(pfx,rm)); 8120 if (epartIsReg(rm)) { 8121 putXMMReg( gregOfRexRM(pfx,rm), 8122 binop(op, gpart, 8123 getXMMReg(eregOfRexRM(pfx,rm))) ); 8124 DIP("%s %s,%s\n", opname, 8125 nameXMMReg(eregOfRexRM(pfx,rm)), 8126 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8127 return delta+1; 8128 } else { 8129 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8130 putXMMReg( gregOfRexRM(pfx,rm), 8131 binop(op, gpart, 8132 loadLE(Ity_V128, mkexpr(addr))) ); 8133 DIP("%s %s,%s\n", opname, 8134 dis_buf, 8135 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8136 return delta+alen; 8137 } 8138 } 8139 8140 8141 /* All lanes SSE binary operation, G = G `op` E. */ 8142 8143 static 8144 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi, 8145 Prefix pfx, Long delta, 8146 HChar* opname, IROp op ) 8147 { 8148 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8149 } 8150 8151 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8152 8153 static 8154 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi, 8155 Prefix pfx, Long delta, 8156 HChar* opname, IROp op ) 8157 { 8158 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8159 } 8160 8161 8162 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8163 8164 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi, 8165 Prefix pfx, Long delta, 8166 HChar* opname, IROp op ) 8167 { 8168 HChar dis_buf[50]; 8169 Int alen; 8170 IRTemp addr; 8171 UChar rm = getUChar(delta); 8172 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8173 if (epartIsReg(rm)) { 8174 putXMMReg( gregOfRexRM(pfx,rm), 8175 binop(op, gpart, 8176 getXMMReg(eregOfRexRM(pfx,rm))) ); 8177 DIP("%s %s,%s\n", opname, 8178 nameXMMReg(eregOfRexRM(pfx,rm)), 8179 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8180 return delta+1; 8181 } else { 8182 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8183 E operand needs to be made simply of zeroes. */ 8184 IRTemp epart = newTemp(Ity_V128); 8185 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8186 assign( epart, unop( Iop_32UtoV128, 8187 loadLE(Ity_I32, mkexpr(addr))) ); 8188 putXMMReg( gregOfRexRM(pfx,rm), 8189 binop(op, gpart, mkexpr(epart)) ); 8190 DIP("%s %s,%s\n", opname, 8191 dis_buf, 8192 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8193 return delta+alen; 8194 } 8195 } 8196 8197 8198 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8199 8200 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi, 8201 Prefix pfx, Long delta, 8202 HChar* opname, IROp op ) 8203 { 8204 HChar dis_buf[50]; 8205 Int alen; 8206 IRTemp addr; 8207 UChar rm = getUChar(delta); 8208 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8209 if (epartIsReg(rm)) { 8210 putXMMReg( gregOfRexRM(pfx,rm), 8211 binop(op, gpart, 8212 getXMMReg(eregOfRexRM(pfx,rm))) ); 8213 DIP("%s %s,%s\n", opname, 8214 nameXMMReg(eregOfRexRM(pfx,rm)), 8215 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8216 return delta+1; 8217 } else { 8218 /* We can only do a 64-bit memory read, so the upper half of the 8219 E operand needs to be made simply of zeroes. */ 8220 IRTemp epart = newTemp(Ity_V128); 8221 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8222 assign( epart, unop( Iop_64UtoV128, 8223 loadLE(Ity_I64, mkexpr(addr))) ); 8224 putXMMReg( gregOfRexRM(pfx,rm), 8225 binop(op, gpart, mkexpr(epart)) ); 8226 DIP("%s %s,%s\n", opname, 8227 dis_buf, 8228 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8229 return delta+alen; 8230 } 8231 } 8232 8233 8234 /* All lanes unary SSE operation, G = op(E). */ 8235 8236 static ULong dis_SSE_E_to_G_unary_all ( 8237 VexAbiInfo* vbi, 8238 Prefix pfx, Long delta, 8239 HChar* opname, IROp op 8240 ) 8241 { 8242 HChar dis_buf[50]; 8243 Int alen; 8244 IRTemp addr; 8245 UChar rm = getUChar(delta); 8246 if (epartIsReg(rm)) { 8247 putXMMReg( gregOfRexRM(pfx,rm), 8248 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) ); 8249 DIP("%s %s,%s\n", opname, 8250 nameXMMReg(eregOfRexRM(pfx,rm)), 8251 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8252 return delta+1; 8253 } else { 8254 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8255 putXMMReg( gregOfRexRM(pfx,rm), 8256 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 8257 DIP("%s %s,%s\n", opname, 8258 dis_buf, 8259 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8260 return delta+alen; 8261 } 8262 } 8263 8264 8265 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8266 8267 static ULong dis_SSE_E_to_G_unary_lo32 ( 8268 VexAbiInfo* vbi, 8269 Prefix pfx, Long delta, 8270 HChar* opname, IROp op 8271 ) 8272 { 8273 /* First we need to get the old G value and patch the low 32 bits 8274 of the E operand into it. Then apply op and write back to G. */ 8275 HChar dis_buf[50]; 8276 Int alen; 8277 IRTemp addr; 8278 UChar rm = getUChar(delta); 8279 IRTemp oldG0 = newTemp(Ity_V128); 8280 IRTemp oldG1 = newTemp(Ity_V128); 8281 8282 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8283 8284 if (epartIsReg(rm)) { 8285 assign( oldG1, 8286 binop( Iop_SetV128lo32, 8287 mkexpr(oldG0), 8288 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8289 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8290 DIP("%s %s,%s\n", opname, 8291 nameXMMReg(eregOfRexRM(pfx,rm)), 8292 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8293 return delta+1; 8294 } else { 8295 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8296 assign( oldG1, 8297 binop( Iop_SetV128lo32, 8298 mkexpr(oldG0), 8299 loadLE(Ity_I32, mkexpr(addr)) )); 8300 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8301 DIP("%s %s,%s\n", opname, 8302 dis_buf, 8303 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8304 return delta+alen; 8305 } 8306 } 8307 8308 8309 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 8310 8311 static ULong dis_SSE_E_to_G_unary_lo64 ( 8312 VexAbiInfo* vbi, 8313 Prefix pfx, Long delta, 8314 HChar* opname, IROp op 8315 ) 8316 { 8317 /* First we need to get the old G value and patch the low 64 bits 8318 of the E operand into it. Then apply op and write back to G. */ 8319 HChar dis_buf[50]; 8320 Int alen; 8321 IRTemp addr; 8322 UChar rm = getUChar(delta); 8323 IRTemp oldG0 = newTemp(Ity_V128); 8324 IRTemp oldG1 = newTemp(Ity_V128); 8325 8326 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8327 8328 if (epartIsReg(rm)) { 8329 assign( oldG1, 8330 binop( Iop_SetV128lo64, 8331 mkexpr(oldG0), 8332 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 8333 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8334 DIP("%s %s,%s\n", opname, 8335 nameXMMReg(eregOfRexRM(pfx,rm)), 8336 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8337 return delta+1; 8338 } else { 8339 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8340 assign( oldG1, 8341 binop( Iop_SetV128lo64, 8342 mkexpr(oldG0), 8343 loadLE(Ity_I64, mkexpr(addr)) )); 8344 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8345 DIP("%s %s,%s\n", opname, 8346 dis_buf, 8347 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8348 return delta+alen; 8349 } 8350 } 8351 8352 8353 /* SSE integer binary operation: 8354 G = G `op` E (eLeft == False) 8355 G = E `op` G (eLeft == True) 8356 */ 8357 static ULong dis_SSEint_E_to_G( 8358 VexAbiInfo* vbi, 8359 Prefix pfx, Long delta, 8360 HChar* opname, IROp op, 8361 Bool eLeft 8362 ) 8363 { 8364 HChar dis_buf[50]; 8365 Int alen; 8366 IRTemp addr; 8367 UChar rm = getUChar(delta); 8368 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8369 IRExpr* epart = NULL; 8370 if (epartIsReg(rm)) { 8371 epart = getXMMReg(eregOfRexRM(pfx,rm)); 8372 DIP("%s %s,%s\n", opname, 8373 nameXMMReg(eregOfRexRM(pfx,rm)), 8374 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8375 delta += 1; 8376 } else { 8377 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8378 epart = loadLE(Ity_V128, mkexpr(addr)); 8379 DIP("%s %s,%s\n", opname, 8380 dis_buf, 8381 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8382 delta += alen; 8383 } 8384 putXMMReg( gregOfRexRM(pfx,rm), 8385 eLeft ? binop(op, epart, gpart) 8386 : binop(op, gpart, epart) ); 8387 return delta; 8388 } 8389 8390 8391 /* Helper for doing SSE FP comparisons. */ 8392 8393 static void findSSECmpOp ( Bool* needNot, IROp* op, 8394 Int imm8, Bool all_lanes, Int sz ) 8395 { 8396 imm8 &= 7; 8397 *needNot = False; 8398 *op = Iop_INVALID; 8399 if (imm8 >= 4) { 8400 *needNot = True; 8401 imm8 -= 4; 8402 } 8403 8404 if (sz == 4 && all_lanes) { 8405 switch (imm8) { 8406 case 0: *op = Iop_CmpEQ32Fx4; return; 8407 case 1: *op = Iop_CmpLT32Fx4; return; 8408 case 2: *op = Iop_CmpLE32Fx4; return; 8409 case 3: *op = Iop_CmpUN32Fx4; return; 8410 default: break; 8411 } 8412 } 8413 if (sz == 4 && !all_lanes) { 8414 switch (imm8) { 8415 case 0: *op = Iop_CmpEQ32F0x4; return; 8416 case 1: *op = Iop_CmpLT32F0x4; return; 8417 case 2: *op = Iop_CmpLE32F0x4; return; 8418 case 3: *op = Iop_CmpUN32F0x4; return; 8419 default: break; 8420 } 8421 } 8422 if (sz == 8 && all_lanes) { 8423 switch (imm8) { 8424 case 0: *op = Iop_CmpEQ64Fx2; return; 8425 case 1: *op = Iop_CmpLT64Fx2; return; 8426 case 2: *op = Iop_CmpLE64Fx2; return; 8427 case 3: *op = Iop_CmpUN64Fx2; return; 8428 default: break; 8429 } 8430 } 8431 if (sz == 8 && !all_lanes) { 8432 switch (imm8) { 8433 case 0: *op = Iop_CmpEQ64F0x2; return; 8434 case 1: *op = Iop_CmpLT64F0x2; return; 8435 case 2: *op = Iop_CmpLE64F0x2; return; 8436 case 3: *op = Iop_CmpUN64F0x2; return; 8437 default: break; 8438 } 8439 } 8440 vpanic("findSSECmpOp(amd64,guest)"); 8441 } 8442 8443 /* Handles SSE 32F/64F comparisons. */ 8444 8445 static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi, 8446 Prefix pfx, Long delta, 8447 HChar* opname, Bool all_lanes, Int sz ) 8448 { 8449 HChar dis_buf[50]; 8450 Int alen, imm8; 8451 IRTemp addr; 8452 Bool needNot = False; 8453 IROp op = Iop_INVALID; 8454 IRTemp plain = newTemp(Ity_V128); 8455 UChar rm = getUChar(delta); 8456 UShort mask = 0; 8457 vassert(sz == 4 || sz == 8); 8458 if (epartIsReg(rm)) { 8459 imm8 = getUChar(delta+1); 8460 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 8461 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 8462 getXMMReg(eregOfRexRM(pfx,rm))) ); 8463 delta += 2; 8464 DIP("%s $%d,%s,%s\n", opname, 8465 (Int)imm8, 8466 nameXMMReg(eregOfRexRM(pfx,rm)), 8467 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8468 } else { 8469 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 8470 imm8 = getUChar(delta+alen); 8471 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 8472 assign( plain, 8473 binop( 8474 op, 8475 getXMMReg(gregOfRexRM(pfx,rm)), 8476 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 8477 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 8478 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 8479 ) 8480 ); 8481 delta += alen+1; 8482 DIP("%s $%d,%s,%s\n", opname, 8483 (Int)imm8, 8484 dis_buf, 8485 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8486 } 8487 8488 if (needNot && all_lanes) { 8489 putXMMReg( gregOfRexRM(pfx,rm), 8490 unop(Iop_NotV128, mkexpr(plain)) ); 8491 } 8492 else 8493 if (needNot && !all_lanes) { 8494 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 8495 putXMMReg( gregOfRexRM(pfx,rm), 8496 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 8497 } 8498 else { 8499 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 8500 } 8501 8502 return delta; 8503 } 8504 8505 8506 /* Vector by scalar shift of G by the amount specified at the bottom 8507 of E. */ 8508 8509 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi, 8510 Prefix pfx, Long delta, 8511 HChar* opname, IROp op ) 8512 { 8513 HChar dis_buf[50]; 8514 Int alen, size; 8515 IRTemp addr; 8516 Bool shl, shr, sar; 8517 UChar rm = getUChar(delta); 8518 IRTemp g0 = newTemp(Ity_V128); 8519 IRTemp g1 = newTemp(Ity_V128); 8520 IRTemp amt = newTemp(Ity_I32); 8521 IRTemp amt8 = newTemp(Ity_I8); 8522 if (epartIsReg(rm)) { 8523 assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) ); 8524 DIP("%s %s,%s\n", opname, 8525 nameXMMReg(eregOfRexRM(pfx,rm)), 8526 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8527 delta++; 8528 } else { 8529 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8530 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 8531 DIP("%s %s,%s\n", opname, 8532 dis_buf, 8533 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8534 delta += alen; 8535 } 8536 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8537 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 8538 8539 shl = shr = sar = False; 8540 size = 0; 8541 switch (op) { 8542 case Iop_ShlN16x8: shl = True; size = 32; break; 8543 case Iop_ShlN32x4: shl = True; size = 32; break; 8544 case Iop_ShlN64x2: shl = True; size = 64; break; 8545 case Iop_SarN16x8: sar = True; size = 16; break; 8546 case Iop_SarN32x4: sar = True; size = 32; break; 8547 case Iop_ShrN16x8: shr = True; size = 16; break; 8548 case Iop_ShrN32x4: shr = True; size = 32; break; 8549 case Iop_ShrN64x2: shr = True; size = 64; break; 8550 default: vassert(0); 8551 } 8552 8553 if (shl || shr) { 8554 assign( 8555 g1, 8556 IRExpr_Mux0X( 8557 unop(Iop_1Uto8, 8558 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), 8559 mkV128(0x0000), 8560 binop(op, mkexpr(g0), mkexpr(amt8)) 8561 ) 8562 ); 8563 } else 8564 if (sar) { 8565 assign( 8566 g1, 8567 IRExpr_Mux0X( 8568 unop(Iop_1Uto8, 8569 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), 8570 binop(op, mkexpr(g0), mkU8(size-1)), 8571 binop(op, mkexpr(g0), mkexpr(amt8)) 8572 ) 8573 ); 8574 } else { 8575 vassert(0); 8576 } 8577 8578 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 8579 return delta; 8580 } 8581 8582 8583 /* Vector by scalar shift of E by an immediate byte. */ 8584 8585 static 8586 ULong dis_SSE_shiftE_imm ( Prefix pfx, 8587 Long delta, HChar* opname, IROp op ) 8588 { 8589 Bool shl, shr, sar; 8590 UChar rm = getUChar(delta); 8591 IRTemp e0 = newTemp(Ity_V128); 8592 IRTemp e1 = newTemp(Ity_V128); 8593 UChar amt, size; 8594 vassert(epartIsReg(rm)); 8595 vassert(gregLO3ofRM(rm) == 2 8596 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 8597 amt = getUChar(delta+1); 8598 delta += 2; 8599 DIP("%s $%d,%s\n", opname, 8600 (Int)amt, 8601 nameXMMReg(eregOfRexRM(pfx,rm)) ); 8602 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 8603 8604 shl = shr = sar = False; 8605 size = 0; 8606 switch (op) { 8607 case Iop_ShlN16x8: shl = True; size = 16; break; 8608 case Iop_ShlN32x4: shl = True; size = 32; break; 8609 case Iop_ShlN64x2: shl = True; size = 64; break; 8610 case Iop_SarN16x8: sar = True; size = 16; break; 8611 case Iop_SarN32x4: sar = True; size = 32; break; 8612 case Iop_ShrN16x8: shr = True; size = 16; break; 8613 case Iop_ShrN32x4: shr = True; size = 32; break; 8614 case Iop_ShrN64x2: shr = True; size = 64; break; 8615 default: vassert(0); 8616 } 8617 8618 if (shl || shr) { 8619 assign( e1, amt >= size 8620 ? mkV128(0x0000) 8621 : binop(op, mkexpr(e0), mkU8(amt)) 8622 ); 8623 } else 8624 if (sar) { 8625 assign( e1, amt >= size 8626 ? binop(op, mkexpr(e0), mkU8(size-1)) 8627 : binop(op, mkexpr(e0), mkU8(amt)) 8628 ); 8629 } else { 8630 vassert(0); 8631 } 8632 8633 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 8634 return delta; 8635 } 8636 8637 8638 /* Get the current SSE rounding mode. */ 8639 8640 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 8641 { 8642 return 8643 unop( Iop_64to32, 8644 binop( Iop_And64, 8645 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 8646 mkU64(3) )); 8647 } 8648 8649 static void put_sse_roundingmode ( IRExpr* sseround ) 8650 { 8651 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 8652 stmt( IRStmt_Put( OFFB_SSEROUND, 8653 unop(Iop_32Uto64,sseround) ) ); 8654 } 8655 8656 /* Break a 128-bit value up into four 32-bit ints. */ 8657 8658 static void breakup128to32s ( IRTemp t128, 8659 /*OUTs*/ 8660 IRTemp* t3, IRTemp* t2, 8661 IRTemp* t1, IRTemp* t0 ) 8662 { 8663 IRTemp hi64 = newTemp(Ity_I64); 8664 IRTemp lo64 = newTemp(Ity_I64); 8665 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 8666 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 8667 8668 vassert(t0 && *t0 == IRTemp_INVALID); 8669 vassert(t1 && *t1 == IRTemp_INVALID); 8670 vassert(t2 && *t2 == IRTemp_INVALID); 8671 vassert(t3 && *t3 == IRTemp_INVALID); 8672 8673 *t0 = newTemp(Ity_I32); 8674 *t1 = newTemp(Ity_I32); 8675 *t2 = newTemp(Ity_I32); 8676 *t3 = newTemp(Ity_I32); 8677 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 8678 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 8679 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 8680 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 8681 } 8682 8683 /* Construct a 128-bit value from four 32-bit ints. */ 8684 8685 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 8686 IRTemp t1, IRTemp t0 ) 8687 { 8688 return 8689 binop( Iop_64HLtoV128, 8690 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 8691 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 8692 ); 8693 } 8694 8695 /* Break a 64-bit value up into four 16-bit ints. */ 8696 8697 static void breakup64to16s ( IRTemp t64, 8698 /*OUTs*/ 8699 IRTemp* t3, IRTemp* t2, 8700 IRTemp* t1, IRTemp* t0 ) 8701 { 8702 IRTemp hi32 = newTemp(Ity_I32); 8703 IRTemp lo32 = newTemp(Ity_I32); 8704 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 8705 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 8706 8707 vassert(t0 && *t0 == IRTemp_INVALID); 8708 vassert(t1 && *t1 == IRTemp_INVALID); 8709 vassert(t2 && *t2 == IRTemp_INVALID); 8710 vassert(t3 && *t3 == IRTemp_INVALID); 8711 8712 *t0 = newTemp(Ity_I16); 8713 *t1 = newTemp(Ity_I16); 8714 *t2 = newTemp(Ity_I16); 8715 *t3 = newTemp(Ity_I16); 8716 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 8717 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 8718 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 8719 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 8720 } 8721 8722 /* Construct a 64-bit value from four 16-bit ints. */ 8723 8724 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 8725 IRTemp t1, IRTemp t0 ) 8726 { 8727 return 8728 binop( Iop_32HLto64, 8729 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 8730 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 8731 ); 8732 } 8733 8734 8735 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 8736 values (aa,bb), computes, for each of the 4 16-bit lanes: 8737 8738 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 8739 */ 8740 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 8741 { 8742 IRTemp aa = newTemp(Ity_I64); 8743 IRTemp bb = newTemp(Ity_I64); 8744 IRTemp aahi32s = newTemp(Ity_I64); 8745 IRTemp aalo32s = newTemp(Ity_I64); 8746 IRTemp bbhi32s = newTemp(Ity_I64); 8747 IRTemp bblo32s = newTemp(Ity_I64); 8748 IRTemp rHi = newTemp(Ity_I64); 8749 IRTemp rLo = newTemp(Ity_I64); 8750 IRTemp one32x2 = newTemp(Ity_I64); 8751 assign(aa, aax); 8752 assign(bb, bbx); 8753 assign( aahi32s, 8754 binop(Iop_SarN32x2, 8755 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 8756 mkU8(16) )); 8757 assign( aalo32s, 8758 binop(Iop_SarN32x2, 8759 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 8760 mkU8(16) )); 8761 assign( bbhi32s, 8762 binop(Iop_SarN32x2, 8763 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 8764 mkU8(16) )); 8765 assign( bblo32s, 8766 binop(Iop_SarN32x2, 8767 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 8768 mkU8(16) )); 8769 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 8770 assign( 8771 rHi, 8772 binop( 8773 Iop_ShrN32x2, 8774 binop( 8775 Iop_Add32x2, 8776 binop( 8777 Iop_ShrN32x2, 8778 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 8779 mkU8(14) 8780 ), 8781 mkexpr(one32x2) 8782 ), 8783 mkU8(1) 8784 ) 8785 ); 8786 assign( 8787 rLo, 8788 binop( 8789 Iop_ShrN32x2, 8790 binop( 8791 Iop_Add32x2, 8792 binop( 8793 Iop_ShrN32x2, 8794 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 8795 mkU8(14) 8796 ), 8797 mkexpr(one32x2) 8798 ), 8799 mkU8(1) 8800 ) 8801 ); 8802 return 8803 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 8804 } 8805 8806 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 8807 values (aa,bb), computes, for each lane: 8808 8809 if aa_lane < 0 then - bb_lane 8810 else if aa_lane > 0 then bb_lane 8811 else 0 8812 */ 8813 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 8814 { 8815 IRTemp aa = newTemp(Ity_I64); 8816 IRTemp bb = newTemp(Ity_I64); 8817 IRTemp zero = newTemp(Ity_I64); 8818 IRTemp bbNeg = newTemp(Ity_I64); 8819 IRTemp negMask = newTemp(Ity_I64); 8820 IRTemp posMask = newTemp(Ity_I64); 8821 IROp opSub = Iop_INVALID; 8822 IROp opCmpGTS = Iop_INVALID; 8823 8824 switch (laneszB) { 8825 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 8826 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 8827 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 8828 default: vassert(0); 8829 } 8830 8831 assign( aa, aax ); 8832 assign( bb, bbx ); 8833 assign( zero, mkU64(0) ); 8834 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 8835 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 8836 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 8837 8838 return 8839 binop(Iop_Or64, 8840 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 8841 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 8842 8843 } 8844 8845 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 8846 value aa, computes, for each lane 8847 8848 if aa < 0 then -aa else aa 8849 8850 Note that the result is interpreted as unsigned, so that the 8851 absolute value of the most negative signed input can be 8852 represented. 8853 */ 8854 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 8855 { 8856 IRTemp aa = newTemp(Ity_I64); 8857 IRTemp zero = newTemp(Ity_I64); 8858 IRTemp aaNeg = newTemp(Ity_I64); 8859 IRTemp negMask = newTemp(Ity_I64); 8860 IRTemp posMask = newTemp(Ity_I64); 8861 IROp opSub = Iop_INVALID; 8862 IROp opSarN = Iop_INVALID; 8863 8864 switch (laneszB) { 8865 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 8866 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 8867 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 8868 default: vassert(0); 8869 } 8870 8871 assign( aa, aax ); 8872 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 8873 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 8874 assign( zero, mkU64(0) ); 8875 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 8876 return 8877 binop(Iop_Or64, 8878 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 8879 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 8880 } 8881 8882 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 8883 IRTemp lo64, Long byteShift ) 8884 { 8885 vassert(byteShift >= 1 && byteShift <= 7); 8886 return 8887 binop(Iop_Or64, 8888 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 8889 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 8890 ); 8891 } 8892 8893 /* Generate a SIGSEGV followed by a restart of the current instruction 8894 if effective_addr is not 16-aligned. This is required behaviour 8895 for some SSE3 instructions and all 128-bit SSSE3 instructions. 8896 This assumes that guest_RIP_curr_instr is set correctly! */ 8897 /* TODO(glider): we've replaced the 0xF mask with 0x0, effectively disabling 8898 * the check. Need to enable it once TSan stops generating unaligned 8899 * accesses in the wrappers. 8900 * See http://code.google.com/p/data-race-test/issues/detail?id=49 */ 8901 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 8902 { 8903 stmt( 8904 IRStmt_Exit( 8905 binop(Iop_CmpNE64, 8906 binop(Iop_And64,mkexpr(effective_addr),mkU64(0x0)), 8907 mkU64(0)), 8908 Ijk_SigSEGV, 8909 IRConst_U64(guest_RIP_curr_instr) 8910 ) 8911 ); 8912 } 8913 8914 8915 /* Helper for deciding whether a given insn (starting at the opcode 8916 byte) may validly be used with a LOCK prefix. The following insns 8917 may be used with LOCK when their destination operand is in memory. 8918 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 8919 8920 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 8921 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 8922 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 8923 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 8924 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 8925 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 8926 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 8927 8928 DEC FE /1, FF /1 8929 INC FE /0, FF /0 8930 8931 NEG F6 /3, F7 /3 8932 NOT F6 /2, F7 /2 8933 8934 XCHG 86, 87 8935 8936 BTC 0F BB, 0F BA /7 8937 BTR 0F B3, 0F BA /6 8938 BTS 0F AB, 0F BA /5 8939 8940 CMPXCHG 0F B0, 0F B1 8941 CMPXCHG8B 0F C7 /1 8942 8943 XADD 0F C0, 0F C1 8944 8945 ------------------------------ 8946 8947 80 /0 = addb $imm8, rm8 8948 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 8949 82 /0 = addb $imm8, rm8 8950 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 8951 8952 00 = addb r8, rm8 8953 01 = addl r32, rm32 and addw r16, rm16 8954 8955 Same for ADD OR ADC SBB AND SUB XOR 8956 8957 FE /1 = dec rm8 8958 FF /1 = dec rm32 and dec rm16 8959 8960 FE /0 = inc rm8 8961 FF /0 = inc rm32 and inc rm16 8962 8963 F6 /3 = neg rm8 8964 F7 /3 = neg rm32 and neg rm16 8965 8966 F6 /2 = not rm8 8967 F7 /2 = not rm32 and not rm16 8968 8969 0F BB = btcw r16, rm16 and btcl r32, rm32 8970 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 8971 8972 Same for BTS, BTR 8973 */ 8974 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 8975 { 8976 switch (opc[0]) { 8977 case 0x00: case 0x01: case 0x08: case 0x09: 8978 case 0x10: case 0x11: case 0x18: case 0x19: 8979 case 0x20: case 0x21: case 0x28: case 0x29: 8980 case 0x30: case 0x31: 8981 if (!epartIsReg(opc[1])) 8982 return True; 8983 break; 8984 8985 case 0x80: case 0x81: case 0x82: case 0x83: 8986 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 8987 && !epartIsReg(opc[1])) 8988 return True; 8989 break; 8990 8991 case 0xFE: case 0xFF: 8992 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 8993 && !epartIsReg(opc[1])) 8994 return True; 8995 break; 8996 8997 case 0xF6: case 0xF7: 8998 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 8999 && !epartIsReg(opc[1])) 9000 return True; 9001 break; 9002 9003 case 0x86: case 0x87: 9004 if (!epartIsReg(opc[1])) 9005 return True; 9006 break; 9007 9008 case 0x0F: { 9009 switch (opc[1]) { 9010 case 0xBB: case 0xB3: case 0xAB: 9011 if (!epartIsReg(opc[2])) 9012 return True; 9013 break; 9014 case 0xBA: 9015 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 9016 && !epartIsReg(opc[2])) 9017 return True; 9018 break; 9019 case 0xB0: case 0xB1: 9020 if (!epartIsReg(opc[2])) 9021 return True; 9022 break; 9023 case 0xC7: 9024 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 9025 return True; 9026 break; 9027 case 0xC0: case 0xC1: 9028 if (!epartIsReg(opc[2])) 9029 return True; 9030 break; 9031 default: 9032 break; 9033 } /* switch (opc[1]) */ 9034 break; 9035 } 9036 9037 default: 9038 break; 9039 } /* switch (opc[0]) */ 9040 9041 return False; 9042 } 9043 9044 9045 /*------------------------------------------------------------*/ 9046 /*--- Disassemble a single instruction ---*/ 9047 /*------------------------------------------------------------*/ 9048 9049 /* Disassemble a single instruction into IR. The instruction is 9050 located in host memory at &guest_code[delta]. */ 9051 9052 static 9053 DisResult disInstr_AMD64_WRK ( 9054 /*OUT*/Bool* expect_CAS, 9055 Bool put_IP, 9056 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 9057 Bool resteerCisOk, 9058 void* callback_opaque, 9059 Long delta64, 9060 VexArchInfo* archinfo, 9061 VexAbiInfo* vbi 9062 ) 9063 { 9064 IRType ty; 9065 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 9066 Int alen; 9067 UChar opc, modrm, abyte, pre; 9068 Long d64; 9069 HChar dis_buf[50]; 9070 Int am_sz, d_sz, n, n_prefixes; 9071 DisResult dres; 9072 UChar* insn; /* used in SSE decoders */ 9073 9074 /* The running delta */ 9075 Long delta = delta64; 9076 9077 /* Holds eip at the start of the insn, so that we can print 9078 consistent error messages for unimplemented insns. */ 9079 Long delta_start = delta; 9080 9081 /* sz denotes the nominal data-op size of the insn; we change it to 9082 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 9083 conflict REX.W takes precedence. */ 9084 Int sz = 4; 9085 9086 /* pfx holds the summary of prefixes. */ 9087 Prefix pfx = PFX_EMPTY; 9088 9089 /* Set result defaults. */ 9090 dres.whatNext = Dis_Continue; 9091 dres.len = 0; 9092 dres.continueAt = 0; 9093 9094 *expect_CAS = False; 9095 9096 vassert(guest_RIP_next_assumed == 0); 9097 vassert(guest_RIP_next_mustcheck == False); 9098 9099 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 9100 9101 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 9102 9103 /* We may be asked to update the guest RIP before going further. */ 9104 if (put_IP) 9105 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) ); 9106 9107 /* Spot "Special" instructions (see comment at top of file). */ 9108 { 9109 UChar* code = (UChar*)(guest_code + delta); 9110 /* Spot the 16-byte preamble: 9111 48C1C703 rolq $3, %rdi 9112 48C1C70D rolq $13, %rdi 9113 48C1C73D rolq $61, %rdi 9114 48C1C733 rolq $51, %rdi 9115 */ 9116 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 9117 && code[ 3] == 0x03 && 9118 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 9119 && code[ 7] == 0x0D && 9120 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 9121 && code[11] == 0x3D && 9122 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 9123 && code[15] == 0x33) { 9124 /* Got a "Special" instruction preamble. Which one is it? */ 9125 if (code[16] == 0x48 && code[17] == 0x87 9126 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 9127 /* %RDX = client_request ( %RAX ) */ 9128 DIP("%%rdx = client_request ( %%rax )\n"); 9129 delta += 19; 9130 jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta); 9131 dres.whatNext = Dis_StopHere; 9132 goto decode_success; 9133 } 9134 else 9135 if (code[16] == 0x48 && code[17] == 0x87 9136 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 9137 /* %RAX = guest_NRADDR */ 9138 DIP("%%rax = guest_NRADDR\n"); 9139 delta += 19; 9140 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 9141 goto decode_success; 9142 } 9143 else 9144 if (code[16] == 0x48 && code[17] == 0x87 9145 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 9146 /* call-noredir *%RAX */ 9147 DIP("call-noredir *%%rax\n"); 9148 delta += 19; 9149 t1 = newTemp(Ity_I64); 9150 assign(t1, getIRegRAX(8)); 9151 t2 = newTemp(Ity_I64); 9152 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 9153 putIReg64(R_RSP, mkexpr(t2)); 9154 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 9155 jmp_treg(Ijk_NoRedir,t1); 9156 dres.whatNext = Dis_StopHere; 9157 goto decode_success; 9158 } 9159 /* We don't know what it is. */ 9160 goto decode_failure; 9161 /*NOTREACHED*/ 9162 } 9163 } 9164 9165 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 9166 as many invalid combinations as possible. */ 9167 n_prefixes = 0; 9168 while (True) { 9169 if (n_prefixes > 7) goto decode_failure; 9170 pre = getUChar(delta); 9171 switch (pre) { 9172 case 0x66: pfx |= PFX_66; break; 9173 case 0x67: pfx |= PFX_ASO; break; 9174 case 0xF2: pfx |= PFX_F2; break; 9175 case 0xF3: pfx |= PFX_F3; break; 9176 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 9177 case 0x2E: pfx |= PFX_CS; break; 9178 case 0x3E: pfx |= PFX_DS; break; 9179 case 0x26: pfx |= PFX_ES; break; 9180 case 0x64: pfx |= PFX_FS; break; 9181 case 0x65: pfx |= PFX_GS; break; 9182 case 0x36: pfx |= PFX_SS; break; 9183 case 0x40 ... 0x4F: 9184 pfx |= PFX_REX; 9185 if (pre & (1<<3)) pfx |= PFX_REXW; 9186 if (pre & (1<<2)) pfx |= PFX_REXR; 9187 if (pre & (1<<1)) pfx |= PFX_REXX; 9188 if (pre & (1<<0)) pfx |= PFX_REXB; 9189 break; 9190 default: 9191 goto not_a_prefix; 9192 } 9193 n_prefixes++; 9194 delta++; 9195 } 9196 9197 not_a_prefix: 9198 9199 /* Dump invalid combinations */ 9200 n = 0; 9201 if (pfx & PFX_F2) n++; 9202 if (pfx & PFX_F3) n++; 9203 if (n > 1) 9204 goto decode_failure; /* can't have both */ 9205 9206 n = 0; 9207 if (pfx & PFX_CS) n++; 9208 if (pfx & PFX_DS) n++; 9209 if (pfx & PFX_ES) n++; 9210 if (pfx & PFX_FS) n++; 9211 if (pfx & PFX_GS) n++; 9212 if (pfx & PFX_SS) n++; 9213 if (n > 1) 9214 goto decode_failure; /* multiple seg overrides == illegal */ 9215 9216 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 9217 that we should accept it. */ 9218 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero) 9219 goto decode_failure; 9220 9221 /* Ditto for %gs prefixes. */ 9222 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60) 9223 goto decode_failure; 9224 9225 /* Set up sz. */ 9226 sz = 4; 9227 if (pfx & PFX_66) sz = 2; 9228 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 9229 9230 /* Now we should be looking at the primary opcode byte or the 9231 leading F2 or F3. Check that any LOCK prefix is actually 9232 allowed. */ 9233 9234 if (pfx & PFX_LOCK) { 9235 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 9236 DIP("lock "); 9237 } else { 9238 *expect_CAS = False; 9239 goto decode_failure; 9240 } 9241 } 9242 9243 9244 /* ---------------------------------------------------- */ 9245 /* --- The SSE/SSE2 decoder. --- */ 9246 /* ---------------------------------------------------- */ 9247 9248 /* What did I do to deserve SSE ? Perhaps I was really bad in a 9249 previous life? */ 9250 9251 /* Note, this doesn't handle SSE3 right now. All amd64s support 9252 SSE2 as a minimum so there is no point distinguishing SSE1 vs 9253 SSE2. */ 9254 9255 insn = (UChar*)&guest_code[delta]; 9256 9257 /* FXSAVE is spuriously at the start here only because it is 9258 thusly placed in guest-x86/toIR.c. */ 9259 9260 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. 9261 Note that the presence or absence of REX.W slightly affects the 9262 written format: whether the saved FPU IP and DP pointers are 64 9263 or 32 bits. But the helper function we call simply writes zero 9264 bits in the relevant fields (which are 64 bits regardless of 9265 what REX.W is) and so it's good enough (iow, equally broken) in 9266 both cases. */ 9267 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9268 && insn[0] == 0x0F && insn[1] == 0xAE 9269 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) { 9270 IRDirty* d; 9271 modrm = getUChar(delta+2); 9272 vassert(!epartIsReg(modrm)); 9273 9274 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9275 delta += 2+alen; 9276 gen_SEGV_if_not_16_aligned(addr); 9277 9278 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 9279 9280 /* Uses dirty helper: 9281 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ 9282 d = unsafeIRDirty_0_N ( 9283 0/*regparms*/, 9284 "amd64g_dirtyhelper_FXSAVE", 9285 &amd64g_dirtyhelper_FXSAVE, 9286 mkIRExprVec_1( mkexpr(addr) ) 9287 ); 9288 d->needsBBP = True; 9289 9290 /* declare we're writing memory */ 9291 d->mFx = Ifx_Write; 9292 d->mAddr = mkexpr(addr); 9293 d->mSize = 512; 9294 9295 /* declare we're reading guest state */ 9296 d->nFxState = 7; 9297 9298 d->fxState[0].fx = Ifx_Read; 9299 d->fxState[0].offset = OFFB_FTOP; 9300 d->fxState[0].size = sizeof(UInt); 9301 9302 d->fxState[1].fx = Ifx_Read; 9303 d->fxState[1].offset = OFFB_FPREGS; 9304 d->fxState[1].size = 8 * sizeof(ULong); 9305 9306 d->fxState[2].fx = Ifx_Read; 9307 d->fxState[2].offset = OFFB_FPTAGS; 9308 d->fxState[2].size = 8 * sizeof(UChar); 9309 9310 d->fxState[3].fx = Ifx_Read; 9311 d->fxState[3].offset = OFFB_FPROUND; 9312 d->fxState[3].size = sizeof(ULong); 9313 9314 d->fxState[4].fx = Ifx_Read; 9315 d->fxState[4].offset = OFFB_FC3210; 9316 d->fxState[4].size = sizeof(ULong); 9317 9318 d->fxState[5].fx = Ifx_Read; 9319 d->fxState[5].offset = OFFB_XMM0; 9320 d->fxState[5].size = 16 * sizeof(U128); 9321 9322 d->fxState[6].fx = Ifx_Read; 9323 d->fxState[6].offset = OFFB_SSEROUND; 9324 d->fxState[6].size = sizeof(ULong); 9325 9326 /* Be paranoid ... this assertion tries to ensure the 16 %xmm 9327 images are packed back-to-back. If not, the value of 9328 d->fxState[5].size is wrong. */ 9329 vassert(16 == sizeof(U128)); 9330 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); 9331 9332 stmt( IRStmt_Dirty(d) ); 9333 9334 goto decode_success; 9335 } 9336 9337 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. 9338 As with FXSAVE above we ignore the value of REX.W since we're 9339 not bothering with the FPU DP and IP fields. */ 9340 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9341 && insn[0] == 0x0F && insn[1] == 0xAE 9342 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 1) { 9343 IRDirty* d; 9344 modrm = getUChar(delta+2); 9345 vassert(!epartIsReg(modrm)); 9346 9347 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9348 delta += 2+alen; 9349 gen_SEGV_if_not_16_aligned(addr); 9350 9351 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 9352 9353 /* Uses dirty helper: 9354 VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) 9355 NOTE: 9356 the VexEmWarn value is simply ignored 9357 */ 9358 d = unsafeIRDirty_0_N ( 9359 0/*regparms*/, 9360 "amd64g_dirtyhelper_FXRSTOR", 9361 &amd64g_dirtyhelper_FXRSTOR, 9362 mkIRExprVec_1( mkexpr(addr) ) 9363 ); 9364 d->needsBBP = True; 9365 9366 /* declare we're reading memory */ 9367 d->mFx = Ifx_Read; 9368 d->mAddr = mkexpr(addr); 9369 d->mSize = 512; 9370 9371 /* declare we're writing guest state */ 9372 d->nFxState = 7; 9373 9374 d->fxState[0].fx = Ifx_Write; 9375 d->fxState[0].offset = OFFB_FTOP; 9376 d->fxState[0].size = sizeof(UInt); 9377 9378 d->fxState[1].fx = Ifx_Write; 9379 d->fxState[1].offset = OFFB_FPREGS; 9380 d->fxState[1].size = 8 * sizeof(ULong); 9381 9382 d->fxState[2].fx = Ifx_Write; 9383 d->fxState[2].offset = OFFB_FPTAGS; 9384 d->fxState[2].size = 8 * sizeof(UChar); 9385 9386 d->fxState[3].fx = Ifx_Write; 9387 d->fxState[3].offset = OFFB_FPROUND; 9388 d->fxState[3].size = sizeof(ULong); 9389 9390 d->fxState[4].fx = Ifx_Write; 9391 d->fxState[4].offset = OFFB_FC3210; 9392 d->fxState[4].size = sizeof(ULong); 9393 9394 d->fxState[5].fx = Ifx_Write; 9395 d->fxState[5].offset = OFFB_XMM0; 9396 d->fxState[5].size = 16 * sizeof(U128); 9397 9398 d->fxState[6].fx = Ifx_Write; 9399 d->fxState[6].offset = OFFB_SSEROUND; 9400 d->fxState[6].size = sizeof(ULong); 9401 9402 /* Be paranoid ... this assertion tries to ensure the 16 %xmm 9403 images are packed back-to-back. If not, the value of 9404 d->fxState[5].size is wrong. */ 9405 vassert(16 == sizeof(U128)); 9406 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); 9407 9408 stmt( IRStmt_Dirty(d) ); 9409 9410 goto decode_success; 9411 } 9412 9413 /* ------ SSE decoder main ------ */ 9414 9415 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 9416 if (haveNo66noF2noF3(pfx) && sz == 4 9417 && insn[0] == 0x0F && insn[1] == 0x58) { 9418 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 ); 9419 goto decode_success; 9420 } 9421 9422 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 9423 if (haveF3no66noF2(pfx) && sz == 4 9424 && insn[0] == 0x0F && insn[1] == 0x58) { 9425 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 ); 9426 goto decode_success; 9427 } 9428 9429 /* 0F 55 = ANDNPS -- G = (not G) and E */ 9430 if (haveNo66noF2noF3(pfx) && sz == 4 9431 && insn[0] == 0x0F && insn[1] == 0x55) { 9432 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 ); 9433 goto decode_success; 9434 } 9435 9436 /* 0F 54 = ANDPS -- G = G and E */ 9437 if (haveNo66noF2noF3(pfx) && sz == 4 9438 && insn[0] == 0x0F && insn[1] == 0x54) { 9439 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 ); 9440 goto decode_success; 9441 } 9442 9443 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 9444 if (haveNo66noF2noF3(pfx) && sz == 4 9445 && insn[0] == 0x0F && insn[1] == 0xC2) { 9446 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 ); 9447 goto decode_success; 9448 } 9449 9450 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 9451 if (haveF3no66noF2(pfx) && sz == 4 9452 && insn[0] == 0x0F && insn[1] == 0xC2) { 9453 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 ); 9454 goto decode_success; 9455 } 9456 9457 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 9458 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 9459 if (haveNo66noF2noF3(pfx) && sz == 4 9460 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9461 IRTemp argL = newTemp(Ity_F32); 9462 IRTemp argR = newTemp(Ity_F32); 9463 modrm = getUChar(delta+2); 9464 if (epartIsReg(modrm)) { 9465 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 9466 0/*lowest lane*/ ) ); 9467 delta += 2+1; 9468 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", 9469 nameXMMReg(eregOfRexRM(pfx,modrm)), 9470 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9471 } else { 9472 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9473 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 9474 delta += 2+alen; 9475 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", 9476 dis_buf, 9477 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9478 } 9479 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 9480 0/*lowest lane*/ ) ); 9481 9482 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9483 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9484 stmt( IRStmt_Put( 9485 OFFB_CC_DEP1, 9486 binop( Iop_And64, 9487 unop( Iop_32Uto64, 9488 binop(Iop_CmpF64, 9489 unop(Iop_F32toF64,mkexpr(argL)), 9490 unop(Iop_F32toF64,mkexpr(argR)))), 9491 mkU64(0x45) 9492 ))); 9493 9494 goto decode_success; 9495 } 9496 9497 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 9498 half xmm */ 9499 if (haveNo66noF2noF3(pfx) && sz == 4 9500 && insn[0] == 0x0F && insn[1] == 0x2A) { 9501 IRTemp arg64 = newTemp(Ity_I64); 9502 IRTemp rmode = newTemp(Ity_I32); 9503 9504 modrm = getUChar(delta+2); 9505 do_MMX_preamble(); 9506 if (epartIsReg(modrm)) { 9507 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 9508 delta += 2+1; 9509 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 9510 nameXMMReg(gregOfRexRM(pfx,modrm))); 9511 } else { 9512 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9513 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9514 delta += 2+alen; 9515 DIP("cvtpi2ps %s,%s\n", dis_buf, 9516 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9517 } 9518 9519 assign( rmode, get_sse_roundingmode() ); 9520 9521 putXMMRegLane32F( 9522 gregOfRexRM(pfx,modrm), 0, 9523 binop(Iop_F64toF32, 9524 mkexpr(rmode), 9525 unop(Iop_I32StoF64, 9526 unop(Iop_64to32, mkexpr(arg64)) )) ); 9527 9528 putXMMRegLane32F( 9529 gregOfRexRM(pfx,modrm), 1, 9530 binop(Iop_F64toF32, 9531 mkexpr(rmode), 9532 unop(Iop_I32StoF64, 9533 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 9534 9535 goto decode_success; 9536 } 9537 9538 /* F3 0F 2A = CVTSI2SS 9539 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 9540 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 9541 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8) 9542 && insn[0] == 0x0F && insn[1] == 0x2A) { 9543 9544 IRTemp rmode = newTemp(Ity_I32); 9545 assign( rmode, get_sse_roundingmode() ); 9546 modrm = getUChar(delta+2); 9547 9548 if (sz == 4) { 9549 IRTemp arg32 = newTemp(Ity_I32); 9550 if (epartIsReg(modrm)) { 9551 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 9552 delta += 2+1; 9553 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 9554 nameXMMReg(gregOfRexRM(pfx,modrm))); 9555 } else { 9556 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9557 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 9558 delta += 2+alen; 9559 DIP("cvtsi2ss %s,%s\n", dis_buf, 9560 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9561 } 9562 putXMMRegLane32F( 9563 gregOfRexRM(pfx,modrm), 0, 9564 binop(Iop_F64toF32, 9565 mkexpr(rmode), 9566 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 9567 } else { 9568 /* sz == 8 */ 9569 IRTemp arg64 = newTemp(Ity_I64); 9570 if (epartIsReg(modrm)) { 9571 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 9572 delta += 2+1; 9573 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 9574 nameXMMReg(gregOfRexRM(pfx,modrm))); 9575 } else { 9576 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9577 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9578 delta += 2+alen; 9579 DIP("cvtsi2ssq %s,%s\n", dis_buf, 9580 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9581 } 9582 putXMMRegLane32F( 9583 gregOfRexRM(pfx,modrm), 0, 9584 binop(Iop_F64toF32, 9585 mkexpr(rmode), 9586 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 9587 } 9588 9589 goto decode_success; 9590 } 9591 9592 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 9593 I32 in mmx, according to prevailing SSE rounding mode */ 9594 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 9595 I32 in mmx, rounding towards zero */ 9596 if (haveNo66noF2noF3(pfx) && sz == 4 9597 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9598 IRTemp dst64 = newTemp(Ity_I64); 9599 IRTemp rmode = newTemp(Ity_I32); 9600 IRTemp f32lo = newTemp(Ity_F32); 9601 IRTemp f32hi = newTemp(Ity_F32); 9602 Bool r2zero = toBool(insn[1] == 0x2C); 9603 9604 do_MMX_preamble(); 9605 modrm = getUChar(delta+2); 9606 9607 if (epartIsReg(modrm)) { 9608 delta += 2+1; 9609 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 9610 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 9611 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 9612 nameXMMReg(eregOfRexRM(pfx,modrm)), 9613 nameMMXReg(gregLO3ofRM(modrm))); 9614 } else { 9615 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9616 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9617 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 9618 mkexpr(addr), 9619 mkU64(4) ))); 9620 delta += 2+alen; 9621 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 9622 dis_buf, 9623 nameMMXReg(gregLO3ofRM(modrm))); 9624 } 9625 9626 if (r2zero) { 9627 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9628 } else { 9629 assign( rmode, get_sse_roundingmode() ); 9630 } 9631 9632 assign( 9633 dst64, 9634 binop( Iop_32HLto64, 9635 binop( Iop_F64toI32S, 9636 mkexpr(rmode), 9637 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 9638 binop( Iop_F64toI32S, 9639 mkexpr(rmode), 9640 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 9641 ) 9642 ); 9643 9644 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 9645 goto decode_success; 9646 } 9647 9648 /* F3 0F 2D = CVTSS2SI 9649 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 9650 according to prevailing SSE rounding mode 9651 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 9652 according to prevailing SSE rounding mode 9653 */ 9654 /* F3 0F 2C = CVTTSS2SI 9655 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 9656 truncating towards zero 9657 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 9658 truncating towards zero 9659 */ 9660 if (haveF3no66noF2(pfx) 9661 && insn[0] == 0x0F 9662 && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9663 IRTemp rmode = newTemp(Ity_I32); 9664 IRTemp f32lo = newTemp(Ity_F32); 9665 Bool r2zero = toBool(insn[1] == 0x2C); 9666 vassert(sz == 4 || sz == 8); 9667 9668 modrm = getUChar(delta+2); 9669 if (epartIsReg(modrm)) { 9670 delta += 2+1; 9671 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 9672 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 9673 nameXMMReg(eregOfRexRM(pfx,modrm)), 9674 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 9675 } else { 9676 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9677 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9678 delta += 2+alen; 9679 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 9680 dis_buf, 9681 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 9682 } 9683 9684 if (r2zero) { 9685 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9686 } else { 9687 assign( rmode, get_sse_roundingmode() ); 9688 } 9689 9690 if (sz == 4) { 9691 putIReg32( gregOfRexRM(pfx,modrm), 9692 binop( Iop_F64toI32S, 9693 mkexpr(rmode), 9694 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9695 } else { 9696 putIReg64( gregOfRexRM(pfx,modrm), 9697 binop( Iop_F64toI64S, 9698 mkexpr(rmode), 9699 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9700 } 9701 9702 goto decode_success; 9703 } 9704 9705 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 9706 if (haveNo66noF2noF3(pfx) && sz == 4 9707 && insn[0] == 0x0F && insn[1] == 0x5E) { 9708 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 ); 9709 goto decode_success; 9710 } 9711 9712 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 9713 if (haveF3no66noF2(pfx) && sz == 4 9714 && insn[0] == 0x0F && insn[1] == 0x5E) { 9715 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 ); 9716 goto decode_success; 9717 } 9718 9719 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 9720 if (insn[0] == 0x0F && insn[1] == 0xAE 9721 && haveNo66noF2noF3(pfx) 9722 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) { 9723 9724 IRTemp t64 = newTemp(Ity_I64); 9725 IRTemp ew = newTemp(Ity_I32); 9726 9727 vassert(sz == 4); 9728 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9729 delta += 2+alen; 9730 DIP("ldmxcsr %s\n", dis_buf); 9731 9732 /* The only thing we observe in %mxcsr is the rounding mode. 9733 Therefore, pass the 32-bit value (SSE native-format control 9734 word) to a clean helper, getting back a 64-bit value, the 9735 lower half of which is the SSEROUND value to store, and the 9736 upper half of which is the emulation-warning token which may 9737 be generated. 9738 */ 9739 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 9740 assign( t64, mkIRExprCCall( 9741 Ity_I64, 0/*regparms*/, 9742 "amd64g_check_ldmxcsr", 9743 &amd64g_check_ldmxcsr, 9744 mkIRExprVec_1( 9745 unop(Iop_32Uto64, 9746 loadLE(Ity_I32, mkexpr(addr)) 9747 ) 9748 ) 9749 ) 9750 ); 9751 9752 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 9753 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 9754 put_emwarn( mkexpr(ew) ); 9755 /* Finally, if an emulation warning was reported, side-exit to 9756 the next insn, reporting the warning, so that Valgrind's 9757 dispatcher sees the warning. */ 9758 stmt( 9759 IRStmt_Exit( 9760 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 9761 Ijk_EmWarn, 9762 IRConst_U64(guest_RIP_bbstart+delta) 9763 ) 9764 ); 9765 goto decode_success; 9766 } 9767 9768 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9769 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 9770 if (haveNo66noF2noF3(pfx) && sz == 4 9771 && insn[0] == 0x0F && insn[1] == 0xF7) { 9772 Bool ok = False; 9773 delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 ); 9774 if (!ok) 9775 goto decode_failure; 9776 goto decode_success; 9777 } 9778 9779 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 9780 if (haveNo66noF2noF3(pfx) && sz == 4 9781 && insn[0] == 0x0F && insn[1] == 0x5F) { 9782 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 ); 9783 goto decode_success; 9784 } 9785 9786 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 9787 if (haveF3no66noF2(pfx) && sz == 4 9788 && insn[0] == 0x0F && insn[1] == 0x5F) { 9789 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 ); 9790 goto decode_success; 9791 } 9792 9793 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 9794 if (haveNo66noF2noF3(pfx) && sz == 4 9795 && insn[0] == 0x0F && insn[1] == 0x5D) { 9796 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 ); 9797 goto decode_success; 9798 } 9799 9800 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 9801 if (haveF3no66noF2(pfx) && sz == 4 9802 && insn[0] == 0x0F && insn[1] == 0x5D) { 9803 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 ); 9804 goto decode_success; 9805 } 9806 9807 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 9808 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 9809 if (haveNo66noF2noF3(pfx) 9810 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9811 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 9812 modrm = getUChar(delta+2); 9813 if (epartIsReg(modrm)) { 9814 putXMMReg( gregOfRexRM(pfx,modrm), 9815 getXMMReg( eregOfRexRM(pfx,modrm) )); 9816 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9817 nameXMMReg(gregOfRexRM(pfx,modrm))); 9818 delta += 2+1; 9819 } else { 9820 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9821 if (insn[1] == 0x28/*movaps*/) 9822 gen_SEGV_if_not_16_aligned( addr ); 9823 putXMMReg( gregOfRexRM(pfx,modrm), 9824 loadLE(Ity_V128, mkexpr(addr)) ); 9825 DIP("mov[ua]ps %s,%s\n", dis_buf, 9826 nameXMMReg(gregOfRexRM(pfx,modrm))); 9827 delta += 2+alen; 9828 } 9829 goto decode_success; 9830 } 9831 9832 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 9833 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 9834 if (haveNo66noF2noF3(pfx) 9835 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9836 && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) { 9837 modrm = getUChar(delta+2); 9838 if (epartIsReg(modrm)) { 9839 /* fall through; awaiting test case */ 9840 } else { 9841 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9842 if (insn[1] == 0x29/*movaps*/) 9843 gen_SEGV_if_not_16_aligned( addr ); 9844 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 9845 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 9846 dis_buf ); 9847 delta += 2+alen; 9848 goto decode_success; 9849 } 9850 } 9851 9852 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 9853 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 9854 if (haveNo66noF2noF3(pfx) 9855 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9856 && insn[0] == 0x0F && insn[1] == 0x16) { 9857 modrm = getUChar(delta+2); 9858 if (epartIsReg(modrm)) { 9859 delta += 2+1; 9860 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 9861 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 9862 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9863 nameXMMReg(gregOfRexRM(pfx,modrm))); 9864 } else { 9865 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9866 delta += 2+alen; 9867 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 9868 loadLE(Ity_I64, mkexpr(addr)) ); 9869 DIP("movhps %s,%s\n", dis_buf, 9870 nameXMMReg( gregOfRexRM(pfx,modrm) )); 9871 } 9872 goto decode_success; 9873 } 9874 9875 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 9876 if (haveNo66noF2noF3(pfx) 9877 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9878 && insn[0] == 0x0F && insn[1] == 0x17) { 9879 if (!epartIsReg(insn[2])) { 9880 delta += 2; 9881 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9882 delta += alen; 9883 storeLE( mkexpr(addr), 9884 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 9885 1/*upper lane*/ ) ); 9886 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 9887 dis_buf); 9888 goto decode_success; 9889 } 9890 /* else fall through */ 9891 } 9892 9893 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 9894 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 9895 if (haveNo66noF2noF3(pfx) 9896 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9897 && insn[0] == 0x0F && insn[1] == 0x12) { 9898 modrm = getUChar(delta+2); 9899 if (epartIsReg(modrm)) { 9900 delta += 2+1; 9901 putXMMRegLane64( gregOfRexRM(pfx,modrm), 9902 0/*lower lane*/, 9903 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 9904 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9905 nameXMMReg(gregOfRexRM(pfx,modrm))); 9906 } else { 9907 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9908 delta += 2+alen; 9909 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 9910 loadLE(Ity_I64, mkexpr(addr)) ); 9911 DIP("movlps %s, %s\n", 9912 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 9913 } 9914 goto decode_success; 9915 } 9916 9917 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 9918 if (haveNo66noF2noF3(pfx) 9919 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9920 && insn[0] == 0x0F && insn[1] == 0x13) { 9921 if (!epartIsReg(insn[2])) { 9922 delta += 2; 9923 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9924 delta += alen; 9925 storeLE( mkexpr(addr), 9926 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 9927 0/*lower lane*/ ) ); 9928 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 9929 dis_buf); 9930 goto decode_success; 9931 } 9932 /* else fall through */ 9933 } 9934 9935 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 9936 to 4 lowest bits of ireg(G) */ 9937 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9938 && insn[0] == 0x0F && insn[1] == 0x50) { 9939 /* sz == 8 is a kludge to handle insns with REX.W redundantly 9940 set to 1, which has been known to happen: 9941 9942 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 9943 9944 20071106: Intel docs say that REX.W isn't redundant: when 9945 present, a 64-bit register is written; when not present, only 9946 the 32-bit half is written. However, testing on a Core2 9947 machine suggests the entire 64 bit register is written 9948 irrespective of the status of REX.W. That could be because 9949 of the default rule that says "if the lower half of a 32-bit 9950 register is written, the upper half is zeroed". By using 9951 putIReg32 here we inadvertantly produce the same behaviour as 9952 the Core2, for the same reason -- putIReg32 implements said 9953 rule. 9954 9955 AMD docs give no indication that REX.W is even valid for this 9956 insn. */ 9957 modrm = getUChar(delta+2); 9958 if (epartIsReg(modrm)) { 9959 Int src; 9960 t0 = newTemp(Ity_I32); 9961 t1 = newTemp(Ity_I32); 9962 t2 = newTemp(Ity_I32); 9963 t3 = newTemp(Ity_I32); 9964 delta += 2+1; 9965 src = eregOfRexRM(pfx,modrm); 9966 assign( t0, binop( Iop_And32, 9967 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 9968 mkU32(1) )); 9969 assign( t1, binop( Iop_And32, 9970 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 9971 mkU32(2) )); 9972 assign( t2, binop( Iop_And32, 9973 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 9974 mkU32(4) )); 9975 assign( t3, binop( Iop_And32, 9976 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 9977 mkU32(8) )); 9978 putIReg32( gregOfRexRM(pfx,modrm), 9979 binop(Iop_Or32, 9980 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 9981 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 9982 ) 9983 ); 9984 DIP("movmskps %s,%s\n", nameXMMReg(src), 9985 nameIReg32(gregOfRexRM(pfx,modrm))); 9986 goto decode_success; 9987 } 9988 /* else fall through */ 9989 } 9990 9991 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 9992 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 9993 if ( ( (haveNo66noF2noF3(pfx) && sz == 4) 9994 || (have66noF2noF3(pfx) && sz == 2) 9995 ) 9996 && insn[0] == 0x0F && insn[1] == 0x2B) { 9997 modrm = getUChar(delta+2); 9998 if (!epartIsReg(modrm)) { 9999 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10000 gen_SEGV_if_not_16_aligned( addr ); 10001 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 10002 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 10003 dis_buf, 10004 nameXMMReg(gregOfRexRM(pfx,modrm))); 10005 delta += 2+alen; 10006 goto decode_success; 10007 } 10008 /* else fall through */ 10009 } 10010 10011 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10012 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 10013 Intel manual does not say anything about the usual business of 10014 the FP reg tags getting trashed whenever an MMX insn happens. 10015 So we just leave them alone. 10016 */ 10017 if (haveNo66noF2noF3(pfx) && sz == 4 10018 && insn[0] == 0x0F && insn[1] == 0xE7) { 10019 modrm = getUChar(delta+2); 10020 if (!epartIsReg(modrm)) { 10021 /* do_MMX_preamble(); Intel docs don't specify this */ 10022 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10023 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 10024 DIP("movntq %s,%s\n", dis_buf, 10025 nameMMXReg(gregLO3ofRM(modrm))); 10026 delta += 2+alen; 10027 goto decode_success; 10028 } 10029 /* else fall through */ 10030 } 10031 10032 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 10033 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 10034 if (haveF3no66noF2(pfx) 10035 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 10036 && insn[0] == 0x0F && insn[1] == 0x10) { 10037 modrm = getUChar(delta+2); 10038 if (epartIsReg(modrm)) { 10039 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 10040 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 10041 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10042 nameXMMReg(gregOfRexRM(pfx,modrm))); 10043 delta += 2+1; 10044 } else { 10045 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10046 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 10047 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 10048 loadLE(Ity_I32, mkexpr(addr)) ); 10049 DIP("movss %s,%s\n", dis_buf, 10050 nameXMMReg(gregOfRexRM(pfx,modrm))); 10051 delta += 2+alen; 10052 } 10053 goto decode_success; 10054 } 10055 10056 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 10057 or lo 1/4 xmm). */ 10058 if (haveF3no66noF2(pfx) && sz == 4 10059 && insn[0] == 0x0F && insn[1] == 0x11) { 10060 modrm = getUChar(delta+2); 10061 if (epartIsReg(modrm)) { 10062 /* fall through, we don't yet have a test case */ 10063 } else { 10064 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10065 storeLE( mkexpr(addr), 10066 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 10067 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 10068 dis_buf); 10069 delta += 2+alen; 10070 goto decode_success; 10071 } 10072 } 10073 10074 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 10075 if (haveNo66noF2noF3(pfx) && sz == 4 10076 && insn[0] == 0x0F && insn[1] == 0x59) { 10077 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 ); 10078 goto decode_success; 10079 } 10080 10081 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 10082 if (haveF3no66noF2(pfx) && sz == 4 10083 && insn[0] == 0x0F && insn[1] == 0x59) { 10084 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 ); 10085 goto decode_success; 10086 } 10087 10088 /* 0F 56 = ORPS -- G = G and E */ 10089 if (haveNo66noF2noF3(pfx) && sz == 4 10090 && insn[0] == 0x0F && insn[1] == 0x56) { 10091 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 ); 10092 goto decode_success; 10093 } 10094 10095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10096 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 10097 if (haveNo66noF2noF3(pfx) && sz == 4 10098 && insn[0] == 0x0F && insn[1] == 0xE0) { 10099 do_MMX_preamble(); 10100 delta = dis_MMXop_regmem_to_reg ( 10101 vbi, pfx, delta+2, insn[1], "pavgb", False ); 10102 goto decode_success; 10103 } 10104 10105 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10106 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 10107 if (haveNo66noF2noF3(pfx) && sz == 4 10108 && insn[0] == 0x0F && insn[1] == 0xE3) { 10109 do_MMX_preamble(); 10110 delta = dis_MMXop_regmem_to_reg ( 10111 vbi, pfx, delta+2, insn[1], "pavgw", False ); 10112 goto decode_success; 10113 } 10114 10115 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10116 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 10117 zero-extend of it in ireg(G). */ 10118 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 10119 && insn[0] == 0x0F && insn[1] == 0xC5) { 10120 modrm = insn[2]; 10121 if (epartIsReg(modrm)) { 10122 IRTemp sV = newTemp(Ity_I64); 10123 t5 = newTemp(Ity_I16); 10124 do_MMX_preamble(); 10125 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 10126 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 10127 switch (insn[3] & 3) { 10128 case 0: assign(t5, mkexpr(t0)); break; 10129 case 1: assign(t5, mkexpr(t1)); break; 10130 case 2: assign(t5, mkexpr(t2)); break; 10131 case 3: assign(t5, mkexpr(t3)); break; 10132 default: vassert(0); 10133 } 10134 if (sz == 8) 10135 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 10136 else 10137 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 10138 DIP("pextrw $%d,%s,%s\n", 10139 (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)), 10140 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 10141 : nameIReg32(gregOfRexRM(pfx,modrm)) 10142 ); 10143 delta += 4; 10144 goto decode_success; 10145 } 10146 /* else fall through */ 10147 /* note, for anyone filling in the mem case: this insn has one 10148 byte after the amode and therefore you must pass 1 as the 10149 last arg to disAMode */ 10150 } 10151 10152 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10153 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10154 put it into the specified lane of mmx(G). */ 10155 if (haveNo66noF2noF3(pfx) 10156 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 10157 && insn[0] == 0x0F && insn[1] == 0xC4) { 10158 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 10159 mmx reg. t4 is the new lane value. t5 is the original 10160 mmx value. t6 is the new mmx value. */ 10161 Int lane; 10162 t4 = newTemp(Ity_I16); 10163 t5 = newTemp(Ity_I64); 10164 t6 = newTemp(Ity_I64); 10165 modrm = insn[2]; 10166 do_MMX_preamble(); 10167 10168 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 10169 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 10170 10171 if (epartIsReg(modrm)) { 10172 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 10173 delta += 3+1; 10174 lane = insn[3+1-1]; 10175 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10176 nameIReg16(eregOfRexRM(pfx,modrm)), 10177 nameMMXReg(gregLO3ofRM(modrm))); 10178 } else { 10179 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); 10180 delta += 3+alen; 10181 lane = insn[3+alen-1]; 10182 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10183 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10184 dis_buf, 10185 nameMMXReg(gregLO3ofRM(modrm))); 10186 } 10187 10188 switch (lane & 3) { 10189 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 10190 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 10191 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 10192 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 10193 default: vassert(0); 10194 } 10195 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 10196 goto decode_success; 10197 } 10198 10199 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10200 /* 0F EE = PMAXSW -- 16x4 signed max */ 10201 if (haveNo66noF2noF3(pfx) && sz == 4 10202 && insn[0] == 0x0F && insn[1] == 0xEE) { 10203 do_MMX_preamble(); 10204 delta = dis_MMXop_regmem_to_reg ( 10205 vbi, pfx, delta+2, insn[1], "pmaxsw", False ); 10206 goto decode_success; 10207 } 10208 10209 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10210 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 10211 if (haveNo66noF2noF3(pfx) && sz == 4 10212 && insn[0] == 0x0F && insn[1] == 0xDE) { 10213 do_MMX_preamble(); 10214 delta = dis_MMXop_regmem_to_reg ( 10215 vbi, pfx, delta+2, insn[1], "pmaxub", False ); 10216 goto decode_success; 10217 } 10218 10219 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10220 /* 0F EA = PMINSW -- 16x4 signed min */ 10221 if (haveNo66noF2noF3(pfx) && sz == 4 10222 && insn[0] == 0x0F && insn[1] == 0xEA) { 10223 do_MMX_preamble(); 10224 delta = dis_MMXop_regmem_to_reg ( 10225 vbi, pfx, delta+2, insn[1], "pminsw", False ); 10226 goto decode_success; 10227 } 10228 10229 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10230 /* 0F DA = PMINUB -- 8x8 unsigned min */ 10231 if (haveNo66noF2noF3(pfx) && sz == 4 10232 && insn[0] == 0x0F && insn[1] == 0xDA) { 10233 do_MMX_preamble(); 10234 delta = dis_MMXop_regmem_to_reg ( 10235 vbi, pfx, delta+2, insn[1], "pminub", False ); 10236 goto decode_success; 10237 } 10238 10239 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10240 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 10241 mmx(G), turn them into a byte, and put zero-extend of it in 10242 ireg(G). */ 10243 if (haveNo66noF2noF3(pfx) && sz == 4 10244 && insn[0] == 0x0F && insn[1] == 0xD7) { 10245 modrm = insn[2]; 10246 if (epartIsReg(modrm)) { 10247 do_MMX_preamble(); 10248 t0 = newTemp(Ity_I64); 10249 t1 = newTemp(Ity_I64); 10250 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 10251 assign(t1, mkIRExprCCall( 10252 Ity_I64, 0/*regparms*/, 10253 "amd64g_calculate_mmx_pmovmskb", 10254 &amd64g_calculate_mmx_pmovmskb, 10255 mkIRExprVec_1(mkexpr(t0)))); 10256 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1))); 10257 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 10258 nameIReg32(gregOfRexRM(pfx,modrm))); 10259 delta += 3; 10260 goto decode_success; 10261 } 10262 /* else fall through */ 10263 } 10264 10265 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10266 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 10267 if (haveNo66noF2noF3(pfx) && sz == 4 10268 && insn[0] == 0x0F && insn[1] == 0xE4) { 10269 do_MMX_preamble(); 10270 delta = dis_MMXop_regmem_to_reg ( 10271 vbi, pfx, delta+2, insn[1], "pmuluh", False ); 10272 goto decode_success; 10273 } 10274 10275 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 10276 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 10277 /* 0F 18 /2 = PREFETCH1 */ 10278 /* 0F 18 /3 = PREFETCH2 */ 10279 if (insn[0] == 0x0F && insn[1] == 0x18 10280 && haveNo66noF2noF3(pfx) 10281 && !epartIsReg(insn[2]) 10282 && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) { 10283 HChar* hintstr = "??"; 10284 10285 modrm = getUChar(delta+2); 10286 vassert(!epartIsReg(modrm)); 10287 10288 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10289 delta += 2+alen; 10290 10291 switch (gregLO3ofRM(modrm)) { 10292 case 0: hintstr = "nta"; break; 10293 case 1: hintstr = "t0"; break; 10294 case 2: hintstr = "t1"; break; 10295 case 3: hintstr = "t2"; break; 10296 default: vassert(0); 10297 } 10298 10299 DIP("prefetch%s %s\n", hintstr, dis_buf); 10300 goto decode_success; 10301 } 10302 10303 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10304 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 10305 if (haveNo66noF2noF3(pfx) && sz == 4 10306 && insn[0] == 0x0F && insn[1] == 0xF6) { 10307 do_MMX_preamble(); 10308 delta = dis_MMXop_regmem_to_reg ( 10309 vbi, pfx, delta+2, insn[1], "psadbw", False ); 10310 goto decode_success; 10311 } 10312 10313 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10314 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 10315 if (haveNo66noF2noF3(pfx) && sz == 4 10316 && insn[0] == 0x0F && insn[1] == 0x70) { 10317 Int order; 10318 IRTemp sV, dV, s3, s2, s1, s0; 10319 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10320 sV = newTemp(Ity_I64); 10321 dV = newTemp(Ity_I64); 10322 do_MMX_preamble(); 10323 modrm = insn[2]; 10324 if (epartIsReg(modrm)) { 10325 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 10326 order = (Int)insn[3]; 10327 delta += 2+2; 10328 DIP("pshufw $%d,%s,%s\n", order, 10329 nameMMXReg(eregLO3ofRM(modrm)), 10330 nameMMXReg(gregLO3ofRM(modrm))); 10331 } else { 10332 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 10333 1/*extra byte after amode*/ ); 10334 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 10335 order = (Int)insn[2+alen]; 10336 delta += 3+alen; 10337 DIP("pshufw $%d,%s,%s\n", order, 10338 dis_buf, 10339 nameMMXReg(gregLO3ofRM(modrm))); 10340 } 10341 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 10342 # define SEL(n) \ 10343 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10344 assign(dV, 10345 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 10346 SEL((order>>2)&3), SEL((order>>0)&3) ) 10347 ); 10348 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 10349 # undef SEL 10350 goto decode_success; 10351 } 10352 10353 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 10354 if (haveNo66noF2noF3(pfx) && sz == 4 10355 && insn[0] == 0x0F && insn[1] == 0x53) { 10356 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10357 "rcpps", Iop_Recip32Fx4 ); 10358 goto decode_success; 10359 } 10360 10361 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 10362 if (haveF3no66noF2(pfx) && sz == 4 10363 && insn[0] == 0x0F && insn[1] == 0x53) { 10364 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10365 "rcpss", Iop_Recip32F0x4 ); 10366 goto decode_success; 10367 } 10368 10369 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 10370 if (haveNo66noF2noF3(pfx) && sz == 4 10371 && insn[0] == 0x0F && insn[1] == 0x52) { 10372 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10373 "rsqrtps", Iop_RSqrt32Fx4 ); 10374 goto decode_success; 10375 } 10376 10377 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 10378 if (haveF3no66noF2(pfx) && sz == 4 10379 && insn[0] == 0x0F && insn[1] == 0x52) { 10380 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10381 "rsqrtss", Iop_RSqrt32F0x4 ); 10382 goto decode_success; 10383 } 10384 10385 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 10386 if (haveNo66noF2noF3(pfx) 10387 && insn[0] == 0x0F && insn[1] == 0xAE 10388 && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7 10389 && sz == 4) { 10390 delta += 3; 10391 /* Insert a memory fence. It's sometimes important that these 10392 are carried through to the generated code. */ 10393 stmt( IRStmt_MBE(Imbe_Fence) ); 10394 DIP("sfence\n"); 10395 goto decode_success; 10396 } 10397 10398 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 10399 if (haveNo66noF2noF3(pfx) && sz == 4 10400 && insn[0] == 0x0F && insn[1] == 0xC6) { 10401 Int select; 10402 IRTemp sV, dV; 10403 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10404 sV = newTemp(Ity_V128); 10405 dV = newTemp(Ity_V128); 10406 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10407 modrm = insn[2]; 10408 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 10409 10410 if (epartIsReg(modrm)) { 10411 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10412 select = (Int)insn[3]; 10413 delta += 2+2; 10414 DIP("shufps $%d,%s,%s\n", select, 10415 nameXMMReg(eregOfRexRM(pfx,modrm)), 10416 nameXMMReg(gregOfRexRM(pfx,modrm))); 10417 } else { 10418 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 10419 1/*byte at end of insn*/ ); 10420 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10421 select = (Int)insn[2+alen]; 10422 delta += 3+alen; 10423 DIP("shufps $%d,%s,%s\n", select, 10424 dis_buf, 10425 nameXMMReg(gregOfRexRM(pfx,modrm))); 10426 } 10427 10428 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10429 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10430 10431 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10432 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10433 10434 putXMMReg( 10435 gregOfRexRM(pfx,modrm), 10436 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 10437 SELD((select>>2)&3), SELD((select>>0)&3) ) 10438 ); 10439 10440 # undef SELD 10441 # undef SELS 10442 10443 goto decode_success; 10444 } 10445 10446 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 10447 if (haveNo66noF2noF3(pfx) && sz == 4 10448 && insn[0] == 0x0F && insn[1] == 0x51) { 10449 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10450 "sqrtps", Iop_Sqrt32Fx4 ); 10451 goto decode_success; 10452 } 10453 10454 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 10455 if (haveF3no66noF2(pfx) && sz == 4 10456 && insn[0] == 0x0F && insn[1] == 0x51) { 10457 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10458 "sqrtss", Iop_Sqrt32F0x4 ); 10459 goto decode_success; 10460 } 10461 10462 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 10463 if (insn[0] == 0x0F && insn[1] == 0xAE 10464 && haveNo66noF2noF3(pfx) 10465 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) { 10466 10467 vassert(sz == 4); 10468 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10469 delta += 2+alen; 10470 10471 /* Fake up a native SSE mxcsr word. The only thing it depends 10472 on is SSEROUND[1:0], so call a clean helper to cook it up. 10473 */ 10474 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 10475 DIP("stmxcsr %s\n", dis_buf); 10476 storeLE( 10477 mkexpr(addr), 10478 unop(Iop_64to32, 10479 mkIRExprCCall( 10480 Ity_I64, 0/*regp*/, 10481 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 10482 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 10483 ) 10484 ) 10485 ); 10486 goto decode_success; 10487 } 10488 10489 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 10490 if (haveNo66noF2noF3(pfx) && sz == 4 10491 && insn[0] == 0x0F && insn[1] == 0x5C) { 10492 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 ); 10493 goto decode_success; 10494 } 10495 10496 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 10497 if (haveF3no66noF2(pfx) && sz == 4 10498 && insn[0] == 0x0F && insn[1] == 0x5C) { 10499 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 ); 10500 goto decode_success; 10501 } 10502 10503 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 10504 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 10505 /* These just appear to be special cases of SHUFPS */ 10506 if (haveNo66noF2noF3(pfx) && sz == 4 10507 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10508 IRTemp sV, dV; 10509 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10510 Bool hi = toBool(insn[1] == 0x15); 10511 sV = newTemp(Ity_V128); 10512 dV = newTemp(Ity_V128); 10513 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10514 modrm = insn[2]; 10515 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 10516 10517 if (epartIsReg(modrm)) { 10518 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10519 delta += 2+1; 10520 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10521 nameXMMReg(eregOfRexRM(pfx,modrm)), 10522 nameXMMReg(gregOfRexRM(pfx,modrm))); 10523 } else { 10524 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10525 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10526 delta += 2+alen; 10527 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10528 dis_buf, 10529 nameXMMReg(gregOfRexRM(pfx,modrm))); 10530 } 10531 10532 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10533 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10534 10535 if (hi) { 10536 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) ); 10537 } else { 10538 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) ); 10539 } 10540 10541 goto decode_success; 10542 } 10543 10544 /* 0F 57 = XORPS -- G = G and E */ 10545 if (haveNo66noF2noF3(pfx) && sz == 4 10546 && insn[0] == 0x0F && insn[1] == 0x57) { 10547 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 ); 10548 goto decode_success; 10549 } 10550 10551 /* ---------------------------------------------------- */ 10552 /* --- end of the SSE decoder. --- */ 10553 /* ---------------------------------------------------- */ 10554 10555 /* ---------------------------------------------------- */ 10556 /* --- start of the SSE2 decoder. --- */ 10557 /* ---------------------------------------------------- */ 10558 10559 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 10560 if (have66noF2noF3(pfx) 10561 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 10562 && insn[0] == 0x0F && insn[1] == 0x58) { 10563 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 ); 10564 goto decode_success; 10565 } 10566 10567 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 10568 if (haveF2no66noF3(pfx) 10569 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 10570 && insn[0] == 0x0F && insn[1] == 0x58) { 10571 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 ); 10572 goto decode_success; 10573 } 10574 10575 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 10576 if (have66noF2noF3(pfx) && sz == 2 10577 && insn[0] == 0x0F && insn[1] == 0x55) { 10578 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 ); 10579 goto decode_success; 10580 } 10581 10582 /* 66 0F 54 = ANDPD -- G = G and E */ 10583 if (have66noF2noF3(pfx) && sz == 2 10584 && insn[0] == 0x0F && insn[1] == 0x54) { 10585 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 ); 10586 goto decode_success; 10587 } 10588 10589 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 10590 if (have66noF2noF3(pfx) && sz == 2 10591 && insn[0] == 0x0F && insn[1] == 0xC2) { 10592 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 ); 10593 goto decode_success; 10594 } 10595 10596 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 10597 if (haveF2no66noF3(pfx) && sz == 4 10598 && insn[0] == 0x0F && insn[1] == 0xC2) { 10599 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 ); 10600 goto decode_success; 10601 } 10602 10603 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 10604 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 10605 if (have66noF2noF3(pfx) && sz == 2 10606 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 10607 IRTemp argL = newTemp(Ity_F64); 10608 IRTemp argR = newTemp(Ity_F64); 10609 modrm = getUChar(delta+2); 10610 if (epartIsReg(modrm)) { 10611 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 10612 0/*lowest lane*/ ) ); 10613 delta += 2+1; 10614 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", 10615 nameXMMReg(eregOfRexRM(pfx,modrm)), 10616 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10617 } else { 10618 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10619 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 10620 delta += 2+alen; 10621 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", 10622 dis_buf, 10623 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10624 } 10625 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 10626 0/*lowest lane*/ ) ); 10627 10628 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10629 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10630 stmt( IRStmt_Put( 10631 OFFB_CC_DEP1, 10632 binop( Iop_And64, 10633 unop( Iop_32Uto64, 10634 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 10635 mkU64(0x45) 10636 ))); 10637 10638 goto decode_success; 10639 } 10640 10641 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 10642 F64 in xmm(G) */ 10643 if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) { 10644 IRTemp arg64 = newTemp(Ity_I64); 10645 if (sz != 4) goto decode_failure; 10646 10647 modrm = getUChar(delta+2); 10648 if (epartIsReg(modrm)) { 10649 assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) ); 10650 delta += 2+1; 10651 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10652 nameXMMReg(gregOfRexRM(pfx,modrm))); 10653 } else { 10654 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10655 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 10656 delta += 2+alen; 10657 DIP("cvtdq2pd %s,%s\n", dis_buf, 10658 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10659 } 10660 10661 putXMMRegLane64F( 10662 gregOfRexRM(pfx,modrm), 0, 10663 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 10664 ); 10665 10666 putXMMRegLane64F( 10667 gregOfRexRM(pfx,modrm), 1, 10668 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 10669 ); 10670 10671 goto decode_success; 10672 } 10673 10674 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 10675 xmm(G) */ 10676 if (haveNo66noF2noF3(pfx) && sz == 4 10677 && insn[0] == 0x0F && insn[1] == 0x5B) { 10678 IRTemp argV = newTemp(Ity_V128); 10679 IRTemp rmode = newTemp(Ity_I32); 10680 10681 modrm = getUChar(delta+2); 10682 if (epartIsReg(modrm)) { 10683 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10684 delta += 2+1; 10685 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10686 nameXMMReg(gregOfRexRM(pfx,modrm))); 10687 } else { 10688 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10689 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10690 delta += 2+alen; 10691 DIP("cvtdq2ps %s,%s\n", dis_buf, 10692 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10693 } 10694 10695 assign( rmode, get_sse_roundingmode() ); 10696 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 10697 10698 # define CVT(_t) binop( Iop_F64toF32, \ 10699 mkexpr(rmode), \ 10700 unop(Iop_I32StoF64,mkexpr(_t))) 10701 10702 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); 10703 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); 10704 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10705 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10706 10707 # undef CVT 10708 10709 goto decode_success; 10710 } 10711 10712 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 10713 lo half xmm(G), and zero upper half, rounding towards zero */ 10714 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 10715 lo half xmm(G), according to prevailing rounding mode, and zero 10716 upper half */ 10717 if ( ( (haveF2no66noF3(pfx) && sz == 4) 10718 || (have66noF2noF3(pfx) && sz == 2) 10719 ) 10720 && insn[0] == 0x0F && insn[1] == 0xE6) { 10721 IRTemp argV = newTemp(Ity_V128); 10722 IRTemp rmode = newTemp(Ity_I32); 10723 Bool r2zero = toBool(sz == 2); 10724 10725 modrm = getUChar(delta+2); 10726 if (epartIsReg(modrm)) { 10727 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10728 delta += 2+1; 10729 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", 10730 nameXMMReg(eregOfRexRM(pfx,modrm)), 10731 nameXMMReg(gregOfRexRM(pfx,modrm))); 10732 } else { 10733 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10734 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10735 delta += 2+alen; 10736 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", 10737 dis_buf, 10738 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10739 } 10740 10741 if (r2zero) { 10742 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10743 } else { 10744 assign( rmode, get_sse_roundingmode() ); 10745 } 10746 10747 t0 = newTemp(Ity_F64); 10748 t1 = newTemp(Ity_F64); 10749 assign( t0, unop(Iop_ReinterpI64asF64, 10750 unop(Iop_V128to64, mkexpr(argV))) ); 10751 assign( t1, unop(Iop_ReinterpI64asF64, 10752 unop(Iop_V128HIto64, mkexpr(argV))) ); 10753 10754 # define CVT(_t) binop( Iop_F64toI32S, \ 10755 mkexpr(rmode), \ 10756 mkexpr(_t) ) 10757 10758 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); 10759 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); 10760 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10761 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10762 10763 # undef CVT 10764 10765 goto decode_success; 10766 } 10767 10768 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 10769 I32 in mmx, according to prevailing SSE rounding mode */ 10770 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 10771 I32 in mmx, rounding towards zero */ 10772 if (have66noF2noF3(pfx) && sz == 2 10773 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 10774 IRTemp dst64 = newTemp(Ity_I64); 10775 IRTemp rmode = newTemp(Ity_I32); 10776 IRTemp f64lo = newTemp(Ity_F64); 10777 IRTemp f64hi = newTemp(Ity_F64); 10778 Bool r2zero = toBool(insn[1] == 0x2C); 10779 10780 do_MMX_preamble(); 10781 modrm = getUChar(delta+2); 10782 10783 if (epartIsReg(modrm)) { 10784 delta += 2+1; 10785 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10786 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 10787 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 10788 nameXMMReg(eregOfRexRM(pfx,modrm)), 10789 nameMMXReg(gregLO3ofRM(modrm))); 10790 } else { 10791 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10792 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10793 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 10794 mkexpr(addr), 10795 mkU64(8) ))); 10796 delta += 2+alen; 10797 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 10798 dis_buf, 10799 nameMMXReg(gregLO3ofRM(modrm))); 10800 } 10801 10802 if (r2zero) { 10803 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10804 } else { 10805 assign( rmode, get_sse_roundingmode() ); 10806 } 10807 10808 assign( 10809 dst64, 10810 binop( Iop_32HLto64, 10811 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 10812 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 10813 ) 10814 ); 10815 10816 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 10817 goto decode_success; 10818 } 10819 10820 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 10821 lo half xmm(G), rounding according to prevailing SSE rounding 10822 mode, and zero upper half */ 10823 /* Note, this is practically identical to CVTPD2DQ. It would have 10824 been nicer to merge them together, but the insn[] offsets differ 10825 by one. */ 10826 if (have66noF2noF3(pfx) && sz == 2 10827 && insn[0] == 0x0F && insn[1] == 0x5A) { 10828 IRTemp argV = newTemp(Ity_V128); 10829 IRTemp rmode = newTemp(Ity_I32); 10830 10831 modrm = getUChar(delta+2); 10832 if (epartIsReg(modrm)) { 10833 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10834 delta += 2+1; 10835 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10836 nameXMMReg(gregOfRexRM(pfx,modrm))); 10837 } else { 10838 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10839 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10840 delta += 2+alen; 10841 DIP("cvtpd2ps %s,%s\n", dis_buf, 10842 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10843 } 10844 10845 assign( rmode, get_sse_roundingmode() ); 10846 t0 = newTemp(Ity_F64); 10847 t1 = newTemp(Ity_F64); 10848 assign( t0, unop(Iop_ReinterpI64asF64, 10849 unop(Iop_V128to64, mkexpr(argV))) ); 10850 assign( t1, unop(Iop_ReinterpI64asF64, 10851 unop(Iop_V128HIto64, mkexpr(argV))) ); 10852 10853 # define CVT(_t) binop( Iop_F64toF32, \ 10854 mkexpr(rmode), \ 10855 mkexpr(_t) ) 10856 10857 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); 10858 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); 10859 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10860 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10861 10862 # undef CVT 10863 10864 goto decode_success; 10865 } 10866 10867 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 10868 xmm(G) */ 10869 if (have66noF2noF3(pfx) && sz == 2 10870 && insn[0] == 0x0F && insn[1] == 0x2A) { 10871 IRTemp arg64 = newTemp(Ity_I64); 10872 10873 modrm = getUChar(delta+2); 10874 if (epartIsReg(modrm)) { 10875 /* Only switch to MMX mode if the source is a MMX register. 10876 This is inconsistent with all other instructions which 10877 convert between XMM and (M64 or MMX), which always switch 10878 to MMX mode even if 64-bit operand is M64 and not MMX. At 10879 least, that's what the Intel docs seem to me to say. 10880 Fixes #210264. */ 10881 do_MMX_preamble(); 10882 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 10883 delta += 2+1; 10884 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 10885 nameXMMReg(gregOfRexRM(pfx,modrm))); 10886 } else { 10887 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10888 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 10889 delta += 2+alen; 10890 DIP("cvtpi2pd %s,%s\n", dis_buf, 10891 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10892 } 10893 10894 putXMMRegLane64F( 10895 gregOfRexRM(pfx,modrm), 0, 10896 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 10897 ); 10898 10899 putXMMRegLane64F( 10900 gregOfRexRM(pfx,modrm), 1, 10901 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 10902 ); 10903 10904 goto decode_success; 10905 } 10906 10907 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 10908 xmm(G), rounding towards zero */ 10909 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 10910 xmm(G), as per the prevailing rounding mode */ 10911 if ( ( (have66noF2noF3(pfx) && sz == 2) 10912 || (haveF3no66noF2(pfx) && sz == 4) 10913 ) 10914 && insn[0] == 0x0F && insn[1] == 0x5B) { 10915 IRTemp argV = newTemp(Ity_V128); 10916 IRTemp rmode = newTemp(Ity_I32); 10917 Bool r2zero = toBool(sz == 4); 10918 10919 modrm = getUChar(delta+2); 10920 if (epartIsReg(modrm)) { 10921 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10922 delta += 2+1; 10923 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10924 nameXMMReg(gregOfRexRM(pfx,modrm))); 10925 } else { 10926 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10927 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10928 delta += 2+alen; 10929 DIP("cvtps2dq %s,%s\n", dis_buf, 10930 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10931 } 10932 10933 if (r2zero) { 10934 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10935 } else { 10936 assign( rmode, get_sse_roundingmode() ); 10937 } 10938 10939 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 10940 10941 /* This is less than ideal. If it turns out to be a performance 10942 bottleneck it can be improved. */ 10943 # define CVT(_t) \ 10944 binop( Iop_F64toI32S, \ 10945 mkexpr(rmode), \ 10946 unop( Iop_F32toF64, \ 10947 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10948 10949 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); 10950 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); 10951 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10952 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10953 10954 # undef CVT 10955 10956 goto decode_success; 10957 } 10958 10959 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 10960 F64 in xmm(G). */ 10961 if (haveNo66noF2noF3(pfx) && sz == 4 10962 && insn[0] == 0x0F && insn[1] == 0x5A) { 10963 IRTemp f32lo = newTemp(Ity_F32); 10964 IRTemp f32hi = newTemp(Ity_F32); 10965 10966 modrm = getUChar(delta+2); 10967 if (epartIsReg(modrm)) { 10968 assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) ); 10969 assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) ); 10970 delta += 2+1; 10971 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10972 nameXMMReg(gregOfRexRM(pfx,modrm))); 10973 } else { 10974 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10975 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10976 assign( f32hi, loadLE(Ity_F32, 10977 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10978 delta += 2+alen; 10979 DIP("cvtps2pd %s,%s\n", dis_buf, 10980 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10981 } 10982 10983 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1, 10984 unop(Iop_F32toF64, mkexpr(f32hi)) ); 10985 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 10986 unop(Iop_F32toF64, mkexpr(f32lo)) ); 10987 10988 goto decode_success; 10989 } 10990 10991 /* F2 0F 2D = CVTSD2SI 10992 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 10993 according to prevailing SSE rounding mode 10994 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 10995 according to prevailing SSE rounding mode 10996 */ 10997 /* F2 0F 2C = CVTTSD2SI 10998 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 10999 truncating towards zero 11000 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 11001 truncating towards zero 11002 */ 11003 if (haveF2no66noF3(pfx) 11004 && insn[0] == 0x0F 11005 && (insn[1] == 0x2D || insn[1] == 0x2C)) { 11006 IRTemp rmode = newTemp(Ity_I32); 11007 IRTemp f64lo = newTemp(Ity_F64); 11008 Bool r2zero = toBool(insn[1] == 0x2C); 11009 vassert(sz == 4 || sz == 8); 11010 11011 modrm = getUChar(delta+2); 11012 if (epartIsReg(modrm)) { 11013 delta += 2+1; 11014 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 11015 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 11016 nameXMMReg(eregOfRexRM(pfx,modrm)), 11017 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 11018 } else { 11019 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11020 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 11021 delta += 2+alen; 11022 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 11023 dis_buf, 11024 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 11025 } 11026 11027 if (r2zero) { 11028 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 11029 } else { 11030 assign( rmode, get_sse_roundingmode() ); 11031 } 11032 11033 if (sz == 4) { 11034 putIReg32( gregOfRexRM(pfx,modrm), 11035 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 11036 } else { 11037 putIReg64( gregOfRexRM(pfx,modrm), 11038 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 11039 } 11040 11041 goto decode_success; 11042 } 11043 11044 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 11045 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 11046 if (haveF2no66noF3(pfx) && sz == 4 11047 && insn[0] == 0x0F && insn[1] == 0x5A) { 11048 IRTemp rmode = newTemp(Ity_I32); 11049 IRTemp f64lo = newTemp(Ity_F64); 11050 vassert(sz == 4); 11051 11052 modrm = getUChar(delta+2); 11053 if (epartIsReg(modrm)) { 11054 delta += 2+1; 11055 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 11056 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11057 nameXMMReg(gregOfRexRM(pfx,modrm))); 11058 } else { 11059 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11060 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 11061 delta += 2+alen; 11062 DIP("cvtsd2ss %s,%s\n", dis_buf, 11063 nameXMMReg(gregOfRexRM(pfx,modrm))); 11064 } 11065 11066 assign( rmode, get_sse_roundingmode() ); 11067 putXMMRegLane32F( 11068 gregOfRexRM(pfx,modrm), 0, 11069 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 11070 ); 11071 11072 goto decode_success; 11073 } 11074 11075 /* F2 0F 2A = CVTSI2SD 11076 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 11077 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 11078 */ 11079 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8) 11080 && insn[0] == 0x0F && insn[1] == 0x2A) { 11081 modrm = getUChar(delta+2); 11082 11083 if (sz == 4) { 11084 IRTemp arg32 = newTemp(Ity_I32); 11085 if (epartIsReg(modrm)) { 11086 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 11087 delta += 2+1; 11088 DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11089 nameXMMReg(gregOfRexRM(pfx,modrm))); 11090 } else { 11091 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11092 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 11093 delta += 2+alen; 11094 DIP("cvtsi2sd %s,%s\n", dis_buf, 11095 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11096 } 11097 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 11098 unop(Iop_I32StoF64, mkexpr(arg32)) 11099 ); 11100 } else { 11101 /* sz == 8 */ 11102 IRTemp arg64 = newTemp(Ity_I64); 11103 if (epartIsReg(modrm)) { 11104 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 11105 delta += 2+1; 11106 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11107 nameXMMReg(gregOfRexRM(pfx,modrm))); 11108 } else { 11109 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11110 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11111 delta += 2+alen; 11112 DIP("cvtsi2sdq %s,%s\n", dis_buf, 11113 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11114 } 11115 putXMMRegLane64F( 11116 gregOfRexRM(pfx,modrm), 11117 0, 11118 binop( Iop_I64StoF64, 11119 get_sse_roundingmode(), 11120 mkexpr(arg64) 11121 ) 11122 ); 11123 11124 } 11125 11126 goto decode_success; 11127 } 11128 11129 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 11130 low half xmm(G) */ 11131 if (haveF3no66noF2(pfx) && sz == 4 11132 && insn[0] == 0x0F && insn[1] == 0x5A) { 11133 IRTemp f32lo = newTemp(Ity_F32); 11134 11135 modrm = getUChar(delta+2); 11136 if (epartIsReg(modrm)) { 11137 delta += 2+1; 11138 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 11139 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11140 nameXMMReg(gregOfRexRM(pfx,modrm))); 11141 } else { 11142 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11143 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 11144 delta += 2+alen; 11145 DIP("cvtss2sd %s,%s\n", dis_buf, 11146 nameXMMReg(gregOfRexRM(pfx,modrm))); 11147 } 11148 11149 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 11150 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 11151 11152 goto decode_success; 11153 } 11154 11155 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 11156 if (have66noF2noF3(pfx) && sz == 2 11157 && insn[0] == 0x0F && insn[1] == 0x5E) { 11158 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 ); 11159 goto decode_success; 11160 } 11161 11162 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 11163 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) { 11164 vassert(sz == 4); 11165 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 ); 11166 goto decode_success; 11167 } 11168 11169 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 11170 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 11171 if (haveNo66noF2noF3(pfx) && sz == 4 11172 && insn[0] == 0x0F && insn[1] == 0xAE 11173 && epartIsReg(insn[2]) 11174 && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) { 11175 delta += 3; 11176 /* Insert a memory fence. It's sometimes important that these 11177 are carried through to the generated code. */ 11178 stmt( IRStmt_MBE(Imbe_Fence) ); 11179 DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m"); 11180 goto decode_success; 11181 } 11182 11183 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 11184 if (have66noF2noF3(pfx) && sz == 2 11185 && insn[0] == 0x0F && insn[1] == 0x5F) { 11186 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 ); 11187 goto decode_success; 11188 } 11189 11190 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 11191 if (haveF2no66noF3(pfx) && sz == 4 11192 && insn[0] == 0x0F && insn[1] == 0x5F) { 11193 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 ); 11194 goto decode_success; 11195 } 11196 11197 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 11198 if (have66noF2noF3(pfx) && sz == 2 11199 && insn[0] == 0x0F && insn[1] == 0x5D) { 11200 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 ); 11201 goto decode_success; 11202 } 11203 11204 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 11205 if (haveF2no66noF3(pfx) && sz == 4 11206 && insn[0] == 0x0F && insn[1] == 0x5D) { 11207 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 ); 11208 goto decode_success; 11209 } 11210 11211 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 11212 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 11213 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 11214 if (have66noF2noF3(pfx) 11215 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11216 && insn[0] == 0x0F 11217 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 11218 HChar* wot = insn[1]==0x28 ? "apd" : 11219 insn[1]==0x10 ? "upd" : "dqa"; 11220 modrm = getUChar(delta+2); 11221 if (epartIsReg(modrm)) { 11222 putXMMReg( gregOfRexRM(pfx,modrm), 11223 getXMMReg( eregOfRexRM(pfx,modrm) )); 11224 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)), 11225 nameXMMReg(gregOfRexRM(pfx,modrm))); 11226 delta += 2+1; 11227 } else { 11228 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11229 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 11230 gen_SEGV_if_not_16_aligned( addr ); 11231 putXMMReg( gregOfRexRM(pfx,modrm), 11232 loadLE(Ity_V128, mkexpr(addr)) ); 11233 DIP("mov%s %s,%s\n", wot, dis_buf, 11234 nameXMMReg(gregOfRexRM(pfx,modrm))); 11235 delta += 2+alen; 11236 } 11237 goto decode_success; 11238 } 11239 11240 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 11241 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 11242 if (have66noF2noF3(pfx) && insn[0] == 0x0F 11243 && (insn[1] == 0x29 || insn[1] == 0x11)) { 11244 HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 11245 modrm = getUChar(delta+2); 11246 if (epartIsReg(modrm)) { 11247 putXMMReg( eregOfRexRM(pfx,modrm), 11248 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11249 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)), 11250 nameXMMReg(eregOfRexRM(pfx,modrm))); 11251 delta += 2+1; 11252 } else { 11253 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11254 if (insn[1] == 0x29/*movapd*/) 11255 gen_SEGV_if_not_16_aligned( addr ); 11256 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11257 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)), 11258 dis_buf ); 11259 delta += 2+alen; 11260 } 11261 goto decode_success; 11262 } 11263 11264 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */ 11265 /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */ 11266 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) { 11267 vassert(sz == 2 || sz == 8); 11268 if (sz == 2) sz = 4; 11269 modrm = getUChar(delta+2); 11270 if (epartIsReg(modrm)) { 11271 delta += 2+1; 11272 if (sz == 4) { 11273 putXMMReg( 11274 gregOfRexRM(pfx,modrm), 11275 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 11276 ); 11277 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11278 nameXMMReg(gregOfRexRM(pfx,modrm))); 11279 } else { 11280 putXMMReg( 11281 gregOfRexRM(pfx,modrm), 11282 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 11283 ); 11284 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11285 nameXMMReg(gregOfRexRM(pfx,modrm))); 11286 } 11287 } else { 11288 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11289 delta += 2+alen; 11290 putXMMReg( 11291 gregOfRexRM(pfx,modrm), 11292 sz == 4 11293 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 11294 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 11295 ); 11296 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 11297 nameXMMReg(gregOfRexRM(pfx,modrm))); 11298 } 11299 goto decode_success; 11300 } 11301 11302 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 11303 /* or from xmm low 1/2 to ireg64 or m64. */ 11304 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) { 11305 if (sz == 2) sz = 4; 11306 vassert(sz == 4 || sz == 8); 11307 modrm = getUChar(delta+2); 11308 if (epartIsReg(modrm)) { 11309 delta += 2+1; 11310 if (sz == 4) { 11311 putIReg32( eregOfRexRM(pfx,modrm), 11312 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 11313 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11314 nameIReg32(eregOfRexRM(pfx,modrm))); 11315 } else { 11316 putIReg64( eregOfRexRM(pfx,modrm), 11317 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11318 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11319 nameIReg64(eregOfRexRM(pfx,modrm))); 11320 } 11321 } else { 11322 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11323 delta += 2+alen; 11324 storeLE( mkexpr(addr), 11325 sz == 4 11326 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 11327 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 11328 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 11329 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11330 } 11331 goto decode_success; 11332 } 11333 11334 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 11335 if (have66noF2noF3(pfx) && sz == 2 11336 && insn[0] == 0x0F && insn[1] == 0x7F) { 11337 modrm = getUChar(delta+2); 11338 if (epartIsReg(modrm)) { 11339 delta += 2+1; 11340 putXMMReg( eregOfRexRM(pfx,modrm), 11341 getXMMReg(gregOfRexRM(pfx,modrm)) ); 11342 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11343 nameXMMReg(eregOfRexRM(pfx,modrm))); 11344 } else { 11345 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11346 gen_SEGV_if_not_16_aligned( addr ); 11347 delta += 2+alen; 11348 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11349 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11350 } 11351 goto decode_success; 11352 } 11353 11354 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 11355 if (haveF3no66noF2(pfx) && sz == 4 11356 && insn[0] == 0x0F && insn[1] == 0x6F) { 11357 modrm = getUChar(delta+2); 11358 if (epartIsReg(modrm)) { 11359 putXMMReg( gregOfRexRM(pfx,modrm), 11360 getXMMReg( eregOfRexRM(pfx,modrm) )); 11361 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11362 nameXMMReg(gregOfRexRM(pfx,modrm))); 11363 delta += 2+1; 11364 } else { 11365 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11366 putXMMReg( gregOfRexRM(pfx,modrm), 11367 loadLE(Ity_V128, mkexpr(addr)) ); 11368 DIP("movdqu %s,%s\n", dis_buf, 11369 nameXMMReg(gregOfRexRM(pfx,modrm))); 11370 delta += 2+alen; 11371 } 11372 goto decode_success; 11373 } 11374 11375 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 11376 if (haveF3no66noF2(pfx) && sz == 4 11377 && insn[0] == 0x0F && insn[1] == 0x7F) { 11378 modrm = getUChar(delta+2); 11379 if (epartIsReg(modrm)) { 11380 goto decode_failure; /* awaiting test case */ 11381 delta += 2+1; 11382 putXMMReg( eregOfRexRM(pfx,modrm), 11383 getXMMReg(gregOfRexRM(pfx,modrm)) ); 11384 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11385 nameXMMReg(eregOfRexRM(pfx,modrm))); 11386 } else { 11387 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11388 delta += 2+alen; 11389 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11390 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11391 } 11392 goto decode_success; 11393 } 11394 11395 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 11396 if (haveF2no66noF3(pfx) && sz == 4 11397 && insn[0] == 0x0F && insn[1] == 0xD6) { 11398 modrm = getUChar(delta+2); 11399 if (epartIsReg(modrm)) { 11400 do_MMX_preamble(); 11401 putMMXReg( gregLO3ofRM(modrm), 11402 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11403 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11404 nameMMXReg(gregLO3ofRM(modrm))); 11405 delta += 2+1; 11406 goto decode_success; 11407 } else { 11408 /* apparently no mem case for this insn */ 11409 goto decode_failure; 11410 } 11411 } 11412 11413 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 11414 /* These seems identical to MOVHPS. This instruction encoding is 11415 completely crazy. */ 11416 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) { 11417 modrm = getUChar(delta+2); 11418 if (epartIsReg(modrm)) { 11419 /* fall through; apparently reg-reg is not possible */ 11420 } else { 11421 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11422 delta += 2+alen; 11423 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 11424 loadLE(Ity_I64, mkexpr(addr)) ); 11425 DIP("movhpd %s,%s\n", dis_buf, 11426 nameXMMReg( gregOfRexRM(pfx,modrm) )); 11427 goto decode_success; 11428 } 11429 } 11430 11431 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 11432 /* Again, this seems identical to MOVHPS. */ 11433 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) { 11434 if (!epartIsReg(insn[2])) { 11435 delta += 2; 11436 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11437 delta += alen; 11438 storeLE( mkexpr(addr), 11439 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 11440 1/*upper lane*/ ) ); 11441 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 11442 dis_buf); 11443 goto decode_success; 11444 } 11445 /* else fall through */ 11446 } 11447 11448 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 11449 /* Identical to MOVLPS ? */ 11450 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) { 11451 modrm = getUChar(delta+2); 11452 if (epartIsReg(modrm)) { 11453 /* fall through; apparently reg-reg is not possible */ 11454 } else { 11455 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11456 delta += 2+alen; 11457 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11458 0/*lower lane*/, 11459 loadLE(Ity_I64, mkexpr(addr)) ); 11460 DIP("movlpd %s, %s\n", 11461 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11462 goto decode_success; 11463 } 11464 } 11465 11466 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 11467 /* Identical to MOVLPS ? */ 11468 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) { 11469 modrm = getUChar(delta+2); 11470 if (!epartIsReg(modrm)) { 11471 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11472 delta += 2+alen; 11473 storeLE( mkexpr(addr), 11474 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11475 0/*lower lane*/ ) ); 11476 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11477 dis_buf); 11478 goto decode_success; 11479 } 11480 /* else fall through */ 11481 } 11482 11483 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 11484 2 lowest bits of ireg(G) */ 11485 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8) 11486 && insn[0] == 0x0F && insn[1] == 0x50) { 11487 /* sz == 8 is a kludge to handle insns with REX.W redundantly 11488 set to 1, which has been known to happen: 11489 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 11490 20071106: see further comments on MOVMSKPS implementation above. 11491 */ 11492 modrm = getUChar(delta+2); 11493 if (epartIsReg(modrm)) { 11494 Int src; 11495 t0 = newTemp(Ity_I32); 11496 t1 = newTemp(Ity_I32); 11497 delta += 2+1; 11498 src = eregOfRexRM(pfx,modrm); 11499 assign( t0, binop( Iop_And32, 11500 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 11501 mkU32(1) )); 11502 assign( t1, binop( Iop_And32, 11503 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 11504 mkU32(2) )); 11505 putIReg32( gregOfRexRM(pfx,modrm), 11506 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 11507 ); 11508 DIP("movmskpd %s,%s\n", nameXMMReg(src), 11509 nameIReg32(gregOfRexRM(pfx,modrm))); 11510 goto decode_success; 11511 } 11512 /* else fall through */ 11513 goto decode_failure; 11514 } 11515 11516 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 11517 if (have66noF2noF3(pfx) && sz == 2 11518 && insn[0] == 0x0F && insn[1] == 0xF7) { 11519 modrm = getUChar(delta+2); 11520 if (epartIsReg(modrm)) { 11521 IRTemp regD = newTemp(Ity_V128); 11522 IRTemp mask = newTemp(Ity_V128); 11523 IRTemp olddata = newTemp(Ity_V128); 11524 IRTemp newdata = newTemp(Ity_V128); 11525 addr = newTemp(Ity_I64); 11526 11527 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 11528 assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) )); 11529 11530 /* Unfortunately can't do the obvious thing with SarN8x16 11531 here since that can't be re-emitted as SSE2 code - no such 11532 insn. */ 11533 assign( 11534 mask, 11535 binop(Iop_64HLtoV128, 11536 binop(Iop_SarN8x8, 11537 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 11538 mkU8(7) ), 11539 binop(Iop_SarN8x8, 11540 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 11541 mkU8(7) ) )); 11542 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 11543 assign( newdata, 11544 binop(Iop_OrV128, 11545 binop(Iop_AndV128, 11546 mkexpr(regD), 11547 mkexpr(mask) ), 11548 binop(Iop_AndV128, 11549 mkexpr(olddata), 11550 unop(Iop_NotV128, mkexpr(mask)))) ); 11551 storeLE( mkexpr(addr), mkexpr(newdata) ); 11552 11553 delta += 2+1; 11554 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ), 11555 nameXMMReg( gregOfRexRM(pfx,modrm) ) ); 11556 goto decode_success; 11557 } 11558 /* else fall through */ 11559 } 11560 11561 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 11562 if (have66noF2noF3(pfx) && sz == 2 11563 && insn[0] == 0x0F && insn[1] == 0xE7) { 11564 modrm = getUChar(delta+2); 11565 if (!epartIsReg(modrm)) { 11566 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11567 gen_SEGV_if_not_16_aligned( addr ); 11568 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11569 DIP("movntdq %s,%s\n", dis_buf, 11570 nameXMMReg(gregOfRexRM(pfx,modrm))); 11571 delta += 2+alen; 11572 goto decode_success; 11573 } 11574 /* else fall through */ 11575 goto decode_failure; 11576 } 11577 11578 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 11579 if (haveNo66noF2noF3(pfx) && 11580 insn[0] == 0x0F && insn[1] == 0xC3) { 11581 vassert(sz == 4 || sz == 8); 11582 modrm = getUChar(delta+2); 11583 if (!epartIsReg(modrm)) { 11584 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11585 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 11586 DIP("movnti %s,%s\n", dis_buf, 11587 nameIRegG(sz, pfx, modrm)); 11588 delta += 2+alen; 11589 goto decode_success; 11590 } 11591 /* else fall through */ 11592 } 11593 11594 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 11595 or lo half xmm). */ 11596 if (have66noF2noF3(pfx) 11597 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11598 && insn[0] == 0x0F && insn[1] == 0xD6) { 11599 modrm = getUChar(delta+2); 11600 if (epartIsReg(modrm)) { 11601 /* fall through, awaiting test case */ 11602 /* dst: lo half copied, hi half zeroed */ 11603 } else { 11604 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11605 storeLE( mkexpr(addr), 11606 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11607 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 11608 delta += 2+alen; 11609 goto decode_success; 11610 } 11611 } 11612 11613 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 11614 hi half). */ 11615 if (haveF3no66noF2(pfx) && sz == 4 11616 && insn[0] == 0x0F && insn[1] == 0xD6) { 11617 modrm = getUChar(delta+2); 11618 if (epartIsReg(modrm)) { 11619 do_MMX_preamble(); 11620 putXMMReg( gregOfRexRM(pfx,modrm), 11621 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 11622 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 11623 nameXMMReg(gregOfRexRM(pfx,modrm))); 11624 delta += 2+1; 11625 goto decode_success; 11626 } else { 11627 /* apparently no mem case for this insn */ 11628 goto decode_failure; 11629 } 11630 } 11631 11632 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 11633 G (lo half xmm). Upper half of G is zeroed out. */ 11634 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 11635 G (lo half xmm). If E is mem, upper half of G is zeroed out. 11636 If E is reg, upper half of G is unchanged. */ 11637 if ( (haveF2no66noF3(pfx) 11638 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11639 && insn[0] == 0x0F && insn[1] == 0x10) 11640 || 11641 (haveF3no66noF2(pfx) 11642 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11643 && insn[0] == 0x0F && insn[1] == 0x7E) 11644 ) { 11645 modrm = getUChar(delta+2); 11646 if (epartIsReg(modrm)) { 11647 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11648 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11649 if (insn[1] == 0x7E/*MOVQ*/) { 11650 /* zero bits 127:64 */ 11651 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 11652 } 11653 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11654 nameXMMReg(gregOfRexRM(pfx,modrm))); 11655 delta += 2+1; 11656 } else { 11657 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11658 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11659 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11660 loadLE(Ity_I64, mkexpr(addr)) ); 11661 DIP("movsd %s,%s\n", dis_buf, 11662 nameXMMReg(gregOfRexRM(pfx,modrm))); 11663 delta += 2+alen; 11664 } 11665 goto decode_success; 11666 } 11667 11668 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 11669 or lo half xmm). */ 11670 if (haveF2no66noF3(pfx) 11671 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11672 && insn[0] == 0x0F && insn[1] == 0x11) { 11673 modrm = getUChar(delta+2); 11674 if (epartIsReg(modrm)) { 11675 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 11676 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11677 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11678 nameXMMReg(eregOfRexRM(pfx,modrm))); 11679 delta += 2+1; 11680 } else { 11681 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11682 storeLE( mkexpr(addr), 11683 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11684 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11685 dis_buf); 11686 delta += 2+alen; 11687 } 11688 goto decode_success; 11689 } 11690 11691 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 11692 if (have66noF2noF3(pfx) 11693 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11694 && insn[0] == 0x0F && insn[1] == 0x59) { 11695 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 ); 11696 goto decode_success; 11697 } 11698 11699 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 11700 if (haveF2no66noF3(pfx) 11701 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11702 && insn[0] == 0x0F && insn[1] == 0x59) { 11703 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 ); 11704 goto decode_success; 11705 } 11706 11707 /* 66 0F 56 = ORPD -- G = G and E */ 11708 if (have66noF2noF3(pfx) && sz == 2 11709 && insn[0] == 0x0F && insn[1] == 0x56) { 11710 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 ); 11711 goto decode_success; 11712 } 11713 11714 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 11715 if (have66noF2noF3(pfx) && sz == 2 11716 && insn[0] == 0x0F && insn[1] == 0xC6) { 11717 Int select; 11718 IRTemp sV = newTemp(Ity_V128); 11719 IRTemp dV = newTemp(Ity_V128); 11720 IRTemp s1 = newTemp(Ity_I64); 11721 IRTemp s0 = newTemp(Ity_I64); 11722 IRTemp d1 = newTemp(Ity_I64); 11723 IRTemp d0 = newTemp(Ity_I64); 11724 11725 modrm = insn[2]; 11726 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 11727 11728 if (epartIsReg(modrm)) { 11729 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 11730 select = (Int)insn[3]; 11731 delta += 2+2; 11732 DIP("shufpd $%d,%s,%s\n", select, 11733 nameXMMReg(eregOfRexRM(pfx,modrm)), 11734 nameXMMReg(gregOfRexRM(pfx,modrm))); 11735 } else { 11736 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); 11737 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11738 select = (Int)insn[2+alen]; 11739 delta += 3+alen; 11740 DIP("shufpd $%d,%s,%s\n", select, 11741 dis_buf, 11742 nameXMMReg(gregOfRexRM(pfx,modrm))); 11743 } 11744 11745 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11746 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11747 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11748 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11749 11750 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 11751 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 11752 11753 putXMMReg( 11754 gregOfRexRM(pfx,modrm), 11755 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 11756 ); 11757 11758 # undef SELD 11759 # undef SELS 11760 11761 goto decode_success; 11762 } 11763 11764 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 11765 if (have66noF2noF3(pfx) && sz == 2 11766 && insn[0] == 0x0F && insn[1] == 0x51) { 11767 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 11768 "sqrtpd", Iop_Sqrt64Fx2 ); 11769 goto decode_success; 11770 } 11771 11772 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 11773 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) { 11774 vassert(sz == 4); 11775 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2, 11776 "sqrtsd", Iop_Sqrt64F0x2 ); 11777 goto decode_success; 11778 } 11779 11780 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 11781 if (have66noF2noF3(pfx) && sz == 2 11782 && insn[0] == 0x0F && insn[1] == 0x5C) { 11783 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 ); 11784 goto decode_success; 11785 } 11786 11787 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 11788 if (haveF2no66noF3(pfx) 11789 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11790 && insn[0] == 0x0F && insn[1] == 0x5C) { 11791 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 ); 11792 goto decode_success; 11793 } 11794 11795 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 11796 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 11797 /* These just appear to be special cases of SHUFPS */ 11798 if (have66noF2noF3(pfx) 11799 && sz == 2 /* could be 8 if rex also present */ 11800 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 11801 IRTemp s1 = newTemp(Ity_I64); 11802 IRTemp s0 = newTemp(Ity_I64); 11803 IRTemp d1 = newTemp(Ity_I64); 11804 IRTemp d0 = newTemp(Ity_I64); 11805 IRTemp sV = newTemp(Ity_V128); 11806 IRTemp dV = newTemp(Ity_V128); 11807 Bool hi = toBool(insn[1] == 0x15); 11808 11809 modrm = insn[2]; 11810 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 11811 11812 if (epartIsReg(modrm)) { 11813 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 11814 delta += 2+1; 11815 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11816 nameXMMReg(eregOfRexRM(pfx,modrm)), 11817 nameXMMReg(gregOfRexRM(pfx,modrm))); 11818 } else { 11819 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11820 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11821 delta += 2+alen; 11822 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11823 dis_buf, 11824 nameXMMReg(gregOfRexRM(pfx,modrm))); 11825 } 11826 11827 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11828 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11829 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11830 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11831 11832 if (hi) { 11833 putXMMReg( gregOfRexRM(pfx,modrm), 11834 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 11835 } else { 11836 putXMMReg( gregOfRexRM(pfx,modrm), 11837 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 11838 } 11839 11840 goto decode_success; 11841 } 11842 11843 /* 66 0F 57 = XORPD -- G = G xor E */ 11844 if (have66noF2noF3(pfx) && sz == 2 11845 && insn[0] == 0x0F && insn[1] == 0x57) { 11846 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 ); 11847 goto decode_success; 11848 } 11849 11850 /* 66 0F 6B = PACKSSDW */ 11851 if (have66noF2noF3(pfx) && sz == 2 11852 && insn[0] == 0x0F && insn[1] == 0x6B) { 11853 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11854 "packssdw", 11855 Iop_QNarrowBin32Sto16Sx8, True ); 11856 goto decode_success; 11857 } 11858 11859 /* 66 0F 63 = PACKSSWB */ 11860 if (have66noF2noF3(pfx) && sz == 2 11861 && insn[0] == 0x0F && insn[1] == 0x63) { 11862 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11863 "packsswb", 11864 Iop_QNarrowBin16Sto8Sx16, True ); 11865 goto decode_success; 11866 } 11867 11868 /* 66 0F 67 = PACKUSWB */ 11869 if (have66noF2noF3(pfx) && sz == 2 11870 && insn[0] == 0x0F && insn[1] == 0x67) { 11871 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11872 "packuswb", 11873 Iop_QNarrowBin16Sto8Ux16, True ); 11874 goto decode_success; 11875 } 11876 11877 /* 66 0F FC = PADDB */ 11878 if (have66noF2noF3(pfx) && sz == 2 11879 && insn[0] == 0x0F && insn[1] == 0xFC) { 11880 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11881 "paddb", Iop_Add8x16, False ); 11882 goto decode_success; 11883 } 11884 11885 /* 66 0F FE = PADDD */ 11886 if (have66noF2noF3(pfx) && sz == 2 11887 && insn[0] == 0x0F && insn[1] == 0xFE) { 11888 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11889 "paddd", Iop_Add32x4, False ); 11890 goto decode_success; 11891 } 11892 11893 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11894 /* 0F D4 = PADDQ -- add 64x1 */ 11895 if (haveNo66noF2noF3(pfx) && sz == 4 11896 && insn[0] == 0x0F && insn[1] == 0xD4) { 11897 do_MMX_preamble(); 11898 delta = dis_MMXop_regmem_to_reg ( 11899 vbi, pfx, delta+2, insn[1], "paddq", False ); 11900 goto decode_success; 11901 } 11902 11903 /* 66 0F D4 = PADDQ */ 11904 if (have66noF2noF3(pfx) && sz == 2 11905 && insn[0] == 0x0F && insn[1] == 0xD4) { 11906 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11907 "paddq", Iop_Add64x2, False ); 11908 goto decode_success; 11909 } 11910 11911 /* 66 0F FD = PADDW */ 11912 if (have66noF2noF3(pfx) && sz == 2 11913 && insn[0] == 0x0F && insn[1] == 0xFD) { 11914 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11915 "paddw", Iop_Add16x8, False ); 11916 goto decode_success; 11917 } 11918 11919 /* 66 0F EC = PADDSB */ 11920 if (have66noF2noF3(pfx) && sz == 2 11921 && insn[0] == 0x0F && insn[1] == 0xEC) { 11922 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11923 "paddsb", Iop_QAdd8Sx16, False ); 11924 goto decode_success; 11925 } 11926 11927 /* 66 0F ED = PADDSW */ 11928 if (have66noF2noF3(pfx) && sz == 2 11929 && insn[0] == 0x0F && insn[1] == 0xED) { 11930 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11931 "paddsw", Iop_QAdd16Sx8, False ); 11932 goto decode_success; 11933 } 11934 11935 /* 66 0F DC = PADDUSB */ 11936 if (have66noF2noF3(pfx) && sz == 2 11937 && insn[0] == 0x0F && insn[1] == 0xDC) { 11938 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11939 "paddusb", Iop_QAdd8Ux16, False ); 11940 goto decode_success; 11941 } 11942 11943 /* 66 0F DD = PADDUSW */ 11944 if (have66noF2noF3(pfx) && sz == 2 11945 && insn[0] == 0x0F && insn[1] == 0xDD) { 11946 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11947 "paddusw", Iop_QAdd16Ux8, False ); 11948 goto decode_success; 11949 } 11950 11951 /* 66 0F DB = PAND */ 11952 if (have66noF2noF3(pfx) && sz == 2 11953 && insn[0] == 0x0F && insn[1] == 0xDB) { 11954 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 ); 11955 goto decode_success; 11956 } 11957 11958 /* 66 0F DF = PANDN */ 11959 if (have66noF2noF3(pfx) && sz == 2 11960 && insn[0] == 0x0F && insn[1] == 0xDF) { 11961 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 ); 11962 goto decode_success; 11963 } 11964 11965 /* 66 0F E0 = PAVGB */ 11966 if (have66noF2noF3(pfx) && sz == 2 11967 && insn[0] == 0x0F && insn[1] == 0xE0) { 11968 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11969 "pavgb", Iop_Avg8Ux16, False ); 11970 goto decode_success; 11971 } 11972 11973 /* 66 0F E3 = PAVGW */ 11974 if (have66noF2noF3(pfx) && sz == 2 11975 && insn[0] == 0x0F && insn[1] == 0xE3) { 11976 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11977 "pavgw", Iop_Avg16Ux8, False ); 11978 goto decode_success; 11979 } 11980 11981 /* 66 0F 74 = PCMPEQB */ 11982 if (have66noF2noF3(pfx) && sz == 2 11983 && insn[0] == 0x0F && insn[1] == 0x74) { 11984 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11985 "pcmpeqb", Iop_CmpEQ8x16, False ); 11986 goto decode_success; 11987 } 11988 11989 /* 66 0F 76 = PCMPEQD */ 11990 if (have66noF2noF3(pfx) && sz == 2 11991 && insn[0] == 0x0F && insn[1] == 0x76) { 11992 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11993 "pcmpeqd", Iop_CmpEQ32x4, False ); 11994 goto decode_success; 11995 } 11996 11997 /* 66 0F 75 = PCMPEQW */ 11998 if (have66noF2noF3(pfx) && sz == 2 11999 && insn[0] == 0x0F && insn[1] == 0x75) { 12000 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12001 "pcmpeqw", Iop_CmpEQ16x8, False ); 12002 goto decode_success; 12003 } 12004 12005 /* 66 0F 64 = PCMPGTB */ 12006 if (have66noF2noF3(pfx) && sz == 2 12007 && insn[0] == 0x0F && insn[1] == 0x64) { 12008 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12009 "pcmpgtb", Iop_CmpGT8Sx16, False ); 12010 goto decode_success; 12011 } 12012 12013 /* 66 0F 66 = PCMPGTD */ 12014 if (have66noF2noF3(pfx) && sz == 2 12015 && insn[0] == 0x0F && insn[1] == 0x66) { 12016 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12017 "pcmpgtd", Iop_CmpGT32Sx4, False ); 12018 goto decode_success; 12019 } 12020 12021 /* 66 0F 65 = PCMPGTW */ 12022 if (have66noF2noF3(pfx) && sz == 2 12023 && insn[0] == 0x0F && insn[1] == 0x65) { 12024 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12025 "pcmpgtw", Iop_CmpGT16Sx8, False ); 12026 goto decode_success; 12027 } 12028 12029 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 12030 zero-extend of it in ireg(G). */ 12031 if (have66noF2noF3(pfx) 12032 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 12033 && insn[0] == 0x0F && insn[1] == 0xC5) { 12034 modrm = insn[2]; 12035 if (epartIsReg(modrm)) { 12036 t5 = newTemp(Ity_V128); 12037 t4 = newTemp(Ity_I16); 12038 assign(t5, getXMMReg(eregOfRexRM(pfx,modrm))); 12039 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 12040 switch (insn[3] & 7) { 12041 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 12042 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 12043 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 12044 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 12045 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 12046 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 12047 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 12048 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 12049 default: vassert(0); 12050 } 12051 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4))); 12052 DIP("pextrw $%d,%s,%s\n", 12053 (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)), 12054 nameIReg32(gregOfRexRM(pfx,modrm))); 12055 delta += 4; 12056 goto decode_success; 12057 } 12058 /* else fall through */ 12059 /* note, if memory case is ever filled in, there is 1 byte after 12060 amode */ 12061 } 12062 12063 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 12064 put it into the specified lane of xmm(G). */ 12065 if (have66noF2noF3(pfx) 12066 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 12067 && insn[0] == 0x0F && insn[1] == 0xC4) { 12068 Int lane; 12069 t4 = newTemp(Ity_I16); 12070 modrm = insn[2]; 12071 12072 if (epartIsReg(modrm)) { 12073 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 12074 delta += 3+1; 12075 lane = insn[3+1-1]; 12076 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 12077 nameIReg16(eregOfRexRM(pfx,modrm)), 12078 nameXMMReg(gregOfRexRM(pfx,modrm))); 12079 } else { 12080 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12081 1/*byte after the amode*/ ); 12082 delta += 3+alen; 12083 lane = insn[3+alen-1]; 12084 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 12085 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 12086 dis_buf, 12087 nameXMMReg(gregOfRexRM(pfx,modrm))); 12088 } 12089 12090 putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) ); 12091 goto decode_success; 12092 } 12093 12094 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 12095 E(xmm or mem) to G(xmm) */ 12096 if (have66noF2noF3(pfx) && sz == 2 12097 && insn[0] == 0x0F && insn[1] == 0xF5) { 12098 IRTemp s1V = newTemp(Ity_V128); 12099 IRTemp s2V = newTemp(Ity_V128); 12100 IRTemp dV = newTemp(Ity_V128); 12101 IRTemp s1Hi = newTemp(Ity_I64); 12102 IRTemp s1Lo = newTemp(Ity_I64); 12103 IRTemp s2Hi = newTemp(Ity_I64); 12104 IRTemp s2Lo = newTemp(Ity_I64); 12105 IRTemp dHi = newTemp(Ity_I64); 12106 IRTemp dLo = newTemp(Ity_I64); 12107 modrm = insn[2]; 12108 if (epartIsReg(modrm)) { 12109 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12110 delta += 2+1; 12111 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12112 nameXMMReg(gregOfRexRM(pfx,modrm))); 12113 } else { 12114 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12115 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 12116 delta += 2+alen; 12117 DIP("pmaddwd %s,%s\n", dis_buf, 12118 nameXMMReg(gregOfRexRM(pfx,modrm))); 12119 } 12120 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12121 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 12122 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 12123 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 12124 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 12125 assign( dHi, mkIRExprCCall( 12126 Ity_I64, 0/*regparms*/, 12127 "amd64g_calculate_mmx_pmaddwd", 12128 &amd64g_calculate_mmx_pmaddwd, 12129 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 12130 )); 12131 assign( dLo, mkIRExprCCall( 12132 Ity_I64, 0/*regparms*/, 12133 "amd64g_calculate_mmx_pmaddwd", 12134 &amd64g_calculate_mmx_pmaddwd, 12135 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 12136 )); 12137 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 12138 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12139 goto decode_success; 12140 } 12141 12142 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 12143 if (have66noF2noF3(pfx) && sz == 2 12144 && insn[0] == 0x0F && insn[1] == 0xEE) { 12145 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12146 "pmaxsw", Iop_Max16Sx8, False ); 12147 goto decode_success; 12148 } 12149 12150 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 12151 if (have66noF2noF3(pfx) && sz == 2 12152 && insn[0] == 0x0F && insn[1] == 0xDE) { 12153 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12154 "pmaxub", Iop_Max8Ux16, False ); 12155 goto decode_success; 12156 } 12157 12158 /* 66 0F EA = PMINSW -- 16x8 signed min */ 12159 if (have66noF2noF3(pfx) && sz == 2 12160 && insn[0] == 0x0F && insn[1] == 0xEA) { 12161 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12162 "pminsw", Iop_Min16Sx8, False ); 12163 goto decode_success; 12164 } 12165 12166 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 12167 if (have66noF2noF3(pfx) && sz == 2 12168 && insn[0] == 0x0F && insn[1] == 0xDA) { 12169 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12170 "pminub", Iop_Min8Ux16, False ); 12171 goto decode_success; 12172 } 12173 12174 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in 12175 xmm(E), turn them into a byte, and put zero-extend of it in 12176 ireg(G). Doing this directly is just too cumbersome; give up 12177 therefore and call a helper. */ 12178 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 12179 if (have66noF2noF3(pfx) 12180 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 12181 && insn[0] == 0x0F && insn[1] == 0xD7) { 12182 modrm = insn[2]; 12183 if (epartIsReg(modrm)) { 12184 t0 = newTemp(Ity_I64); 12185 t1 = newTemp(Ity_I64); 12186 assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0)); 12187 assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1)); 12188 t5 = newTemp(Ity_I64); 12189 assign(t5, mkIRExprCCall( 12190 Ity_I64, 0/*regparms*/, 12191 "amd64g_calculate_sse_pmovmskb", 12192 &amd64g_calculate_sse_pmovmskb, 12193 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 12194 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5))); 12195 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12196 nameIReg32(gregOfRexRM(pfx,modrm))); 12197 delta += 3; 12198 goto decode_success; 12199 } 12200 /* else fall through */ 12201 } 12202 12203 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 12204 if (have66noF2noF3(pfx) && sz == 2 12205 && insn[0] == 0x0F && insn[1] == 0xE4) { 12206 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12207 "pmulhuw", Iop_MulHi16Ux8, False ); 12208 goto decode_success; 12209 } 12210 12211 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 12212 if (have66noF2noF3(pfx) && sz == 2 12213 && insn[0] == 0x0F && insn[1] == 0xE5) { 12214 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12215 "pmulhw", Iop_MulHi16Sx8, False ); 12216 goto decode_success; 12217 } 12218 12219 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 12220 if (have66noF2noF3(pfx) && sz == 2 12221 && insn[0] == 0x0F && insn[1] == 0xD5) { 12222 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12223 "pmullw", Iop_Mul16x8, False ); 12224 goto decode_success; 12225 } 12226 12227 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 12228 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 12229 0 to form 64-bit result */ 12230 if (haveNo66noF2noF3(pfx) && sz == 4 12231 && insn[0] == 0x0F && insn[1] == 0xF4) { 12232 IRTemp sV = newTemp(Ity_I64); 12233 IRTemp dV = newTemp(Ity_I64); 12234 t1 = newTemp(Ity_I32); 12235 t0 = newTemp(Ity_I32); 12236 modrm = insn[2]; 12237 12238 do_MMX_preamble(); 12239 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 12240 12241 if (epartIsReg(modrm)) { 12242 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 12243 delta += 2+1; 12244 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12245 nameMMXReg(gregLO3ofRM(modrm))); 12246 } else { 12247 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12248 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12249 delta += 2+alen; 12250 DIP("pmuludq %s,%s\n", dis_buf, 12251 nameMMXReg(gregLO3ofRM(modrm))); 12252 } 12253 12254 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 12255 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 12256 putMMXReg( gregLO3ofRM(modrm), 12257 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 12258 goto decode_success; 12259 } 12260 12261 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 12262 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 12263 half */ 12264 /* This is a really poor translation -- could be improved if 12265 performance critical */ 12266 if (have66noF2noF3(pfx) && sz == 2 12267 && insn[0] == 0x0F && insn[1] == 0xF4) { 12268 IRTemp sV, dV; 12269 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 12270 sV = newTemp(Ity_V128); 12271 dV = newTemp(Ity_V128); 12272 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 12273 t1 = newTemp(Ity_I64); 12274 t0 = newTemp(Ity_I64); 12275 modrm = insn[2]; 12276 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12277 12278 if (epartIsReg(modrm)) { 12279 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12280 delta += 2+1; 12281 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12282 nameXMMReg(gregOfRexRM(pfx,modrm))); 12283 } else { 12284 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12285 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12286 delta += 2+alen; 12287 DIP("pmuludq %s,%s\n", dis_buf, 12288 nameXMMReg(gregOfRexRM(pfx,modrm))); 12289 } 12290 12291 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 12292 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12293 12294 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 12295 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) ); 12296 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 12297 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) ); 12298 goto decode_success; 12299 } 12300 12301 /* 66 0F EB = POR */ 12302 if (have66noF2noF3(pfx) && sz == 2 12303 && insn[0] == 0x0F && insn[1] == 0xEB) { 12304 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 ); 12305 goto decode_success; 12306 } 12307 12308 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 12309 from E(xmm or mem) to G(xmm) */ 12310 if (have66noF2noF3(pfx) && sz == 2 12311 && insn[0] == 0x0F && insn[1] == 0xF6) { 12312 IRTemp s1V = newTemp(Ity_V128); 12313 IRTemp s2V = newTemp(Ity_V128); 12314 IRTemp dV = newTemp(Ity_V128); 12315 IRTemp s1Hi = newTemp(Ity_I64); 12316 IRTemp s1Lo = newTemp(Ity_I64); 12317 IRTemp s2Hi = newTemp(Ity_I64); 12318 IRTemp s2Lo = newTemp(Ity_I64); 12319 IRTemp dHi = newTemp(Ity_I64); 12320 IRTemp dLo = newTemp(Ity_I64); 12321 modrm = insn[2]; 12322 if (epartIsReg(modrm)) { 12323 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12324 delta += 2+1; 12325 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12326 nameXMMReg(gregOfRexRM(pfx,modrm))); 12327 } else { 12328 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12329 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 12330 delta += 2+alen; 12331 DIP("psadbw %s,%s\n", dis_buf, 12332 nameXMMReg(gregOfRexRM(pfx,modrm))); 12333 } 12334 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12335 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 12336 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 12337 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 12338 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 12339 assign( dHi, mkIRExprCCall( 12340 Ity_I64, 0/*regparms*/, 12341 "amd64g_calculate_mmx_psadbw", 12342 &amd64g_calculate_mmx_psadbw, 12343 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 12344 )); 12345 assign( dLo, mkIRExprCCall( 12346 Ity_I64, 0/*regparms*/, 12347 "amd64g_calculate_mmx_psadbw", 12348 &amd64g_calculate_mmx_psadbw, 12349 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 12350 )); 12351 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 12352 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12353 goto decode_success; 12354 } 12355 12356 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 12357 if (have66noF2noF3(pfx) && sz == 2 12358 && insn[0] == 0x0F && insn[1] == 0x70) { 12359 Int order; 12360 IRTemp sV, dV, s3, s2, s1, s0; 12361 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12362 sV = newTemp(Ity_V128); 12363 dV = newTemp(Ity_V128); 12364 modrm = insn[2]; 12365 if (epartIsReg(modrm)) { 12366 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12367 order = (Int)insn[3]; 12368 delta += 3+1; 12369 DIP("pshufd $%d,%s,%s\n", order, 12370 nameXMMReg(eregOfRexRM(pfx,modrm)), 12371 nameXMMReg(gregOfRexRM(pfx,modrm))); 12372 } else { 12373 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12374 1/*byte after the amode*/ ); 12375 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12376 order = (Int)insn[2+alen]; 12377 delta += 2+alen+1; 12378 DIP("pshufd $%d,%s,%s\n", order, 12379 dis_buf, 12380 nameXMMReg(gregOfRexRM(pfx,modrm))); 12381 } 12382 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12383 12384 # define SEL(n) \ 12385 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12386 assign(dV, 12387 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 12388 SEL((order>>2)&3), SEL((order>>0)&3) ) 12389 ); 12390 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12391 # undef SEL 12392 goto decode_success; 12393 } 12394 12395 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 12396 mem) to G(xmm), and copy lower half */ 12397 if (haveF3no66noF2(pfx) && sz == 4 12398 && insn[0] == 0x0F && insn[1] == 0x70) { 12399 Int order; 12400 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 12401 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12402 sV = newTemp(Ity_V128); 12403 dV = newTemp(Ity_V128); 12404 sVhi = newTemp(Ity_I64); 12405 dVhi = newTemp(Ity_I64); 12406 modrm = insn[2]; 12407 if (epartIsReg(modrm)) { 12408 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12409 order = (Int)insn[3]; 12410 delta += 3+1; 12411 DIP("pshufhw $%d,%s,%s\n", order, 12412 nameXMMReg(eregOfRexRM(pfx,modrm)), 12413 nameXMMReg(gregOfRexRM(pfx,modrm))); 12414 } else { 12415 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12416 1/*byte after the amode*/ ); 12417 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12418 order = (Int)insn[2+alen]; 12419 delta += 2+alen+1; 12420 DIP("pshufhw $%d,%s,%s\n", order, 12421 dis_buf, 12422 nameXMMReg(gregOfRexRM(pfx,modrm))); 12423 } 12424 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12425 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 12426 12427 # define SEL(n) \ 12428 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12429 assign(dVhi, 12430 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 12431 SEL((order>>2)&3), SEL((order>>0)&3) ) 12432 ); 12433 assign(dV, binop( Iop_64HLtoV128, 12434 mkexpr(dVhi), 12435 unop(Iop_V128to64, mkexpr(sV))) ); 12436 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12437 # undef SEL 12438 goto decode_success; 12439 } 12440 12441 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 12442 mem) to G(xmm), and copy upper half */ 12443 if (haveF2no66noF3(pfx) && sz == 4 12444 && insn[0] == 0x0F && insn[1] == 0x70) { 12445 Int order; 12446 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 12447 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12448 sV = newTemp(Ity_V128); 12449 dV = newTemp(Ity_V128); 12450 sVlo = newTemp(Ity_I64); 12451 dVlo = newTemp(Ity_I64); 12452 modrm = insn[2]; 12453 if (epartIsReg(modrm)) { 12454 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12455 order = (Int)insn[3]; 12456 delta += 3+1; 12457 DIP("pshuflw $%d,%s,%s\n", order, 12458 nameXMMReg(eregOfRexRM(pfx,modrm)), 12459 nameXMMReg(gregOfRexRM(pfx,modrm))); 12460 } else { 12461 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12462 1/*byte after the amode*/ ); 12463 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12464 order = (Int)insn[2+alen]; 12465 delta += 2+alen+1; 12466 DIP("pshuflw $%d,%s,%s\n", order, 12467 dis_buf, 12468 nameXMMReg(gregOfRexRM(pfx,modrm))); 12469 } 12470 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 12471 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 12472 12473 # define SEL(n) \ 12474 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12475 assign(dVlo, 12476 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 12477 SEL((order>>2)&3), SEL((order>>0)&3) ) 12478 ); 12479 assign(dV, binop( Iop_64HLtoV128, 12480 unop(Iop_V128HIto64, mkexpr(sV)), 12481 mkexpr(dVlo) ) ); 12482 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12483 # undef SEL 12484 goto decode_success; 12485 } 12486 12487 /* 66 0F 72 /6 ib = PSLLD by immediate */ 12488 if (have66noF2noF3(pfx) && sz == 2 12489 && insn[0] == 0x0F && insn[1] == 0x72 12490 && epartIsReg(insn[2]) 12491 && gregLO3ofRM(insn[2]) == 6) { 12492 delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 ); 12493 goto decode_success; 12494 } 12495 12496 /* 66 0F F2 = PSLLD by E */ 12497 if (have66noF2noF3(pfx) && sz == 2 12498 && insn[0] == 0x0F && insn[1] == 0xF2) { 12499 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 ); 12500 goto decode_success; 12501 } 12502 12503 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 12504 /* note, if mem case ever filled in, 1 byte after amode */ 12505 if (have66noF2noF3(pfx) && sz == 2 12506 && insn[0] == 0x0F && insn[1] == 0x73 12507 && epartIsReg(insn[2]) 12508 && gregLO3ofRM(insn[2]) == 7) { 12509 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 12510 Int imm = (Int)insn[3]; 12511 Int reg = eregOfRexRM(pfx,insn[2]); 12512 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 12513 vassert(imm >= 0 && imm <= 255); 12514 delta += 4; 12515 12516 sV = newTemp(Ity_V128); 12517 dV = newTemp(Ity_V128); 12518 hi64 = newTemp(Ity_I64); 12519 lo64 = newTemp(Ity_I64); 12520 hi64r = newTemp(Ity_I64); 12521 lo64r = newTemp(Ity_I64); 12522 12523 if (imm >= 16) { 12524 putXMMReg(reg, mkV128(0x0000)); 12525 goto decode_success; 12526 } 12527 12528 assign( sV, getXMMReg(reg) ); 12529 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 12530 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 12531 12532 if (imm == 0) { 12533 assign( lo64r, mkexpr(lo64) ); 12534 assign( hi64r, mkexpr(hi64) ); 12535 } 12536 else 12537 if (imm == 8) { 12538 assign( lo64r, mkU64(0) ); 12539 assign( hi64r, mkexpr(lo64) ); 12540 } 12541 else 12542 if (imm > 8) { 12543 assign( lo64r, mkU64(0) ); 12544 assign( hi64r, binop( Iop_Shl64, 12545 mkexpr(lo64), 12546 mkU8( 8*(imm-8) ) )); 12547 } else { 12548 assign( lo64r, binop( Iop_Shl64, 12549 mkexpr(lo64), 12550 mkU8(8 * imm) )); 12551 assign( hi64r, 12552 binop( Iop_Or64, 12553 binop(Iop_Shl64, mkexpr(hi64), 12554 mkU8(8 * imm)), 12555 binop(Iop_Shr64, mkexpr(lo64), 12556 mkU8(8 * (8 - imm)) ) 12557 ) 12558 ); 12559 } 12560 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 12561 putXMMReg(reg, mkexpr(dV)); 12562 goto decode_success; 12563 } 12564 12565 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 12566 if (have66noF2noF3(pfx) && sz == 2 12567 && insn[0] == 0x0F && insn[1] == 0x73 12568 && epartIsReg(insn[2]) 12569 && gregLO3ofRM(insn[2]) == 6) { 12570 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 ); 12571 goto decode_success; 12572 } 12573 12574 /* 66 0F F3 = PSLLQ by E */ 12575 if (have66noF2noF3(pfx) && sz == 2 12576 && insn[0] == 0x0F && insn[1] == 0xF3) { 12577 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 ); 12578 goto decode_success; 12579 } 12580 12581 /* 66 0F 71 /6 ib = PSLLW by immediate */ 12582 if (have66noF2noF3(pfx) && sz == 2 12583 && insn[0] == 0x0F && insn[1] == 0x71 12584 && epartIsReg(insn[2]) 12585 && gregLO3ofRM(insn[2]) == 6) { 12586 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 ); 12587 goto decode_success; 12588 } 12589 12590 /* 66 0F F1 = PSLLW by E */ 12591 if (have66noF2noF3(pfx) && sz == 2 12592 && insn[0] == 0x0F && insn[1] == 0xF1) { 12593 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 ); 12594 goto decode_success; 12595 } 12596 12597 /* 66 0F 72 /4 ib = PSRAD by immediate */ 12598 if (have66noF2noF3(pfx) && sz == 2 12599 && insn[0] == 0x0F && insn[1] == 0x72 12600 && epartIsReg(insn[2]) 12601 && gregLO3ofRM(insn[2]) == 4) { 12602 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 ); 12603 goto decode_success; 12604 } 12605 12606 /* 66 0F E2 = PSRAD by E */ 12607 if (have66noF2noF3(pfx) && sz == 2 12608 && insn[0] == 0x0F && insn[1] == 0xE2) { 12609 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 ); 12610 goto decode_success; 12611 } 12612 12613 /* 66 0F 71 /4 ib = PSRAW by immediate */ 12614 if (have66noF2noF3(pfx) && sz == 2 12615 && insn[0] == 0x0F && insn[1] == 0x71 12616 && epartIsReg(insn[2]) 12617 && gregLO3ofRM(insn[2]) == 4) { 12618 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 ); 12619 goto decode_success; 12620 } 12621 12622 /* 66 0F E1 = PSRAW by E */ 12623 if (have66noF2noF3(pfx) && sz == 2 12624 && insn[0] == 0x0F && insn[1] == 0xE1) { 12625 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 ); 12626 goto decode_success; 12627 } 12628 12629 /* 66 0F 72 /2 ib = PSRLD by immediate */ 12630 if (have66noF2noF3(pfx) && sz == 2 12631 && insn[0] == 0x0F && insn[1] == 0x72 12632 && epartIsReg(insn[2]) 12633 && gregLO3ofRM(insn[2]) == 2) { 12634 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 ); 12635 goto decode_success; 12636 } 12637 12638 /* 66 0F D2 = PSRLD by E */ 12639 if (have66noF2noF3(pfx) && sz == 2 12640 && insn[0] == 0x0F && insn[1] == 0xD2) { 12641 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 ); 12642 goto decode_success; 12643 } 12644 12645 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 12646 /* note, if mem case ever filled in, 1 byte after amode */ 12647 if (have66noF2noF3(pfx) && sz == 2 12648 && insn[0] == 0x0F && insn[1] == 0x73 12649 && epartIsReg(insn[2]) 12650 && gregLO3ofRM(insn[2]) == 3) { 12651 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 12652 Int imm = (Int)insn[3]; 12653 Int reg = eregOfRexRM(pfx,insn[2]); 12654 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 12655 vassert(imm >= 0 && imm <= 255); 12656 delta += 4; 12657 12658 sV = newTemp(Ity_V128); 12659 dV = newTemp(Ity_V128); 12660 hi64 = newTemp(Ity_I64); 12661 lo64 = newTemp(Ity_I64); 12662 hi64r = newTemp(Ity_I64); 12663 lo64r = newTemp(Ity_I64); 12664 12665 if (imm >= 16) { 12666 putXMMReg(reg, mkV128(0x0000)); 12667 goto decode_success; 12668 } 12669 12670 assign( sV, getXMMReg(reg) ); 12671 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 12672 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 12673 12674 if (imm == 0) { 12675 assign( lo64r, mkexpr(lo64) ); 12676 assign( hi64r, mkexpr(hi64) ); 12677 } 12678 else 12679 if (imm == 8) { 12680 assign( hi64r, mkU64(0) ); 12681 assign( lo64r, mkexpr(hi64) ); 12682 } 12683 else 12684 if (imm > 8) { 12685 assign( hi64r, mkU64(0) ); 12686 assign( lo64r, binop( Iop_Shr64, 12687 mkexpr(hi64), 12688 mkU8( 8*(imm-8) ) )); 12689 } else { 12690 assign( hi64r, binop( Iop_Shr64, 12691 mkexpr(hi64), 12692 mkU8(8 * imm) )); 12693 assign( lo64r, 12694 binop( Iop_Or64, 12695 binop(Iop_Shr64, mkexpr(lo64), 12696 mkU8(8 * imm)), 12697 binop(Iop_Shl64, mkexpr(hi64), 12698 mkU8(8 * (8 - imm)) ) 12699 ) 12700 ); 12701 } 12702 12703 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 12704 putXMMReg(reg, mkexpr(dV)); 12705 goto decode_success; 12706 } 12707 12708 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 12709 if (have66noF2noF3(pfx) && sz == 2 12710 && insn[0] == 0x0F && insn[1] == 0x73 12711 && epartIsReg(insn[2]) 12712 && gregLO3ofRM(insn[2]) == 2) { 12713 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 ); 12714 goto decode_success; 12715 } 12716 12717 /* 66 0F D3 = PSRLQ by E */ 12718 if (have66noF2noF3(pfx) && sz == 2 12719 && insn[0] == 0x0F && insn[1] == 0xD3) { 12720 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 ); 12721 goto decode_success; 12722 } 12723 12724 /* 66 0F 71 /2 ib = PSRLW by immediate */ 12725 if (have66noF2noF3(pfx) && sz == 2 12726 && insn[0] == 0x0F && insn[1] == 0x71 12727 && epartIsReg(insn[2]) 12728 && gregLO3ofRM(insn[2]) == 2) { 12729 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 ); 12730 goto decode_success; 12731 } 12732 12733 /* 66 0F D1 = PSRLW by E */ 12734 if (have66noF2noF3(pfx) && sz == 2 12735 && insn[0] == 0x0F && insn[1] == 0xD1) { 12736 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 ); 12737 goto decode_success; 12738 } 12739 12740 /* 66 0F F8 = PSUBB */ 12741 if (have66noF2noF3(pfx) && sz == 2 12742 && insn[0] == 0x0F && insn[1] == 0xF8) { 12743 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12744 "psubb", Iop_Sub8x16, False ); 12745 goto decode_success; 12746 } 12747 12748 /* 66 0F FA = PSUBD */ 12749 if (have66noF2noF3(pfx) && sz == 2 12750 && insn[0] == 0x0F && insn[1] == 0xFA) { 12751 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12752 "psubd", Iop_Sub32x4, False ); 12753 goto decode_success; 12754 } 12755 12756 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 12757 /* 0F FB = PSUBQ -- sub 64x1 */ 12758 if (haveNo66noF2noF3(pfx) && sz == 4 12759 && insn[0] == 0x0F && insn[1] == 0xFB) { 12760 do_MMX_preamble(); 12761 delta = dis_MMXop_regmem_to_reg ( 12762 vbi, pfx, delta+2, insn[1], "psubq", False ); 12763 goto decode_success; 12764 } 12765 12766 /* 66 0F FB = PSUBQ */ 12767 if (have66noF2noF3(pfx) && sz == 2 12768 && insn[0] == 0x0F && insn[1] == 0xFB) { 12769 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12770 "psubq", Iop_Sub64x2, False ); 12771 goto decode_success; 12772 } 12773 12774 /* 66 0F F9 = PSUBW */ 12775 if (have66noF2noF3(pfx) && sz == 2 12776 && insn[0] == 0x0F && insn[1] == 0xF9) { 12777 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12778 "psubw", Iop_Sub16x8, False ); 12779 goto decode_success; 12780 } 12781 12782 /* 66 0F E8 = PSUBSB */ 12783 if (have66noF2noF3(pfx) && sz == 2 12784 && insn[0] == 0x0F && insn[1] == 0xE8) { 12785 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12786 "psubsb", Iop_QSub8Sx16, False ); 12787 goto decode_success; 12788 } 12789 12790 /* 66 0F E9 = PSUBSW */ 12791 if (have66noF2noF3(pfx) && sz == 2 12792 && insn[0] == 0x0F && insn[1] == 0xE9) { 12793 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12794 "psubsw", Iop_QSub16Sx8, False ); 12795 goto decode_success; 12796 } 12797 12798 /* 66 0F D8 = PSUBSB */ 12799 if (have66noF2noF3(pfx) && sz == 2 12800 && insn[0] == 0x0F && insn[1] == 0xD8) { 12801 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12802 "psubusb", Iop_QSub8Ux16, False ); 12803 goto decode_success; 12804 } 12805 12806 /* 66 0F D9 = PSUBSW */ 12807 if (have66noF2noF3(pfx) && sz == 2 12808 && insn[0] == 0x0F && insn[1] == 0xD9) { 12809 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12810 "psubusw", Iop_QSub16Ux8, False ); 12811 goto decode_success; 12812 } 12813 12814 /* 66 0F 68 = PUNPCKHBW */ 12815 if (have66noF2noF3(pfx) && sz == 2 12816 && insn[0] == 0x0F && insn[1] == 0x68) { 12817 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12818 "punpckhbw", 12819 Iop_InterleaveHI8x16, True ); 12820 goto decode_success; 12821 } 12822 12823 /* 66 0F 6A = PUNPCKHDQ */ 12824 if (have66noF2noF3(pfx) && sz == 2 12825 && insn[0] == 0x0F && insn[1] == 0x6A) { 12826 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12827 "punpckhdq", 12828 Iop_InterleaveHI32x4, True ); 12829 goto decode_success; 12830 } 12831 12832 /* 66 0F 6D = PUNPCKHQDQ */ 12833 if (have66noF2noF3(pfx) && sz == 2 12834 && insn[0] == 0x0F && insn[1] == 0x6D) { 12835 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12836 "punpckhqdq", 12837 Iop_InterleaveHI64x2, True ); 12838 goto decode_success; 12839 } 12840 12841 /* 66 0F 69 = PUNPCKHWD */ 12842 if (have66noF2noF3(pfx) && sz == 2 12843 && insn[0] == 0x0F && insn[1] == 0x69) { 12844 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12845 "punpckhwd", 12846 Iop_InterleaveHI16x8, True ); 12847 goto decode_success; 12848 } 12849 12850 /* 66 0F 60 = PUNPCKLBW */ 12851 if (have66noF2noF3(pfx) && sz == 2 12852 && insn[0] == 0x0F && insn[1] == 0x60) { 12853 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12854 "punpcklbw", 12855 Iop_InterleaveLO8x16, True ); 12856 goto decode_success; 12857 } 12858 12859 /* 66 0F 62 = PUNPCKLDQ */ 12860 if (have66noF2noF3(pfx) && sz == 2 12861 && insn[0] == 0x0F && insn[1] == 0x62) { 12862 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12863 "punpckldq", 12864 Iop_InterleaveLO32x4, True ); 12865 goto decode_success; 12866 } 12867 12868 /* 66 0F 6C = PUNPCKLQDQ */ 12869 if (have66noF2noF3(pfx) && sz == 2 12870 && insn[0] == 0x0F && insn[1] == 0x6C) { 12871 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12872 "punpcklqdq", 12873 Iop_InterleaveLO64x2, True ); 12874 goto decode_success; 12875 } 12876 12877 /* 66 0F 61 = PUNPCKLWD */ 12878 if (have66noF2noF3(pfx) && sz == 2 12879 && insn[0] == 0x0F && insn[1] == 0x61) { 12880 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12881 "punpcklwd", 12882 Iop_InterleaveLO16x8, True ); 12883 goto decode_success; 12884 } 12885 12886 /* 66 0F EF = PXOR */ 12887 if (have66noF2noF3(pfx) && sz == 2 12888 && insn[0] == 0x0F && insn[1] == 0xEF) { 12889 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 ); 12890 goto decode_success; 12891 } 12892 12893 //.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 12894 //.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE 12895 //.. //-- && (!epartIsReg(insn[2])) 12896 //.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 12897 //.. //-- Bool store = gregOfRM(insn[2]) == 0; 12898 //.. //-- vg_assert(sz == 4); 12899 //.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 12900 //.. //-- t1 = LOW24(pair); 12901 //.. //-- eip += 2+HI8(pair); 12902 //.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 12903 //.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 12904 //.. //-- Lit16, (UShort)insn[2], 12905 //.. //-- TempReg, t1 ); 12906 //.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 12907 //.. //-- goto decode_success; 12908 //.. //-- } 12909 12910 /* 0F AE /7 = CLFLUSH -- flush cache line */ 12911 if (haveNo66noF2noF3(pfx) && sz == 4 12912 && insn[0] == 0x0F && insn[1] == 0xAE 12913 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) { 12914 12915 /* This is something of a hack. We need to know the size of the 12916 cache line containing addr. Since we don't (easily), assume 12917 256 on the basis that no real cache would have a line that 12918 big. It's safe to invalidate more stuff than we need, just 12919 inefficient. */ 12920 ULong lineszB = 256ULL; 12921 12922 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12923 delta += 2+alen; 12924 12925 /* Round addr down to the start of the containing block. */ 12926 stmt( IRStmt_Put( 12927 OFFB_TISTART, 12928 binop( Iop_And64, 12929 mkexpr(addr), 12930 mkU64( ~(lineszB-1) ))) ); 12931 12932 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) ); 12933 12934 irsb->jumpkind = Ijk_TInval; 12935 irsb->next = mkU64(guest_RIP_bbstart+delta); 12936 dres.whatNext = Dis_StopHere; 12937 12938 DIP("clflush %s\n", dis_buf); 12939 goto decode_success; 12940 } 12941 12942 /* ---------------------------------------------------- */ 12943 /* --- end of the SSE/SSE2 decoder. --- */ 12944 /* ---------------------------------------------------- */ 12945 12946 /* ---------------------------------------------------- */ 12947 /* --- start of the SSE3 decoder. --- */ 12948 /* ---------------------------------------------------- */ 12949 12950 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 12951 duplicating some lanes (2:2:0:0). */ 12952 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 12953 duplicating some lanes (3:3:1:1). */ 12954 if (haveF3no66noF2(pfx) && sz == 4 12955 && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) { 12956 IRTemp s3, s2, s1, s0; 12957 IRTemp sV = newTemp(Ity_V128); 12958 Bool isH = insn[1] == 0x16; 12959 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12960 12961 modrm = insn[2]; 12962 if (epartIsReg(modrm)) { 12963 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12964 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 12965 nameXMMReg(eregOfRexRM(pfx,modrm)), 12966 nameXMMReg(gregOfRexRM(pfx,modrm))); 12967 delta += 2+1; 12968 } else { 12969 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12970 gen_SEGV_if_not_16_aligned( addr ); 12971 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12972 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 12973 dis_buf, 12974 nameXMMReg(gregOfRexRM(pfx,modrm))); 12975 delta += 2+alen; 12976 } 12977 12978 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12979 putXMMReg( gregOfRexRM(pfx,modrm), 12980 isH ? mk128from32s( s3, s3, s1, s1 ) 12981 : mk128from32s( s2, s2, s0, s0 ) ); 12982 goto decode_success; 12983 } 12984 12985 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 12986 duplicating some lanes (0:1:0:1). */ 12987 if (haveF2no66noF3(pfx) 12988 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 12989 && insn[0] == 0x0F && insn[1] == 0x12) { 12990 IRTemp sV = newTemp(Ity_V128); 12991 IRTemp d0 = newTemp(Ity_I64); 12992 12993 modrm = insn[2]; 12994 if (epartIsReg(modrm)) { 12995 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12996 DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12997 nameXMMReg(gregOfRexRM(pfx,modrm))); 12998 delta += 2+1; 12999 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 13000 } else { 13001 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13002 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 13003 DIP("movddup %s,%s\n", dis_buf, 13004 nameXMMReg(gregOfRexRM(pfx,modrm))); 13005 delta += 2+alen; 13006 } 13007 13008 putXMMReg( gregOfRexRM(pfx,modrm), 13009 binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 13010 goto decode_success; 13011 } 13012 13013 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 13014 if (haveF2no66noF3(pfx) && sz == 4 13015 && insn[0] == 0x0F && insn[1] == 0xD0) { 13016 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 13017 IRTemp eV = newTemp(Ity_V128); 13018 IRTemp gV = newTemp(Ity_V128); 13019 IRTemp addV = newTemp(Ity_V128); 13020 IRTemp subV = newTemp(Ity_V128); 13021 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 13022 13023 modrm = insn[2]; 13024 if (epartIsReg(modrm)) { 13025 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13026 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13027 nameXMMReg(gregOfRexRM(pfx,modrm))); 13028 delta += 2+1; 13029 } else { 13030 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13031 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13032 DIP("addsubps %s,%s\n", dis_buf, 13033 nameXMMReg(gregOfRexRM(pfx,modrm))); 13034 delta += 2+alen; 13035 } 13036 13037 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13038 13039 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); 13040 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); 13041 13042 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 13043 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 13044 13045 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 )); 13046 goto decode_success; 13047 } 13048 13049 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 13050 if (have66noF2noF3(pfx) && sz == 2 13051 && insn[0] == 0x0F && insn[1] == 0xD0) { 13052 IRTemp eV = newTemp(Ity_V128); 13053 IRTemp gV = newTemp(Ity_V128); 13054 IRTemp addV = newTemp(Ity_V128); 13055 IRTemp subV = newTemp(Ity_V128); 13056 IRTemp a1 = newTemp(Ity_I64); 13057 IRTemp s0 = newTemp(Ity_I64); 13058 13059 modrm = insn[2]; 13060 if (epartIsReg(modrm)) { 13061 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13062 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13063 nameXMMReg(gregOfRexRM(pfx,modrm))); 13064 delta += 2+1; 13065 } else { 13066 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13067 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13068 DIP("addsubpd %s,%s\n", dis_buf, 13069 nameXMMReg(gregOfRexRM(pfx,modrm))); 13070 delta += 2+alen; 13071 } 13072 13073 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13074 13075 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); 13076 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); 13077 13078 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 13079 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 13080 13081 putXMMReg( gregOfRexRM(pfx,modrm), 13082 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 13083 goto decode_success; 13084 } 13085 13086 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 13087 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 13088 if (haveF2no66noF3(pfx) && sz == 4 13089 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 13090 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 13091 IRTemp eV = newTemp(Ity_V128); 13092 IRTemp gV = newTemp(Ity_V128); 13093 IRTemp leftV = newTemp(Ity_V128); 13094 IRTemp rightV = newTemp(Ity_V128); 13095 Bool isAdd = insn[1] == 0x7C; 13096 HChar* str = isAdd ? "add" : "sub"; 13097 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 13098 13099 modrm = insn[2]; 13100 if (epartIsReg(modrm)) { 13101 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13102 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13103 nameXMMReg(gregOfRexRM(pfx,modrm))); 13104 delta += 2+1; 13105 } else { 13106 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13107 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13108 DIP("h%sps %s,%s\n", str, dis_buf, 13109 nameXMMReg(gregOfRexRM(pfx,modrm))); 13110 delta += 2+alen; 13111 } 13112 13113 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13114 13115 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 13116 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 13117 13118 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 13119 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 13120 13121 putXMMReg( gregOfRexRM(pfx,modrm), 13122 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 13123 mkexpr(leftV), mkexpr(rightV) ) ); 13124 goto decode_success; 13125 } 13126 13127 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 13128 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 13129 if (have66noF2noF3(pfx) && sz == 2 13130 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 13131 IRTemp e1 = newTemp(Ity_I64); 13132 IRTemp e0 = newTemp(Ity_I64); 13133 IRTemp g1 = newTemp(Ity_I64); 13134 IRTemp g0 = newTemp(Ity_I64); 13135 IRTemp eV = newTemp(Ity_V128); 13136 IRTemp gV = newTemp(Ity_V128); 13137 IRTemp leftV = newTemp(Ity_V128); 13138 IRTemp rightV = newTemp(Ity_V128); 13139 Bool isAdd = insn[1] == 0x7C; 13140 HChar* str = isAdd ? "add" : "sub"; 13141 13142 modrm = insn[2]; 13143 if (epartIsReg(modrm)) { 13144 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13145 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13146 nameXMMReg(gregOfRexRM(pfx,modrm))); 13147 delta += 2+1; 13148 } else { 13149 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13150 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13151 DIP("h%spd %s,%s\n", str, dis_buf, 13152 nameXMMReg(gregOfRexRM(pfx,modrm))); 13153 delta += 2+alen; 13154 } 13155 13156 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13157 13158 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 13159 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 13160 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 13161 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 13162 13163 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 13164 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 13165 13166 putXMMReg( gregOfRexRM(pfx,modrm), 13167 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 13168 mkexpr(leftV), mkexpr(rightV) ) ); 13169 goto decode_success; 13170 } 13171 13172 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 13173 if (haveF2no66noF3(pfx) && sz == 4 13174 && insn[0] == 0x0F && insn[1] == 0xF0) { 13175 modrm = insn[2]; 13176 if (epartIsReg(modrm)) { 13177 goto decode_failure; 13178 } else { 13179 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13180 putXMMReg( gregOfRexRM(pfx,modrm), 13181 loadLE(Ity_V128, mkexpr(addr)) ); 13182 DIP("lddqu %s,%s\n", dis_buf, 13183 nameXMMReg(gregOfRexRM(pfx,modrm))); 13184 delta += 2+alen; 13185 } 13186 goto decode_success; 13187 } 13188 13189 /* ---------------------------------------------------- */ 13190 /* --- end of the SSE3 decoder. --- */ 13191 /* ---------------------------------------------------- */ 13192 13193 /* ---------------------------------------------------- */ 13194 /* --- start of the SSSE3 decoder. --- */ 13195 /* ---------------------------------------------------- */ 13196 13197 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 13198 Unsigned Bytes (MMX) */ 13199 if (haveNo66noF2noF3(pfx) 13200 && sz == 4 13201 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 13202 IRTemp sV = newTemp(Ity_I64); 13203 IRTemp dV = newTemp(Ity_I64); 13204 IRTemp sVoddsSX = newTemp(Ity_I64); 13205 IRTemp sVevensSX = newTemp(Ity_I64); 13206 IRTemp dVoddsZX = newTemp(Ity_I64); 13207 IRTemp dVevensZX = newTemp(Ity_I64); 13208 13209 modrm = insn[3]; 13210 do_MMX_preamble(); 13211 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13212 13213 if (epartIsReg(modrm)) { 13214 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13215 delta += 3+1; 13216 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13217 nameMMXReg(gregLO3ofRM(modrm))); 13218 } else { 13219 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13220 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13221 delta += 3+alen; 13222 DIP("pmaddubsw %s,%s\n", dis_buf, 13223 nameMMXReg(gregLO3ofRM(modrm))); 13224 } 13225 13226 /* compute dV unsigned x sV signed */ 13227 assign( sVoddsSX, 13228 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 13229 assign( sVevensSX, 13230 binop(Iop_SarN16x4, 13231 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 13232 mkU8(8)) ); 13233 assign( dVoddsZX, 13234 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 13235 assign( dVevensZX, 13236 binop(Iop_ShrN16x4, 13237 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 13238 mkU8(8)) ); 13239 13240 putMMXReg( 13241 gregLO3ofRM(modrm), 13242 binop(Iop_QAdd16Sx4, 13243 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 13244 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 13245 ) 13246 ); 13247 goto decode_success; 13248 } 13249 13250 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 13251 Unsigned Bytes (XMM) */ 13252 if (have66noF2noF3(pfx) 13253 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13254 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 13255 IRTemp sV = newTemp(Ity_V128); 13256 IRTemp dV = newTemp(Ity_V128); 13257 IRTemp sVoddsSX = newTemp(Ity_V128); 13258 IRTemp sVevensSX = newTemp(Ity_V128); 13259 IRTemp dVoddsZX = newTemp(Ity_V128); 13260 IRTemp dVevensZX = newTemp(Ity_V128); 13261 13262 modrm = insn[3]; 13263 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13264 13265 if (epartIsReg(modrm)) { 13266 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13267 delta += 3+1; 13268 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13269 nameXMMReg(gregOfRexRM(pfx,modrm))); 13270 } else { 13271 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13272 gen_SEGV_if_not_16_aligned( addr ); 13273 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13274 delta += 3+alen; 13275 DIP("pmaddubsw %s,%s\n", dis_buf, 13276 nameXMMReg(gregOfRexRM(pfx,modrm))); 13277 } 13278 13279 /* compute dV unsigned x sV signed */ 13280 assign( sVoddsSX, 13281 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 13282 assign( sVevensSX, 13283 binop(Iop_SarN16x8, 13284 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 13285 mkU8(8)) ); 13286 assign( dVoddsZX, 13287 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 13288 assign( dVevensZX, 13289 binop(Iop_ShrN16x8, 13290 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 13291 mkU8(8)) ); 13292 13293 putXMMReg( 13294 gregOfRexRM(pfx,modrm), 13295 binop(Iop_QAdd16Sx8, 13296 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 13297 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 13298 ) 13299 ); 13300 goto decode_success; 13301 } 13302 13303 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 13304 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 13305 mmx) and G to G (mmx). */ 13306 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 13307 mmx) and G to G (mmx). */ 13308 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 13309 to G (mmx). */ 13310 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 13311 to G (mmx). */ 13312 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 13313 to G (mmx). */ 13314 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 13315 to G (mmx). */ 13316 13317 if (haveNo66noF2noF3(pfx) 13318 && sz == 4 13319 && insn[0] == 0x0F && insn[1] == 0x38 13320 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 13321 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 13322 HChar* str = "???"; 13323 IROp opV64 = Iop_INVALID; 13324 IROp opCatO = Iop_CatOddLanes16x4; 13325 IROp opCatE = Iop_CatEvenLanes16x4; 13326 IRTemp sV = newTemp(Ity_I64); 13327 IRTemp dV = newTemp(Ity_I64); 13328 13329 modrm = insn[3]; 13330 13331 switch (insn[2]) { 13332 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 13333 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 13334 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 13335 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 13336 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 13337 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 13338 default: vassert(0); 13339 } 13340 if (insn[2] == 0x02 || insn[2] == 0x06) { 13341 opCatO = Iop_InterleaveHI32x2; 13342 opCatE = Iop_InterleaveLO32x2; 13343 } 13344 13345 do_MMX_preamble(); 13346 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13347 13348 if (epartIsReg(modrm)) { 13349 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13350 delta += 3+1; 13351 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13352 nameMMXReg(gregLO3ofRM(modrm))); 13353 } else { 13354 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13355 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13356 delta += 3+alen; 13357 DIP("ph%s %s,%s\n", str, dis_buf, 13358 nameMMXReg(gregLO3ofRM(modrm))); 13359 } 13360 13361 putMMXReg( 13362 gregLO3ofRM(modrm), 13363 binop(opV64, 13364 binop(opCatE,mkexpr(sV),mkexpr(dV)), 13365 binop(opCatO,mkexpr(sV),mkexpr(dV)) 13366 ) 13367 ); 13368 goto decode_success; 13369 } 13370 13371 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 13372 xmm) and G to G (xmm). */ 13373 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 13374 xmm) and G to G (xmm). */ 13375 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 13376 G to G (xmm). */ 13377 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 13378 G to G (xmm). */ 13379 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 13380 G to G (xmm). */ 13381 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 13382 G to G (xmm). */ 13383 13384 if (have66noF2noF3(pfx) 13385 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13386 && insn[0] == 0x0F && insn[1] == 0x38 13387 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 13388 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 13389 HChar* str = "???"; 13390 IROp opV64 = Iop_INVALID; 13391 IROp opCatO = Iop_CatOddLanes16x4; 13392 IROp opCatE = Iop_CatEvenLanes16x4; 13393 IRTemp sV = newTemp(Ity_V128); 13394 IRTemp dV = newTemp(Ity_V128); 13395 IRTemp sHi = newTemp(Ity_I64); 13396 IRTemp sLo = newTemp(Ity_I64); 13397 IRTemp dHi = newTemp(Ity_I64); 13398 IRTemp dLo = newTemp(Ity_I64); 13399 13400 modrm = insn[3]; 13401 13402 switch (insn[2]) { 13403 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 13404 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 13405 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 13406 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 13407 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 13408 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 13409 default: vassert(0); 13410 } 13411 if (insn[2] == 0x02 || insn[2] == 0x06) { 13412 opCatO = Iop_InterleaveHI32x2; 13413 opCatE = Iop_InterleaveLO32x2; 13414 } 13415 13416 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13417 13418 if (epartIsReg(modrm)) { 13419 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13420 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13421 nameXMMReg(gregOfRexRM(pfx,modrm))); 13422 delta += 3+1; 13423 } else { 13424 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13425 gen_SEGV_if_not_16_aligned( addr ); 13426 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13427 DIP("ph%s %s,%s\n", str, dis_buf, 13428 nameXMMReg(gregOfRexRM(pfx,modrm))); 13429 delta += 3+alen; 13430 } 13431 13432 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13433 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13434 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13435 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13436 13437 /* This isn't a particularly efficient way to compute the 13438 result, but at least it avoids a proliferation of IROps, 13439 hence avoids complication all the backends. */ 13440 putXMMReg( 13441 gregOfRexRM(pfx,modrm), 13442 binop(Iop_64HLtoV128, 13443 binop(opV64, 13444 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 13445 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 13446 ), 13447 binop(opV64, 13448 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 13449 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 13450 ) 13451 ) 13452 ); 13453 goto decode_success; 13454 } 13455 13456 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 13457 (MMX) */ 13458 if (haveNo66noF2noF3(pfx) 13459 && sz == 4 13460 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 13461 IRTemp sV = newTemp(Ity_I64); 13462 IRTemp dV = newTemp(Ity_I64); 13463 13464 modrm = insn[3]; 13465 do_MMX_preamble(); 13466 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13467 13468 if (epartIsReg(modrm)) { 13469 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13470 delta += 3+1; 13471 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13472 nameMMXReg(gregLO3ofRM(modrm))); 13473 } else { 13474 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13475 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13476 delta += 3+alen; 13477 DIP("pmulhrsw %s,%s\n", dis_buf, 13478 nameMMXReg(gregLO3ofRM(modrm))); 13479 } 13480 13481 putMMXReg( 13482 gregLO3ofRM(modrm), 13483 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 13484 ); 13485 goto decode_success; 13486 } 13487 13488 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 13489 Scale (XMM) */ 13490 if (have66noF2noF3(pfx) 13491 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13492 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 13493 IRTemp sV = newTemp(Ity_V128); 13494 IRTemp dV = newTemp(Ity_V128); 13495 IRTemp sHi = newTemp(Ity_I64); 13496 IRTemp sLo = newTemp(Ity_I64); 13497 IRTemp dHi = newTemp(Ity_I64); 13498 IRTemp dLo = newTemp(Ity_I64); 13499 13500 modrm = insn[3]; 13501 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13502 13503 if (epartIsReg(modrm)) { 13504 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13505 delta += 3+1; 13506 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13507 nameXMMReg(gregOfRexRM(pfx,modrm))); 13508 } else { 13509 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13510 gen_SEGV_if_not_16_aligned( addr ); 13511 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13512 delta += 3+alen; 13513 DIP("pmulhrsw %s,%s\n", dis_buf, 13514 nameXMMReg(gregOfRexRM(pfx,modrm))); 13515 } 13516 13517 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13518 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13519 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13520 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13521 13522 putXMMReg( 13523 gregOfRexRM(pfx,modrm), 13524 binop(Iop_64HLtoV128, 13525 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 13526 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 13527 ) 13528 ); 13529 goto decode_success; 13530 } 13531 13532 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 13533 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 13534 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 13535 if (haveNo66noF2noF3(pfx) 13536 && sz == 4 13537 && insn[0] == 0x0F && insn[1] == 0x38 13538 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 13539 IRTemp sV = newTemp(Ity_I64); 13540 IRTemp dV = newTemp(Ity_I64); 13541 HChar* str = "???"; 13542 Int laneszB = 0; 13543 13544 switch (insn[2]) { 13545 case 0x08: laneszB = 1; str = "b"; break; 13546 case 0x09: laneszB = 2; str = "w"; break; 13547 case 0x0A: laneszB = 4; str = "d"; break; 13548 default: vassert(0); 13549 } 13550 13551 modrm = insn[3]; 13552 do_MMX_preamble(); 13553 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13554 13555 if (epartIsReg(modrm)) { 13556 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13557 delta += 3+1; 13558 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13559 nameMMXReg(gregLO3ofRM(modrm))); 13560 } else { 13561 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13562 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13563 delta += 3+alen; 13564 DIP("psign%s %s,%s\n", str, dis_buf, 13565 nameMMXReg(gregLO3ofRM(modrm))); 13566 } 13567 13568 putMMXReg( 13569 gregLO3ofRM(modrm), 13570 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 13571 ); 13572 goto decode_success; 13573 } 13574 13575 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 13576 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 13577 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 13578 if (have66noF2noF3(pfx) 13579 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13580 && insn[0] == 0x0F && insn[1] == 0x38 13581 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 13582 IRTemp sV = newTemp(Ity_V128); 13583 IRTemp dV = newTemp(Ity_V128); 13584 IRTemp sHi = newTemp(Ity_I64); 13585 IRTemp sLo = newTemp(Ity_I64); 13586 IRTemp dHi = newTemp(Ity_I64); 13587 IRTemp dLo = newTemp(Ity_I64); 13588 HChar* str = "???"; 13589 Int laneszB = 0; 13590 13591 switch (insn[2]) { 13592 case 0x08: laneszB = 1; str = "b"; break; 13593 case 0x09: laneszB = 2; str = "w"; break; 13594 case 0x0A: laneszB = 4; str = "d"; break; 13595 default: vassert(0); 13596 } 13597 13598 modrm = insn[3]; 13599 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13600 13601 if (epartIsReg(modrm)) { 13602 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13603 delta += 3+1; 13604 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13605 nameXMMReg(gregOfRexRM(pfx,modrm))); 13606 } else { 13607 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13608 gen_SEGV_if_not_16_aligned( addr ); 13609 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13610 delta += 3+alen; 13611 DIP("psign%s %s,%s\n", str, dis_buf, 13612 nameXMMReg(gregOfRexRM(pfx,modrm))); 13613 } 13614 13615 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13616 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13617 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13618 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13619 13620 putXMMReg( 13621 gregOfRexRM(pfx,modrm), 13622 binop(Iop_64HLtoV128, 13623 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 13624 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 13625 ) 13626 ); 13627 goto decode_success; 13628 } 13629 13630 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 13631 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 13632 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 13633 if (haveNo66noF2noF3(pfx) 13634 && sz == 4 13635 && insn[0] == 0x0F && insn[1] == 0x38 13636 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 13637 IRTemp sV = newTemp(Ity_I64); 13638 HChar* str = "???"; 13639 Int laneszB = 0; 13640 13641 switch (insn[2]) { 13642 case 0x1C: laneszB = 1; str = "b"; break; 13643 case 0x1D: laneszB = 2; str = "w"; break; 13644 case 0x1E: laneszB = 4; str = "d"; break; 13645 default: vassert(0); 13646 } 13647 13648 modrm = insn[3]; 13649 do_MMX_preamble(); 13650 13651 if (epartIsReg(modrm)) { 13652 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13653 delta += 3+1; 13654 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13655 nameMMXReg(gregLO3ofRM(modrm))); 13656 } else { 13657 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13658 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13659 delta += 3+alen; 13660 DIP("pabs%s %s,%s\n", str, dis_buf, 13661 nameMMXReg(gregLO3ofRM(modrm))); 13662 } 13663 13664 putMMXReg( 13665 gregLO3ofRM(modrm), 13666 dis_PABS_helper( mkexpr(sV), laneszB ) 13667 ); 13668 goto decode_success; 13669 } 13670 13671 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 13672 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 13673 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 13674 if (have66noF2noF3(pfx) 13675 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13676 && insn[0] == 0x0F && insn[1] == 0x38 13677 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 13678 IRTemp sV = newTemp(Ity_V128); 13679 IRTemp sHi = newTemp(Ity_I64); 13680 IRTemp sLo = newTemp(Ity_I64); 13681 HChar* str = "???"; 13682 Int laneszB = 0; 13683 13684 switch (insn[2]) { 13685 case 0x1C: laneszB = 1; str = "b"; break; 13686 case 0x1D: laneszB = 2; str = "w"; break; 13687 case 0x1E: laneszB = 4; str = "d"; break; 13688 default: vassert(0); 13689 } 13690 13691 modrm = insn[3]; 13692 13693 if (epartIsReg(modrm)) { 13694 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13695 delta += 3+1; 13696 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13697 nameXMMReg(gregOfRexRM(pfx,modrm))); 13698 } else { 13699 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13700 gen_SEGV_if_not_16_aligned( addr ); 13701 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13702 delta += 3+alen; 13703 DIP("pabs%s %s,%s\n", str, dis_buf, 13704 nameXMMReg(gregOfRexRM(pfx,modrm))); 13705 } 13706 13707 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13708 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13709 13710 putXMMReg( 13711 gregOfRexRM(pfx,modrm), 13712 binop(Iop_64HLtoV128, 13713 dis_PABS_helper( mkexpr(sHi), laneszB ), 13714 dis_PABS_helper( mkexpr(sLo), laneszB ) 13715 ) 13716 ); 13717 goto decode_success; 13718 } 13719 13720 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 13721 if (haveNo66noF2noF3(pfx) && sz == 4 13722 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 13723 IRTemp sV = newTemp(Ity_I64); 13724 IRTemp dV = newTemp(Ity_I64); 13725 IRTemp res = newTemp(Ity_I64); 13726 13727 modrm = insn[3]; 13728 do_MMX_preamble(); 13729 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13730 13731 if (epartIsReg(modrm)) { 13732 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13733 d64 = (Long)insn[3+1]; 13734 delta += 3+1+1; 13735 DIP("palignr $%d,%s,%s\n", (Int)d64, 13736 nameMMXReg(eregLO3ofRM(modrm)), 13737 nameMMXReg(gregLO3ofRM(modrm))); 13738 } else { 13739 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 13740 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13741 d64 = (Long)insn[3+alen]; 13742 delta += 3+alen+1; 13743 DIP("palignr $%d%s,%s\n", (Int)d64, 13744 dis_buf, 13745 nameMMXReg(gregLO3ofRM(modrm))); 13746 } 13747 13748 if (d64 == 0) { 13749 assign( res, mkexpr(sV) ); 13750 } 13751 else if (d64 >= 1 && d64 <= 7) { 13752 assign(res, 13753 binop(Iop_Or64, 13754 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 13755 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 13756 ))); 13757 } 13758 else if (d64 == 8) { 13759 assign( res, mkexpr(dV) ); 13760 } 13761 else if (d64 >= 9 && d64 <= 15) { 13762 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 13763 } 13764 else if (d64 >= 16 && d64 <= 255) { 13765 assign( res, mkU64(0) ); 13766 } 13767 else 13768 vassert(0); 13769 13770 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 13771 goto decode_success; 13772 } 13773 13774 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 13775 if (have66noF2noF3(pfx) 13776 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13777 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 13778 IRTemp sV = newTemp(Ity_V128); 13779 IRTemp dV = newTemp(Ity_V128); 13780 IRTemp sHi = newTemp(Ity_I64); 13781 IRTemp sLo = newTemp(Ity_I64); 13782 IRTemp dHi = newTemp(Ity_I64); 13783 IRTemp dLo = newTemp(Ity_I64); 13784 IRTemp rHi = newTemp(Ity_I64); 13785 IRTemp rLo = newTemp(Ity_I64); 13786 13787 modrm = insn[3]; 13788 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13789 13790 if (epartIsReg(modrm)) { 13791 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13792 d64 = (Long)insn[3+1]; 13793 delta += 3+1+1; 13794 DIP("palignr $%d,%s,%s\n", (Int)d64, 13795 nameXMMReg(eregOfRexRM(pfx,modrm)), 13796 nameXMMReg(gregOfRexRM(pfx,modrm))); 13797 } else { 13798 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 13799 gen_SEGV_if_not_16_aligned( addr ); 13800 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13801 d64 = (Long)insn[3+alen]; 13802 delta += 3+alen+1; 13803 DIP("palignr $%d,%s,%s\n", (Int)d64, 13804 dis_buf, 13805 nameXMMReg(gregOfRexRM(pfx,modrm))); 13806 } 13807 13808 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13809 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13810 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13811 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13812 13813 if (d64 == 0) { 13814 assign( rHi, mkexpr(sHi) ); 13815 assign( rLo, mkexpr(sLo) ); 13816 } 13817 else if (d64 >= 1 && d64 <= 7) { 13818 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) ); 13819 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) ); 13820 } 13821 else if (d64 == 8) { 13822 assign( rHi, mkexpr(dLo) ); 13823 assign( rLo, mkexpr(sHi) ); 13824 } 13825 else if (d64 >= 9 && d64 <= 15) { 13826 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) ); 13827 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) ); 13828 } 13829 else if (d64 == 16) { 13830 assign( rHi, mkexpr(dHi) ); 13831 assign( rLo, mkexpr(dLo) ); 13832 } 13833 else if (d64 >= 17 && d64 <= 23) { 13834 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) ); 13835 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) ); 13836 } 13837 else if (d64 == 24) { 13838 assign( rHi, mkU64(0) ); 13839 assign( rLo, mkexpr(dHi) ); 13840 } 13841 else if (d64 >= 25 && d64 <= 31) { 13842 assign( rHi, mkU64(0) ); 13843 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) ); 13844 } 13845 else if (d64 >= 32 && d64 <= 255) { 13846 assign( rHi, mkU64(0) ); 13847 assign( rLo, mkU64(0) ); 13848 } 13849 else 13850 vassert(0); 13851 13852 putXMMReg( 13853 gregOfRexRM(pfx,modrm), 13854 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 13855 ); 13856 goto decode_success; 13857 } 13858 13859 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 13860 if (haveNo66noF2noF3(pfx) 13861 && sz == 4 13862 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 13863 IRTemp sV = newTemp(Ity_I64); 13864 IRTemp dV = newTemp(Ity_I64); 13865 13866 modrm = insn[3]; 13867 do_MMX_preamble(); 13868 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13869 13870 if (epartIsReg(modrm)) { 13871 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13872 delta += 3+1; 13873 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13874 nameMMXReg(gregLO3ofRM(modrm))); 13875 } else { 13876 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13877 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13878 delta += 3+alen; 13879 DIP("pshufb %s,%s\n", dis_buf, 13880 nameMMXReg(gregLO3ofRM(modrm))); 13881 } 13882 13883 putMMXReg( 13884 gregLO3ofRM(modrm), 13885 binop( 13886 Iop_And64, 13887 /* permute the lanes */ 13888 binop( 13889 Iop_Perm8x8, 13890 mkexpr(dV), 13891 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 13892 ), 13893 /* mask off lanes which have (index & 0x80) == 0x80 */ 13894 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 13895 ) 13896 ); 13897 goto decode_success; 13898 } 13899 13900 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 13901 if (have66noF2noF3(pfx) 13902 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13903 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 13904 IRTemp sV = newTemp(Ity_V128); 13905 IRTemp dV = newTemp(Ity_V128); 13906 IRTemp sHi = newTemp(Ity_I64); 13907 IRTemp sLo = newTemp(Ity_I64); 13908 IRTemp dHi = newTemp(Ity_I64); 13909 IRTemp dLo = newTemp(Ity_I64); 13910 IRTemp rHi = newTemp(Ity_I64); 13911 IRTemp rLo = newTemp(Ity_I64); 13912 IRTemp sevens = newTemp(Ity_I64); 13913 IRTemp mask0x80hi = newTemp(Ity_I64); 13914 IRTemp mask0x80lo = newTemp(Ity_I64); 13915 IRTemp maskBit3hi = newTemp(Ity_I64); 13916 IRTemp maskBit3lo = newTemp(Ity_I64); 13917 IRTemp sAnd7hi = newTemp(Ity_I64); 13918 IRTemp sAnd7lo = newTemp(Ity_I64); 13919 IRTemp permdHi = newTemp(Ity_I64); 13920 IRTemp permdLo = newTemp(Ity_I64); 13921 13922 modrm = insn[3]; 13923 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13924 13925 if (epartIsReg(modrm)) { 13926 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13927 delta += 3+1; 13928 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13929 nameXMMReg(gregOfRexRM(pfx,modrm))); 13930 } else { 13931 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13932 gen_SEGV_if_not_16_aligned( addr ); 13933 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13934 delta += 3+alen; 13935 DIP("pshufb %s,%s\n", dis_buf, 13936 nameXMMReg(gregOfRexRM(pfx,modrm))); 13937 } 13938 13939 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13940 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13941 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13942 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13943 13944 assign( sevens, mkU64(0x0707070707070707ULL) ); 13945 13946 /* 13947 mask0x80hi = Not(SarN8x8(sHi,7)) 13948 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 13949 sAnd7hi = And(sHi,sevens) 13950 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 13951 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 13952 rHi = And(permdHi,mask0x80hi) 13953 */ 13954 assign( 13955 mask0x80hi, 13956 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 13957 13958 assign( 13959 maskBit3hi, 13960 binop(Iop_SarN8x8, 13961 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 13962 mkU8(7))); 13963 13964 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 13965 13966 assign( 13967 permdHi, 13968 binop( 13969 Iop_Or64, 13970 binop(Iop_And64, 13971 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 13972 mkexpr(maskBit3hi)), 13973 binop(Iop_And64, 13974 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 13975 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 13976 13977 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 13978 13979 /* And the same for the lower half of the result. What fun. */ 13980 13981 assign( 13982 mask0x80lo, 13983 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 13984 13985 assign( 13986 maskBit3lo, 13987 binop(Iop_SarN8x8, 13988 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 13989 mkU8(7))); 13990 13991 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 13992 13993 assign( 13994 permdLo, 13995 binop( 13996 Iop_Or64, 13997 binop(Iop_And64, 13998 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 13999 mkexpr(maskBit3lo)), 14000 binop(Iop_And64, 14001 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 14002 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 14003 14004 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 14005 14006 putXMMReg( 14007 gregOfRexRM(pfx,modrm), 14008 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 14009 ); 14010 goto decode_success; 14011 } 14012 14013 /* ---------------------------------------------------- */ 14014 /* --- end of the SSSE3 decoder. --- */ 14015 /* ---------------------------------------------------- */ 14016 14017 /* ---------------------------------------------------- */ 14018 /* --- start of the SSE4 decoder --- */ 14019 /* ---------------------------------------------------- */ 14020 14021 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 14022 Blend Packed Double Precision Floating-Point Values (XMM) */ 14023 if ( have66noF2noF3( pfx ) 14024 && sz == 2 14025 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0D ) { 14026 14027 Int imm8; 14028 UShort imm8_mask_16; 14029 14030 IRTemp dst_vec = newTemp(Ity_V128); 14031 IRTemp src_vec = newTemp(Ity_V128); 14032 IRTemp imm8_mask = newTemp(Ity_V128); 14033 14034 modrm = insn[3]; 14035 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14036 14037 if ( epartIsReg( modrm ) ) { 14038 imm8 = (Int)insn[4]; 14039 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14040 delta += 3+1+1; 14041 DIP( "blendpd $%d, %s,%s\n", imm8, 14042 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14043 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14044 } else { 14045 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14046 1/* imm8 is 1 byte after the amode */ ); 14047 gen_SEGV_if_not_16_aligned( addr ); 14048 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14049 imm8 = (Int)insn[2+alen+1]; 14050 delta += 3+alen+1; 14051 DIP( "blendpd $%d, %s,%s\n", 14052 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14053 } 14054 14055 switch( imm8 & 3 ) { 14056 case 0: imm8_mask_16 = 0x0000; break; 14057 case 1: imm8_mask_16 = 0x00FF; break; 14058 case 2: imm8_mask_16 = 0xFF00; break; 14059 case 3: imm8_mask_16 = 0xFFFF; break; 14060 default: vassert(0); break; 14061 } 14062 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 14063 14064 putXMMReg( gregOfRexRM(pfx, modrm), 14065 binop( Iop_OrV128, 14066 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ), 14067 binop( Iop_AndV128, mkexpr(dst_vec), 14068 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 14069 14070 goto decode_success; 14071 } 14072 14073 14074 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 14075 Blend Packed Single Precision Floating-Point Values (XMM) */ 14076 if ( have66noF2noF3( pfx ) 14077 && sz == 2 14078 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0C ) { 14079 14080 Int imm8; 14081 IRTemp dst_vec = newTemp(Ity_V128); 14082 IRTemp src_vec = newTemp(Ity_V128); 14083 14084 modrm = insn[3]; 14085 14086 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14087 14088 if ( epartIsReg( modrm ) ) { 14089 imm8 = (Int)insn[3+1]; 14090 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14091 delta += 3+1+1; 14092 DIP( "blendps $%d, %s,%s\n", imm8, 14093 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14094 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14095 } else { 14096 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14097 1/* imm8 is 1 byte after the amode */ ); 14098 gen_SEGV_if_not_16_aligned( addr ); 14099 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14100 imm8 = (Int)insn[3+alen]; 14101 delta += 3+alen+1; 14102 DIP( "blendpd $%d, %s,%s\n", 14103 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14104 } 14105 14106 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 0x0F0F, 14107 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 0xF0F0, 0xF0FF, 14108 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF }; 14109 IRTemp imm8_mask = newTemp(Ity_V128); 14110 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 14111 14112 putXMMReg( gregOfRexRM(pfx, modrm), 14113 binop( Iop_OrV128, 14114 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ), 14115 binop( Iop_AndV128, mkexpr(dst_vec), 14116 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 14117 14118 goto decode_success; 14119 } 14120 14121 14122 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 14123 Blend Packed Words (XMM) */ 14124 if ( have66noF2noF3( pfx ) 14125 && sz == 2 14126 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0E ) { 14127 14128 Int imm8; 14129 IRTemp dst_vec = newTemp(Ity_V128); 14130 IRTemp src_vec = newTemp(Ity_V128); 14131 14132 modrm = insn[3]; 14133 14134 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14135 14136 if ( epartIsReg( modrm ) ) { 14137 imm8 = (Int)insn[3+1]; 14138 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14139 delta += 3+1+1; 14140 DIP( "pblendw $%d, %s,%s\n", imm8, 14141 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14142 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14143 } else { 14144 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14145 1/* imm8 is 1 byte after the amode */ ); 14146 gen_SEGV_if_not_16_aligned( addr ); 14147 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14148 imm8 = (Int)insn[3+alen]; 14149 delta += 3+alen+1; 14150 DIP( "pblendw $%d, %s,%s\n", 14151 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14152 } 14153 14154 /* Make w be a 16-bit version of imm8, formed by duplicating each 14155 bit in imm8. */ 14156 Int i; 14157 UShort imm16 = 0; 14158 for (i = 0; i < 8; i++) { 14159 if (imm8 & (1 << i)) 14160 imm16 |= (3 << (2*i)); 14161 } 14162 IRTemp imm16_mask = newTemp(Ity_V128); 14163 assign( imm16_mask, mkV128( imm16 )); 14164 14165 putXMMReg( gregOfRexRM(pfx, modrm), 14166 binop( Iop_OrV128, 14167 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm16_mask) ), 14168 binop( Iop_AndV128, mkexpr(dst_vec), 14169 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 14170 14171 goto decode_success; 14172 } 14173 14174 14175 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 14176 * Carry-less multiplication of selected XMM quadwords into XMM 14177 * registers (a.k.a multiplication of polynomials over GF(2)) 14178 */ 14179 if ( have66noF2noF3( pfx ) 14180 && sz == 2 14181 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x44 ) { 14182 14183 Int imm8; 14184 IRTemp svec = newTemp(Ity_V128); 14185 IRTemp dvec = newTemp(Ity_V128); 14186 14187 modrm = insn[3]; 14188 14189 assign( dvec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14190 14191 if ( epartIsReg( modrm ) ) { 14192 imm8 = (Int)insn[4]; 14193 assign( svec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14194 delta += 3+1+1; 14195 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 14196 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14197 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14198 } else { 14199 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14200 1/* imm8 is 1 byte after the amode */ ); 14201 gen_SEGV_if_not_16_aligned( addr ); 14202 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 14203 imm8 = (Int)insn[2+alen+1]; 14204 delta += 3+alen+1; 14205 DIP( "pclmulqdq $%d, %s,%s\n", 14206 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14207 } 14208 14209 t0 = newTemp(Ity_I64); 14210 t1 = newTemp(Ity_I64); 14211 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, mkexpr(dvec))); 14212 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, mkexpr(svec))); 14213 14214 t2 = newTemp(Ity_I64); 14215 t3 = newTemp(Ity_I64); 14216 14217 IRExpr** args; 14218 14219 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 14220 assign(t2, 14221 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 14222 &amd64g_calculate_pclmul, args)); 14223 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 14224 assign(t3, 14225 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 14226 &amd64g_calculate_pclmul, args)); 14227 14228 IRTemp res = newTemp(Ity_V128); 14229 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 14230 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14231 14232 goto decode_success; 14233 } 14234 14235 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 14236 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 14237 if ( have66noF2noF3( pfx ) 14238 && sz == 2 14239 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x41 ) { 14240 14241 Int imm8; 14242 IRTemp src_vec = newTemp(Ity_V128); 14243 IRTemp dst_vec = newTemp(Ity_V128); 14244 IRTemp and_vec = newTemp(Ity_V128); 14245 IRTemp sum_vec = newTemp(Ity_V128); 14246 14247 modrm = insn[3]; 14248 14249 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14250 14251 if ( epartIsReg( modrm ) ) { 14252 imm8 = (Int)insn[4]; 14253 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14254 delta += 3+1+1; 14255 DIP( "dppd $%d, %s,%s\n", imm8, 14256 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14257 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14258 } else { 14259 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14260 1/* imm8 is 1 byte after the amode */ ); 14261 gen_SEGV_if_not_16_aligned( addr ); 14262 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14263 imm8 = (Int)insn[2+alen+1]; 14264 delta += 3+alen+1; 14265 DIP( "dppd $%d, %s,%s\n", 14266 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14267 } 14268 14269 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 14270 14271 assign( and_vec, binop( Iop_AndV128, 14272 binop( Iop_Mul64Fx2, 14273 mkexpr(dst_vec), mkexpr(src_vec) ), 14274 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 14275 14276 assign( sum_vec, binop( Iop_Add64F0x2, 14277 binop( Iop_InterleaveHI64x2, 14278 mkexpr(and_vec), mkexpr(and_vec) ), 14279 binop( Iop_InterleaveLO64x2, 14280 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 14281 14282 putXMMReg( gregOfRexRM( pfx, modrm ), 14283 binop( Iop_AndV128, 14284 binop( Iop_InterleaveLO64x2, 14285 mkexpr(sum_vec), mkexpr(sum_vec) ), 14286 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 14287 14288 goto decode_success; 14289 } 14290 14291 14292 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 14293 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 14294 if ( have66noF2noF3( pfx ) 14295 && sz == 2 14296 && insn[0] == 0x0F 14297 && insn[1] == 0x3A 14298 && insn[2] == 0x40 ) { 14299 14300 Int imm8; 14301 IRTemp xmm1_vec = newTemp(Ity_V128); 14302 IRTemp xmm2_vec = newTemp(Ity_V128); 14303 IRTemp tmp_prod_vec = newTemp(Ity_V128); 14304 IRTemp prod_vec = newTemp(Ity_V128); 14305 IRTemp sum_vec = newTemp(Ity_V128); 14306 IRTemp v3, v2, v1, v0; 14307 v3 = v2 = v1 = v0 = IRTemp_INVALID; 14308 14309 modrm = insn[3]; 14310 14311 assign( xmm1_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14312 14313 if ( epartIsReg( modrm ) ) { 14314 imm8 = (Int)insn[4]; 14315 assign( xmm2_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14316 delta += 3+1+1; 14317 DIP( "dpps $%d, %s,%s\n", imm8, 14318 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14319 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14320 } else { 14321 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14322 1/* imm8 is 1 byte after the amode */ ); 14323 gen_SEGV_if_not_16_aligned( addr ); 14324 assign( xmm2_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14325 imm8 = (Int)insn[2+alen+1]; 14326 delta += 3+alen+1; 14327 DIP( "dpps $%d, %s,%s\n", 14328 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14329 } 14330 14331 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 14332 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 14333 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF }; 14334 14335 assign( tmp_prod_vec, 14336 binop( Iop_AndV128, 14337 binop( Iop_Mul32Fx4, mkexpr(xmm1_vec), mkexpr(xmm2_vec) ), 14338 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 14339 breakup128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 14340 assign( prod_vec, mk128from32s( v3, v1, v2, v0 ) ); 14341 14342 assign( sum_vec, binop( Iop_Add32Fx4, 14343 binop( Iop_InterleaveHI32x4, 14344 mkexpr(prod_vec), mkexpr(prod_vec) ), 14345 binop( Iop_InterleaveLO32x4, 14346 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 14347 14348 putXMMReg( gregOfRexRM(pfx, modrm), 14349 binop( Iop_AndV128, 14350 binop( Iop_Add32Fx4, 14351 binop( Iop_InterleaveHI32x4, 14352 mkexpr(sum_vec), mkexpr(sum_vec) ), 14353 binop( Iop_InterleaveLO32x4, 14354 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 14355 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 14356 14357 goto decode_success; 14358 } 14359 14360 14361 /* 66 0F 3A 21 /r ib = INSERTPS xmm1, xmm2/m32, imm8 14362 Insert Packed Single Precision Floating-Point Value (XMM) */ 14363 if ( have66noF2noF3( pfx ) 14364 && sz == 2 14365 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x21 ) { 14366 14367 Int imm8; 14368 Int imm8_count_s; 14369 Int imm8_count_d; 14370 Int imm8_zmask; 14371 IRTemp dstVec = newTemp(Ity_V128); 14372 IRTemp srcDWord = newTemp(Ity_I32); 14373 14374 modrm = insn[3]; 14375 14376 assign( dstVec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14377 14378 if ( epartIsReg( modrm ) ) { 14379 IRTemp src_vec = newTemp(Ity_V128); 14380 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14381 14382 IRTemp src_lane_0 = IRTemp_INVALID; 14383 IRTemp src_lane_1 = IRTemp_INVALID; 14384 IRTemp src_lane_2 = IRTemp_INVALID; 14385 IRTemp src_lane_3 = IRTemp_INVALID; 14386 breakup128to32s( src_vec, 14387 &src_lane_3, &src_lane_2, &src_lane_1, &src_lane_0 ); 14388 14389 imm8 = (Int)insn[4]; 14390 imm8_count_s = ((imm8 >> 6) & 3); 14391 switch( imm8_count_s ) { 14392 case 0: assign( srcDWord, mkexpr(src_lane_0) ); break; 14393 case 1: assign( srcDWord, mkexpr(src_lane_1) ); break; 14394 case 2: assign( srcDWord, mkexpr(src_lane_2) ); break; 14395 case 3: assign( srcDWord, mkexpr(src_lane_3) ); break; 14396 default: vassert(0); break; 14397 } 14398 14399 delta += 3+1+1; 14400 DIP( "insertps $%d, %s,%s\n", imm8, 14401 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14402 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14403 } else { 14404 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14405 1/* const imm8 is 1 byte after the amode */ ); 14406 assign( srcDWord, loadLE( Ity_I32, mkexpr(addr) ) ); 14407 imm8 = (Int)insn[2+alen+1]; 14408 imm8_count_s = 0; 14409 delta += 3+alen+1; 14410 DIP( "insertps $%d, %s,%s\n", 14411 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14412 } 14413 14414 IRTemp dst_lane_0 = IRTemp_INVALID; 14415 IRTemp dst_lane_1 = IRTemp_INVALID; 14416 IRTemp dst_lane_2 = IRTemp_INVALID; 14417 IRTemp dst_lane_3 = IRTemp_INVALID; 14418 breakup128to32s( dstVec, 14419 &dst_lane_3, &dst_lane_2, &dst_lane_1, &dst_lane_0 ); 14420 14421 imm8_count_d = ((imm8 >> 4) & 3); 14422 switch( imm8_count_d ) { 14423 case 0: dst_lane_0 = srcDWord; break; 14424 case 1: dst_lane_1 = srcDWord; break; 14425 case 2: dst_lane_2 = srcDWord; break; 14426 case 3: dst_lane_3 = srcDWord; break; 14427 default: vassert(0); break; 14428 } 14429 14430 imm8_zmask = (imm8 & 15); 14431 IRTemp zero_32 = newTemp(Ity_I32); 14432 assign( zero_32, mkU32(0) ); 14433 14434 IRExpr* ire_vec_128 = mk128from32s( 14435 ((imm8_zmask & 8) == 8) ? zero_32 : dst_lane_3, 14436 ((imm8_zmask & 4) == 4) ? zero_32 : dst_lane_2, 14437 ((imm8_zmask & 2) == 2) ? zero_32 : dst_lane_1, 14438 ((imm8_zmask & 1) == 1) ? zero_32 : dst_lane_0 ); 14439 14440 putXMMReg( gregOfRexRM(pfx, modrm), ire_vec_128 ); 14441 14442 goto decode_success; 14443 } 14444 14445 14446 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 14447 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */ 14448 if ( have66noF2noF3( pfx ) 14449 && sz == 2 14450 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x14 ) { 14451 14452 Int imm8; 14453 IRTemp xmm_vec = newTemp(Ity_V128); 14454 IRTemp sel_lane = newTemp(Ity_I32); 14455 IRTemp shr_lane = newTemp(Ity_I32); 14456 14457 modrm = insn[3]; 14458 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14459 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14460 14461 if ( epartIsReg( modrm ) ) { 14462 imm8 = (Int)insn[3+1]; 14463 } else { 14464 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14465 imm8 = (Int)insn[3+alen]; 14466 } 14467 switch( (imm8 >> 2) & 3 ) { 14468 case 0: assign( sel_lane, mkexpr(t0) ); break; 14469 case 1: assign( sel_lane, mkexpr(t1) ); break; 14470 case 2: assign( sel_lane, mkexpr(t2) ); break; 14471 case 3: assign( sel_lane, mkexpr(t3) ); break; 14472 default: vassert(0); 14473 } 14474 assign( shr_lane, 14475 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 14476 14477 if ( epartIsReg( modrm ) ) { 14478 putIReg64( eregOfRexRM(pfx,modrm), 14479 unop( Iop_32Uto64, 14480 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 14481 14482 delta += 3+1+1; 14483 DIP( "pextrb $%d, %s,%s\n", imm8, 14484 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14485 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14486 } else { 14487 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 14488 delta += 3+alen+1; 14489 DIP( "$%d, pextrb %s,%s\n", 14490 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14491 } 14492 14493 goto decode_success; 14494 } 14495 14496 14497 /* 66 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 14498 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 14499 Note that this insn has the same opcodes as PEXTRQ, but 14500 here the REX.W bit is _not_ present */ 14501 if ( have66noF2noF3( pfx ) 14502 && sz == 2 /* REX.W is _not_ present */ 14503 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) { 14504 14505 Int imm8_10; 14506 IRTemp xmm_vec = newTemp(Ity_V128); 14507 IRTemp src_dword = newTemp(Ity_I32); 14508 14509 modrm = insn[3]; 14510 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14511 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14512 14513 if ( epartIsReg( modrm ) ) { 14514 imm8_10 = (Int)(insn[3+1] & 3); 14515 } else { 14516 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14517 imm8_10 = (Int)(insn[3+alen] & 3); 14518 } 14519 14520 switch ( imm8_10 ) { 14521 case 0: assign( src_dword, mkexpr(t0) ); break; 14522 case 1: assign( src_dword, mkexpr(t1) ); break; 14523 case 2: assign( src_dword, mkexpr(t2) ); break; 14524 case 3: assign( src_dword, mkexpr(t3) ); break; 14525 default: vassert(0); 14526 } 14527 14528 if ( epartIsReg( modrm ) ) { 14529 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 14530 delta += 3+1+1; 14531 DIP( "pextrd $%d, %s,%s\n", imm8_10, 14532 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14533 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 14534 } else { 14535 storeLE( mkexpr(addr), mkexpr(src_dword) ); 14536 delta += 3+alen+1; 14537 DIP( "pextrd $%d, %s,%s\n", 14538 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14539 } 14540 14541 goto decode_success; 14542 } 14543 14544 14545 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 14546 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 14547 Note that this insn has the same opcodes as PEXTRD, but 14548 here the REX.W bit is present */ 14549 if ( have66noF2noF3( pfx ) 14550 && sz == 8 /* REX.W is present */ 14551 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) { 14552 14553 Int imm8_0; 14554 IRTemp xmm_vec = newTemp(Ity_V128); 14555 IRTemp src_qword = newTemp(Ity_I64); 14556 14557 modrm = insn[3]; 14558 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14559 14560 if ( epartIsReg( modrm ) ) { 14561 imm8_0 = (Int)(insn[3+1] & 1); 14562 } else { 14563 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14564 imm8_0 = (Int)(insn[3+alen] & 1); 14565 } 14566 switch ( imm8_0 ) { 14567 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); break; 14568 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); break; 14569 default: vassert(0); 14570 } 14571 14572 if ( epartIsReg( modrm ) ) { 14573 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 14574 delta += 3+1+1; 14575 DIP( "pextrq $%d, %s,%s\n", imm8_0, 14576 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14577 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14578 } else { 14579 storeLE( mkexpr(addr), mkexpr(src_qword) ); 14580 delta += 3+alen+1; 14581 DIP( "pextrq $%d, %s,%s\n", 14582 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14583 } 14584 14585 goto decode_success; 14586 } 14587 14588 14589 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 14590 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */ 14591 if ( have66noF2noF3( pfx ) 14592 && sz == 2 14593 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x15 ) { 14594 14595 Int imm8_20; 14596 IRTemp xmm_vec = newTemp(Ity_V128); 14597 IRTemp src_word = newTemp(Ity_I16); 14598 14599 modrm = insn[3]; 14600 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14601 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14602 14603 if ( epartIsReg( modrm ) ) { 14604 imm8_20 = (Int)(insn[3+1] & 7); 14605 } else { 14606 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14607 imm8_20 = (Int)(insn[3+alen] & 7); 14608 } 14609 14610 switch ( imm8_20 ) { 14611 case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break; 14612 case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break; 14613 case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break; 14614 case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break; 14615 case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break; 14616 case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break; 14617 case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break; 14618 case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break; 14619 default: vassert(0); 14620 } 14621 14622 if ( epartIsReg( modrm ) ) { 14623 putIReg64( eregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(src_word)) ); 14624 delta += 3+1+1; 14625 DIP( "pextrw $%d, %s,%s\n", imm8_20, 14626 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14627 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14628 } else { 14629 storeLE( mkexpr(addr), mkexpr(src_word) ); 14630 delta += 3+alen+1; 14631 DIP( "pextrw $%d, %s,%s\n", 14632 imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14633 } 14634 14635 goto decode_success; 14636 } 14637 14638 14639 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 14640 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 14641 if ( have66noF2noF3( pfx ) 14642 && sz == 8 /* REX.W is present */ 14643 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) { 14644 14645 Int imm8_0; 14646 IRTemp src_elems = newTemp(Ity_I64); 14647 IRTemp src_vec = newTemp(Ity_V128); 14648 14649 modrm = insn[3]; 14650 14651 if ( epartIsReg( modrm ) ) { 14652 imm8_0 = (Int)(insn[3+1] & 1); 14653 assign( src_elems, getIReg64( eregOfRexRM(pfx,modrm) ) ); 14654 delta += 3+1+1; 14655 DIP( "pinsrq $%d, %s,%s\n", imm8_0, 14656 nameIReg64( eregOfRexRM(pfx, modrm) ), 14657 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14658 } else { 14659 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14660 imm8_0 = (Int)(insn[3+alen] & 1); 14661 assign( src_elems, loadLE( Ity_I64, mkexpr(addr) ) ); 14662 delta += 3+alen+1; 14663 DIP( "pinsrq $%d, %s,%s\n", 14664 imm8_0, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14665 } 14666 14667 UShort mask = 0; 14668 if ( imm8_0 == 0 ) { 14669 mask = 0xFF00; 14670 assign( src_vec, binop( Iop_64HLtoV128, mkU64(0), mkexpr(src_elems) ) ); 14671 } else { 14672 mask = 0x00FF; 14673 assign( src_vec, binop( Iop_64HLtoV128, mkexpr(src_elems), mkU64(0) ) ); 14674 } 14675 14676 putXMMReg( gregOfRexRM(pfx, modrm), 14677 binop( Iop_OrV128, mkexpr(src_vec), 14678 binop( Iop_AndV128, 14679 getXMMReg( gregOfRexRM(pfx, modrm) ), 14680 mkV128(mask) ) ) ); 14681 14682 goto decode_success; 14683 } 14684 14685 14686 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 14687 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 14688 if ( have66noF2noF3( pfx ) 14689 && sz == 2 /* REX.W is NOT present */ 14690 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) { 14691 14692 Int imm8_10; 14693 IRTemp src_elems = newTemp(Ity_I32); 14694 IRTemp src_vec = newTemp(Ity_V128); 14695 IRTemp z32 = newTemp(Ity_I32); 14696 14697 modrm = insn[3]; 14698 14699 if ( epartIsReg( modrm ) ) { 14700 imm8_10 = (Int)(insn[3+1] & 3); 14701 assign( src_elems, getIReg32( eregOfRexRM(pfx,modrm) ) ); 14702 delta += 3+1+1; 14703 DIP( "pinsrd $%d, %s,%s\n", imm8_10, 14704 nameIReg32( eregOfRexRM(pfx, modrm) ), 14705 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14706 } else { 14707 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14708 imm8_10 = (Int)(insn[3+alen] & 3); 14709 assign( src_elems, loadLE( Ity_I32, mkexpr(addr) ) ); 14710 delta += 3+alen+1; 14711 DIP( "pinsrd $%d, %s,%s\n", 14712 imm8_10, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14713 } 14714 14715 assign(z32, mkU32(0)); 14716 14717 UShort mask = 0; 14718 switch (imm8_10) { 14719 case 3: mask = 0x0FFF; 14720 assign(src_vec, mk128from32s(src_elems, z32, z32, z32)); 14721 break; 14722 case 2: mask = 0xF0FF; 14723 assign(src_vec, mk128from32s(z32, src_elems, z32, z32)); 14724 break; 14725 case 1: mask = 0xFF0F; 14726 assign(src_vec, mk128from32s(z32, z32, src_elems, z32)); 14727 break; 14728 case 0: mask = 0xFFF0; 14729 assign(src_vec, mk128from32s(z32, z32, z32, src_elems)); 14730 break; 14731 default: vassert(0); 14732 } 14733 14734 putXMMReg( gregOfRexRM(pfx, modrm), 14735 binop( Iop_OrV128, mkexpr(src_vec), 14736 binop( Iop_AndV128, 14737 getXMMReg( gregOfRexRM(pfx, modrm) ), 14738 mkV128(mask) ) ) ); 14739 14740 goto decode_success; 14741 } 14742 14743 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 14744 Extract byte from r32/m8 and insert into xmm1 */ 14745 if ( have66noF2noF3( pfx ) 14746 && sz == 2 14747 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x20 ) { 14748 14749 Int imm8; 14750 IRTemp new8 = newTemp(Ity_I64); 14751 14752 modrm = insn[3]; 14753 14754 if ( epartIsReg( modrm ) ) { 14755 imm8 = (Int)(insn[3+1] & 0xF); 14756 assign( new8, binop(Iop_And64, 14757 unop(Iop_32Uto64, 14758 getIReg32(eregOfRexRM(pfx,modrm))), 14759 mkU64(0xFF))); 14760 delta += 3+1+1; 14761 DIP( "pinsrb $%d,%s,%s\n", imm8, 14762 nameIReg32( eregOfRexRM(pfx, modrm) ), 14763 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14764 } else { 14765 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14766 imm8 = (Int)(insn[3+alen] & 0xF); 14767 assign( new8, unop(Iop_8Uto64, loadLE( Ity_I8, mkexpr(addr) ))); 14768 delta += 3+alen+1; 14769 DIP( "pinsrb $%d,%s,%s\n", 14770 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14771 } 14772 14773 // Create a V128 value which has the selected byte in the 14774 // specified lane, and zeroes everywhere else. 14775 IRTemp tmp128 = newTemp(Ity_V128); 14776 IRTemp halfshift = newTemp(Ity_I64); 14777 assign(halfshift, binop(Iop_Shl64, 14778 mkexpr(new8), mkU8(8 * (imm8 & 7)))); 14779 vassert(imm8 >= 0 && imm8 <= 15); 14780 if (imm8 < 8) { 14781 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 14782 } else { 14783 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 14784 } 14785 14786 UShort mask = ~(1 << imm8); 14787 14788 putXMMReg( gregOfRexRM(pfx, modrm), 14789 binop( Iop_OrV128, 14790 mkexpr(tmp128), 14791 binop( Iop_AndV128, 14792 getXMMReg( gregOfRexRM(pfx, modrm) ), 14793 mkV128(mask) ) ) ); 14794 14795 goto decode_success; 14796 } 14797 14798 14799 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 14800 float from xmm reg and store in gen.reg or mem. This is 14801 identical to PEXTRD, except that REX.W appears to be ignored. 14802 */ 14803 if ( have66noF2noF3( pfx ) 14804 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14805 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x17 ) { 14806 14807 Int imm8_10; 14808 IRTemp xmm_vec = newTemp(Ity_V128); 14809 IRTemp src_dword = newTemp(Ity_I32); 14810 14811 modrm = insn[3]; 14812 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14813 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14814 14815 if ( epartIsReg( modrm ) ) { 14816 imm8_10 = (Int)(insn[3+1] & 3); 14817 } else { 14818 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14819 imm8_10 = (Int)(insn[3+alen] & 3); 14820 } 14821 14822 switch ( imm8_10 ) { 14823 case 0: assign( src_dword, mkexpr(t0) ); break; 14824 case 1: assign( src_dword, mkexpr(t1) ); break; 14825 case 2: assign( src_dword, mkexpr(t2) ); break; 14826 case 3: assign( src_dword, mkexpr(t3) ); break; 14827 default: vassert(0); 14828 } 14829 14830 if ( epartIsReg( modrm ) ) { 14831 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 14832 delta += 3+1+1; 14833 DIP( "extractps $%d, %s,%s\n", imm8_10, 14834 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14835 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 14836 } else { 14837 storeLE( mkexpr(addr), mkexpr(src_dword) ); 14838 delta += 3+alen+1; 14839 DIP( "extractps $%d, %s,%s\n", 14840 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14841 } 14842 14843 goto decode_success; 14844 } 14845 14846 14847 /* 66 0F 38 37 = PCMPGTQ 14848 64x2 comparison (signed, presumably; the Intel docs don't say :-) 14849 */ 14850 if ( have66noF2noF3( pfx ) && sz == 2 14851 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x37) { 14852 /* FIXME: this needs an alignment check */ 14853 delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, 14854 "pcmpgtq", Iop_CmpGT64Sx2, False ); 14855 goto decode_success; 14856 } 14857 14858 /* 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 14859 Maximum of Packed Signed Double Word Integers (XMM) 14860 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 14861 Minimum of Packed Signed Double Word Integers (XMM) */ 14862 if ( have66noF2noF3( pfx ) && sz == 2 14863 && insn[0] == 0x0F && insn[1] == 0x38 14864 && (insn[2] == 0x3D || insn[2] == 0x39)) { 14865 /* FIXME: this needs an alignment check */ 14866 Bool isMAX = insn[2] == 0x3D; 14867 delta = dis_SSEint_E_to_G( 14868 vbi, pfx, delta+3, 14869 isMAX ? "pmaxsd" : "pminsd", 14870 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 14871 False 14872 ); 14873 goto decode_success; 14874 } 14875 14876 /* 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 14877 Maximum of Packed Unsigned Doubleword Integers (XMM) 14878 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 14879 Minimum of Packed Unsigned Doubleword Integers (XMM) */ 14880 if ( have66noF2noF3( pfx ) && sz == 2 14881 && insn[0] == 0x0F && insn[1] == 0x38 14882 && (insn[2] == 0x3F || insn[2] == 0x3B)) { 14883 /* FIXME: this needs an alignment check */ 14884 Bool isMAX = insn[2] == 0x3F; 14885 delta = dis_SSEint_E_to_G( 14886 vbi, pfx, delta+3, 14887 isMAX ? "pmaxud" : "pminud", 14888 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 14889 False 14890 ); 14891 goto decode_success; 14892 } 14893 14894 /* 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 14895 Maximum of Packed Unsigned Word Integers (XMM) 14896 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 14897 Minimum of Packed Unsigned Word Integers (XMM) 14898 */ 14899 if ( have66noF2noF3( pfx ) && sz == 2 14900 && insn[0] == 0x0F && insn[1] == 0x38 14901 && (insn[2] == 0x3E || insn[2] == 0x3A)) { 14902 /* FIXME: this needs an alignment check */ 14903 Bool isMAX = insn[2] == 0x3E; 14904 delta = dis_SSEint_E_to_G( 14905 vbi, pfx, delta+3, 14906 isMAX ? "pmaxuw" : "pminuw", 14907 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 14908 False 14909 ); 14910 goto decode_success; 14911 } 14912 14913 /* 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 14914 8Sx16 (signed) max 14915 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 14916 8Sx16 (signed) min 14917 */ 14918 if ( have66noF2noF3( pfx ) && sz == 2 14919 && insn[0] == 0x0F && insn[1] == 0x38 14920 && (insn[2] == 0x3C || insn[2] == 0x38)) { 14921 /* FIXME: this needs an alignment check */ 14922 Bool isMAX = insn[2] == 0x3C; 14923 delta = dis_SSEint_E_to_G( 14924 vbi, pfx, delta+3, 14925 isMAX ? "pmaxsb" : "pminsb", 14926 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 14927 False 14928 ); 14929 goto decode_success; 14930 } 14931 14932 /* 66 0f 38 20 /r = PMOVSXBW xmm1, xmm2/m64 14933 Packed Move with Sign Extend from Byte to Word (XMM) */ 14934 if ( have66noF2noF3( pfx ) 14935 && sz == 2 14936 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x20 ) { 14937 14938 modrm = insn[3]; 14939 14940 IRTemp srcVec = newTemp(Ity_V128); 14941 14942 if ( epartIsReg( modrm ) ) { 14943 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14944 delta += 3+1; 14945 DIP( "pmovsxbw %s,%s\n", 14946 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14947 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14948 } else { 14949 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 14950 assign( srcVec, 14951 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 14952 delta += 3+alen; 14953 DIP( "pmovsxbw %s,%s\n", 14954 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14955 } 14956 14957 putXMMReg( gregOfRexRM(pfx, modrm), 14958 binop( Iop_SarN16x8, 14959 binop( Iop_ShlN16x8, 14960 binop( Iop_InterleaveLO8x16, 14961 IRExpr_Const( IRConst_V128(0) ), 14962 mkexpr(srcVec) ), 14963 mkU8(8) ), 14964 mkU8(8) ) ); 14965 14966 goto decode_success; 14967 } 14968 14969 14970 /* 66 0f 38 21 /r = PMOVSXBD xmm1, xmm2/m32 14971 Packed Move with Sign Extend from Byte to DWord (XMM) */ 14972 if ( have66noF2noF3( pfx ) 14973 && sz == 2 14974 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x21 ) { 14975 14976 modrm = insn[3]; 14977 14978 IRTemp srcVec = newTemp(Ity_V128); 14979 14980 if ( epartIsReg( modrm ) ) { 14981 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14982 delta += 3+1; 14983 DIP( "pmovsxbd %s,%s\n", 14984 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14985 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14986 } else { 14987 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 14988 assign( srcVec, 14989 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 14990 delta += 3+alen; 14991 DIP( "pmovsxbd %s,%s\n", 14992 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14993 } 14994 14995 IRTemp zeroVec = newTemp(Ity_V128); 14996 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 14997 14998 putXMMReg( gregOfRexRM(pfx, modrm), 14999 binop( Iop_SarN32x4, 15000 binop( Iop_ShlN32x4, 15001 binop( Iop_InterleaveLO8x16, 15002 mkexpr(zeroVec), 15003 binop( Iop_InterleaveLO8x16, 15004 mkexpr(zeroVec), 15005 mkexpr(srcVec) ) ), 15006 mkU8(24) ), mkU8(24) ) ); 15007 15008 goto decode_success; 15009 } 15010 15011 15012 /* 66 0f 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 15013 Packed Move with Sign Extend from Byte to QWord (XMM) */ 15014 if ( have66noF2noF3(pfx) 15015 && sz == 2 15016 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x22 ) { 15017 15018 modrm = insn[3]; 15019 15020 IRTemp srcBytes = newTemp(Ity_I16); 15021 15022 if ( epartIsReg(modrm) ) { 15023 assign( srcBytes, getXMMRegLane16( eregOfRexRM(pfx, modrm), 0 ) ); 15024 delta += 3+1; 15025 DIP( "pmovsxbq %s,%s\n", 15026 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15027 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15028 } else { 15029 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15030 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 15031 delta += 3+alen; 15032 DIP( "pmovsxbq %s,%s\n", 15033 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15034 } 15035 15036 putXMMReg( gregOfRexRM( pfx, modrm ), 15037 binop( Iop_64HLtoV128, 15038 unop( Iop_8Sto64, 15039 unop( Iop_16HIto8, 15040 mkexpr(srcBytes) ) ), 15041 unop( Iop_8Sto64, 15042 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 15043 15044 goto decode_success; 15045 } 15046 15047 15048 /* 66 0f 38 23 /r = PMOVSXWD xmm1, xmm2/m64 15049 Packed Move with Sign Extend from Word to DWord (XMM) */ 15050 if ( have66noF2noF3( pfx ) 15051 && sz == 2 15052 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x23 ) { 15053 15054 modrm = insn[3]; 15055 15056 IRTemp srcVec = newTemp(Ity_V128); 15057 15058 if ( epartIsReg(modrm) ) { 15059 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15060 delta += 3+1; 15061 DIP( "pmovsxwd %s,%s\n", 15062 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15063 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15064 } else { 15065 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15066 assign( srcVec, 15067 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15068 delta += 3+alen; 15069 DIP( "pmovsxwd %s,%s\n", 15070 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15071 } 15072 15073 putXMMReg( gregOfRexRM(pfx, modrm), 15074 binop( Iop_SarN32x4, 15075 binop( Iop_ShlN32x4, 15076 binop( Iop_InterleaveLO16x8, 15077 IRExpr_Const( IRConst_V128(0) ), 15078 mkexpr(srcVec) ), 15079 mkU8(16) ), 15080 mkU8(16) ) ); 15081 15082 goto decode_success; 15083 } 15084 15085 15086 /* 66 0f 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 15087 Packed Move with Sign Extend from Word to QWord (XMM) */ 15088 if ( have66noF2noF3( pfx ) 15089 && sz == 2 15090 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x24 ) { 15091 15092 modrm = insn[3]; 15093 15094 IRTemp srcBytes = newTemp(Ity_I32); 15095 15096 if ( epartIsReg( modrm ) ) { 15097 assign( srcBytes, getXMMRegLane32( eregOfRexRM(pfx, modrm), 0 ) ); 15098 delta += 3+1; 15099 DIP( "pmovsxwq %s,%s\n", 15100 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15101 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15102 } else { 15103 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15104 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 15105 delta += 3+alen; 15106 DIP( "pmovsxwq %s,%s\n", 15107 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15108 } 15109 15110 putXMMReg( gregOfRexRM( pfx, modrm ), 15111 binop( Iop_64HLtoV128, 15112 unop( Iop_16Sto64, 15113 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 15114 unop( Iop_16Sto64, 15115 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 15116 15117 goto decode_success; 15118 } 15119 15120 15121 /* 66 0f 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 15122 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 15123 if ( have66noF2noF3( pfx ) 15124 && sz == 2 15125 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x25 ) { 15126 15127 modrm = insn[3]; 15128 15129 IRTemp srcBytes = newTemp(Ity_I64); 15130 15131 if ( epartIsReg(modrm) ) { 15132 assign( srcBytes, getXMMRegLane64( eregOfRexRM(pfx, modrm), 0 ) ); 15133 delta += 3+1; 15134 DIP( "pmovsxdq %s,%s\n", 15135 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15136 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15137 } else { 15138 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15139 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) ); 15140 delta += 3+alen; 15141 DIP( "pmovsxdq %s,%s\n", 15142 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15143 } 15144 15145 putXMMReg( gregOfRexRM(pfx, modrm), 15146 binop( Iop_64HLtoV128, 15147 unop( Iop_32Sto64, 15148 unop( Iop_64HIto32, mkexpr(srcBytes) ) ), 15149 unop( Iop_32Sto64, 15150 unop( Iop_64to32, mkexpr(srcBytes) ) ) ) ); 15151 15152 goto decode_success; 15153 } 15154 15155 15156 /* 66 0f 38 30 /r = PMOVZXBW xmm1, xmm2/m64 15157 Packed Move with Zero Extend from Byte to Word (XMM) */ 15158 if ( have66noF2noF3(pfx) 15159 && sz == 2 15160 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x30 ) { 15161 15162 modrm = insn[3]; 15163 15164 IRTemp srcVec = newTemp(Ity_V128); 15165 15166 if ( epartIsReg(modrm) ) { 15167 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15168 delta += 3+1; 15169 DIP( "pmovzxbw %s,%s\n", 15170 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15171 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15172 } else { 15173 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15174 assign( srcVec, 15175 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15176 delta += 3+alen; 15177 DIP( "pmovzxbw %s,%s\n", 15178 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15179 } 15180 15181 putXMMReg( gregOfRexRM(pfx, modrm), 15182 binop( Iop_InterleaveLO8x16, 15183 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 15184 15185 goto decode_success; 15186 } 15187 15188 15189 /* 66 0f 38 31 /r = PMOVZXBD xmm1, xmm2/m32 15190 Packed Move with Zero Extend from Byte to DWord (XMM) */ 15191 if ( have66noF2noF3( pfx ) 15192 && sz == 2 15193 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x31 ) { 15194 15195 modrm = insn[3]; 15196 15197 IRTemp srcVec = newTemp(Ity_V128); 15198 15199 if ( epartIsReg(modrm) ) { 15200 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15201 delta += 3+1; 15202 DIP( "pmovzxbd %s,%s\n", 15203 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15204 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15205 } else { 15206 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15207 assign( srcVec, 15208 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15209 delta += 3+alen; 15210 DIP( "pmovzxbd %s,%s\n", 15211 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15212 } 15213 15214 IRTemp zeroVec = newTemp(Ity_V128); 15215 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15216 15217 putXMMReg( gregOfRexRM( pfx, modrm ), 15218 binop( Iop_InterleaveLO8x16, 15219 mkexpr(zeroVec), 15220 binop( Iop_InterleaveLO8x16, 15221 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 15222 15223 goto decode_success; 15224 } 15225 15226 15227 /* 66 0f 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 15228 Packed Move with Zero Extend from Byte to QWord (XMM) */ 15229 if ( have66noF2noF3( pfx ) 15230 && sz == 2 15231 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x32 ) { 15232 15233 modrm = insn[3]; 15234 15235 IRTemp srcVec = newTemp(Ity_V128); 15236 15237 if ( epartIsReg(modrm) ) { 15238 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15239 delta += 3+1; 15240 DIP( "pmovzxbq %s,%s\n", 15241 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15242 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15243 } else { 15244 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15245 assign( srcVec, 15246 unop( Iop_32UtoV128, 15247 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ) ) ) ); 15248 delta += 3+alen; 15249 DIP( "pmovzxbq %s,%s\n", 15250 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15251 } 15252 15253 IRTemp zeroVec = newTemp(Ity_V128); 15254 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15255 15256 putXMMReg( gregOfRexRM( pfx, modrm ), 15257 binop( Iop_InterleaveLO8x16, 15258 mkexpr(zeroVec), 15259 binop( Iop_InterleaveLO8x16, 15260 mkexpr(zeroVec), 15261 binop( Iop_InterleaveLO8x16, 15262 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 15263 15264 goto decode_success; 15265 } 15266 15267 15268 /* 66 0f 38 33 /r = PMOVZXWD xmm1, xmm2/m64 15269 Packed Move with Zero Extend from Word to DWord (XMM) */ 15270 if ( have66noF2noF3( pfx ) 15271 && sz == 2 15272 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x33 ) { 15273 15274 modrm = insn[3]; 15275 15276 IRTemp srcVec = newTemp(Ity_V128); 15277 15278 if ( epartIsReg(modrm) ) { 15279 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15280 delta += 3+1; 15281 DIP( "pmovzxwd %s,%s\n", 15282 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15283 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15284 } else { 15285 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15286 assign( srcVec, 15287 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15288 delta += 3+alen; 15289 DIP( "pmovzxwd %s,%s\n", 15290 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15291 } 15292 15293 putXMMReg( gregOfRexRM(pfx, modrm), 15294 binop( Iop_InterleaveLO16x8, 15295 IRExpr_Const( IRConst_V128(0) ), 15296 mkexpr(srcVec) ) ); 15297 15298 goto decode_success; 15299 } 15300 15301 15302 /* 66 0f 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 15303 Packed Move with Zero Extend from Word to QWord (XMM) */ 15304 if ( have66noF2noF3( pfx ) 15305 && sz == 2 15306 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x34 ) { 15307 15308 modrm = insn[3]; 15309 15310 IRTemp srcVec = newTemp(Ity_V128); 15311 15312 if ( epartIsReg( modrm ) ) { 15313 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15314 delta += 3+1; 15315 DIP( "pmovzxwq %s,%s\n", 15316 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15317 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15318 } else { 15319 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15320 assign( srcVec, 15321 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15322 delta += 3+alen; 15323 DIP( "pmovzxwq %s,%s\n", 15324 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15325 } 15326 15327 IRTemp zeroVec = newTemp( Ity_V128 ); 15328 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15329 15330 putXMMReg( gregOfRexRM( pfx, modrm ), 15331 binop( Iop_InterleaveLO16x8, 15332 mkexpr(zeroVec), 15333 binop( Iop_InterleaveLO16x8, 15334 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 15335 15336 goto decode_success; 15337 } 15338 15339 15340 /* 66 0f 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 15341 Packed Move with Zero Extend from DWord to QWord (XMM) */ 15342 if ( have66noF2noF3( pfx ) 15343 && sz == 2 15344 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x35 ) { 15345 15346 modrm = insn[3]; 15347 15348 IRTemp srcVec = newTemp(Ity_V128); 15349 15350 if ( epartIsReg(modrm) ) { 15351 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15352 delta += 3+1; 15353 DIP( "pmovzxdq %s,%s\n", 15354 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15355 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15356 } else { 15357 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15358 assign( srcVec, 15359 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15360 delta += 3+alen; 15361 DIP( "pmovzxdq %s,%s\n", 15362 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15363 } 15364 15365 putXMMReg( gregOfRexRM(pfx, modrm), 15366 binop( Iop_InterleaveLO32x4, 15367 IRExpr_Const( IRConst_V128(0) ), 15368 mkexpr(srcVec) ) ); 15369 15370 goto decode_success; 15371 } 15372 15373 15374 /* 66 0f 38 40 /r = PMULLD xmm1, xmm2/m128 15375 32x4 integer multiply from xmm2/m128 to xmm1 */ 15376 if ( have66noF2noF3( pfx ) 15377 && sz == 2 15378 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x40 ) { 15379 15380 modrm = insn[3]; 15381 15382 IRTemp argL = newTemp(Ity_V128); 15383 IRTemp argR = newTemp(Ity_V128); 15384 15385 if ( epartIsReg(modrm) ) { 15386 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15387 delta += 3+1; 15388 DIP( "pmulld %s,%s\n", 15389 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15390 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15391 } else { 15392 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15393 gen_SEGV_if_not_16_aligned( addr ); 15394 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 15395 delta += 3+alen; 15396 DIP( "pmulld %s,%s\n", 15397 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15398 } 15399 15400 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 15401 15402 putXMMReg( gregOfRexRM(pfx, modrm), 15403 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 15404 15405 goto decode_success; 15406 } 15407 15408 15409 /* F3 0F B8 = POPCNT{W,L,Q} 15410 Count the number of 1 bits in a register 15411 */ 15412 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15413 && insn[0] == 0x0F && insn[1] == 0xB8) { 15414 vassert(sz == 2 || sz == 4 || sz == 8); 15415 /*IRType*/ ty = szToITy(sz); 15416 IRTemp src = newTemp(ty); 15417 modrm = insn[2]; 15418 if (epartIsReg(modrm)) { 15419 assign(src, getIRegE(sz, pfx, modrm)); 15420 delta += 2+1; 15421 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15422 nameIRegG(sz, pfx, modrm)); 15423 } else { 15424 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0); 15425 assign(src, loadLE(ty, mkexpr(addr))); 15426 delta += 2+alen; 15427 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 15428 nameIRegG(sz, pfx, modrm)); 15429 } 15430 15431 IRTemp result = gen_POPCOUNT(ty, src); 15432 putIRegG(sz, pfx, modrm, mkexpr(result)); 15433 15434 // Update flags. This is pretty lame .. perhaps can do better 15435 // if this turns out to be performance critical. 15436 // O S A C P are cleared. Z is set if SRC == 0. 15437 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15438 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15439 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15440 stmt( IRStmt_Put( OFFB_CC_DEP1, 15441 binop(Iop_Shl64, 15442 unop(Iop_1Uto64, 15443 binop(Iop_CmpEQ64, 15444 widenUto64(mkexpr(src)), 15445 mkU64(0))), 15446 mkU8(AMD64G_CC_SHIFT_Z)))); 15447 15448 goto decode_success; 15449 } 15450 15451 15452 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 15453 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 15454 */ 15455 if (have66noF2noF3(pfx) 15456 && sz == 2 15457 && insn[0] == 0x0F && insn[1] == 0x3A 15458 && (insn[2] == 0x0B || insn[2] == 0x0A)) { 15459 15460 Bool isD = insn[2] == 0x0B; 15461 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 15462 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 15463 Int imm = 0; 15464 15465 modrm = insn[3]; 15466 15467 if (epartIsReg(modrm)) { 15468 assign( src, 15469 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 15470 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 15471 imm = insn[3+1]; 15472 if (imm & ~15) goto decode_failure; 15473 delta += 3+1+1; 15474 DIP( "rounds%c $%d,%s,%s\n", 15475 isD ? 'd' : 's', 15476 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15477 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15478 } else { 15479 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15480 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 15481 imm = insn[3+alen]; 15482 if (imm & ~15) goto decode_failure; 15483 delta += 3+alen+1; 15484 DIP( "rounds%c $%d,%s,%s\n", 15485 isD ? 'd' : 's', 15486 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15487 } 15488 15489 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15490 that encoding is the same as the encoding for IRRoundingMode, 15491 we can use that value directly in the IR as a rounding 15492 mode. */ 15493 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 15494 (imm & 4) ? get_sse_roundingmode() 15495 : mkU32(imm & 3), 15496 mkexpr(src)) ); 15497 15498 if (isD) 15499 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 15500 else 15501 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 15502 15503 goto decode_success; 15504 } 15505 15506 15507 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 15508 if (have66noF2noF3(pfx) 15509 && sz == 2 15510 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x09) { 15511 15512 IRTemp src0 = newTemp(Ity_F64); 15513 IRTemp src1 = newTemp(Ity_F64); 15514 IRTemp res0 = newTemp(Ity_F64); 15515 IRTemp res1 = newTemp(Ity_F64); 15516 IRTemp rm = newTemp(Ity_I32); 15517 Int imm = 0; 15518 15519 modrm = insn[3]; 15520 15521 if (epartIsReg(modrm)) { 15522 assign( src0, 15523 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 15524 assign( src1, 15525 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 15526 imm = insn[3+1]; 15527 if (imm & ~15) goto decode_failure; 15528 delta += 3+1+1; 15529 DIP( "roundpd $%d,%s,%s\n", 15530 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15531 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15532 } else { 15533 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15534 gen_SEGV_if_not_16_aligned(addr); 15535 assign( src0, loadLE(Ity_F64, 15536 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 15537 assign( src1, loadLE(Ity_F64, 15538 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 15539 imm = insn[3+alen]; 15540 if (imm & ~15) goto decode_failure; 15541 delta += 3+alen+1; 15542 DIP( "roundpd $%d,%s,%s\n", 15543 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15544 } 15545 15546 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15547 that encoding is the same as the encoding for IRRoundingMode, 15548 we can use that value directly in the IR as a rounding 15549 mode. */ 15550 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 15551 15552 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 15553 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 15554 15555 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 15556 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 15557 15558 goto decode_success; 15559 } 15560 15561 15562 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 15563 if (have66noF2noF3(pfx) 15564 && sz == 2 15565 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x08) { 15566 15567 IRTemp src0 = newTemp(Ity_F32); 15568 IRTemp src1 = newTemp(Ity_F32); 15569 IRTemp src2 = newTemp(Ity_F32); 15570 IRTemp src3 = newTemp(Ity_F32); 15571 IRTemp res0 = newTemp(Ity_F32); 15572 IRTemp res1 = newTemp(Ity_F32); 15573 IRTemp res2 = newTemp(Ity_F32); 15574 IRTemp res3 = newTemp(Ity_F32); 15575 IRTemp rm = newTemp(Ity_I32); 15576 Int imm = 0; 15577 15578 modrm = insn[3]; 15579 15580 if (epartIsReg(modrm)) { 15581 assign( src0, 15582 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 15583 assign( src1, 15584 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 15585 assign( src2, 15586 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 15587 assign( src3, 15588 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 15589 imm = insn[3+1]; 15590 if (imm & ~15) goto decode_failure; 15591 delta += 3+1+1; 15592 DIP( "roundps $%d,%s,%s\n", 15593 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15594 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15595 } else { 15596 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15597 gen_SEGV_if_not_16_aligned(addr); 15598 assign( src0, loadLE(Ity_F32, 15599 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 15600 assign( src1, loadLE(Ity_F32, 15601 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 15602 assign( src2, loadLE(Ity_F32, 15603 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 15604 assign( src3, loadLE(Ity_F32, 15605 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 15606 imm = insn[3+alen]; 15607 if (imm & ~15) goto decode_failure; 15608 delta += 3+alen+1; 15609 DIP( "roundps $%d,%s,%s\n", 15610 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15611 } 15612 15613 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15614 that encoding is the same as the encoding for IRRoundingMode, 15615 we can use that value directly in the IR as a rounding 15616 mode. */ 15617 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 15618 15619 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 15620 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 15621 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 15622 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 15623 15624 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 15625 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 15626 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 15627 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 15628 15629 goto decode_success; 15630 } 15631 15632 15633 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 15634 which we can only decode if we're sure this is an AMD cpu that 15635 supports LZCNT, since otherwise it's BSR, which behaves 15636 differently. */ 15637 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15638 && insn[0] == 0x0F && insn[1] == 0xBD 15639 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 15640 vassert(sz == 2 || sz == 4 || sz == 8); 15641 /*IRType*/ ty = szToITy(sz); 15642 IRTemp src = newTemp(ty); 15643 modrm = insn[2]; 15644 if (epartIsReg(modrm)) { 15645 assign(src, getIRegE(sz, pfx, modrm)); 15646 delta += 2+1; 15647 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15648 nameIRegG(sz, pfx, modrm)); 15649 } else { 15650 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0); 15651 assign(src, loadLE(ty, mkexpr(addr))); 15652 delta += 2+alen; 15653 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 15654 nameIRegG(sz, pfx, modrm)); 15655 } 15656 15657 IRTemp res = gen_LZCNT(ty, src); 15658 putIRegG(sz, pfx, modrm, mkexpr(res)); 15659 15660 // Update flags. This is pretty lame .. perhaps can do better 15661 // if this turns out to be performance critical. 15662 // O S A P are cleared. Z is set if RESULT == 0. 15663 // C is set if SRC is zero. 15664 IRTemp src64 = newTemp(Ity_I64); 15665 IRTemp res64 = newTemp(Ity_I64); 15666 assign(src64, widenUto64(mkexpr(src))); 15667 assign(res64, widenUto64(mkexpr(res))); 15668 15669 IRTemp oszacp = newTemp(Ity_I64); 15670 assign( 15671 oszacp, 15672 binop(Iop_Or64, 15673 binop(Iop_Shl64, 15674 unop(Iop_1Uto64, 15675 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 15676 mkU8(AMD64G_CC_SHIFT_Z)), 15677 binop(Iop_Shl64, 15678 unop(Iop_1Uto64, 15679 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 15680 mkU8(AMD64G_CC_SHIFT_C)) 15681 ) 15682 ); 15683 15684 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15685 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15686 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15687 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 15688 15689 goto decode_success; 15690 } 15691 15692 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 15693 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 15694 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 15695 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 15696 (selected special cases that actually occur in glibc, 15697 not by any means a complete implementation.) 15698 */ 15699 if (have66noF2noF3(pfx) 15700 && sz == 2 15701 && insn[0] == 0x0F && insn[1] == 0x3A 15702 && (insn[2] >= 0x60 && insn[2] <= 0x63)) { 15703 15704 UInt isISTRx = insn[2] & 2; 15705 UInt isxSTRM = (insn[2] & 1) ^ 1; 15706 UInt regNoL = 0; 15707 UInt regNoR = 0; 15708 UChar imm = 0; 15709 15710 /* This is a nasty kludge. We need to pass 2 x V128 to the 15711 helper (which is clean). Since we can't do that, use a dirty 15712 helper to compute the results directly from the XMM regs in 15713 the guest state. That means for the memory case, we need to 15714 move the left operand into a pseudo-register (XMM16, let's 15715 call it). */ 15716 modrm = insn[3]; 15717 if (epartIsReg(modrm)) { 15718 regNoL = eregOfRexRM(pfx, modrm); 15719 regNoR = gregOfRexRM(pfx, modrm); 15720 imm = insn[3+1]; 15721 delta += 3+1+1; 15722 } else { 15723 regNoL = 16; /* use XMM16 as an intermediary */ 15724 regNoR = gregOfRexRM(pfx, modrm); 15725 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15726 /* No alignment check; I guess that makes sense, given that 15727 these insns are for dealing with C style strings. */ 15728 stmt( IRStmt_Put( OFFB_XMM16, loadLE(Ity_V128, mkexpr(addr)) )); 15729 imm = insn[3+alen]; 15730 delta += 3+alen+1; 15731 } 15732 15733 /* Now we know the XMM reg numbers for the operands, and the 15734 immediate byte. Is it one we can actually handle? Throw out 15735 any cases for which the helper function has not been 15736 verified. */ 15737 switch (imm) { 15738 case 0x00: 15739 case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12: 15740 case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A: 15741 break; 15742 default: 15743 goto decode_failure; 15744 } 15745 15746 /* Who ya gonna call? Presumably not Ghostbusters. */ 15747 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 15748 HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 15749 15750 /* Round up the arguments. Note that this is a kludge -- the 15751 use of mkU64 rather than mkIRExpr_HWord implies the 15752 assumption that the host's word size is 64-bit. */ 15753 UInt gstOffL = regNoL == 16 ? OFFB_XMM16 : xmmGuestRegOffset(regNoL); 15754 UInt gstOffR = xmmGuestRegOffset(regNoR); 15755 15756 IRExpr* opc4_and_imm = mkU64((insn[2] << 8) | (imm & 0xFF)); 15757 IRExpr* gstOffLe = mkU64(gstOffL); 15758 IRExpr* gstOffRe = mkU64(gstOffR); 15759 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 15760 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 15761 IRExpr** args 15762 = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 15763 15764 IRTemp resT = newTemp(Ity_I64); 15765 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 15766 /* It's not really a dirty call, but we can't use the clean 15767 helper mechanism here for the very lame reason that we can't 15768 pass 2 x V128s by value to a helper, nor get one back. Hence 15769 this roundabout scheme. */ 15770 d->needsBBP = True; 15771 d->nFxState = 2; 15772 d->fxState[0].fx = Ifx_Read; 15773 d->fxState[0].offset = gstOffL; 15774 d->fxState[0].size = sizeof(U128); 15775 d->fxState[1].fx = Ifx_Read; 15776 d->fxState[1].offset = gstOffR; 15777 d->fxState[1].size = sizeof(U128); 15778 if (isxSTRM) { 15779 /* Declare that the helper writes XMM0. */ 15780 d->nFxState = 3; 15781 d->fxState[2].fx = Ifx_Write; 15782 d->fxState[2].offset = xmmGuestRegOffset(0); 15783 d->fxState[2].size = sizeof(U128); 15784 } 15785 15786 stmt( IRStmt_Dirty(d) ); 15787 15788 /* Now resT[15:0] holds the new OSZACP values, so the condition 15789 codes must be updated. And for a xSTRI case, resT[31:16] 15790 holds the new ECX value, so stash that too. */ 15791 if (!isxSTRM) { 15792 putIReg64(R_RCX, binop(Iop_And64, 15793 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 15794 mkU64(0xFFFF))); 15795 } 15796 15797 stmt( IRStmt_Put( 15798 OFFB_CC_DEP1, 15799 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 15800 )); 15801 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15802 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15803 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15804 15805 if (regNoL == 16) { 15806 DIP("pcmp%cstr%c $%x,%s,%s\n", 15807 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 15808 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 15809 } else { 15810 DIP("pcmp%cstr%c $%x,%s,%s\n", 15811 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 15812 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 15813 } 15814 15815 goto decode_success; 15816 } 15817 15818 15819 /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128 15820 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 15821 if (have66noF2noF3( pfx ) 15822 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 15823 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) { 15824 modrm = insn[3]; 15825 IRTemp vecE = newTemp(Ity_V128); 15826 IRTemp vecG = newTemp(Ity_V128); 15827 15828 if ( epartIsReg(modrm) ) { 15829 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 15830 delta += 3+1; 15831 DIP( "ptest %s,%s\n", 15832 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15833 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15834 } else { 15835 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15836 gen_SEGV_if_not_16_aligned( addr ); 15837 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 15838 delta += 3+alen; 15839 DIP( "ptest %s,%s\n", 15840 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15841 } 15842 15843 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 15844 15845 /* Set Z=1 iff (vecE & vecG) == 0 15846 Set C=1 iff (vecE & not vecG) == 0 15847 */ 15848 15849 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 15850 IRTemp andV = newTemp(Ity_V128); 15851 IRTemp andnV = newTemp(Ity_V128); 15852 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 15853 assign(andnV, binop(Iop_AndV128, 15854 mkexpr(vecE), 15855 binop(Iop_XorV128, mkexpr(vecG), 15856 mkV128(0xFFFF)))); 15857 15858 /* The same, but reduced to 64-bit values, by or-ing the top 15859 and bottom 64-bits together. It relies on this trick: 15860 15861 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 15862 15863 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 15864 InterleaveHI64x2([a,b],[a,b]) == [a,a] 15865 15866 and so the OR of the above 2 exprs produces 15867 [a OR b, a OR b], from which we simply take the lower half. 15868 */ 15869 IRTemp and64 = newTemp(Ity_I64); 15870 IRTemp andn64 = newTemp(Ity_I64); 15871 15872 assign( 15873 and64, 15874 unop(Iop_V128to64, 15875 binop(Iop_OrV128, 15876 binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)), 15877 binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV)) 15878 ) 15879 ) 15880 ); 15881 15882 assign( 15883 andn64, 15884 unop(Iop_V128to64, 15885 binop(Iop_OrV128, 15886 binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)), 15887 binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV)) 15888 ) 15889 ) 15890 ); 15891 15892 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 15893 slice out the Z and C bits conveniently. We use the standard 15894 trick all-zeroes -> all-zeroes, anything-else -> all-ones 15895 done by "(x | -x) >>s (word-size - 1)". 15896 */ 15897 IRTemp z64 = newTemp(Ity_I64); 15898 IRTemp c64 = newTemp(Ity_I64); 15899 assign(z64, 15900 unop(Iop_Not64, 15901 binop(Iop_Sar64, 15902 binop(Iop_Or64, 15903 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 15904 mkexpr(and64) 15905 ), 15906 mkU8(63))) 15907 ); 15908 15909 assign(c64, 15910 unop(Iop_Not64, 15911 binop(Iop_Sar64, 15912 binop(Iop_Or64, 15913 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 15914 mkexpr(andn64) 15915 ), 15916 mkU8(63))) 15917 ); 15918 15919 /* And finally, slice out the Z and C flags and set the flags 15920 thunk to COPY for them. OSAP are set to zero. */ 15921 IRTemp newOSZACP = newTemp(Ity_I64); 15922 assign(newOSZACP, 15923 binop(Iop_Or64, 15924 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 15925 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)) 15926 ) 15927 ); 15928 15929 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 15930 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15931 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15932 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15933 15934 goto decode_success; 15935 } 15936 15937 /* 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 15938 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 15939 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 15940 Blend at various granularities, with XMM0 (implicit operand) 15941 providing the controlling mask. 15942 */ 15943 if (have66noF2noF3(pfx) && sz == 2 15944 && insn[0] == 0x0F && insn[1] == 0x38 15945 && (insn[2] == 0x15 || insn[2] == 0x14 || insn[2] == 0x10)) { 15946 modrm = insn[3]; 15947 15948 HChar* nm = NULL; 15949 UInt gran = 0; 15950 IROp opSAR = Iop_INVALID; 15951 switch (insn[2]) { 15952 case 0x15: 15953 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 15954 break; 15955 case 0x14: 15956 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 15957 break; 15958 case 0x10: 15959 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 15960 break; 15961 } 15962 vassert(nm); 15963 15964 IRTemp vecE = newTemp(Ity_V128); 15965 IRTemp vecG = newTemp(Ity_V128); 15966 IRTemp vec0 = newTemp(Ity_V128); 15967 15968 if ( epartIsReg(modrm) ) { 15969 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 15970 delta += 3+1; 15971 DIP( "%s %s,%s\n", nm, 15972 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15973 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15974 } else { 15975 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15976 gen_SEGV_if_not_16_aligned( addr ); 15977 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 15978 delta += 3+alen; 15979 DIP( "%s %s,%s\n", nm, 15980 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15981 } 15982 15983 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 15984 assign(vec0, getXMMReg(0)); 15985 15986 /* Now the tricky bit is to convert vec0 into a suitable mask, 15987 by copying the most significant bit of each lane into all 15988 positions in the lane. */ 15989 IRTemp sh = newTemp(Ity_I8); 15990 assign(sh, mkU8(8 * gran - 1)); 15991 15992 IRTemp mask = newTemp(Ity_V128); 15993 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 15994 15995 IRTemp notmask = newTemp(Ity_V128); 15996 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 15997 15998 IRExpr* res = binop(Iop_OrV128, 15999 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 16000 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))); 16001 putXMMReg(gregOfRexRM(pfx, modrm), res); 16002 16003 goto decode_success; 16004 } 16005 16006 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 16007 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 16008 The decoding on this is a bit unusual. 16009 */ 16010 if (haveF2noF3(pfx) 16011 && insn[0] == 0x0F && insn[1] == 0x38 16012 && (insn[2] == 0xF1 16013 || (insn[2] == 0xF0 && !have66(pfx)))) { 16014 modrm = insn[3]; 16015 16016 if (insn[2] == 0xF0) 16017 sz = 1; 16018 else 16019 vassert(sz == 2 || sz == 4 || sz == 8); 16020 16021 IRType tyE = szToITy(sz); 16022 IRTemp valE = newTemp(tyE); 16023 16024 if (epartIsReg(modrm)) { 16025 assign(valE, getIRegE(sz, pfx, modrm)); 16026 delta += 3+1; 16027 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 16028 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); 16029 } else { 16030 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 16031 assign(valE, loadLE(tyE, mkexpr(addr))); 16032 delta += 3+alen; 16033 DIP("crc32b %s,%s\n", dis_buf, 16034 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); 16035 } 16036 16037 /* Somewhat funny getting/putting of the crc32 value, in order 16038 to ensure that it turns into 64-bit gets and puts. However, 16039 mask off the upper 32 bits so as to not get memcheck false 16040 +ves around the helper call. */ 16041 IRTemp valG0 = newTemp(Ity_I64); 16042 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 16043 mkU64(0xFFFFFFFF))); 16044 16045 HChar* nm = NULL; 16046 void* fn = NULL; 16047 switch (sz) { 16048 case 1: nm = "amd64g_calc_crc32b"; 16049 fn = &amd64g_calc_crc32b; break; 16050 case 2: nm = "amd64g_calc_crc32w"; 16051 fn = &amd64g_calc_crc32w; break; 16052 case 4: nm = "amd64g_calc_crc32l"; 16053 fn = &amd64g_calc_crc32l; break; 16054 case 8: nm = "amd64g_calc_crc32q"; 16055 fn = &amd64g_calc_crc32q; break; 16056 } 16057 vassert(nm && fn); 16058 IRTemp valG1 = newTemp(Ity_I64); 16059 assign(valG1, 16060 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 16061 mkIRExprVec_2(mkexpr(valG0), 16062 widenUto64(mkexpr(valE))))); 16063 16064 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 16065 goto decode_success; 16066 } 16067 16068 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128 16069 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */ 16070 if ( have66noF2noF3( pfx ) 16071 && sz == 2 16072 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x2B ) { 16073 16074 modrm = insn[3]; 16075 16076 IRTemp argL = newTemp(Ity_V128); 16077 IRTemp argR = newTemp(Ity_V128); 16078 16079 if ( epartIsReg(modrm) ) { 16080 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 16081 delta += 3+1; 16082 DIP( "packusdw %s,%s\n", 16083 nameXMMReg( eregOfRexRM(pfx, modrm) ), 16084 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16085 } else { 16086 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 16087 gen_SEGV_if_not_16_aligned( addr ); 16088 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 16089 delta += 3+alen; 16090 DIP( "packusdw %s,%s\n", 16091 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 16092 } 16093 16094 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 16095 16096 putXMMReg( gregOfRexRM(pfx, modrm), 16097 binop( Iop_QNarrowBin32Sto16Ux8, 16098 mkexpr(argL), mkexpr(argR)) ); 16099 16100 goto decode_success; 16101 } 16102 16103 /* 66 0F 38 28 = PMULUDQ -- signed widening multiply of 32-lanes 0 x 16104 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 16105 half */ 16106 /* This is a really poor translation -- could be improved if 16107 performance critical. It's a copy-paste of PMULDQ, too. */ 16108 if (have66noF2noF3(pfx) && sz == 2 16109 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x28) { 16110 IRTemp sV, dV; 16111 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 16112 sV = newTemp(Ity_V128); 16113 dV = newTemp(Ity_V128); 16114 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 16115 t1 = newTemp(Ity_I64); 16116 t0 = newTemp(Ity_I64); 16117 modrm = insn[3]; 16118 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 16119 16120 if (epartIsReg(modrm)) { 16121 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 16122 delta += 3+1; 16123 DIP("pmuldq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 16124 nameXMMReg(gregOfRexRM(pfx,modrm))); 16125 } else { 16126 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 16127 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16128 delta += 3+alen; 16129 DIP("pmuldq %s,%s\n", dis_buf, 16130 nameXMMReg(gregOfRexRM(pfx,modrm))); 16131 } 16132 16133 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 16134 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 16135 16136 assign( t0, binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ); 16137 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) ); 16138 assign( t1, binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)) ); 16139 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) ); 16140 goto decode_success; 16141 } 16142 16143 /* 66 0F 38 29 = PCMPEQQ 16144 64x2 equality comparison 16145 */ 16146 if ( have66noF2noF3( pfx ) && sz == 2 16147 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x29) { 16148 /* FIXME: this needs an alignment check */ 16149 delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, 16150 "pcmpeqq", Iop_CmpEQ64x2, False ); 16151 goto decode_success; 16152 } 16153 16154 /* ---------------------------------------------------- */ 16155 /* --- end of the SSE4 decoder --- */ 16156 /* ---------------------------------------------------- */ 16157 16158 /*after_sse_decoders:*/ 16159 16160 /* Get the primary opcode. */ 16161 opc = getUChar(delta); delta++; 16162 16163 /* We get here if the current insn isn't SSE, or this CPU doesn't 16164 support SSE. */ 16165 16166 switch (opc) { 16167 16168 /* ------------------------ Control flow --------------- */ 16169 16170 case 0xC2: /* RET imm16 */ 16171 if (have66orF2orF3(pfx)) goto decode_failure; 16172 d64 = getUDisp16(delta); 16173 delta += 2; 16174 dis_ret(vbi, d64); 16175 dres.whatNext = Dis_StopHere; 16176 DIP("ret %lld\n", d64); 16177 break; 16178 16179 case 0xC3: /* RET */ 16180 if (have66orF2(pfx)) goto decode_failure; 16181 /* F3 is acceptable on AMD. */ 16182 dis_ret(vbi, 0); 16183 dres.whatNext = Dis_StopHere; 16184 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 16185 break; 16186 16187 case 0xE8: /* CALL J4 */ 16188 if (haveF2orF3(pfx)) goto decode_failure; 16189 d64 = getSDisp32(delta); delta += 4; 16190 d64 += (guest_RIP_bbstart+delta); 16191 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 16192 t1 = newTemp(Ity_I64); 16193 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 16194 putIReg64(R_RSP, mkexpr(t1)); 16195 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 16196 t2 = newTemp(Ity_I64); 16197 assign(t2, mkU64((Addr64)d64)); 16198 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 16199 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 16200 /* follow into the call target. */ 16201 dres.whatNext = Dis_ResteerU; 16202 dres.continueAt = d64; 16203 } else { 16204 jmp_lit(Ijk_Call,d64); 16205 dres.whatNext = Dis_StopHere; 16206 } 16207 DIP("call 0x%llx\n",d64); 16208 break; 16209 16210 //.. //-- case 0xC8: /* ENTER */ 16211 //.. //-- d32 = getUDisp16(eip); eip += 2; 16212 //.. //-- abyte = getUChar(delta); delta++; 16213 //.. //-- 16214 //.. //-- vg_assert(sz == 4); 16215 //.. //-- vg_assert(abyte == 0); 16216 //.. //-- 16217 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); 16218 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 16219 //.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 16220 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 16221 //.. //-- uLiteral(cb, sz); 16222 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 16223 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 16224 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 16225 //.. //-- if (d32) { 16226 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 16227 //.. //-- uLiteral(cb, d32); 16228 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 16229 //.. //-- } 16230 //.. //-- DIP("enter 0x%x, 0x%x", d32, abyte); 16231 //.. //-- break; 16232 16233 case 0xC8: /* ENTER */ 16234 /* Same comments re operand size as for LEAVE below apply. 16235 Also, only handles the case "enter $imm16, $0"; other cases 16236 for the second operand (nesting depth) are not handled. */ 16237 if (sz != 4) 16238 goto decode_failure; 16239 d64 = getUDisp16(delta); 16240 delta += 2; 16241 vassert(d64 >= 0 && d64 <= 0xFFFF); 16242 if (getUChar(delta) != 0) 16243 goto decode_failure; 16244 delta++; 16245 /* Intel docs seem to suggest: 16246 push rbp 16247 temp = rsp 16248 rbp = temp 16249 rsp = rsp - imm16 16250 */ 16251 t1 = newTemp(Ity_I64); 16252 assign(t1, getIReg64(R_RBP)); 16253 t2 = newTemp(Ity_I64); 16254 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 16255 putIReg64(R_RSP, mkexpr(t2)); 16256 storeLE(mkexpr(t2), mkexpr(t1)); 16257 putIReg64(R_RBP, mkexpr(t2)); 16258 if (d64 > 0) { 16259 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64))); 16260 } 16261 DIP("enter $%u, $0\n", (UInt)d64); 16262 break; 16263 16264 case 0xC9: /* LEAVE */ 16265 /* In 64-bit mode this defaults to a 64-bit operand size. There 16266 is no way to encode a 32-bit variant. Hence sz==4 but we do 16267 it as if sz=8. */ 16268 if (sz != 4) 16269 goto decode_failure; 16270 t1 = newTemp(Ity_I64); 16271 t2 = newTemp(Ity_I64); 16272 assign(t1, getIReg64(R_RBP)); 16273 /* First PUT RSP looks redundant, but need it because RSP must 16274 always be up-to-date for Memcheck to work... */ 16275 putIReg64(R_RSP, mkexpr(t1)); 16276 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 16277 putIReg64(R_RBP, mkexpr(t2)); 16278 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 16279 DIP("leave\n"); 16280 break; 16281 16282 //.. //-- /* ---------------- Misc weird-ass insns --------------- */ 16283 //.. //-- 16284 //.. //-- case 0x27: /* DAA */ 16285 //.. //-- case 0x2F: /* DAS */ 16286 //.. //-- t1 = newTemp(cb); 16287 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1); 16288 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ 16289 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16290 //.. //-- uWiden(cb, 1, False); 16291 //.. //-- uInstr0(cb, CALLM_S, 0); 16292 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16293 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16294 //.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) ); 16295 //.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO); 16296 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16297 //.. //-- uInstr0(cb, CALLM_E, 0); 16298 //.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL); 16299 //.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n"); 16300 //.. //-- break; 16301 //.. //-- 16302 //.. //-- case 0x37: /* AAA */ 16303 //.. //-- case 0x3F: /* AAS */ 16304 //.. //-- t1 = newTemp(cb); 16305 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); 16306 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ 16307 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16308 //.. //-- uWiden(cb, 2, False); 16309 //.. //-- uInstr0(cb, CALLM_S, 0); 16310 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16311 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16312 //.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) ); 16313 //.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty); 16314 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16315 //.. //-- uInstr0(cb, CALLM_E, 0); 16316 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); 16317 //.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n"); 16318 //.. //-- break; 16319 //.. //-- 16320 //.. //-- case 0xD4: /* AAM */ 16321 //.. //-- case 0xD5: /* AAD */ 16322 //.. //-- d32 = getUChar(delta); delta++; 16323 //.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !"); 16324 //.. //-- t1 = newTemp(cb); 16325 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); 16326 //.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */ 16327 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16328 //.. //-- uWiden(cb, 2, False); 16329 //.. //-- uInstr0(cb, CALLM_S, 0); 16330 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16331 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16332 //.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) ); 16333 //.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty); 16334 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16335 //.. //-- uInstr0(cb, CALLM_E, 0); 16336 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); 16337 //.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 16338 //.. //-- break; 16339 16340 /* ------------------------ CWD/CDQ -------------------- */ 16341 16342 case 0x98: /* CBW */ 16343 if (haveF2orF3(pfx)) goto decode_failure; 16344 if (sz == 8) { 16345 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 16346 DIP(/*"cdqe\n"*/"cltq"); 16347 break; 16348 } 16349 if (sz == 4) { 16350 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 16351 DIP("cwtl\n"); 16352 break; 16353 } 16354 if (sz == 2) { 16355 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 16356 DIP("cbw\n"); 16357 break; 16358 } 16359 goto decode_failure; 16360 16361 case 0x99: /* CWD/CDQ/CQO */ 16362 if (haveF2orF3(pfx)) goto decode_failure; 16363 vassert(sz == 2 || sz == 4 || sz == 8); 16364 ty = szToITy(sz); 16365 putIRegRDX( sz, 16366 binop(mkSizedOp(ty,Iop_Sar8), 16367 getIRegRAX(sz), 16368 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 16369 DIP(sz == 2 ? "cwd\n" 16370 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 16371 : "cqo\n")); 16372 break; 16373 16374 /* ------------------------ FPU ops -------------------- */ 16375 16376 case 0x9E: /* SAHF */ 16377 codegen_SAHF(); 16378 DIP("sahf\n"); 16379 break; 16380 16381 case 0x9F: /* LAHF */ 16382 codegen_LAHF(); 16383 DIP("lahf\n"); 16384 break; 16385 16386 case 0x9B: /* FWAIT */ 16387 /* ignore? */ 16388 DIP("fwait\n"); 16389 break; 16390 16391 case 0xD8: 16392 case 0xD9: 16393 case 0xDA: 16394 case 0xDB: 16395 case 0xDC: 16396 case 0xDD: 16397 case 0xDE: 16398 case 0xDF: { 16399 Bool redundantREXWok = False; 16400 16401 if (haveF2orF3(pfx)) 16402 goto decode_failure; 16403 16404 /* kludge to tolerate redundant rex.w prefixes (should do this 16405 properly one day) */ 16406 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 16407 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 16408 redundantREXWok = True; 16409 16410 if ( (sz == 4 16411 || (sz == 8 && redundantREXWok)) 16412 && haveNo66noF2noF3(pfx)) { 16413 Long delta0 = delta; 16414 Bool decode_OK = False; 16415 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 16416 if (!decode_OK) { 16417 delta = delta0; 16418 goto decode_failure; 16419 } 16420 break; 16421 } else { 16422 goto decode_failure; 16423 } 16424 } 16425 16426 /* ------------------------ INT ------------------------ */ 16427 16428 case 0xCC: /* INT 3 */ 16429 jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta); 16430 dres.whatNext = Dis_StopHere; 16431 DIP("int $0x3\n"); 16432 break; 16433 16434 case 0xCD: { /* INT imm8 */ 16435 IRJumpKind jk = Ijk_Boring; 16436 if (have66orF2orF3(pfx)) goto decode_failure; 16437 d64 = getUChar(delta); delta++; 16438 switch (d64) { 16439 case 32: jk = Ijk_Sys_int32; break; 16440 default: goto decode_failure; 16441 } 16442 guest_RIP_next_mustcheck = True; 16443 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 16444 jmp_lit(jk, guest_RIP_next_assumed); 16445 /* It's important that all ArchRegs carry their up-to-date value 16446 at this point. So we declare an end-of-block here, which 16447 forces any TempRegs caching ArchRegs to be flushed. */ 16448 dres.whatNext = Dis_StopHere; 16449 DIP("int $0x%02x\n", (UInt)d64); 16450 break; 16451 } 16452 16453 /* ------------------------ Jcond, byte offset --------- */ 16454 16455 case 0xEB: /* Jb (jump, byte offset) */ 16456 if (haveF2orF3(pfx)) goto decode_failure; 16457 if (sz != 4) 16458 goto decode_failure; /* JRS added 2004 July 11 */ 16459 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 16460 delta++; 16461 if (resteerOkFn(callback_opaque,d64)) { 16462 dres.whatNext = Dis_ResteerU; 16463 dres.continueAt = d64; 16464 } else { 16465 jmp_lit(Ijk_Boring,d64); 16466 dres.whatNext = Dis_StopHere; 16467 } 16468 DIP("jmp-8 0x%llx\n", d64); 16469 break; 16470 16471 case 0xE9: /* Jv (jump, 16/32 offset) */ 16472 if (haveF2orF3(pfx)) goto decode_failure; 16473 if (sz != 4) 16474 goto decode_failure; /* JRS added 2004 July 11 */ 16475 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 16476 delta += sz; 16477 if (resteerOkFn(callback_opaque,d64)) { 16478 dres.whatNext = Dis_ResteerU; 16479 dres.continueAt = d64; 16480 } else { 16481 jmp_lit(Ijk_Boring,d64); 16482 dres.whatNext = Dis_StopHere; 16483 } 16484 DIP("jmp 0x%llx\n", d64); 16485 break; 16486 16487 case 0x70: 16488 case 0x71: 16489 case 0x72: /* JBb/JNAEb (jump below) */ 16490 case 0x73: /* JNBb/JAEb (jump not below) */ 16491 case 0x74: /* JZb/JEb (jump zero) */ 16492 case 0x75: /* JNZb/JNEb (jump not zero) */ 16493 case 0x76: /* JBEb/JNAb (jump below or equal) */ 16494 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 16495 case 0x78: /* JSb (jump negative) */ 16496 case 0x79: /* JSb (jump not negative) */ 16497 case 0x7A: /* JP (jump parity even) */ 16498 case 0x7B: /* JNP/JPO (jump parity odd) */ 16499 case 0x7C: /* JLb/JNGEb (jump less) */ 16500 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 16501 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 16502 case 0x7F: /* JGb/JNLEb (jump greater) */ 16503 { Long jmpDelta; 16504 HChar* comment = ""; 16505 if (haveF2orF3(pfx)) goto decode_failure; 16506 jmpDelta = getSDisp8(delta); 16507 vassert(-128 <= jmpDelta && jmpDelta < 128); 16508 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 16509 delta++; 16510 if (resteerCisOk 16511 && vex_control.guest_chase_cond 16512 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 16513 && jmpDelta < 0 16514 && resteerOkFn( callback_opaque, d64) ) { 16515 /* Speculation: assume this backward branch is taken. So we 16516 need to emit a side-exit to the insn following this one, 16517 on the negation of the condition, and continue at the 16518 branch target address (d64). If we wind up back at the 16519 first instruction of the trace, just stop; it's better to 16520 let the IR loop unroller handle that case. */ 16521 stmt( IRStmt_Exit( 16522 mk_amd64g_calculate_condition( 16523 (AMD64Condcode)(1 ^ (opc - 0x70))), 16524 Ijk_Boring, 16525 IRConst_U64(guest_RIP_bbstart+delta) ) ); 16526 dres.whatNext = Dis_ResteerC; 16527 dres.continueAt = d64; 16528 comment = "(assumed taken)"; 16529 } 16530 else 16531 if (resteerCisOk 16532 && vex_control.guest_chase_cond 16533 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 16534 && jmpDelta >= 0 16535 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 16536 /* Speculation: assume this forward branch is not taken. So 16537 we need to emit a side-exit to d64 (the dest) and continue 16538 disassembling at the insn immediately following this 16539 one. */ 16540 stmt( IRStmt_Exit( 16541 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 16542 Ijk_Boring, 16543 IRConst_U64(d64) ) ); 16544 dres.whatNext = Dis_ResteerC; 16545 dres.continueAt = guest_RIP_bbstart+delta; 16546 comment = "(assumed not taken)"; 16547 } 16548 else { 16549 /* Conservative default translation - end the block at this 16550 point. */ 16551 jcc_01( (AMD64Condcode)(opc - 0x70), 16552 guest_RIP_bbstart+delta, 16553 d64 ); 16554 dres.whatNext = Dis_StopHere; 16555 } 16556 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); 16557 break; 16558 } 16559 16560 case 0xE3: 16561 /* JRCXZ or JECXZ, depending address size override. */ 16562 if (have66orF2orF3(pfx)) goto decode_failure; 16563 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 16564 delta++; 16565 if (haveASO(pfx)) { 16566 /* 32-bit */ 16567 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 16568 unop(Iop_32Uto64, getIReg32(R_RCX)), 16569 mkU64(0)), 16570 Ijk_Boring, 16571 IRConst_U64(d64)) 16572 ); 16573 DIP("jecxz 0x%llx\n", d64); 16574 } else { 16575 /* 64-bit */ 16576 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 16577 getIReg64(R_RCX), 16578 mkU64(0)), 16579 Ijk_Boring, 16580 IRConst_U64(d64)) 16581 ); 16582 DIP("jrcxz 0x%llx\n", d64); 16583 } 16584 break; 16585 16586 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 16587 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 16588 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 16589 { /* The docs say this uses rCX as a count depending on the 16590 address size override, not the operand one. */ 16591 IRExpr* zbit = NULL; 16592 IRExpr* count = NULL; 16593 IRExpr* cond = NULL; 16594 HChar* xtra = NULL; 16595 16596 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 16597 /* So at this point we've rejected any variants which appear to 16598 be governed by the usual operand-size modifiers. Hence only 16599 the address size prefix can have an effect. It changes the 16600 size from 64 (default) to 32. */ 16601 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 16602 delta++; 16603 if (haveASO(pfx)) { 16604 /* 64to32 of 64-bit get is merely a get-put improvement 16605 trick. */ 16606 putIReg32(R_RCX, binop(Iop_Sub32, 16607 unop(Iop_64to32, getIReg64(R_RCX)), 16608 mkU32(1))); 16609 } else { 16610 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 16611 } 16612 16613 /* This is correct, both for 32- and 64-bit versions. If we're 16614 doing a 32-bit dec and the result is zero then the default 16615 zero extension rule will cause the upper 32 bits to be zero 16616 too. Hence a 64-bit check against zero is OK. */ 16617 count = getIReg64(R_RCX); 16618 cond = binop(Iop_CmpNE64, count, mkU64(0)); 16619 switch (opc) { 16620 case 0xE2: 16621 xtra = ""; 16622 break; 16623 case 0xE1: 16624 xtra = "e"; 16625 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 16626 cond = mkAnd1(cond, zbit); 16627 break; 16628 case 0xE0: 16629 xtra = "ne"; 16630 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 16631 cond = mkAnd1(cond, zbit); 16632 break; 16633 default: 16634 vassert(0); 16635 } 16636 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) ); 16637 16638 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); 16639 break; 16640 } 16641 16642 /* ------------------------ IMUL ----------------------- */ 16643 16644 case 0x69: /* IMUL Iv, Ev, Gv */ 16645 if (haveF2orF3(pfx)) goto decode_failure; 16646 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 16647 break; 16648 case 0x6B: /* IMUL Ib, Ev, Gv */ 16649 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 16650 break; 16651 16652 /* ------------------------ MOV ------------------------ */ 16653 16654 case 0x88: /* MOV Gb,Eb */ 16655 if (haveF2orF3(pfx)) goto decode_failure; 16656 delta = dis_mov_G_E(vbi, pfx, 1, delta); 16657 break; 16658 16659 case 0x89: /* MOV Gv,Ev */ 16660 if (haveF2orF3(pfx)) goto decode_failure; 16661 delta = dis_mov_G_E(vbi, pfx, sz, delta); 16662 break; 16663 16664 case 0x8A: /* MOV Eb,Gb */ 16665 if (haveF2orF3(pfx)) goto decode_failure; 16666 delta = dis_mov_E_G(vbi, pfx, 1, delta); 16667 break; 16668 16669 case 0x8B: /* MOV Ev,Gv */ 16670 if (haveF2orF3(pfx)) goto decode_failure; 16671 delta = dis_mov_E_G(vbi, pfx, sz, delta); 16672 break; 16673 16674 case 0x8D: /* LEA M,Gv */ 16675 if (haveF2orF3(pfx)) goto decode_failure; 16676 if (sz != 4 && sz != 8) 16677 goto decode_failure; 16678 modrm = getUChar(delta); 16679 if (epartIsReg(modrm)) 16680 goto decode_failure; 16681 /* NOTE! this is the one place where a segment override prefix 16682 has no effect on the address calculation. Therefore we clear 16683 any segment override bits in pfx. */ 16684 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 16685 delta += alen; 16686 /* This is a hack. But it isn't clear that really doing the 16687 calculation at 32 bits is really worth it. Hence for leal, 16688 do the full 64-bit calculation and then truncate it. */ 16689 putIRegG( sz, pfx, modrm, 16690 sz == 4 16691 ? unop(Iop_64to32, mkexpr(addr)) 16692 : mkexpr(addr) 16693 ); 16694 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 16695 nameIRegG(sz,pfx,modrm)); 16696 break; 16697 16698 //.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 16699 //.. delta = dis_mov_Sw_Ew(sorb, sz, delta); 16700 //.. break; 16701 //.. 16702 //.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 16703 //.. delta = dis_mov_Ew_Sw(sorb, delta); 16704 //.. break; 16705 16706 case 0xA0: /* MOV Ob,AL */ 16707 if (have66orF2orF3(pfx)) goto decode_failure; 16708 sz = 1; 16709 /* Fall through ... */ 16710 case 0xA1: /* MOV Ov,eAX */ 16711 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 16712 goto decode_failure; 16713 d64 = getDisp64(delta); 16714 delta += 8; 16715 ty = szToITy(sz); 16716 addr = newTemp(Ity_I64); 16717 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 16718 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 16719 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 16720 segRegTxt(pfx), d64, 16721 nameIRegRAX(sz)); 16722 break; 16723 16724 case 0xA2: /* MOV AL,Ob */ 16725 if (have66orF2orF3(pfx)) goto decode_failure; 16726 sz = 1; 16727 /* Fall through ... */ 16728 case 0xA3: /* MOV eAX,Ov */ 16729 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 16730 goto decode_failure; 16731 d64 = getDisp64(delta); 16732 delta += 8; 16733 ty = szToITy(sz); 16734 addr = newTemp(Ity_I64); 16735 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 16736 storeLE( mkexpr(addr), getIRegRAX(sz) ); 16737 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 16738 segRegTxt(pfx), d64); 16739 break; 16740 16741 /* XXXX be careful here with moves to AH/BH/CH/DH */ 16742 case 0xB0: /* MOV imm,AL */ 16743 case 0xB1: /* MOV imm,CL */ 16744 case 0xB2: /* MOV imm,DL */ 16745 case 0xB3: /* MOV imm,BL */ 16746 case 0xB4: /* MOV imm,AH */ 16747 case 0xB5: /* MOV imm,CH */ 16748 case 0xB6: /* MOV imm,DH */ 16749 case 0xB7: /* MOV imm,BH */ 16750 if (haveF2orF3(pfx)) goto decode_failure; 16751 d64 = getUChar(delta); 16752 delta += 1; 16753 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 16754 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 16755 break; 16756 16757 case 0xB8: /* MOV imm,eAX */ 16758 case 0xB9: /* MOV imm,eCX */ 16759 case 0xBA: /* MOV imm,eDX */ 16760 case 0xBB: /* MOV imm,eBX */ 16761 case 0xBC: /* MOV imm,eSP */ 16762 case 0xBD: /* MOV imm,eBP */ 16763 case 0xBE: /* MOV imm,eSI */ 16764 case 0xBF: /* MOV imm,eDI */ 16765 /* This is the one-and-only place where 64-bit literals are 16766 allowed in the instruction stream. */ 16767 if (haveF2orF3(pfx)) goto decode_failure; 16768 if (sz == 8) { 16769 d64 = getDisp64(delta); 16770 delta += 8; 16771 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 16772 DIP("movabsq $%lld,%s\n", (Long)d64, 16773 nameIRegRexB(8,pfx,opc-0xB8)); 16774 } else { 16775 d64 = getSDisp(imin(4,sz),delta); 16776 delta += imin(4,sz); 16777 putIRegRexB(sz, pfx, opc-0xB8, 16778 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16779 DIP("mov%c $%lld,%s\n", nameISize(sz), 16780 (Long)d64, 16781 nameIRegRexB(sz,pfx,opc-0xB8)); 16782 } 16783 break; 16784 16785 case 0xC6: /* MOV Ib,Eb */ 16786 sz = 1; 16787 goto do_Mov_I_E; 16788 case 0xC7: /* MOV Iv,Ev */ 16789 goto do_Mov_I_E; 16790 16791 do_Mov_I_E: 16792 if (haveF2orF3(pfx)) goto decode_failure; 16793 modrm = getUChar(delta); 16794 if (epartIsReg(modrm)) { 16795 delta++; /* mod/rm byte */ 16796 d64 = getSDisp(imin(4,sz),delta); 16797 delta += imin(4,sz); 16798 putIRegE(sz, pfx, modrm, 16799 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16800 DIP("mov%c $%lld, %s\n", nameISize(sz), 16801 (Long)d64, 16802 nameIRegE(sz,pfx,modrm)); 16803 } else { 16804 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 16805 /*xtra*/imin(4,sz) ); 16806 delta += alen; 16807 d64 = getSDisp(imin(4,sz),delta); 16808 delta += imin(4,sz); 16809 storeLE(mkexpr(addr), 16810 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16811 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 16812 } 16813 break; 16814 16815 /* ------------------------ MOVx ------------------------ */ 16816 16817 case 0x63: /* MOVSX */ 16818 if (haveF2orF3(pfx)) goto decode_failure; 16819 if (haveREX(pfx) && 1==getRexW(pfx)) { 16820 vassert(sz == 8); 16821 /* movsx r/m32 to r64 */ 16822 modrm = getUChar(delta); 16823 if (epartIsReg(modrm)) { 16824 delta++; 16825 putIRegG(8, pfx, modrm, 16826 unop(Iop_32Sto64, 16827 getIRegE(4, pfx, modrm))); 16828 DIP("movslq %s,%s\n", 16829 nameIRegE(4, pfx, modrm), 16830 nameIRegG(8, pfx, modrm)); 16831 break; 16832 } else { 16833 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16834 delta += alen; 16835 putIRegG(8, pfx, modrm, 16836 unop(Iop_32Sto64, 16837 loadLE(Ity_I32, mkexpr(addr)))); 16838 DIP("movslq %s,%s\n", dis_buf, 16839 nameIRegG(8, pfx, modrm)); 16840 break; 16841 } 16842 } else { 16843 goto decode_failure; 16844 } 16845 16846 /* ------------------------ opl imm, A ----------------- */ 16847 16848 case 0x04: /* ADD Ib, AL */ 16849 if (haveF2orF3(pfx)) goto decode_failure; 16850 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 16851 break; 16852 case 0x05: /* ADD Iv, eAX */ 16853 if (haveF2orF3(pfx)) goto decode_failure; 16854 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 16855 break; 16856 16857 case 0x0C: /* OR Ib, AL */ 16858 if (haveF2orF3(pfx)) goto decode_failure; 16859 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 16860 break; 16861 case 0x0D: /* OR Iv, eAX */ 16862 if (haveF2orF3(pfx)) goto decode_failure; 16863 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 16864 break; 16865 16866 case 0x14: /* ADC Ib, AL */ 16867 if (haveF2orF3(pfx)) goto decode_failure; 16868 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 16869 break; 16870 case 0x15: /* ADC Iv, eAX */ 16871 if (haveF2orF3(pfx)) goto decode_failure; 16872 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 16873 break; 16874 16875 case 0x1C: /* SBB Ib, AL */ 16876 if (haveF2orF3(pfx)) goto decode_failure; 16877 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 16878 break; 16879 case 0x1D: /* SBB Iv, eAX */ 16880 if (haveF2orF3(pfx)) goto decode_failure; 16881 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 16882 break; 16883 16884 case 0x24: /* AND Ib, AL */ 16885 if (haveF2orF3(pfx)) goto decode_failure; 16886 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 16887 break; 16888 case 0x25: /* AND Iv, eAX */ 16889 if (haveF2orF3(pfx)) goto decode_failure; 16890 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 16891 break; 16892 16893 case 0x2C: /* SUB Ib, AL */ 16894 if (haveF2orF3(pfx)) goto decode_failure; 16895 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 16896 break; 16897 case 0x2D: /* SUB Iv, eAX */ 16898 if (haveF2orF3(pfx)) goto decode_failure; 16899 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 16900 break; 16901 16902 case 0x34: /* XOR Ib, AL */ 16903 if (haveF2orF3(pfx)) goto decode_failure; 16904 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 16905 break; 16906 case 0x35: /* XOR Iv, eAX */ 16907 if (haveF2orF3(pfx)) goto decode_failure; 16908 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 16909 break; 16910 16911 case 0x3C: /* CMP Ib, AL */ 16912 if (haveF2orF3(pfx)) goto decode_failure; 16913 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 16914 break; 16915 case 0x3D: /* CMP Iv, eAX */ 16916 if (haveF2orF3(pfx)) goto decode_failure; 16917 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 16918 break; 16919 16920 case 0xA8: /* TEST Ib, AL */ 16921 if (haveF2orF3(pfx)) goto decode_failure; 16922 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 16923 break; 16924 case 0xA9: /* TEST Iv, eAX */ 16925 if (haveF2orF3(pfx)) goto decode_failure; 16926 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 16927 break; 16928 16929 /* ------------------------ opl Ev, Gv ----------------- */ 16930 16931 case 0x02: /* ADD Eb,Gb */ 16932 if (haveF2orF3(pfx)) goto decode_failure; 16933 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 16934 break; 16935 case 0x03: /* ADD Ev,Gv */ 16936 if (haveF2orF3(pfx)) goto decode_failure; 16937 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 16938 break; 16939 16940 case 0x0A: /* OR Eb,Gb */ 16941 if (haveF2orF3(pfx)) goto decode_failure; 16942 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 16943 break; 16944 case 0x0B: /* OR Ev,Gv */ 16945 if (haveF2orF3(pfx)) goto decode_failure; 16946 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 16947 break; 16948 16949 case 0x12: /* ADC Eb,Gb */ 16950 if (haveF2orF3(pfx)) goto decode_failure; 16951 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 16952 break; 16953 case 0x13: /* ADC Ev,Gv */ 16954 if (haveF2orF3(pfx)) goto decode_failure; 16955 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 16956 break; 16957 16958 case 0x1A: /* SBB Eb,Gb */ 16959 if (haveF2orF3(pfx)) goto decode_failure; 16960 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 16961 break; 16962 case 0x1B: /* SBB Ev,Gv */ 16963 if (haveF2orF3(pfx)) goto decode_failure; 16964 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 16965 break; 16966 16967 case 0x22: /* AND Eb,Gb */ 16968 if (haveF2orF3(pfx)) goto decode_failure; 16969 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 16970 break; 16971 case 0x23: /* AND Ev,Gv */ 16972 if (haveF2orF3(pfx)) goto decode_failure; 16973 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 16974 break; 16975 16976 case 0x2A: /* SUB Eb,Gb */ 16977 if (haveF2orF3(pfx)) goto decode_failure; 16978 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 16979 break; 16980 case 0x2B: /* SUB Ev,Gv */ 16981 if (haveF2orF3(pfx)) goto decode_failure; 16982 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 16983 break; 16984 16985 case 0x32: /* XOR Eb,Gb */ 16986 if (haveF2orF3(pfx)) goto decode_failure; 16987 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 16988 break; 16989 case 0x33: /* XOR Ev,Gv */ 16990 if (haveF2orF3(pfx)) goto decode_failure; 16991 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 16992 break; 16993 16994 case 0x3A: /* CMP Eb,Gb */ 16995 if (haveF2orF3(pfx)) goto decode_failure; 16996 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 16997 break; 16998 case 0x3B: /* CMP Ev,Gv */ 16999 if (haveF2orF3(pfx)) goto decode_failure; 17000 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 17001 break; 17002 17003 case 0x84: /* TEST Eb,Gb */ 17004 if (haveF2orF3(pfx)) goto decode_failure; 17005 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 17006 break; 17007 case 0x85: /* TEST Ev,Gv */ 17008 if (haveF2orF3(pfx)) goto decode_failure; 17009 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 17010 break; 17011 17012 /* ------------------------ opl Gv, Ev ----------------- */ 17013 17014 case 0x00: /* ADD Gb,Eb */ 17015 if (haveF2orF3(pfx)) goto decode_failure; 17016 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 17017 break; 17018 case 0x01: /* ADD Gv,Ev */ 17019 if (haveF2orF3(pfx)) goto decode_failure; 17020 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 17021 break; 17022 17023 case 0x08: /* OR Gb,Eb */ 17024 if (haveF2orF3(pfx)) goto decode_failure; 17025 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 17026 break; 17027 case 0x09: /* OR Gv,Ev */ 17028 if (haveF2orF3(pfx)) goto decode_failure; 17029 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 17030 break; 17031 17032 case 0x10: /* ADC Gb,Eb */ 17033 if (haveF2orF3(pfx)) goto decode_failure; 17034 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 17035 break; 17036 case 0x11: /* ADC Gv,Ev */ 17037 if (haveF2orF3(pfx)) goto decode_failure; 17038 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 17039 break; 17040 17041 case 0x18: /* SBB Gb,Eb */ 17042 if (haveF2orF3(pfx)) goto decode_failure; 17043 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 17044 break; 17045 case 0x19: /* SBB Gv,Ev */ 17046 if (haveF2orF3(pfx)) goto decode_failure; 17047 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 17048 break; 17049 17050 case 0x20: /* AND Gb,Eb */ 17051 if (haveF2orF3(pfx)) goto decode_failure; 17052 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 17053 break; 17054 case 0x21: /* AND Gv,Ev */ 17055 if (haveF2orF3(pfx)) goto decode_failure; 17056 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 17057 break; 17058 17059 case 0x28: /* SUB Gb,Eb */ 17060 if (haveF2orF3(pfx)) goto decode_failure; 17061 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 17062 break; 17063 case 0x29: /* SUB Gv,Ev */ 17064 if (haveF2orF3(pfx)) goto decode_failure; 17065 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 17066 break; 17067 17068 case 0x30: /* XOR Gb,Eb */ 17069 if (haveF2orF3(pfx)) goto decode_failure; 17070 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 17071 break; 17072 case 0x31: /* XOR Gv,Ev */ 17073 if (haveF2orF3(pfx)) goto decode_failure; 17074 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 17075 break; 17076 17077 case 0x38: /* CMP Gb,Eb */ 17078 if (haveF2orF3(pfx)) goto decode_failure; 17079 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 17080 break; 17081 case 0x39: /* CMP Gv,Ev */ 17082 if (haveF2orF3(pfx)) goto decode_failure; 17083 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 17084 break; 17085 17086 /* ------------------------ POP ------------------------ */ 17087 17088 case 0x58: /* POP eAX */ 17089 case 0x59: /* POP eCX */ 17090 case 0x5A: /* POP eDX */ 17091 case 0x5B: /* POP eBX */ 17092 case 0x5D: /* POP eBP */ 17093 case 0x5E: /* POP eSI */ 17094 case 0x5F: /* POP eDI */ 17095 case 0x5C: /* POP eSP */ 17096 if (haveF2orF3(pfx)) goto decode_failure; 17097 vassert(sz == 2 || sz == 4 || sz == 8); 17098 if (sz == 4) 17099 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 17100 t1 = newTemp(szToITy(sz)); 17101 t2 = newTemp(Ity_I64); 17102 assign(t2, getIReg64(R_RSP)); 17103 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 17104 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 17105 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 17106 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 17107 break; 17108 17109 case 0x9D: /* POPF */ 17110 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 17111 So sz==4 actually means sz==8. */ 17112 if (haveF2orF3(pfx)) goto decode_failure; 17113 vassert(sz == 2 || sz == 4 || sz == 8); 17114 if (sz == 4) sz = 8; 17115 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 17116 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 17117 assign(t2, getIReg64(R_RSP)); 17118 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 17119 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 17120 /* t1 is the flag word. Mask out everything except OSZACP and 17121 set the flags thunk to AMD64G_CC_OP_COPY. */ 17122 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 17123 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 17124 stmt( IRStmt_Put( OFFB_CC_DEP1, 17125 binop(Iop_And64, 17126 mkexpr(t1), 17127 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 17128 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 17129 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 17130 ) 17131 ) 17132 ); 17133 17134 /* Also need to set the D flag, which is held in bit 10 of t1. 17135 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 17136 stmt( IRStmt_Put( 17137 OFFB_DFLAG, 17138 IRExpr_Mux0X( 17139 unop(Iop_32to8, 17140 unop(Iop_64to32, 17141 binop(Iop_And64, 17142 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 17143 mkU64(1)))), 17144 mkU64(1), 17145 mkU64(0xFFFFFFFFFFFFFFFFULL))) 17146 ); 17147 17148 /* And set the ID flag */ 17149 stmt( IRStmt_Put( 17150 OFFB_IDFLAG, 17151 IRExpr_Mux0X( 17152 unop(Iop_32to8, 17153 unop(Iop_64to32, 17154 binop(Iop_And64, 17155 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 17156 mkU64(1)))), 17157 mkU64(0), 17158 mkU64(1))) 17159 ); 17160 17161 /* And set the AC flag too */ 17162 stmt( IRStmt_Put( 17163 OFFB_ACFLAG, 17164 IRExpr_Mux0X( 17165 unop(Iop_32to8, 17166 unop(Iop_64to32, 17167 binop(Iop_And64, 17168 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 17169 mkU64(1)))), 17170 mkU64(0), 17171 mkU64(1))) 17172 ); 17173 17174 DIP("popf%c\n", nameISize(sz)); 17175 break; 17176 17177 //.. case 0x61: /* POPA */ 17178 //.. /* This is almost certainly wrong for sz==2. So ... */ 17179 //.. if (sz != 4) goto decode_failure; 17180 //.. 17181 //.. /* t5 is the old %ESP value. */ 17182 //.. t5 = newTemp(Ity_I32); 17183 //.. assign( t5, getIReg(4, R_ESP) ); 17184 //.. 17185 //.. /* Reload all the registers, except %esp. */ 17186 //.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 17187 //.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 17188 //.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 17189 //.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 17190 //.. /* ignore saved %ESP */ 17191 //.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 17192 //.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 17193 //.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 17194 //.. 17195 //.. /* and move %ESP back up */ 17196 //.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 17197 //.. 17198 //.. DIP("pusha%c\n", nameISize(sz)); 17199 //.. break; 17200 17201 case 0x8F: { /* POPQ m64 / POPW m16 */ 17202 Int len; 17203 UChar rm; 17204 /* There is no encoding for 32-bit pop in 64-bit mode. 17205 So sz==4 actually means sz==8. */ 17206 if (haveF2orF3(pfx)) goto decode_failure; 17207 vassert(sz == 2 || sz == 4 17208 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 17209 if (sz == 4) sz = 8; 17210 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 17211 17212 rm = getUChar(delta); 17213 17214 /* make sure this instruction is correct POP */ 17215 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 17216 goto decode_failure; 17217 /* and has correct size */ 17218 vassert(sz == 8); 17219 17220 t1 = newTemp(Ity_I64); 17221 t3 = newTemp(Ity_I64); 17222 assign( t1, getIReg64(R_RSP) ); 17223 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 17224 17225 /* Increase RSP; must be done before the STORE. Intel manual 17226 says: If the RSP register is used as a base register for 17227 addressing a destination operand in memory, the POP 17228 instruction computes the effective address of the operand 17229 after it increments the RSP register. */ 17230 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 17231 17232 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 17233 storeLE( mkexpr(addr), mkexpr(t3) ); 17234 17235 DIP("popl %s\n", dis_buf); 17236 17237 delta += len; 17238 break; 17239 } 17240 17241 //.. //-- case 0x1F: /* POP %DS */ 17242 //.. //-- dis_pop_segreg( cb, R_DS, sz ); break; 17243 //.. //-- case 0x07: /* POP %ES */ 17244 //.. //-- dis_pop_segreg( cb, R_ES, sz ); break; 17245 //.. //-- case 0x17: /* POP %SS */ 17246 //.. //-- dis_pop_segreg( cb, R_SS, sz ); break; 17247 17248 /* ------------------------ PUSH ----------------------- */ 17249 17250 case 0x50: /* PUSH eAX */ 17251 case 0x51: /* PUSH eCX */ 17252 case 0x52: /* PUSH eDX */ 17253 case 0x53: /* PUSH eBX */ 17254 case 0x55: /* PUSH eBP */ 17255 case 0x56: /* PUSH eSI */ 17256 case 0x57: /* PUSH eDI */ 17257 case 0x54: /* PUSH eSP */ 17258 /* This is the Right Way, in that the value to be pushed is 17259 established before %rsp is changed, so that pushq %rsp 17260 correctly pushes the old value. */ 17261 if (haveF2orF3(pfx)) goto decode_failure; 17262 vassert(sz == 2 || sz == 4 || sz == 8); 17263 if (sz == 4) 17264 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 17265 ty = sz==2 ? Ity_I16 : Ity_I64; 17266 t1 = newTemp(ty); 17267 t2 = newTemp(Ity_I64); 17268 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 17269 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 17270 putIReg64(R_RSP, mkexpr(t2) ); 17271 storeLE(mkexpr(t2),mkexpr(t1)); 17272 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 17273 break; 17274 17275 case 0x68: /* PUSH Iv */ 17276 if (haveF2orF3(pfx)) goto decode_failure; 17277 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 17278 if (sz == 4) sz = 8; 17279 d64 = getSDisp(imin(4,sz),delta); 17280 delta += imin(4,sz); 17281 goto do_push_I; 17282 case 0x6A: /* PUSH Ib, sign-extended to sz */ 17283 if (haveF2orF3(pfx)) goto decode_failure; 17284 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 17285 if (sz == 4) sz = 8; 17286 d64 = getSDisp8(delta); delta += 1; 17287 goto do_push_I; 17288 do_push_I: 17289 ty = szToITy(sz); 17290 t1 = newTemp(Ity_I64); 17291 t2 = newTemp(ty); 17292 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 17293 putIReg64(R_RSP, mkexpr(t1) ); 17294 /* stop mkU16 asserting if d32 is a negative 16-bit number 17295 (bug #132813) */ 17296 if (ty == Ity_I16) 17297 d64 &= 0xFFFF; 17298 storeLE( mkexpr(t1), mkU(ty,d64) ); 17299 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 17300 break; 17301 17302 case 0x9C: /* PUSHF */ { 17303 /* Note. There is no encoding for a 32-bit pushf in 64-bit 17304 mode. So sz==4 actually means sz==8. */ 17305 /* 24 July 06: has also been seen with a redundant REX prefix, 17306 so must also allow sz==8. */ 17307 if (haveF2orF3(pfx)) goto decode_failure; 17308 vassert(sz == 2 || sz == 4 || sz == 8); 17309 if (sz == 4) sz = 8; 17310 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 17311 17312 t1 = newTemp(Ity_I64); 17313 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 17314 putIReg64(R_RSP, mkexpr(t1) ); 17315 17316 t2 = newTemp(Ity_I64); 17317 assign( t2, mk_amd64g_calculate_rflags_all() ); 17318 17319 /* Patch in the D flag. This can simply be a copy of bit 10 of 17320 baseBlock[OFFB_DFLAG]. */ 17321 t3 = newTemp(Ity_I64); 17322 assign( t3, binop(Iop_Or64, 17323 mkexpr(t2), 17324 binop(Iop_And64, 17325 IRExpr_Get(OFFB_DFLAG,Ity_I64), 17326 mkU64(1<<10))) 17327 ); 17328 17329 /* And patch in the ID flag. */ 17330 t4 = newTemp(Ity_I64); 17331 assign( t4, binop(Iop_Or64, 17332 mkexpr(t3), 17333 binop(Iop_And64, 17334 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 17335 mkU8(21)), 17336 mkU64(1<<21))) 17337 ); 17338 17339 /* And patch in the AC flag too. */ 17340 t5 = newTemp(Ity_I64); 17341 assign( t5, binop(Iop_Or64, 17342 mkexpr(t4), 17343 binop(Iop_And64, 17344 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 17345 mkU8(18)), 17346 mkU64(1<<18))) 17347 ); 17348 17349 /* if sz==2, the stored value needs to be narrowed. */ 17350 if (sz == 2) 17351 storeLE( mkexpr(t1), unop(Iop_32to16, 17352 unop(Iop_64to32,mkexpr(t5))) ); 17353 else 17354 storeLE( mkexpr(t1), mkexpr(t5) ); 17355 17356 DIP("pushf%c\n", nameISize(sz)); 17357 break; 17358 } 17359 17360 //.. case 0x60: /* PUSHA */ 17361 //.. /* This is almost certainly wrong for sz==2. So ... */ 17362 //.. if (sz != 4) goto decode_failure; 17363 //.. 17364 //.. /* This is the Right Way, in that the value to be pushed is 17365 //.. established before %esp is changed, so that pusha 17366 //.. correctly pushes the old %esp value. New value of %esp is 17367 //.. pushed at start. */ 17368 //.. /* t0 is the %ESP value we're going to push. */ 17369 //.. t0 = newTemp(Ity_I32); 17370 //.. assign( t0, getIReg(4, R_ESP) ); 17371 //.. 17372 //.. /* t5 will be the new %ESP value. */ 17373 //.. t5 = newTemp(Ity_I32); 17374 //.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 17375 //.. 17376 //.. /* Update guest state before prodding memory. */ 17377 //.. putIReg(4, R_ESP, mkexpr(t5)); 17378 //.. 17379 //.. /* Dump all the registers. */ 17380 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 17381 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 17382 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 17383 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 17384 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 17385 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 17386 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 17387 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 17388 //.. 17389 //.. DIP("pusha%c\n", nameISize(sz)); 17390 //.. break; 17391 //.. 17392 //.. 17393 //.. //-- case 0x0E: /* PUSH %CS */ 17394 //.. //-- dis_push_segreg( cb, R_CS, sz ); break; 17395 //.. //-- case 0x1E: /* PUSH %DS */ 17396 //.. //-- dis_push_segreg( cb, R_DS, sz ); break; 17397 //.. //-- case 0x06: /* PUSH %ES */ 17398 //.. //-- dis_push_segreg( cb, R_ES, sz ); break; 17399 //.. //-- case 0x16: /* PUSH %SS */ 17400 //.. //-- dis_push_segreg( cb, R_SS, sz ); break; 17401 //.. 17402 //.. /* ------------------------ SCAS et al ----------------- */ 17403 //.. 17404 //.. case 0xA4: /* MOVS, no REP prefix */ 17405 //.. case 0xA5: 17406 //.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 17407 //.. break; 17408 //.. 17409 //.. case 0xA6: /* CMPSb, no REP prefix */ 17410 //.. //-- case 0xA7: 17411 //.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 17412 //.. break; 17413 //.. //-- 17414 //.. //-- 17415 case 0xAC: /* LODS, no REP prefix */ 17416 case 0xAD: 17417 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 17418 break; 17419 //.. 17420 //.. case 0xAE: /* SCAS, no REP prefix */ 17421 //.. case 0xAF: 17422 //.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 17423 //.. break; 17424 17425 17426 case 0xFC: /* CLD */ 17427 if (haveF2orF3(pfx)) goto decode_failure; 17428 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 17429 DIP("cld\n"); 17430 break; 17431 17432 case 0xFD: /* STD */ 17433 if (haveF2orF3(pfx)) goto decode_failure; 17434 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 17435 DIP("std\n"); 17436 break; 17437 17438 case 0xF8: /* CLC */ 17439 case 0xF9: /* STC */ 17440 case 0xF5: /* CMC */ 17441 t0 = newTemp(Ity_I64); 17442 t1 = newTemp(Ity_I64); 17443 assign( t0, mk_amd64g_calculate_rflags_all() ); 17444 switch (opc) { 17445 case 0xF8: 17446 assign( t1, binop(Iop_And64, mkexpr(t0), 17447 mkU64(~AMD64G_CC_MASK_C))); 17448 DIP("clc\n"); 17449 break; 17450 case 0xF9: 17451 assign( t1, binop(Iop_Or64, mkexpr(t0), 17452 mkU64(AMD64G_CC_MASK_C))); 17453 DIP("stc\n"); 17454 break; 17455 case 0xF5: 17456 assign( t1, binop(Iop_Xor64, mkexpr(t0), 17457 mkU64(AMD64G_CC_MASK_C))); 17458 DIP("cmc\n"); 17459 break; 17460 default: 17461 vpanic("disInstr(x64)(clc/stc/cmc)"); 17462 } 17463 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 17464 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 17465 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 17466 /* Set NDEP even though it isn't used. This makes redundant-PUT 17467 elimination of previous stores to this field work better. */ 17468 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 17469 break; 17470 17471 //.. /* REPNE prefix insn */ 17472 //.. case 0xF2: { 17473 //.. Addr32 eip_orig = guest_eip_bbstart + delta - 1; 17474 //.. vassert(sorb == 0); 17475 //.. abyte = getUChar(delta); delta++; 17476 //.. 17477 //.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; } 17478 //.. whatNext = Dis_StopHere; 17479 //.. 17480 //.. switch (abyte) { 17481 //.. /* According to the Intel manual, "repne movs" should never occur, but 17482 //.. * in practice it has happened, so allow for it here... */ 17483 //.. case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 17484 //.. goto decode_failure; 17485 //.. //-- case 0xA5: 17486 //.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig, 17487 //.. // guest_eip_bbstart+delta, "repne movs" ); 17488 //.. // break; 17489 //.. //-- 17490 //.. //-- case 0xA6: sz = 1; /* REPNE CMPS<sz> */ 17491 //.. //-- case 0xA7: 17492 //.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" ); 17493 //.. //-- break; 17494 //.. //-- 17495 //.. case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 17496 //.. case 0xAF: 17497 //.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, 17498 //.. guest_eip_bbstart+delta, "repne scas" ); 17499 //.. break; 17500 //.. 17501 //.. default: 17502 //.. goto decode_failure; 17503 //.. } 17504 //.. break; 17505 //.. } 17506 17507 /* ------ AE: SCAS variants ------ */ 17508 case 0xAE: 17509 case 0xAF: 17510 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 17511 if (haveF2(pfx) && !haveF3(pfx)) { 17512 if (opc == 0xAE) 17513 sz = 1; 17514 dis_REP_op ( AMD64CondNZ, dis_SCAS, sz, 17515 guest_RIP_curr_instr, 17516 guest_RIP_bbstart+delta, "repne scas", pfx ); 17517 dres.whatNext = Dis_StopHere; 17518 break; 17519 } 17520 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 17521 if (!haveF2(pfx) && haveF3(pfx)) { 17522 if (opc == 0xAE) 17523 sz = 1; 17524 dis_REP_op ( AMD64CondZ, dis_SCAS, sz, 17525 guest_RIP_curr_instr, 17526 guest_RIP_bbstart+delta, "repe scas", pfx ); 17527 dres.whatNext = Dis_StopHere; 17528 break; 17529 } 17530 /* AE/AF: scasb/scas{w,l,q} */ 17531 if (!haveF2(pfx) && !haveF3(pfx)) { 17532 if (opc == 0xAE) 17533 sz = 1; 17534 dis_string_op( dis_SCAS, sz, "scas", pfx ); 17535 break; 17536 } 17537 goto decode_failure; 17538 17539 /* ------ A6, A7: CMPS variants ------ */ 17540 case 0xA6: 17541 case 0xA7: 17542 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 17543 if (haveF3(pfx) && !haveF2(pfx)) { 17544 if (opc == 0xA6) 17545 sz = 1; 17546 dis_REP_op ( AMD64CondZ, dis_CMPS, sz, 17547 guest_RIP_curr_instr, 17548 guest_RIP_bbstart+delta, "repe cmps", pfx ); 17549 dres.whatNext = Dis_StopHere; 17550 break; 17551 } 17552 goto decode_failure; 17553 17554 /* ------ AA, AB: STOS variants ------ */ 17555 case 0xAA: 17556 case 0xAB: 17557 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 17558 if (haveF3(pfx) && !haveF2(pfx)) { 17559 if (opc == 0xAA) 17560 sz = 1; 17561 dis_REP_op ( AMD64CondAlways, dis_STOS, sz, 17562 guest_RIP_curr_instr, 17563 guest_RIP_bbstart+delta, "rep stos", pfx ); 17564 dres.whatNext = Dis_StopHere; 17565 break; 17566 } 17567 /* AA/AB: stosb/stos{w,l,q} */ 17568 if (!haveF3(pfx) && !haveF2(pfx)) { 17569 if (opc == 0xAA) 17570 sz = 1; 17571 dis_string_op( dis_STOS, sz, "stos", pfx ); 17572 break; 17573 } 17574 goto decode_failure; 17575 17576 /* ------ A4, A5: MOVS variants ------ */ 17577 case 0xA4: 17578 case 0xA5: 17579 /* F3 A4: rep movsb */ 17580 if (haveF3(pfx) && !haveF2(pfx)) { 17581 if (opc == 0xA4) 17582 sz = 1; 17583 dis_REP_op ( AMD64CondAlways, dis_MOVS, sz, 17584 guest_RIP_curr_instr, 17585 guest_RIP_bbstart+delta, "rep movs", pfx ); 17586 dres.whatNext = Dis_StopHere; 17587 break; 17588 } 17589 /* A4: movsb */ 17590 if (!haveF3(pfx) && !haveF2(pfx)) { 17591 if (opc == 0xA4) 17592 sz = 1; 17593 dis_string_op( dis_MOVS, sz, "movs", pfx ); 17594 break; 17595 } 17596 goto decode_failure; 17597 17598 17599 /* ------------------------ XCHG ----------------------- */ 17600 17601 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 17602 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) 17603 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is 17604 used with an explicit LOCK prefix, we don't want to end up with 17605 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by 17606 the generic LOCK logic at the top of disInstr. */ 17607 case 0x86: /* XCHG Gb,Eb */ 17608 sz = 1; 17609 /* Fall through ... */ 17610 case 0x87: /* XCHG Gv,Ev */ 17611 if (haveF2orF3(pfx)) goto decode_failure; 17612 modrm = getUChar(delta); 17613 ty = szToITy(sz); 17614 t1 = newTemp(ty); t2 = newTemp(ty); 17615 if (epartIsReg(modrm)) { 17616 assign(t1, getIRegE(sz, pfx, modrm)); 17617 assign(t2, getIRegG(sz, pfx, modrm)); 17618 putIRegG(sz, pfx, modrm, mkexpr(t1)); 17619 putIRegE(sz, pfx, modrm, mkexpr(t2)); 17620 delta++; 17621 DIP("xchg%c %s, %s\n", 17622 nameISize(sz), nameIRegG(sz, pfx, modrm), 17623 nameIRegE(sz, pfx, modrm)); 17624 } else { 17625 *expect_CAS = True; 17626 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17627 assign( t1, loadLE(ty, mkexpr(addr)) ); 17628 assign( t2, getIRegG(sz, pfx, modrm) ); 17629 casLE( mkexpr(addr), 17630 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 17631 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 17632 delta += alen; 17633 DIP("xchg%c %s, %s\n", nameISize(sz), 17634 nameIRegG(sz, pfx, modrm), dis_buf); 17635 } 17636 break; 17637 17638 case 0x90: /* XCHG eAX,eAX */ 17639 /* detect and handle F3 90 (rep nop) specially */ 17640 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 17641 DIP("rep nop (P4 pause)\n"); 17642 /* "observe" the hint. The Vex client needs to be careful not 17643 to cause very long delays as a result, though. */ 17644 jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta); 17645 dres.whatNext = Dis_StopHere; 17646 break; 17647 } 17648 /* detect and handle NOPs specially */ 17649 if (/* F2/F3 probably change meaning completely */ 17650 !haveF2orF3(pfx) 17651 /* If REX.B is 1, we're not exchanging rAX with itself */ 17652 && getRexB(pfx)==0 ) { 17653 DIP("nop\n"); 17654 break; 17655 } 17656 /* else fall through to normal case. */ 17657 case 0x91: /* XCHG rAX,rCX */ 17658 case 0x92: /* XCHG rAX,rDX */ 17659 case 0x93: /* XCHG rAX,rBX */ 17660 case 0x94: /* XCHG rAX,rSP */ 17661 case 0x95: /* XCHG rAX,rBP */ 17662 case 0x96: /* XCHG rAX,rSI */ 17663 case 0x97: /* XCHG rAX,rDI */ 17664 17665 /* guard against mutancy */ 17666 if (haveF2orF3(pfx)) goto decode_failure; 17667 17668 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 17669 break; 17670 17671 //.. //-- /* ------------------------ XLAT ----------------------- */ 17672 //.. //-- 17673 //.. //-- case 0xD7: /* XLAT */ 17674 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); 17675 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */ 17676 //.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */ 17677 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */ 17678 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */ 17679 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2); 17680 //.. //-- uWiden(cb, 1, False); 17681 //.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */ 17682 //.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */ 17683 //.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */ 17684 //.. //-- 17685 //.. //-- DIP("xlat%c [ebx]\n", nameISize(sz)); 17686 //.. //-- break; 17687 17688 /* ------------------------ IN / OUT ----------------------- */ 17689 17690 case 0xE4: /* IN imm8, AL */ 17691 sz = 1; 17692 t1 = newTemp(Ity_I64); 17693 abyte = getUChar(delta); delta++; 17694 assign(t1, mkU64( abyte & 0xFF )); 17695 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 17696 goto do_IN; 17697 case 0xE5: /* IN imm8, eAX */ 17698 if (!(sz == 2 || sz == 4)) goto decode_failure; 17699 t1 = newTemp(Ity_I64); 17700 abyte = getUChar(delta); delta++; 17701 assign(t1, mkU64( abyte & 0xFF )); 17702 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 17703 goto do_IN; 17704 case 0xEC: /* IN %DX, AL */ 17705 sz = 1; 17706 t1 = newTemp(Ity_I64); 17707 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 17708 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 17709 nameIRegRAX(sz)); 17710 goto do_IN; 17711 case 0xED: /* IN %DX, eAX */ 17712 if (!(sz == 2 || sz == 4)) goto decode_failure; 17713 t1 = newTemp(Ity_I64); 17714 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 17715 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 17716 nameIRegRAX(sz)); 17717 goto do_IN; 17718 do_IN: { 17719 /* At this point, sz indicates the width, and t1 is a 64-bit 17720 value giving port number. */ 17721 IRDirty* d; 17722 if (haveF2orF3(pfx)) goto decode_failure; 17723 vassert(sz == 1 || sz == 2 || sz == 4); 17724 ty = szToITy(sz); 17725 t2 = newTemp(Ity_I64); 17726 d = unsafeIRDirty_1_N( 17727 t2, 17728 0/*regparms*/, 17729 "amd64g_dirtyhelper_IN", 17730 &amd64g_dirtyhelper_IN, 17731 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 17732 ); 17733 /* do the call, dumping the result in t2. */ 17734 stmt( IRStmt_Dirty(d) ); 17735 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 17736 break; 17737 } 17738 17739 case 0xE6: /* OUT AL, imm8 */ 17740 sz = 1; 17741 t1 = newTemp(Ity_I64); 17742 abyte = getUChar(delta); delta++; 17743 assign( t1, mkU64( abyte & 0xFF ) ); 17744 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 17745 goto do_OUT; 17746 case 0xE7: /* OUT eAX, imm8 */ 17747 if (!(sz == 2 || sz == 4)) goto decode_failure; 17748 t1 = newTemp(Ity_I64); 17749 abyte = getUChar(delta); delta++; 17750 assign( t1, mkU64( abyte & 0xFF ) ); 17751 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 17752 goto do_OUT; 17753 case 0xEE: /* OUT AL, %DX */ 17754 sz = 1; 17755 t1 = newTemp(Ity_I64); 17756 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 17757 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 17758 nameIRegRDX(2)); 17759 goto do_OUT; 17760 case 0xEF: /* OUT eAX, %DX */ 17761 if (!(sz == 2 || sz == 4)) goto decode_failure; 17762 t1 = newTemp(Ity_I64); 17763 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 17764 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 17765 nameIRegRDX(2)); 17766 goto do_OUT; 17767 do_OUT: { 17768 /* At this point, sz indicates the width, and t1 is a 64-bit 17769 value giving port number. */ 17770 IRDirty* d; 17771 if (haveF2orF3(pfx)) goto decode_failure; 17772 vassert(sz == 1 || sz == 2 || sz == 4); 17773 ty = szToITy(sz); 17774 d = unsafeIRDirty_0_N( 17775 0/*regparms*/, 17776 "amd64g_dirtyhelper_OUT", 17777 &amd64g_dirtyhelper_OUT, 17778 mkIRExprVec_3( mkexpr(t1), 17779 widenUto64( getIRegRAX(sz) ), 17780 mkU64(sz) ) 17781 ); 17782 stmt( IRStmt_Dirty(d) ); 17783 break; 17784 } 17785 17786 /* ------------------------ (Grp1 extensions) ---------- */ 17787 17788 case 0x80: /* Grp1 Ib,Eb */ 17789 if (haveF2orF3(pfx)) goto decode_failure; 17790 modrm = getUChar(delta); 17791 am_sz = lengthAMode(pfx,delta); 17792 sz = 1; 17793 d_sz = 1; 17794 d64 = getSDisp8(delta + am_sz); 17795 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17796 break; 17797 17798 case 0x81: /* Grp1 Iv,Ev */ 17799 if (haveF2orF3(pfx)) goto decode_failure; 17800 modrm = getUChar(delta); 17801 am_sz = lengthAMode(pfx,delta); 17802 d_sz = imin(sz,4); 17803 d64 = getSDisp(d_sz, delta + am_sz); 17804 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17805 break; 17806 17807 case 0x83: /* Grp1 Ib,Ev */ 17808 if (haveF2orF3(pfx)) goto decode_failure; 17809 modrm = getUChar(delta); 17810 am_sz = lengthAMode(pfx,delta); 17811 d_sz = 1; 17812 d64 = getSDisp8(delta + am_sz); 17813 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17814 break; 17815 17816 /* ------------------------ (Grp2 extensions) ---------- */ 17817 17818 case 0xC0: { /* Grp2 Ib,Eb */ 17819 Bool decode_OK = True; 17820 if (haveF2orF3(pfx)) goto decode_failure; 17821 modrm = getUChar(delta); 17822 am_sz = lengthAMode(pfx,delta); 17823 d_sz = 1; 17824 d64 = getUChar(delta + am_sz); 17825 sz = 1; 17826 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17827 mkU8(d64 & 0xFF), NULL, &decode_OK ); 17828 if (!decode_OK) goto decode_failure; 17829 break; 17830 } 17831 case 0xC1: { /* Grp2 Ib,Ev */ 17832 Bool decode_OK = True; 17833 if (haveF2orF3(pfx)) goto decode_failure; 17834 modrm = getUChar(delta); 17835 am_sz = lengthAMode(pfx,delta); 17836 d_sz = 1; 17837 d64 = getUChar(delta + am_sz); 17838 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17839 mkU8(d64 & 0xFF), NULL, &decode_OK ); 17840 if (!decode_OK) goto decode_failure; 17841 break; 17842 } 17843 case 0xD0: { /* Grp2 1,Eb */ 17844 Bool decode_OK = True; 17845 if (haveF2orF3(pfx)) goto decode_failure; 17846 modrm = getUChar(delta); 17847 am_sz = lengthAMode(pfx,delta); 17848 d_sz = 0; 17849 d64 = 1; 17850 sz = 1; 17851 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17852 mkU8(d64), NULL, &decode_OK ); 17853 if (!decode_OK) goto decode_failure; 17854 break; 17855 } 17856 case 0xD1: { /* Grp2 1,Ev */ 17857 Bool decode_OK = True; 17858 if (haveF2orF3(pfx)) goto decode_failure; 17859 modrm = getUChar(delta); 17860 am_sz = lengthAMode(pfx,delta); 17861 d_sz = 0; 17862 d64 = 1; 17863 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17864 mkU8(d64), NULL, &decode_OK ); 17865 if (!decode_OK) goto decode_failure; 17866 break; 17867 } 17868 case 0xD2: { /* Grp2 CL,Eb */ 17869 Bool decode_OK = True; 17870 if (haveF2orF3(pfx)) goto decode_failure; 17871 modrm = getUChar(delta); 17872 am_sz = lengthAMode(pfx,delta); 17873 d_sz = 0; 17874 sz = 1; 17875 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17876 getIRegCL(), "%cl", &decode_OK ); 17877 if (!decode_OK) goto decode_failure; 17878 break; 17879 } 17880 case 0xD3: { /* Grp2 CL,Ev */ 17881 Bool decode_OK = True; 17882 if (haveF2orF3(pfx)) goto decode_failure; 17883 modrm = getUChar(delta); 17884 am_sz = lengthAMode(pfx,delta); 17885 d_sz = 0; 17886 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17887 getIRegCL(), "%cl", &decode_OK ); 17888 if (!decode_OK) goto decode_failure; 17889 break; 17890 } 17891 17892 /* ------------------------ (Grp3 extensions) ---------- */ 17893 17894 case 0xF6: { /* Grp3 Eb */ 17895 Bool decode_OK = True; 17896 if (haveF2orF3(pfx)) goto decode_failure; 17897 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 17898 if (!decode_OK) goto decode_failure; 17899 break; 17900 } 17901 case 0xF7: { /* Grp3 Ev */ 17902 Bool decode_OK = True; 17903 if (haveF2orF3(pfx)) goto decode_failure; 17904 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 17905 if (!decode_OK) goto decode_failure; 17906 break; 17907 } 17908 17909 /* ------------------------ (Grp4 extensions) ---------- */ 17910 17911 case 0xFE: { /* Grp4 Eb */ 17912 Bool decode_OK = True; 17913 if (haveF2orF3(pfx)) goto decode_failure; 17914 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 17915 if (!decode_OK) goto decode_failure; 17916 break; 17917 } 17918 17919 /* ------------------------ (Grp5 extensions) ---------- */ 17920 17921 case 0xFF: { /* Grp5 Ev */ 17922 Bool decode_OK = True; 17923 if (haveF2orF3(pfx)) goto decode_failure; 17924 delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK ); 17925 if (!decode_OK) goto decode_failure; 17926 break; 17927 } 17928 17929 /* ------------------------ Escapes to 2-byte opcodes -- */ 17930 17931 case 0x0F: { 17932 opc = getUChar(delta); delta++; 17933 switch (opc) { 17934 17935 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 17936 17937 case 0xBA: { /* Grp8 Ib,Ev */ 17938 Bool decode_OK = False; 17939 if (haveF2orF3(pfx)) goto decode_failure; 17940 modrm = getUChar(delta); 17941 am_sz = lengthAMode(pfx,delta); 17942 d64 = getSDisp8(delta + am_sz); 17943 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 17944 &decode_OK ); 17945 if (!decode_OK) 17946 goto decode_failure; 17947 break; 17948 } 17949 17950 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 17951 17952 case 0xBC: /* BSF Gv,Ev */ 17953 if (haveF2orF3(pfx)) goto decode_failure; 17954 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 17955 break; 17956 case 0xBD: /* BSR Gv,Ev */ 17957 if (haveF2orF3(pfx)) goto decode_failure; 17958 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 17959 break; 17960 17961 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 17962 17963 case 0xC8: /* BSWAP %eax */ 17964 case 0xC9: 17965 case 0xCA: 17966 case 0xCB: 17967 case 0xCC: 17968 case 0xCD: 17969 case 0xCE: 17970 case 0xCF: /* BSWAP %edi */ 17971 if (haveF2orF3(pfx)) goto decode_failure; 17972 /* According to the AMD64 docs, this insn can have size 4 or 17973 8. */ 17974 if (sz == 4) { 17975 t1 = newTemp(Ity_I32); 17976 t2 = newTemp(Ity_I32); 17977 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 17978 assign( t2, 17979 binop(Iop_Or32, 17980 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 17981 binop(Iop_Or32, 17982 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 17983 mkU32(0x00FF0000)), 17984 binop(Iop_Or32, 17985 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 17986 mkU32(0x0000FF00)), 17987 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 17988 mkU32(0x000000FF) ) 17989 ))) 17990 ); 17991 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 17992 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 17993 break; 17994 } 17995 else if (sz == 8) { 17996 IRTemp m8 = newTemp(Ity_I64); 17997 IRTemp s8 = newTemp(Ity_I64); 17998 IRTemp m16 = newTemp(Ity_I64); 17999 IRTemp s16 = newTemp(Ity_I64); 18000 IRTemp m32 = newTemp(Ity_I64); 18001 t1 = newTemp(Ity_I64); 18002 t2 = newTemp(Ity_I64); 18003 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 18004 18005 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 18006 assign( s8, 18007 binop(Iop_Or64, 18008 binop(Iop_Shr64, 18009 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 18010 mkU8(8)), 18011 binop(Iop_And64, 18012 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 18013 mkexpr(m8)) 18014 ) 18015 ); 18016 18017 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 18018 assign( s16, 18019 binop(Iop_Or64, 18020 binop(Iop_Shr64, 18021 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 18022 mkU8(16)), 18023 binop(Iop_And64, 18024 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 18025 mkexpr(m16)) 18026 ) 18027 ); 18028 18029 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 18030 assign( t2, 18031 binop(Iop_Or64, 18032 binop(Iop_Shr64, 18033 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 18034 mkU8(32)), 18035 binop(Iop_And64, 18036 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 18037 mkexpr(m32)) 18038 ) 18039 ); 18040 18041 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 18042 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 18043 break; 18044 } else { 18045 goto decode_failure; 18046 } 18047 18048 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 18049 18050 /* All of these are possible at sizes 2, 4 and 8, but until a 18051 size 2 test case shows up, only handle sizes 4 and 8. */ 18052 18053 case 0xA3: /* BT Gv,Ev */ 18054 if (haveF2orF3(pfx)) goto decode_failure; 18055 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 18056 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone ); 18057 break; 18058 case 0xB3: /* BTR Gv,Ev */ 18059 if (haveF2orF3(pfx)) goto decode_failure; 18060 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 18061 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset ); 18062 break; 18063 case 0xAB: /* BTS Gv,Ev */ 18064 if (haveF2orF3(pfx)) goto decode_failure; 18065 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 18066 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet ); 18067 break; 18068 case 0xBB: /* BTC Gv,Ev */ 18069 if (haveF2orF3(pfx)) goto decode_failure; 18070 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 18071 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp ); 18072 break; 18073 18074 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 18075 18076 case 0x40: 18077 case 0x41: 18078 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 18079 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 18080 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 18081 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 18082 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 18083 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 18084 case 0x48: /* CMOVSb (cmov negative) */ 18085 case 0x49: /* CMOVSb (cmov not negative) */ 18086 case 0x4A: /* CMOVP (cmov parity even) */ 18087 case 0x4B: /* CMOVNP (cmov parity odd) */ 18088 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 18089 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 18090 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 18091 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 18092 if (haveF2orF3(pfx)) goto decode_failure; 18093 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 18094 break; 18095 18096 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 18097 18098 case 0xB0: { /* CMPXCHG Gb,Eb */ 18099 Bool ok = True; 18100 if (haveF2orF3(pfx)) goto decode_failure; 18101 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 18102 if (!ok) goto decode_failure; 18103 break; 18104 } 18105 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 18106 Bool ok = True; 18107 if (haveF2orF3(pfx)) goto decode_failure; 18108 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 18109 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 18110 if (!ok) goto decode_failure; 18111 break; 18112 } 18113 18114 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 18115 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 18116 IRTemp expdHi = newTemp(elemTy); 18117 IRTemp expdLo = newTemp(elemTy); 18118 IRTemp dataHi = newTemp(elemTy); 18119 IRTemp dataLo = newTemp(elemTy); 18120 IRTemp oldHi = newTemp(elemTy); 18121 IRTemp oldLo = newTemp(elemTy); 18122 IRTemp flags_old = newTemp(Ity_I64); 18123 IRTemp flags_new = newTemp(Ity_I64); 18124 IRTemp success = newTemp(Ity_I1); 18125 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 18126 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 18127 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 18128 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 18129 IRTemp expdHi64 = newTemp(Ity_I64); 18130 IRTemp expdLo64 = newTemp(Ity_I64); 18131 18132 /* Translate this using a DCAS, even if there is no LOCK 18133 prefix. Life is too short to bother with generating two 18134 different translations for the with/without-LOCK-prefix 18135 cases. */ 18136 *expect_CAS = True; 18137 18138 /* Decode, and generate address. */ 18139 if (have66orF2orF3(pfx)) goto decode_failure; 18140 if (sz != 4 && sz != 8) goto decode_failure; 18141 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 18142 goto decode_failure; 18143 modrm = getUChar(delta); 18144 if (epartIsReg(modrm)) goto decode_failure; 18145 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 18146 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18147 delta += alen; 18148 18149 /* cmpxchg16b requires an alignment check. */ 18150 if (sz == 8) 18151 gen_SEGV_if_not_16_aligned( addr ); 18152 18153 /* Get the expected and new values. */ 18154 assign( expdHi64, getIReg64(R_RDX) ); 18155 assign( expdLo64, getIReg64(R_RAX) ); 18156 18157 /* These are the correctly-sized expected and new values. 18158 However, we also get expdHi64/expdLo64 above as 64-bits 18159 regardless, because we will need them later in the 32-bit 18160 case (paradoxically). */ 18161 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 18162 : mkexpr(expdHi64) ); 18163 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 18164 : mkexpr(expdLo64) ); 18165 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 18166 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 18167 18168 /* Do the DCAS */ 18169 stmt( IRStmt_CAS( 18170 mkIRCAS( oldHi, oldLo, 18171 Iend_LE, mkexpr(addr), 18172 mkexpr(expdHi), mkexpr(expdLo), 18173 mkexpr(dataHi), mkexpr(dataLo) 18174 ))); 18175 18176 /* success when oldHi:oldLo == expdHi:expdLo */ 18177 assign( success, 18178 binop(opCasCmpEQ, 18179 binop(opOR, 18180 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 18181 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 18182 ), 18183 zero 18184 )); 18185 18186 /* If the DCAS is successful, that is to say oldHi:oldLo == 18187 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 18188 which is where they came from originally. Both the actual 18189 contents of these two regs, and any shadow values, are 18190 unchanged. If the DCAS fails then we're putting into 18191 RDX:RAX the value seen in memory. */ 18192 /* Now of course there's a complication in the 32-bit case 18193 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 18194 unchanged; but if we use the same scheme as in the 64-bit 18195 case, we get hit by the standard rule that a write to the 18196 bottom 32 bits of an integer register zeros the upper 32 18197 bits. And so the upper halves of RDX and RAX mysteriously 18198 become zero. So we have to stuff back in the original 18199 64-bit values which we previously stashed in 18200 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 18201 /* It's just _so_ much fun ... */ 18202 putIRegRDX( 8, 18203 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 18204 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 18205 : mkexpr(oldHi), 18206 mkexpr(expdHi64) 18207 )); 18208 putIRegRAX( 8, 18209 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 18210 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 18211 : mkexpr(oldLo), 18212 mkexpr(expdLo64) 18213 )); 18214 18215 /* Copy the success bit into the Z flag and leave the others 18216 unchanged */ 18217 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 18218 assign( 18219 flags_new, 18220 binop(Iop_Or64, 18221 binop(Iop_And64, mkexpr(flags_old), 18222 mkU64(~AMD64G_CC_MASK_Z)), 18223 binop(Iop_Shl64, 18224 binop(Iop_And64, 18225 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 18226 mkU8(AMD64G_CC_SHIFT_Z)) )); 18227 18228 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18229 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 18230 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18231 /* Set NDEP even though it isn't used. This makes 18232 redundant-PUT elimination of previous stores to this field 18233 work better. */ 18234 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 18235 18236 /* Sheesh. Aren't you glad it was me and not you that had to 18237 write and validate all this grunge? */ 18238 18239 DIP("cmpxchg8b %s\n", dis_buf); 18240 break; 18241 18242 } 18243 18244 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 18245 18246 case 0xA2: { /* CPUID */ 18247 /* Uses dirty helper: 18248 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 18249 declared to mod rax, wr rbx, rcx, rdx 18250 */ 18251 IRDirty* d = NULL; 18252 HChar* fName = NULL; 18253 void* fAddr = NULL; 18254 if (haveF2orF3(pfx)) goto decode_failure; 18255 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3 18256 |VEX_HWCAPS_AMD64_CX16)) { 18257 fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16"; 18258 fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16; 18259 /* This is a Core-2-like machine */ 18260 //fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 18261 //fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 18262 /* This is a Core-i5-like machine */ 18263 } 18264 else { 18265 /* Give a CPUID for at least a baseline machine, SSE2 18266 only, and no CX16 */ 18267 fName = "amd64g_dirtyhelper_CPUID_baseline"; 18268 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 18269 } 18270 18271 vassert(fName); vassert(fAddr); 18272 d = unsafeIRDirty_0_N ( 0/*regparms*/, 18273 fName, fAddr, mkIRExprVec_0() ); 18274 /* declare guest state effects */ 18275 d->needsBBP = True; 18276 d->nFxState = 4; 18277 d->fxState[0].fx = Ifx_Modify; 18278 d->fxState[0].offset = OFFB_RAX; 18279 d->fxState[0].size = 8; 18280 d->fxState[1].fx = Ifx_Write; 18281 d->fxState[1].offset = OFFB_RBX; 18282 d->fxState[1].size = 8; 18283 d->fxState[2].fx = Ifx_Modify; 18284 d->fxState[2].offset = OFFB_RCX; 18285 d->fxState[2].size = 8; 18286 d->fxState[3].fx = Ifx_Write; 18287 d->fxState[3].offset = OFFB_RDX; 18288 d->fxState[3].size = 8; 18289 /* execute the dirty call, side-effecting guest state */ 18290 stmt( IRStmt_Dirty(d) ); 18291 /* CPUID is a serialising insn. So, just in case someone is 18292 using it as a memory fence ... */ 18293 stmt( IRStmt_MBE(Imbe_Fence) ); 18294 DIP("cpuid\n"); 18295 break; 18296 } 18297 18298 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 18299 18300 case 0xB6: /* MOVZXb Eb,Gv */ 18301 if (haveF2orF3(pfx)) goto decode_failure; 18302 if (sz != 2 && sz != 4 && sz != 8) 18303 goto decode_failure; 18304 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 18305 break; 18306 case 0xB7: /* MOVZXw Ew,Gv */ 18307 if (haveF2orF3(pfx)) goto decode_failure; 18308 if (sz != 4 && sz != 8) 18309 goto decode_failure; 18310 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 18311 break; 18312 18313 case 0xBE: /* MOVSXb Eb,Gv */ 18314 if (haveF2orF3(pfx)) goto decode_failure; 18315 if (sz != 2 && sz != 4 && sz != 8) 18316 goto decode_failure; 18317 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 18318 break; 18319 case 0xBF: /* MOVSXw Ew,Gv */ 18320 if (haveF2orF3(pfx)) goto decode_failure; 18321 if (sz != 4 && sz != 8) 18322 goto decode_failure; 18323 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 18324 break; 18325 18326 //.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 18327 //.. //-- 18328 //.. //-- case 0xC3: /* MOVNTI Gv,Ev */ 18329 //.. //-- vg_assert(sz == 4); 18330 //.. //-- modrm = getUChar(eip); 18331 //.. //-- vg_assert(!epartIsReg(modrm)); 18332 //.. //-- t1 = newTemp(cb); 18333 //.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 18334 //.. //-- pair = disAMode ( cb, sorb, eip, dis_buf ); 18335 //.. //-- t2 = LOW24(pair); 18336 //.. //-- eip += HI8(pair); 18337 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 18338 //.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 18339 //.. //-- break; 18340 18341 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 18342 18343 case 0xAF: /* IMUL Ev, Gv */ 18344 if (haveF2orF3(pfx)) goto decode_failure; 18345 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 18346 break; 18347 18348 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 18349 18350 case 0x1F: 18351 if (haveF2orF3(pfx)) goto decode_failure; 18352 modrm = getUChar(delta); 18353 if (epartIsReg(modrm)) goto decode_failure; 18354 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18355 delta += alen; 18356 DIP("nop%c %s\n", nameISize(sz), dis_buf); 18357 break; 18358 18359 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 18360 case 0x80: 18361 case 0x81: 18362 case 0x82: /* JBb/JNAEb (jump below) */ 18363 case 0x83: /* JNBb/JAEb (jump not below) */ 18364 case 0x84: /* JZb/JEb (jump zero) */ 18365 case 0x85: /* JNZb/JNEb (jump not zero) */ 18366 case 0x86: /* JBEb/JNAb (jump below or equal) */ 18367 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 18368 case 0x88: /* JSb (jump negative) */ 18369 case 0x89: /* JSb (jump not negative) */ 18370 case 0x8A: /* JP (jump parity even) */ 18371 case 0x8B: /* JNP/JPO (jump parity odd) */ 18372 case 0x8C: /* JLb/JNGEb (jump less) */ 18373 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 18374 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 18375 case 0x8F: /* JGb/JNLEb (jump greater) */ 18376 { Long jmpDelta; 18377 HChar* comment = ""; 18378 if (haveF2orF3(pfx)) goto decode_failure; 18379 jmpDelta = getSDisp32(delta); 18380 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 18381 delta += 4; 18382 if (resteerCisOk 18383 && vex_control.guest_chase_cond 18384 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18385 && jmpDelta < 0 18386 && resteerOkFn( callback_opaque, d64) ) { 18387 /* Speculation: assume this backward branch is taken. So 18388 we need to emit a side-exit to the insn following this 18389 one, on the negation of the condition, and continue at 18390 the branch target address (d64). If we wind up back at 18391 the first instruction of the trace, just stop; it's 18392 better to let the IR loop unroller handle that case. */ 18393 stmt( IRStmt_Exit( 18394 mk_amd64g_calculate_condition( 18395 (AMD64Condcode)(1 ^ (opc - 0x80))), 18396 Ijk_Boring, 18397 IRConst_U64(guest_RIP_bbstart+delta) ) ); 18398 dres.whatNext = Dis_ResteerC; 18399 dres.continueAt = d64; 18400 comment = "(assumed taken)"; 18401 } 18402 else 18403 if (resteerCisOk 18404 && vex_control.guest_chase_cond 18405 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18406 && jmpDelta >= 0 18407 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 18408 /* Speculation: assume this forward branch is not taken. 18409 So we need to emit a side-exit to d64 (the dest) and 18410 continue disassembling at the insn immediately 18411 following this one. */ 18412 stmt( IRStmt_Exit( 18413 mk_amd64g_calculate_condition((AMD64Condcode) 18414 (opc - 0x80)), 18415 Ijk_Boring, 18416 IRConst_U64(d64) ) ); 18417 dres.whatNext = Dis_ResteerC; 18418 dres.continueAt = guest_RIP_bbstart+delta; 18419 comment = "(assumed not taken)"; 18420 } 18421 else { 18422 /* Conservative default translation - end the block at 18423 this point. */ 18424 jcc_01( (AMD64Condcode)(opc - 0x80), 18425 guest_RIP_bbstart+delta, 18426 d64 ); 18427 dres.whatNext = Dis_StopHere; 18428 } 18429 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); 18430 break; 18431 } 18432 18433 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ 18434 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 18435 /* 0F 0D /1 -- prefetchw mem8 */ 18436 if (have66orF2orF3(pfx)) goto decode_failure; 18437 modrm = getUChar(delta); 18438 if (epartIsReg(modrm)) goto decode_failure; 18439 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 18440 goto decode_failure; 18441 18442 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18443 delta += alen; 18444 18445 switch (gregLO3ofRM(modrm)) { 18446 case 0: DIP("prefetch %s\n", dis_buf); break; 18447 case 1: DIP("prefetchw %s\n", dis_buf); break; 18448 default: vassert(0); /*NOTREACHED*/ 18449 } 18450 break; 18451 18452 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 18453 case 0x31: { /* RDTSC */ 18454 IRTemp val = newTemp(Ity_I64); 18455 IRExpr** args = mkIRExprVec_0(); 18456 IRDirty* d = unsafeIRDirty_1_N ( 18457 val, 18458 0/*regparms*/, 18459 "amd64g_dirtyhelper_RDTSC", 18460 &amd64g_dirtyhelper_RDTSC, 18461 args 18462 ); 18463 if (have66orF2orF3(pfx)) goto decode_failure; 18464 /* execute the dirty call, dumping the result in val. */ 18465 stmt( IRStmt_Dirty(d) ); 18466 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 18467 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 18468 DIP("rdtsc\n"); 18469 break; 18470 } 18471 18472 //.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 18473 //.. 18474 //.. case 0xA1: /* POP %FS */ 18475 //.. dis_pop_segreg( R_FS, sz ); break; 18476 //.. case 0xA9: /* POP %GS */ 18477 //.. dis_pop_segreg( R_GS, sz ); break; 18478 //.. 18479 //.. case 0xA0: /* PUSH %FS */ 18480 //.. dis_push_segreg( R_FS, sz ); break; 18481 //.. case 0xA8: /* PUSH %GS */ 18482 //.. dis_push_segreg( R_GS, sz ); break; 18483 18484 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 18485 case 0x90: 18486 case 0x91: 18487 case 0x92: /* set-Bb/set-NAEb (set if below) */ 18488 case 0x93: /* set-NBb/set-AEb (set if not below) */ 18489 case 0x94: /* set-Zb/set-Eb (set if zero) */ 18490 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 18491 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 18492 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 18493 case 0x98: /* set-Sb (set if negative) */ 18494 case 0x99: /* set-Sb (set if not negative) */ 18495 case 0x9A: /* set-P (set if parity even) */ 18496 case 0x9B: /* set-NP (set if parity odd) */ 18497 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 18498 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 18499 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 18500 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 18501 if (haveF2orF3(pfx)) goto decode_failure; 18502 t1 = newTemp(Ity_I8); 18503 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 18504 modrm = getUChar(delta); 18505 if (epartIsReg(modrm)) { 18506 delta++; 18507 putIRegE(1, pfx, modrm, mkexpr(t1)); 18508 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 18509 nameIRegE(1,pfx,modrm)); 18510 } else { 18511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18512 delta += alen; 18513 storeLE( mkexpr(addr), mkexpr(t1) ); 18514 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 18515 } 18516 break; 18517 18518 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 18519 18520 case 0xA4: /* SHLDv imm8,Gv,Ev */ 18521 modrm = getUChar(delta); 18522 d64 = delta + lengthAMode(pfx, delta); 18523 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 18524 delta = dis_SHLRD_Gv_Ev ( 18525 vbi, pfx, delta, modrm, sz, 18526 mkU8(getUChar(d64)), True, /* literal */ 18527 dis_buf, True /* left */ ); 18528 break; 18529 case 0xA5: /* SHLDv %cl,Gv,Ev */ 18530 modrm = getUChar(delta); 18531 delta = dis_SHLRD_Gv_Ev ( 18532 vbi, pfx, delta, modrm, sz, 18533 getIRegCL(), False, /* not literal */ 18534 "%cl", True /* left */ ); 18535 break; 18536 18537 case 0xAC: /* SHRDv imm8,Gv,Ev */ 18538 modrm = getUChar(delta); 18539 d64 = delta + lengthAMode(pfx, delta); 18540 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 18541 delta = dis_SHLRD_Gv_Ev ( 18542 vbi, pfx, delta, modrm, sz, 18543 mkU8(getUChar(d64)), True, /* literal */ 18544 dis_buf, False /* right */ ); 18545 break; 18546 case 0xAD: /* SHRDv %cl,Gv,Ev */ 18547 modrm = getUChar(delta); 18548 delta = dis_SHLRD_Gv_Ev ( 18549 vbi, pfx, delta, modrm, sz, 18550 getIRegCL(), False, /* not literal */ 18551 "%cl", False /* right */); 18552 break; 18553 18554 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ 18555 case 0x05: /* SYSCALL */ 18556 guest_RIP_next_mustcheck = True; 18557 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 18558 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 18559 /* It's important that all guest state is up-to-date 18560 at this point. So we declare an end-of-block here, which 18561 forces any cached guest state to be flushed. */ 18562 jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed); 18563 dres.whatNext = Dis_StopHere; 18564 DIP("syscall\n"); 18565 break; 18566 18567 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 18568 18569 case 0xC0: { /* XADD Gb,Eb */ 18570 Bool decode_OK = False; 18571 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 18572 if (!decode_OK) 18573 goto decode_failure; 18574 break; 18575 } 18576 case 0xC1: { /* XADD Gv,Ev */ 18577 Bool decode_OK = False; 18578 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 18579 if (!decode_OK) 18580 goto decode_failure; 18581 break; 18582 } 18583 18584 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 18585 18586 case 0x71: 18587 case 0x72: 18588 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 18589 18590 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 18591 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 18592 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 18593 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 18594 18595 case 0xFC: 18596 case 0xFD: 18597 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 18598 18599 case 0xEC: 18600 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18601 18602 case 0xDC: 18603 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18604 18605 case 0xF8: 18606 case 0xF9: 18607 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 18608 18609 case 0xE8: 18610 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18611 18612 case 0xD8: 18613 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18614 18615 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 18616 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 18617 18618 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 18619 18620 case 0x74: 18621 case 0x75: 18622 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 18623 18624 case 0x64: 18625 case 0x65: 18626 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 18627 18628 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 18629 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 18630 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 18631 18632 case 0x68: 18633 case 0x69: 18634 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 18635 18636 case 0x60: 18637 case 0x61: 18638 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18639 18640 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 18641 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 18642 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 18643 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 18644 18645 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18646 case 0xF2: 18647 case 0xF3: 18648 18649 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18650 case 0xD2: 18651 case 0xD3: 18652 18653 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 18654 case 0xE2: 18655 { 18656 Long delta0 = delta-1; 18657 Bool decode_OK = False; 18658 18659 /* If sz==2 this is SSE, and we assume sse idec has 18660 already spotted those cases by now. */ 18661 if (sz != 4 && sz != 8) 18662 goto decode_failure; 18663 if (have66orF2orF3(pfx)) 18664 goto decode_failure; 18665 18666 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 ); 18667 if (!decode_OK) { 18668 delta = delta0; 18669 goto decode_failure; 18670 } 18671 break; 18672 } 18673 18674 case 0x0E: /* FEMMS */ 18675 case 0x77: /* EMMS */ 18676 if (sz != 4) 18677 goto decode_failure; 18678 do_EMMS_preamble(); 18679 DIP("{f}emms\n"); 18680 break; 18681 18682 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 18683 case 0x01: /* 0F 01 /0 -- SGDT */ 18684 /* 0F 01 /1 -- SIDT */ 18685 { 18686 /* This is really revolting, but ... since each processor 18687 (core) only has one IDT and one GDT, just let the guest 18688 see it (pass-through semantics). I can't see any way to 18689 construct a faked-up value, so don't bother to try. */ 18690 modrm = getUChar(delta); 18691 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18692 delta += alen; 18693 if (epartIsReg(modrm)) goto decode_failure; 18694 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 18695 goto decode_failure; 18696 switch (gregLO3ofRM(modrm)) { 18697 case 0: DIP("sgdt %s\n", dis_buf); break; 18698 case 1: DIP("sidt %s\n", dis_buf); break; 18699 default: vassert(0); /*NOTREACHED*/ 18700 } 18701 18702 IRDirty* d = unsafeIRDirty_0_N ( 18703 0/*regparms*/, 18704 "amd64g_dirtyhelper_SxDT", 18705 &amd64g_dirtyhelper_SxDT, 18706 mkIRExprVec_2( mkexpr(addr), 18707 mkU64(gregLO3ofRM(modrm)) ) 18708 ); 18709 /* declare we're writing memory */ 18710 d->mFx = Ifx_Write; 18711 d->mAddr = mkexpr(addr); 18712 d->mSize = 6; 18713 stmt( IRStmt_Dirty(d) ); 18714 break; 18715 } 18716 18717 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 18718 18719 default: 18720 goto decode_failure; 18721 } /* switch (opc) for the 2-byte opcodes */ 18722 goto decode_success; 18723 } /* case 0x0F: of primary opcode */ 18724 18725 /* ------------------------ ??? ------------------------ */ 18726 18727 default: 18728 decode_failure: 18729 /* All decode failures end up here. */ 18730 vex_printf("vex amd64->IR: unhandled instruction bytes: " 18731 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 18732 (Int)getUChar(delta_start+0), 18733 (Int)getUChar(delta_start+1), 18734 (Int)getUChar(delta_start+2), 18735 (Int)getUChar(delta_start+3), 18736 (Int)getUChar(delta_start+4), 18737 (Int)getUChar(delta_start+5), 18738 (Int)getUChar(delta_start+6), 18739 (Int)getUChar(delta_start+7) ); 18740 18741 /* Tell the dispatcher that this insn cannot be decoded, and so has 18742 not been executed, and (is currently) the next to be executed. 18743 RIP should be up-to-date since it made so at the start of each 18744 insn, but nevertheless be paranoid and update it again right 18745 now. */ 18746 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 18747 jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr); 18748 dres.whatNext = Dis_StopHere; 18749 dres.len = 0; 18750 /* We also need to say that a CAS is not expected now, regardless 18751 of what it might have been set to at the start of the function, 18752 since the IR that we've emitted just above (to synthesis a 18753 SIGILL) does not involve any CAS, and presumably no other IR has 18754 been emitted for this (non-decoded) insn. */ 18755 *expect_CAS = False; 18756 return dres; 18757 18758 } /* switch (opc) for the main (primary) opcode switch. */ 18759 18760 decode_success: 18761 /* All decode successes end up here. */ 18762 DIP("\n"); 18763 dres.len = (Int)toUInt(delta - delta_start); 18764 return dres; 18765 } 18766 18767 #undef DIP 18768 #undef DIS 18769 18770 18771 /*------------------------------------------------------------*/ 18772 /*--- Top-level fn ---*/ 18773 /*------------------------------------------------------------*/ 18774 18775 /* Disassemble a single instruction into IR. The instruction 18776 is located in host memory at &guest_code[delta]. */ 18777 18778 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 18779 Bool put_IP, 18780 Bool (*resteerOkFn) ( void*, Addr64 ), 18781 Bool resteerCisOk, 18782 void* callback_opaque, 18783 UChar* guest_code_IN, 18784 Long delta, 18785 Addr64 guest_IP, 18786 VexArch guest_arch, 18787 VexArchInfo* archinfo, 18788 VexAbiInfo* abiinfo, 18789 Bool host_bigendian_IN ) 18790 { 18791 Int i, x1, x2; 18792 Bool expect_CAS, has_CAS; 18793 DisResult dres; 18794 18795 /* Set globals (see top of this file) */ 18796 vassert(guest_arch == VexArchAMD64); 18797 guest_code = guest_code_IN; 18798 irsb = irsb_IN; 18799 host_is_bigendian = host_bigendian_IN; 18800 guest_RIP_curr_instr = guest_IP; 18801 guest_RIP_bbstart = guest_IP - delta; 18802 18803 /* We'll consult these after doing disInstr_AMD64_WRK. */ 18804 guest_RIP_next_assumed = 0; 18805 guest_RIP_next_mustcheck = False; 18806 18807 x1 = irsb_IN->stmts_used; 18808 expect_CAS = False; 18809 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, 18810 resteerCisOk, 18811 callback_opaque, 18812 delta, archinfo, abiinfo ); 18813 x2 = irsb_IN->stmts_used; 18814 vassert(x2 >= x1); 18815 18816 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 18817 got it right. Failure of this assertion is serious and denotes 18818 a bug in disInstr. */ 18819 if (guest_RIP_next_mustcheck 18820 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 18821 vex_printf("\n"); 18822 vex_printf("assumed next %%rip = 0x%llx\n", 18823 guest_RIP_next_assumed ); 18824 vex_printf(" actual next %%rip = 0x%llx\n", 18825 guest_RIP_curr_instr + dres.len ); 18826 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 18827 } 18828 18829 /* See comment at the top of disInstr_AMD64_WRK for meaning of 18830 expect_CAS. Here, we (sanity-)check for the presence/absence of 18831 IRCAS as directed by the returned expect_CAS value. */ 18832 has_CAS = False; 18833 for (i = x1; i < x2; i++) { 18834 if (irsb_IN->stmts[i]->tag == Ist_CAS) 18835 has_CAS = True; 18836 } 18837 18838 if (expect_CAS != has_CAS) { 18839 /* inconsistency detected. re-disassemble the instruction so as 18840 to generate a useful error message; then assert. */ 18841 vex_traceflags |= VEX_TRACE_FE; 18842 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, 18843 resteerCisOk, 18844 callback_opaque, 18845 delta, archinfo, abiinfo ); 18846 for (i = x1; i < x2; i++) { 18847 vex_printf("\t\t"); 18848 ppIRStmt(irsb_IN->stmts[i]); 18849 vex_printf("\n"); 18850 } 18851 /* Failure of this assertion is serious and denotes a bug in 18852 disInstr. */ 18853 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 18854 } 18855 18856 return dres; 18857 } 18858 18859 18860 /*------------------------------------------------------------*/ 18861 /*--- Unused stuff ---*/ 18862 /*------------------------------------------------------------*/ 18863 18864 // A potentially more Memcheck-friendly version of gen_LZCNT, if 18865 // this should ever be needed. 18866 // 18867 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 18868 //{ 18869 // /* Scheme is simple: propagate the most significant 1-bit into all 18870 // lower positions in the word. This gives a word of the form 18871 // 0---01---1. Now invert it, giving a word of the form 18872 // 1---10---0, then do a population-count idiom (to count the 1s, 18873 // which is the number of leading zeroes, or the word size if the 18874 // original word was 0. 18875 // */ 18876 // Int i; 18877 // IRTemp t[7]; 18878 // for (i = 0; i < 7; i++) { 18879 // t[i] = newTemp(ty); 18880 // } 18881 // if (ty == Ity_I64) { 18882 // assign(t[0], binop(Iop_Or64, mkexpr(src), 18883 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 18884 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 18885 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 18886 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 18887 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 18888 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 18889 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 18890 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 18891 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 18892 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 18893 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 18894 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 18895 // return gen_POPCOUNT(ty, t[6]); 18896 // } 18897 // if (ty == Ity_I32) { 18898 // assign(t[0], binop(Iop_Or32, mkexpr(src), 18899 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 18900 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 18901 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 18902 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 18903 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 18904 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 18905 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 18906 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 18907 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 18908 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 18909 // return gen_POPCOUNT(ty, t[5]); 18910 // } 18911 // if (ty == Ity_I16) { 18912 // assign(t[0], binop(Iop_Or16, mkexpr(src), 18913 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 18914 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 18915 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 18916 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 18917 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 18918 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 18919 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 18920 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 18921 // return gen_POPCOUNT(ty, t[4]); 18922 // } 18923 // vassert(0); 18924 //} 18925 18926 18927 /*--------------------------------------------------------------------*/ 18928 /*--- end guest_amd64_toIR.c ---*/ 18929 /*--------------------------------------------------------------------*/ 18930