1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 56 even when it isn't. 57 58 * some of the FCOM cases could do with testing -- not convinced 59 that the args are the right way round. 60 61 * FSAVE does not re-initialise the FPU; it should do 62 63 * FINIT not only initialises the FPU environment, it also zeroes 64 all the FP registers. It should leave the registers unchanged. 65 66 RDTSC returns zero, always. 67 68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 69 per Intel docs this bit has no meaning anyway. Since PUSHF is the 70 only way to observe eflags[1], a proper fix would be to make that 71 bit be set by PUSHF. 72 73 This module uses global variables and so is not MT-safe (if that 74 should ever become relevant). 75 */ 76 77 /* Notes re address size overrides (0x67). 78 79 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 81 and System Instructions"), Section 1.2.3 ("Address-Size Override 82 Prefix"): 83 84 0x67 applies to all explicit memory references, causing the top 85 32 bits of the effective address to become zero. 86 87 0x67 has no effect on stack references (push/pop); these always 88 use a 64-bit address. 89 90 0x67 changes the interpretation of instructions which implicitly 91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 92 instead. These are: 93 94 cmp{s,sb,sw,sd,sq} 95 in{s,sb,sw,sd} 96 jcxz, jecxz, jrcxz 97 lod{s,sb,sw,sd,sq} 98 loop{,e,bz,be,z} 99 mov{s,sb,sw,sd,sq} 100 out{s,sb,sw,sd} 101 rep{,e,ne,nz} 102 sca{s,sb,sw,sd,sq} 103 sto{s,sb,sw,sd,sq} 104 xlat{,b} */ 105 106 /* "Special" instructions. 107 108 This instruction decoder can decode three special instructions 109 which mean nothing natively (are no-ops as far as regs/mem are 110 concerned) but have meaning for supporting Valgrind. A special 111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 114 Following that, one of the following 3 are allowed (standard 115 interpretation in parentheses): 116 117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 120 121 Any other bytes following the 16-byte preamble are illegal and 122 constitute a failure in instruction decoding. This all assumes 123 that the preamble will never occur except in specific code 124 fragments designed for Valgrind to catch. 125 126 No prefixes may precede a "Special" instruction. 127 */ 128 129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 130 insns: the side-exit back to the start of the insn is done with 131 Ijk_Boring. This is quite wrong, it should be done with 132 Ijk_NoRedir, since otherwise the side exit, which is intended to 133 restart the instruction for whatever reason, could go somewhere 134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 135 no-redir jumps performance critical, at least for rep-prefixed 136 instructions, since all iterations thereof would involve such a 137 jump. It's not such a big deal with casLE since the side exit is 138 only taken if the CAS fails, that is, the location is contended, 139 which is relatively unlikely. 140 141 Note also, the test for CAS success vs failure is done using 142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 144 shouldn't definedness-check these comparisons. See 145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 146 background/rationale. 147 */ 148 149 /* LOCK prefixed instructions. These are translated using IR-level 150 CAS statements (IRCAS) and are believed to preserve atomicity, even 151 from the point of view of some other process racing against a 152 simulated one (presumably they communicate via a shared memory 153 segment). 154 155 Handlers which are aware of LOCK prefixes are: 156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 157 dis_cmpxchg_G_E (cmpxchg) 158 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 159 dis_Grp3 (not, neg) 160 dis_Grp4 (inc, dec) 161 dis_Grp5 (inc, dec) 162 dis_Grp8_Imm (bts, btc, btr) 163 dis_bt_G_E (bts, btc, btr) 164 dis_xadd_G_E (xadd) 165 */ 166 167 168 #include "libvex_basictypes.h" 169 #include "libvex_ir.h" 170 #include "libvex.h" 171 #include "libvex_guest_amd64.h" 172 173 #include "main_util.h" 174 #include "main_globals.h" 175 #include "guest_generic_bb_to_IR.h" 176 #include "guest_generic_x87.h" 177 #include "guest_amd64_defs.h" 178 179 180 /*------------------------------------------------------------*/ 181 /*--- Globals ---*/ 182 /*------------------------------------------------------------*/ 183 184 /* These are set at the start of the translation of an insn, right 185 down in disInstr_AMD64, so that we don't have to pass them around 186 endlessly. They are all constant during the translation of any 187 given insn. */ 188 189 /* These are set at the start of the translation of a BB, so 190 that we don't have to pass them around endlessly. */ 191 192 /* We need to know this to do sub-register accesses correctly. */ 193 static Bool host_is_bigendian; 194 195 /* Pointer to the guest code area (points to start of BB, not to the 196 insn being processed). */ 197 static UChar* guest_code; 198 199 /* The guest address corresponding to guest_code[0]. */ 200 static Addr64 guest_RIP_bbstart; 201 202 /* The guest address for the instruction currently being 203 translated. */ 204 static Addr64 guest_RIP_curr_instr; 205 206 /* The IRSB* into which we're generating code. */ 207 static IRSB* irsb; 208 209 /* For ensuring that %rip-relative addressing is done right. A read 210 of %rip generates the address of the next instruction. It may be 211 that we don't conveniently know that inside disAMode(). For sanity 212 checking, if the next insn %rip is needed, we make a guess at what 213 it is, record that guess here, and set the accompanying Bool to 214 indicate that -- after this insn's decode is finished -- that guess 215 needs to be checked. */ 216 217 /* At the start of each insn decode, is set to (0, False). 218 After the decode, if _mustcheck is now True, _assumed is 219 checked. */ 220 221 static Addr64 guest_RIP_next_assumed; 222 static Bool guest_RIP_next_mustcheck; 223 224 225 /*------------------------------------------------------------*/ 226 /*--- Helpers for constructing IR. ---*/ 227 /*------------------------------------------------------------*/ 228 229 /* Generate a new temporary of the given type. */ 230 static IRTemp newTemp ( IRType ty ) 231 { 232 vassert(isPlausibleIRType(ty)); 233 return newIRTemp( irsb->tyenv, ty ); 234 } 235 236 /* Add a statement to the list held by "irsb". */ 237 static void stmt ( IRStmt* st ) 238 { 239 addStmtToIRSB( irsb, st ); 240 } 241 242 /* Generate a statement "dst := e". */ 243 static void assign ( IRTemp dst, IRExpr* e ) 244 { 245 stmt( IRStmt_WrTmp(dst, e) ); 246 } 247 248 static IRExpr* unop ( IROp op, IRExpr* a ) 249 { 250 return IRExpr_Unop(op, a); 251 } 252 253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 254 { 255 return IRExpr_Binop(op, a1, a2); 256 } 257 258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 259 { 260 return IRExpr_Triop(op, a1, a2, a3); 261 } 262 263 static IRExpr* mkexpr ( IRTemp tmp ) 264 { 265 return IRExpr_RdTmp(tmp); 266 } 267 268 static IRExpr* mkU8 ( ULong i ) 269 { 270 vassert(i < 256); 271 return IRExpr_Const(IRConst_U8( (UChar)i )); 272 } 273 274 static IRExpr* mkU16 ( ULong i ) 275 { 276 vassert(i < 0x10000ULL); 277 return IRExpr_Const(IRConst_U16( (UShort)i )); 278 } 279 280 static IRExpr* mkU32 ( ULong i ) 281 { 282 vassert(i < 0x100000000ULL); 283 return IRExpr_Const(IRConst_U32( (UInt)i )); 284 } 285 286 static IRExpr* mkU64 ( ULong i ) 287 { 288 return IRExpr_Const(IRConst_U64(i)); 289 } 290 291 static IRExpr* mkU ( IRType ty, ULong i ) 292 { 293 switch (ty) { 294 case Ity_I8: return mkU8(i); 295 case Ity_I16: return mkU16(i); 296 case Ity_I32: return mkU32(i); 297 case Ity_I64: return mkU64(i); 298 default: vpanic("mkU(amd64)"); 299 } 300 } 301 302 static void storeLE ( IRExpr* addr, IRExpr* data ) 303 { 304 stmt( IRStmt_Store(Iend_LE, addr, data) ); 305 } 306 307 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 308 { 309 return IRExpr_Load(Iend_LE, ty, addr); 310 } 311 312 static IROp mkSizedOp ( IRType ty, IROp op8 ) 313 { 314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 315 || op8 == Iop_Mul8 316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 319 || op8 == Iop_CasCmpNE8 320 || op8 == Iop_Not8 ); 321 switch (ty) { 322 case Ity_I8: return 0 +op8; 323 case Ity_I16: return 1 +op8; 324 case Ity_I32: return 2 +op8; 325 case Ity_I64: return 3 +op8; 326 default: vpanic("mkSizedOp(amd64)"); 327 } 328 } 329 330 static 331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 332 { 333 if (szSmall == 1 && szBig == 4) { 334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 335 } 336 if (szSmall == 1 && szBig == 2) { 337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 338 } 339 if (szSmall == 2 && szBig == 4) { 340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 341 } 342 if (szSmall == 1 && szBig == 8 && !signd) { 343 return unop(Iop_8Uto64, src); 344 } 345 if (szSmall == 1 && szBig == 8 && signd) { 346 return unop(Iop_8Sto64, src); 347 } 348 if (szSmall == 2 && szBig == 8 && !signd) { 349 return unop(Iop_16Uto64, src); 350 } 351 if (szSmall == 2 && szBig == 8 && signd) { 352 return unop(Iop_16Sto64, src); 353 } 354 vpanic("doScalarWidening(amd64)"); 355 } 356 357 358 359 /*------------------------------------------------------------*/ 360 /*--- Debugging output ---*/ 361 /*------------------------------------------------------------*/ 362 363 /* Bomb out if we can't handle something. */ 364 __attribute__ ((noreturn)) 365 static void unimplemented ( HChar* str ) 366 { 367 vex_printf("amd64toIR: unimplemented feature\n"); 368 vpanic(str); 369 } 370 371 #define DIP(format, args...) \ 372 if (vex_traceflags & VEX_TRACE_FE) \ 373 vex_printf(format, ## args) 374 375 #define DIS(buf, format, args...) \ 376 if (vex_traceflags & VEX_TRACE_FE) \ 377 vex_sprintf(buf, format, ## args) 378 379 380 /*------------------------------------------------------------*/ 381 /*--- Offsets of various parts of the amd64 guest state. ---*/ 382 /*------------------------------------------------------------*/ 383 384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 400 401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 402 403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) 404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) 405 406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 410 411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 419 //.. 420 //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS) 421 //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS) 422 //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES) 423 //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS) 424 //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS) 425 //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS) 426 //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 427 //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 428 429 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 430 #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0) 431 #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1) 432 #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2) 433 #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3) 434 #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4) 435 #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5) 436 #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6) 437 #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7) 438 #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8) 439 #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9) 440 #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10) 441 #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11) 442 #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12) 443 #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13) 444 #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14) 445 #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15) 446 #define OFFB_XMM16 offsetof(VexGuestAMD64State,guest_XMM16) 447 448 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN) 449 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) 450 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) 451 452 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 453 454 455 /*------------------------------------------------------------*/ 456 /*--- Helper bits and pieces for deconstructing the ---*/ 457 /*--- amd64 insn stream. ---*/ 458 /*------------------------------------------------------------*/ 459 460 /* This is the AMD64 register encoding -- integer regs. */ 461 #define R_RAX 0 462 #define R_RCX 1 463 #define R_RDX 2 464 #define R_RBX 3 465 #define R_RSP 4 466 #define R_RBP 5 467 #define R_RSI 6 468 #define R_RDI 7 469 #define R_R8 8 470 #define R_R9 9 471 #define R_R10 10 472 #define R_R11 11 473 #define R_R12 12 474 #define R_R13 13 475 #define R_R14 14 476 #define R_R15 15 477 478 //.. #define R_AL (0+R_EAX) 479 //.. #define R_AH (4+R_EAX) 480 481 /* This is the Intel register encoding -- segment regs. */ 482 #define R_ES 0 483 #define R_CS 1 484 #define R_SS 2 485 #define R_DS 3 486 #define R_FS 4 487 #define R_GS 5 488 489 490 /* Various simple conversions */ 491 492 static ULong extend_s_8to64 ( UChar x ) 493 { 494 return (ULong)((((Long)x) << 56) >> 56); 495 } 496 497 static ULong extend_s_16to64 ( UShort x ) 498 { 499 return (ULong)((((Long)x) << 48) >> 48); 500 } 501 502 static ULong extend_s_32to64 ( UInt x ) 503 { 504 return (ULong)((((Long)x) << 32) >> 32); 505 } 506 507 /* Figure out whether the mod and rm parts of a modRM byte refer to a 508 register or memory. If so, the byte will have the form 11XXXYYY, 509 where YYY is the register number. */ 510 inline 511 static Bool epartIsReg ( UChar mod_reg_rm ) 512 { 513 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 514 } 515 516 /* Extract the 'g' field from a modRM byte. This only produces 3 517 bits, which is not a complete register number. You should avoid 518 this function if at all possible. */ 519 inline 520 static Int gregLO3ofRM ( UChar mod_reg_rm ) 521 { 522 return (Int)( (mod_reg_rm >> 3) & 7 ); 523 } 524 525 /* Ditto the 'e' field of a modRM byte. */ 526 inline 527 static Int eregLO3ofRM ( UChar mod_reg_rm ) 528 { 529 return (Int)(mod_reg_rm & 0x7); 530 } 531 532 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 533 534 static UChar getUChar ( Long delta ) 535 { 536 UChar v = guest_code[delta+0]; 537 return v; 538 } 539 540 static UInt getUDisp16 ( Long delta ) 541 { 542 UInt v = guest_code[delta+1]; v <<= 8; 543 v |= guest_code[delta+0]; 544 return v & 0xFFFF; 545 } 546 547 //.. static UInt getUDisp ( Int size, Long delta ) 548 //.. { 549 //.. switch (size) { 550 //.. case 4: return getUDisp32(delta); 551 //.. case 2: return getUDisp16(delta); 552 //.. case 1: return getUChar(delta); 553 //.. default: vpanic("getUDisp(x86)"); 554 //.. } 555 //.. return 0; /*notreached*/ 556 //.. } 557 558 559 /* Get a byte value out of the insn stream and sign-extend to 64 560 bits. */ 561 static Long getSDisp8 ( Long delta ) 562 { 563 return extend_s_8to64( guest_code[delta] ); 564 } 565 566 /* Get a 16-bit value out of the insn stream and sign-extend to 64 567 bits. */ 568 static Long getSDisp16 ( Long delta ) 569 { 570 UInt v = guest_code[delta+1]; v <<= 8; 571 v |= guest_code[delta+0]; 572 return extend_s_16to64( (UShort)v ); 573 } 574 575 /* Get a 32-bit value out of the insn stream and sign-extend to 64 576 bits. */ 577 static Long getSDisp32 ( Long delta ) 578 { 579 UInt v = guest_code[delta+3]; v <<= 8; 580 v |= guest_code[delta+2]; v <<= 8; 581 v |= guest_code[delta+1]; v <<= 8; 582 v |= guest_code[delta+0]; 583 return extend_s_32to64( v ); 584 } 585 586 /* Get a 64-bit value out of the insn stream. */ 587 static Long getDisp64 ( Long delta ) 588 { 589 ULong v = 0; 590 v |= guest_code[delta+7]; v <<= 8; 591 v |= guest_code[delta+6]; v <<= 8; 592 v |= guest_code[delta+5]; v <<= 8; 593 v |= guest_code[delta+4]; v <<= 8; 594 v |= guest_code[delta+3]; v <<= 8; 595 v |= guest_code[delta+2]; v <<= 8; 596 v |= guest_code[delta+1]; v <<= 8; 597 v |= guest_code[delta+0]; 598 return v; 599 } 600 601 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 602 if this is called with size==8. Should not happen. */ 603 static Long getSDisp ( Int size, Long delta ) 604 { 605 switch (size) { 606 case 4: return getSDisp32(delta); 607 case 2: return getSDisp16(delta); 608 case 1: return getSDisp8(delta); 609 default: vpanic("getSDisp(amd64)"); 610 } 611 } 612 613 static ULong mkSizeMask ( Int sz ) 614 { 615 switch (sz) { 616 case 1: return 0x00000000000000FFULL; 617 case 2: return 0x000000000000FFFFULL; 618 case 4: return 0x00000000FFFFFFFFULL; 619 case 8: return 0xFFFFFFFFFFFFFFFFULL; 620 default: vpanic("mkSzMask(amd64)"); 621 } 622 } 623 624 static Int imin ( Int a, Int b ) 625 { 626 return (a < b) ? a : b; 627 } 628 629 static IRType szToITy ( Int n ) 630 { 631 switch (n) { 632 case 1: return Ity_I8; 633 case 2: return Ity_I16; 634 case 4: return Ity_I32; 635 case 8: return Ity_I64; 636 default: vex_printf("\nszToITy(%d)\n", n); 637 vpanic("szToITy(amd64)"); 638 } 639 } 640 641 642 /*------------------------------------------------------------*/ 643 /*--- For dealing with prefixes. ---*/ 644 /*------------------------------------------------------------*/ 645 646 /* The idea is to pass around an int holding a bitmask summarising 647 info from the prefixes seen on the current instruction, including 648 info from the REX byte. This info is used in various places, but 649 most especially when making sense of register fields in 650 instructions. 651 652 The top 16 bits of the prefix are 0x3141, just as a hacky way 653 to ensure it really is a valid prefix. 654 655 Things you can safely assume about a well-formed prefix: 656 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 657 * if REX is not present then REXW,REXR,REXX,REXB will read 658 as zero. 659 * F2 and F3 will not both be 1. 660 */ 661 662 typedef UInt Prefix; 663 664 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 665 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 666 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 667 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 668 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 669 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 670 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 671 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 672 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 673 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 674 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 675 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 676 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 677 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 678 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 679 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 680 681 #define PFX_EMPTY 0x31410000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFFFF0000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 /* Apparently unused. 695 static Int getRexR ( Prefix pfx ) { 696 return (pfx & PFX_REXR) ? 1 : 0; 697 } 698 */ 699 static Int getRexX ( Prefix pfx ) { 700 return (pfx & PFX_REXX) ? 1 : 0; 701 } 702 static Int getRexB ( Prefix pfx ) { 703 return (pfx & PFX_REXB) ? 1 : 0; 704 } 705 706 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 707 completely changes what instruction it really is. */ 708 static Bool haveF2orF3 ( Prefix pfx ) { 709 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 710 } 711 static Bool haveF2 ( Prefix pfx ) { 712 return toBool((pfx & PFX_F2) > 0); 713 } 714 static Bool haveF3 ( Prefix pfx ) { 715 return toBool((pfx & PFX_F3) > 0); 716 } 717 718 static Bool have66 ( Prefix pfx ) { 719 return toBool((pfx & PFX_66) > 0); 720 } 721 static Bool haveASO ( Prefix pfx ) { 722 return toBool((pfx & PFX_ASO) > 0); 723 } 724 725 /* Return True iff pfx has 66 set and F2 and F3 clear */ 726 static Bool have66noF2noF3 ( Prefix pfx ) 727 { 728 return 729 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 730 } 731 732 /* Return True iff pfx has F2 set and 66 and F3 clear */ 733 static Bool haveF2no66noF3 ( Prefix pfx ) 734 { 735 return 736 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 737 } 738 739 /* Return True iff pfx has F3 set and 66 and F2 clear */ 740 static Bool haveF3no66noF2 ( Prefix pfx ) 741 { 742 return 743 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 744 } 745 746 /* Return True iff pfx has F3 set and F2 clear */ 747 static Bool haveF3noF2 ( Prefix pfx ) 748 { 749 return 750 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 751 } 752 753 /* Return True iff pfx has F2 set and F3 clear */ 754 static Bool haveF2noF3 ( Prefix pfx ) 755 { 756 return 757 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 758 } 759 760 /* Return True iff pfx has 66, F2 and F3 clear */ 761 static Bool haveNo66noF2noF3 ( Prefix pfx ) 762 { 763 return 764 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 765 } 766 767 /* Return True iff pfx has any of 66, F2 and F3 set */ 768 static Bool have66orF2orF3 ( Prefix pfx ) 769 { 770 return toBool( ! haveNo66noF2noF3(pfx) ); 771 } 772 773 /* Return True iff pfx has 66 or F2 set */ 774 static Bool have66orF2 ( Prefix pfx ) 775 { 776 return toBool((pfx & (PFX_66|PFX_F2)) > 0); 777 } 778 779 /* Clear all the segment-override bits in a prefix. */ 780 static Prefix clearSegBits ( Prefix p ) 781 { 782 return 783 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 784 } 785 786 787 /*------------------------------------------------------------*/ 788 /*--- For dealing with integer registers ---*/ 789 /*------------------------------------------------------------*/ 790 791 /* This is somewhat complex. The rules are: 792 793 For 64, 32 and 16 bit register references, the e or g fields in the 794 modrm bytes supply the low 3 bits of the register number. The 795 fourth (most-significant) bit of the register number is supplied by 796 the REX byte, if it is present; else that bit is taken to be zero. 797 798 The REX.R bit supplies the high bit corresponding to the g register 799 field, and the REX.B bit supplies the high bit corresponding to the 800 e register field (when the mod part of modrm indicates that modrm's 801 e component refers to a register and not to memory). 802 803 The REX.X bit supplies a high register bit for certain registers 804 in SIB address modes, and is generally rarely used. 805 806 For 8 bit register references, the presence of the REX byte itself 807 has significance. If there is no REX present, then the 3-bit 808 number extracted from the modrm e or g field is treated as an index 809 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 810 old x86 encoding scheme. 811 812 But if there is a REX present, the register reference is 813 interpreted in the same way as for 64/32/16-bit references: a high 814 bit is extracted from REX, giving a 4-bit number, and the denoted 815 register is the lowest 8 bits of the 16 integer registers denoted 816 by the number. In particular, values 3 through 7 of this sequence 817 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 818 %rsp %rbp %rsi %rdi. 819 820 The REX.W bit has no bearing at all on register numbers. Instead 821 its presence indicates that the operand size is to be overridden 822 from its default value (32 bits) to 64 bits instead. This is in 823 the same fashion that an 0x66 prefix indicates the operand size is 824 to be overridden from 32 bits down to 16 bits. When both REX.W and 825 0x66 are present there is a conflict, and REX.W takes precedence. 826 827 Rather than try to handle this complexity using a single huge 828 function, several smaller ones are provided. The aim is to make it 829 as difficult as possible to screw up register decoding in a subtle 830 and hard-to-track-down way. 831 832 Because these routines fish around in the host's memory (that is, 833 in the guest state area) for sub-parts of guest registers, their 834 correctness depends on the host's endianness. So far these 835 routines only work for little-endian hosts. Those for which 836 endianness is important have assertions to ensure sanity. 837 */ 838 839 840 /* About the simplest question you can ask: where do the 64-bit 841 integer registers live (in the guest state) ? */ 842 843 static Int integerGuestReg64Offset ( UInt reg ) 844 { 845 switch (reg) { 846 case R_RAX: return OFFB_RAX; 847 case R_RCX: return OFFB_RCX; 848 case R_RDX: return OFFB_RDX; 849 case R_RBX: return OFFB_RBX; 850 case R_RSP: return OFFB_RSP; 851 case R_RBP: return OFFB_RBP; 852 case R_RSI: return OFFB_RSI; 853 case R_RDI: return OFFB_RDI; 854 case R_R8: return OFFB_R8; 855 case R_R9: return OFFB_R9; 856 case R_R10: return OFFB_R10; 857 case R_R11: return OFFB_R11; 858 case R_R12: return OFFB_R12; 859 case R_R13: return OFFB_R13; 860 case R_R14: return OFFB_R14; 861 case R_R15: return OFFB_R15; 862 default: vpanic("integerGuestReg64Offset(amd64)"); 863 } 864 } 865 866 867 /* Produce the name of an integer register, for printing purposes. 868 reg is a number in the range 0 .. 15 that has been generated from a 869 3-bit reg-field number and a REX extension bit. irregular denotes 870 the case where sz==1 and no REX byte is present. */ 871 872 static 873 HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 874 { 875 static HChar* ireg64_names[16] 876 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 877 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 878 static HChar* ireg32_names[16] 879 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 880 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 881 static HChar* ireg16_names[16] 882 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 883 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 884 static HChar* ireg8_names[16] 885 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 886 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 887 static HChar* ireg8_irregular[8] 888 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 889 890 vassert(reg < 16); 891 if (sz == 1) { 892 if (irregular) 893 vassert(reg < 8); 894 } else { 895 vassert(irregular == False); 896 } 897 898 switch (sz) { 899 case 8: return ireg64_names[reg]; 900 case 4: return ireg32_names[reg]; 901 case 2: return ireg16_names[reg]; 902 case 1: if (irregular) { 903 return ireg8_irregular[reg]; 904 } else { 905 return ireg8_names[reg]; 906 } 907 default: vpanic("nameIReg(amd64)"); 908 } 909 } 910 911 /* Using the same argument conventions as nameIReg, produce the 912 guest state offset of an integer register. */ 913 914 static 915 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 916 { 917 vassert(reg < 16); 918 if (sz == 1) { 919 if (irregular) 920 vassert(reg < 8); 921 } else { 922 vassert(irregular == False); 923 } 924 925 /* Deal with irregular case -- sz==1 and no REX present */ 926 if (sz == 1 && irregular) { 927 switch (reg) { 928 case R_RSP: return 1+ OFFB_RAX; 929 case R_RBP: return 1+ OFFB_RCX; 930 case R_RSI: return 1+ OFFB_RDX; 931 case R_RDI: return 1+ OFFB_RBX; 932 default: break; /* use the normal case */ 933 } 934 } 935 936 /* Normal case */ 937 return integerGuestReg64Offset(reg); 938 } 939 940 941 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 942 943 static IRExpr* getIRegCL ( void ) 944 { 945 vassert(!host_is_bigendian); 946 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 947 } 948 949 950 /* Write to the %AH register. */ 951 952 static void putIRegAH ( IRExpr* e ) 953 { 954 vassert(!host_is_bigendian); 955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 956 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 957 } 958 959 960 /* Read/write various widths of %RAX, as it has various 961 special-purpose uses. */ 962 963 static HChar* nameIRegRAX ( Int sz ) 964 { 965 switch (sz) { 966 case 1: return "%al"; 967 case 2: return "%ax"; 968 case 4: return "%eax"; 969 case 8: return "%rax"; 970 default: vpanic("nameIRegRAX(amd64)"); 971 } 972 } 973 974 static IRExpr* getIRegRAX ( Int sz ) 975 { 976 vassert(!host_is_bigendian); 977 switch (sz) { 978 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 979 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 980 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 981 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 982 default: vpanic("getIRegRAX(amd64)"); 983 } 984 } 985 986 static void putIRegRAX ( Int sz, IRExpr* e ) 987 { 988 IRType ty = typeOfIRExpr(irsb->tyenv, e); 989 vassert(!host_is_bigendian); 990 switch (sz) { 991 case 8: vassert(ty == Ity_I64); 992 stmt( IRStmt_Put( OFFB_RAX, e )); 993 break; 994 case 4: vassert(ty == Ity_I32); 995 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 996 break; 997 case 2: vassert(ty == Ity_I16); 998 stmt( IRStmt_Put( OFFB_RAX, e )); 999 break; 1000 case 1: vassert(ty == Ity_I8); 1001 stmt( IRStmt_Put( OFFB_RAX, e )); 1002 break; 1003 default: vpanic("putIRegRAX(amd64)"); 1004 } 1005 } 1006 1007 1008 /* Read/write various widths of %RDX, as it has various 1009 special-purpose uses. */ 1010 1011 static HChar* nameIRegRDX ( Int sz ) 1012 { 1013 switch (sz) { 1014 case 1: return "%dl"; 1015 case 2: return "%dx"; 1016 case 4: return "%edx"; 1017 case 8: return "%rdx"; 1018 default: vpanic("nameIRegRDX(amd64)"); 1019 } 1020 } 1021 1022 static IRExpr* getIRegRDX ( Int sz ) 1023 { 1024 vassert(!host_is_bigendian); 1025 switch (sz) { 1026 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1027 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1028 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1029 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1030 default: vpanic("getIRegRDX(amd64)"); 1031 } 1032 } 1033 1034 static void putIRegRDX ( Int sz, IRExpr* e ) 1035 { 1036 vassert(!host_is_bigendian); 1037 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1038 switch (sz) { 1039 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1040 break; 1041 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1042 break; 1043 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1044 break; 1045 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1046 break; 1047 default: vpanic("putIRegRDX(amd64)"); 1048 } 1049 } 1050 1051 1052 /* Simplistic functions to deal with the integer registers as a 1053 straightforward bank of 16 64-bit regs. */ 1054 1055 static IRExpr* getIReg64 ( UInt regno ) 1056 { 1057 return IRExpr_Get( integerGuestReg64Offset(regno), 1058 Ity_I64 ); 1059 } 1060 1061 static void putIReg64 ( UInt regno, IRExpr* e ) 1062 { 1063 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1064 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1065 } 1066 1067 static HChar* nameIReg64 ( UInt regno ) 1068 { 1069 return nameIReg( 8, regno, False ); 1070 } 1071 1072 1073 /* Simplistic functions to deal with the lower halves of integer 1074 registers as a straightforward bank of 16 32-bit regs. */ 1075 1076 static IRExpr* getIReg32 ( UInt regno ) 1077 { 1078 vassert(!host_is_bigendian); 1079 return unop(Iop_64to32, 1080 IRExpr_Get( integerGuestReg64Offset(regno), 1081 Ity_I64 )); 1082 } 1083 1084 static void putIReg32 ( UInt regno, IRExpr* e ) 1085 { 1086 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1087 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1088 unop(Iop_32Uto64,e) ) ); 1089 } 1090 1091 static HChar* nameIReg32 ( UInt regno ) 1092 { 1093 return nameIReg( 4, regno, False ); 1094 } 1095 1096 1097 /* Simplistic functions to deal with the lower quarters of integer 1098 registers as a straightforward bank of 16 16-bit regs. */ 1099 1100 static IRExpr* getIReg16 ( UInt regno ) 1101 { 1102 vassert(!host_is_bigendian); 1103 return IRExpr_Get( integerGuestReg64Offset(regno), 1104 Ity_I16 ); 1105 } 1106 1107 static HChar* nameIReg16 ( UInt regno ) 1108 { 1109 return nameIReg( 2, regno, False ); 1110 } 1111 1112 1113 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1114 which field of the REX byte is to be used to extend to a 4-bit 1115 number. These functions cater for that situation. 1116 */ 1117 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1118 { 1119 vassert(lo3bits < 8); 1120 vassert(IS_VALID_PFX(pfx)); 1121 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1122 } 1123 1124 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1125 { 1126 vassert(lo3bits < 8); 1127 vassert(IS_VALID_PFX(pfx)); 1128 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1129 } 1130 1131 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1132 { 1133 vassert(lo3bits < 8); 1134 vassert(IS_VALID_PFX(pfx)); 1135 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1136 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1137 toBool(sz==1 && !haveREX(pfx)) ); 1138 } 1139 1140 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1141 { 1142 vassert(lo3bits < 8); 1143 vassert(IS_VALID_PFX(pfx)); 1144 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1145 if (sz == 4) { 1146 sz = 8; 1147 return unop(Iop_64to32, 1148 IRExpr_Get( 1149 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1150 toBool(sz==1 && !haveREX(pfx)) ), 1151 szToITy(sz) 1152 ) 1153 ); 1154 } else { 1155 return IRExpr_Get( 1156 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1157 toBool(sz==1 && !haveREX(pfx)) ), 1158 szToITy(sz) 1159 ); 1160 } 1161 } 1162 1163 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1164 { 1165 vassert(lo3bits < 8); 1166 vassert(IS_VALID_PFX(pfx)); 1167 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1168 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1169 stmt( IRStmt_Put( 1170 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1171 toBool(sz==1 && !haveREX(pfx)) ), 1172 sz==4 ? unop(Iop_32Uto64,e) : e 1173 )); 1174 } 1175 1176 1177 /* Functions for getting register numbers from modrm bytes and REX 1178 when we don't have to consider the complexities of integer subreg 1179 accesses. 1180 */ 1181 /* Extract the g reg field from a modRM byte, and augment it using the 1182 REX.R bit from the supplied REX byte. The R bit usually is 1183 associated with the g register field. 1184 */ 1185 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1186 { 1187 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1188 reg += (pfx & PFX_REXR) ? 8 : 0; 1189 return reg; 1190 } 1191 1192 /* Extract the e reg field from a modRM byte, and augment it using the 1193 REX.B bit from the supplied REX byte. The B bit usually is 1194 associated with the e register field (when modrm indicates e is a 1195 register, that is). 1196 */ 1197 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1198 { 1199 Int rm; 1200 vassert(epartIsReg(mod_reg_rm)); 1201 rm = (Int)(mod_reg_rm & 0x7); 1202 rm += (pfx & PFX_REXB) ? 8 : 0; 1203 return rm; 1204 } 1205 1206 1207 /* General functions for dealing with integer register access. */ 1208 1209 /* Produce the guest state offset for a reference to the 'g' register 1210 field in a modrm byte, taking into account REX (or its absence), 1211 and the size of the access. 1212 */ 1213 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1214 { 1215 UInt reg; 1216 vassert(!host_is_bigendian); 1217 vassert(IS_VALID_PFX(pfx)); 1218 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1219 reg = gregOfRexRM( pfx, mod_reg_rm ); 1220 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1221 } 1222 1223 static 1224 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1225 { 1226 if (sz == 4) { 1227 sz = 8; 1228 return unop(Iop_64to32, 1229 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1230 szToITy(sz) )); 1231 } else { 1232 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1233 szToITy(sz) ); 1234 } 1235 } 1236 1237 static 1238 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1239 { 1240 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1241 if (sz == 4) { 1242 e = unop(Iop_32Uto64,e); 1243 } 1244 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1245 } 1246 1247 static 1248 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1249 { 1250 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1251 toBool(sz==1 && !haveREX(pfx)) ); 1252 } 1253 1254 1255 /* Produce the guest state offset for a reference to the 'e' register 1256 field in a modrm byte, taking into account REX (or its absence), 1257 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1258 denotes a memory access rather than a register access. 1259 */ 1260 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1261 { 1262 UInt reg; 1263 vassert(!host_is_bigendian); 1264 vassert(IS_VALID_PFX(pfx)); 1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1266 reg = eregOfRexRM( pfx, mod_reg_rm ); 1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1268 } 1269 1270 static 1271 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1272 { 1273 if (sz == 4) { 1274 sz = 8; 1275 return unop(Iop_64to32, 1276 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1277 szToITy(sz) )); 1278 } else { 1279 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1280 szToITy(sz) ); 1281 } 1282 } 1283 1284 static 1285 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1286 { 1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1288 if (sz == 4) { 1289 e = unop(Iop_32Uto64,e); 1290 } 1291 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1292 } 1293 1294 static 1295 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1296 { 1297 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1298 toBool(sz==1 && !haveREX(pfx)) ); 1299 } 1300 1301 1302 /*------------------------------------------------------------*/ 1303 /*--- For dealing with XMM registers ---*/ 1304 /*------------------------------------------------------------*/ 1305 1306 //.. static Int segmentGuestRegOffset ( UInt sreg ) 1307 //.. { 1308 //.. switch (sreg) { 1309 //.. case R_ES: return OFFB_ES; 1310 //.. case R_CS: return OFFB_CS; 1311 //.. case R_SS: return OFFB_SS; 1312 //.. case R_DS: return OFFB_DS; 1313 //.. case R_FS: return OFFB_FS; 1314 //.. case R_GS: return OFFB_GS; 1315 //.. default: vpanic("segmentGuestRegOffset(x86)"); 1316 //.. } 1317 //.. } 1318 1319 static Int xmmGuestRegOffset ( UInt xmmreg ) 1320 { 1321 switch (xmmreg) { 1322 case 0: return OFFB_XMM0; 1323 case 1: return OFFB_XMM1; 1324 case 2: return OFFB_XMM2; 1325 case 3: return OFFB_XMM3; 1326 case 4: return OFFB_XMM4; 1327 case 5: return OFFB_XMM5; 1328 case 6: return OFFB_XMM6; 1329 case 7: return OFFB_XMM7; 1330 case 8: return OFFB_XMM8; 1331 case 9: return OFFB_XMM9; 1332 case 10: return OFFB_XMM10; 1333 case 11: return OFFB_XMM11; 1334 case 12: return OFFB_XMM12; 1335 case 13: return OFFB_XMM13; 1336 case 14: return OFFB_XMM14; 1337 case 15: return OFFB_XMM15; 1338 default: vpanic("xmmGuestRegOffset(amd64)"); 1339 } 1340 } 1341 1342 /* Lanes of vector registers are always numbered from zero being the 1343 least significant lane (rightmost in the register). */ 1344 1345 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1346 { 1347 /* Correct for little-endian host only. */ 1348 vassert(!host_is_bigendian); 1349 vassert(laneno >= 0 && laneno < 8); 1350 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1351 } 1352 1353 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1354 { 1355 /* Correct for little-endian host only. */ 1356 vassert(!host_is_bigendian); 1357 vassert(laneno >= 0 && laneno < 4); 1358 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1359 } 1360 1361 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1362 { 1363 /* Correct for little-endian host only. */ 1364 vassert(!host_is_bigendian); 1365 vassert(laneno >= 0 && laneno < 2); 1366 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1367 } 1368 1369 //.. static IRExpr* getSReg ( UInt sreg ) 1370 //.. { 1371 //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 1372 //.. } 1373 //.. 1374 //.. static void putSReg ( UInt sreg, IRExpr* e ) 1375 //.. { 1376 //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1377 //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 1378 //.. } 1379 1380 static IRExpr* getXMMReg ( UInt xmmreg ) 1381 { 1382 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1383 } 1384 1385 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1386 { 1387 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1388 } 1389 1390 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1391 { 1392 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1393 } 1394 1395 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1396 { 1397 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1398 } 1399 1400 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1401 { 1402 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1403 } 1404 1405 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1406 { 1407 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1408 } 1409 1410 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1411 { 1412 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1413 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1414 } 1415 1416 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1417 { 1418 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1419 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1420 } 1421 1422 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1423 { 1424 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1425 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1426 } 1427 1428 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1429 { 1430 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1431 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1432 } 1433 1434 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1435 { 1436 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1437 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1438 } 1439 1440 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 1441 { 1442 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1443 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 1444 } 1445 1446 static IRExpr* mkV128 ( UShort mask ) 1447 { 1448 return IRExpr_Const(IRConst_V128(mask)); 1449 } 1450 1451 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1452 { 1453 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1454 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1455 return unop(Iop_64to1, 1456 binop(Iop_And64, 1457 unop(Iop_1Uto64,x), 1458 unop(Iop_1Uto64,y))); 1459 } 1460 1461 /* Generate a compare-and-swap operation, operating on memory at 1462 'addr'. The expected value is 'expVal' and the new value is 1463 'newVal'. If the operation fails, then transfer control (with a 1464 no-redir jump (XXX no -- see comment at top of this file)) to 1465 'restart_point', which is presumably the address of the guest 1466 instruction again -- retrying, essentially. */ 1467 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1468 Addr64 restart_point ) 1469 { 1470 IRCAS* cas; 1471 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1472 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1473 IRTemp oldTmp = newTemp(tyE); 1474 IRTemp expTmp = newTemp(tyE); 1475 vassert(tyE == tyN); 1476 vassert(tyE == Ity_I64 || tyE == Ity_I32 1477 || tyE == Ity_I16 || tyE == Ity_I8); 1478 assign(expTmp, expVal); 1479 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1480 NULL, mkexpr(expTmp), NULL, newVal ); 1481 stmt( IRStmt_CAS(cas) ); 1482 stmt( IRStmt_Exit( 1483 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1484 mkexpr(oldTmp), mkexpr(expTmp) ), 1485 Ijk_Boring, /*Ijk_NoRedir*/ 1486 IRConst_U64( restart_point ) 1487 )); 1488 } 1489 1490 1491 /*------------------------------------------------------------*/ 1492 /*--- Helpers for %rflags. ---*/ 1493 /*------------------------------------------------------------*/ 1494 1495 /* -------------- Evaluating the flags-thunk. -------------- */ 1496 1497 /* Build IR to calculate all the eflags from stored 1498 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1499 Ity_I64. */ 1500 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1501 { 1502 IRExpr** args 1503 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1504 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1505 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1506 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1507 IRExpr* call 1508 = mkIRExprCCall( 1509 Ity_I64, 1510 0/*regparm*/, 1511 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1512 args 1513 ); 1514 /* Exclude OP and NDEP from definedness checking. We're only 1515 interested in DEP1 and DEP2. */ 1516 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1517 return call; 1518 } 1519 1520 /* Build IR to calculate some particular condition from stored 1521 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1522 Ity_Bit. */ 1523 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1524 { 1525 IRExpr** args 1526 = mkIRExprVec_5( mkU64(cond), 1527 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1528 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1529 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1530 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1531 IRExpr* call 1532 = mkIRExprCCall( 1533 Ity_I64, 1534 0/*regparm*/, 1535 "amd64g_calculate_condition", &amd64g_calculate_condition, 1536 args 1537 ); 1538 /* Exclude the requested condition, OP and NDEP from definedness 1539 checking. We're only interested in DEP1 and DEP2. */ 1540 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1541 return unop(Iop_64to1, call); 1542 } 1543 1544 /* Build IR to calculate just the carry flag from stored 1545 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1546 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1547 { 1548 IRExpr** args 1549 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1550 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1551 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1552 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1553 IRExpr* call 1554 = mkIRExprCCall( 1555 Ity_I64, 1556 0/*regparm*/, 1557 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1558 args 1559 ); 1560 /* Exclude OP and NDEP from definedness checking. We're only 1561 interested in DEP1 and DEP2. */ 1562 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1563 return call; 1564 } 1565 1566 1567 /* -------------- Building the flags-thunk. -------------- */ 1568 1569 /* The machinery in this section builds the flag-thunk following a 1570 flag-setting operation. Hence the various setFlags_* functions. 1571 */ 1572 1573 static Bool isAddSub ( IROp op8 ) 1574 { 1575 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1576 } 1577 1578 static Bool isLogic ( IROp op8 ) 1579 { 1580 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1581 } 1582 1583 /* U-widen 8/16/32/64 bit int expr to 64. */ 1584 static IRExpr* widenUto64 ( IRExpr* e ) 1585 { 1586 switch (typeOfIRExpr(irsb->tyenv,e)) { 1587 case Ity_I64: return e; 1588 case Ity_I32: return unop(Iop_32Uto64, e); 1589 case Ity_I16: return unop(Iop_16Uto64, e); 1590 case Ity_I8: return unop(Iop_8Uto64, e); 1591 default: vpanic("widenUto64"); 1592 } 1593 } 1594 1595 /* S-widen 8/16/32/64 bit int expr to 32. */ 1596 static IRExpr* widenSto64 ( IRExpr* e ) 1597 { 1598 switch (typeOfIRExpr(irsb->tyenv,e)) { 1599 case Ity_I64: return e; 1600 case Ity_I32: return unop(Iop_32Sto64, e); 1601 case Ity_I16: return unop(Iop_16Sto64, e); 1602 case Ity_I8: return unop(Iop_8Sto64, e); 1603 default: vpanic("widenSto64"); 1604 } 1605 } 1606 1607 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1608 of these combinations make sense. */ 1609 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1610 { 1611 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1612 if (src_ty == dst_ty) 1613 return e; 1614 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1615 return unop(Iop_32to16, e); 1616 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1617 return unop(Iop_32to8, e); 1618 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1619 return unop(Iop_64to32, e); 1620 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1621 return unop(Iop_64to16, e); 1622 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1623 return unop(Iop_64to8, e); 1624 1625 vex_printf("\nsrc, dst tys are: "); 1626 ppIRType(src_ty); 1627 vex_printf(", "); 1628 ppIRType(dst_ty); 1629 vex_printf("\n"); 1630 vpanic("narrowTo(amd64)"); 1631 } 1632 1633 1634 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1635 auto-sized up to the real op. */ 1636 1637 static 1638 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1639 { 1640 Int ccOp = 0; 1641 switch (ty) { 1642 case Ity_I8: ccOp = 0; break; 1643 case Ity_I16: ccOp = 1; break; 1644 case Ity_I32: ccOp = 2; break; 1645 case Ity_I64: ccOp = 3; break; 1646 default: vassert(0); 1647 } 1648 switch (op8) { 1649 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1650 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1651 default: ppIROp(op8); 1652 vpanic("setFlags_DEP1_DEP2(amd64)"); 1653 } 1654 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1655 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1656 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1657 } 1658 1659 1660 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1661 1662 static 1663 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1664 { 1665 Int ccOp = 0; 1666 switch (ty) { 1667 case Ity_I8: ccOp = 0; break; 1668 case Ity_I16: ccOp = 1; break; 1669 case Ity_I32: ccOp = 2; break; 1670 case Ity_I64: ccOp = 3; break; 1671 default: vassert(0); 1672 } 1673 switch (op8) { 1674 case Iop_Or8: 1675 case Iop_And8: 1676 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1677 default: ppIROp(op8); 1678 vpanic("setFlags_DEP1(amd64)"); 1679 } 1680 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1681 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1682 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1683 } 1684 1685 1686 /* For shift operations, we put in the result and the undershifted 1687 result. Except if the shift amount is zero, the thunk is left 1688 unchanged. */ 1689 1690 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1691 IRTemp res, 1692 IRTemp resUS, 1693 IRType ty, 1694 IRTemp guard ) 1695 { 1696 Int ccOp = 0; 1697 switch (ty) { 1698 case Ity_I8: ccOp = 0; break; 1699 case Ity_I16: ccOp = 1; break; 1700 case Ity_I32: ccOp = 2; break; 1701 case Ity_I64: ccOp = 3; break; 1702 default: vassert(0); 1703 } 1704 1705 vassert(guard); 1706 1707 /* Both kinds of right shifts are handled by the same thunk 1708 operation. */ 1709 switch (op64) { 1710 case Iop_Shr64: 1711 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1712 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1713 default: ppIROp(op64); 1714 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1715 } 1716 1717 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1718 stmt( IRStmt_Put( OFFB_CC_OP, 1719 IRExpr_Mux0X( mkexpr(guard), 1720 IRExpr_Get(OFFB_CC_OP,Ity_I64), 1721 mkU64(ccOp))) ); 1722 stmt( IRStmt_Put( OFFB_CC_DEP1, 1723 IRExpr_Mux0X( mkexpr(guard), 1724 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 1725 widenUto64(mkexpr(res)))) ); 1726 stmt( IRStmt_Put( OFFB_CC_DEP2, 1727 IRExpr_Mux0X( mkexpr(guard), 1728 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 1729 widenUto64(mkexpr(resUS)))) ); 1730 } 1731 1732 1733 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1734 the former value of the carry flag, which unfortunately we have to 1735 compute. */ 1736 1737 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1738 { 1739 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1740 1741 switch (ty) { 1742 case Ity_I8: ccOp += 0; break; 1743 case Ity_I16: ccOp += 1; break; 1744 case Ity_I32: ccOp += 2; break; 1745 case Ity_I64: ccOp += 3; break; 1746 default: vassert(0); 1747 } 1748 1749 /* This has to come first, because calculating the C flag 1750 may require reading all four thunk fields. */ 1751 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1752 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1753 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1754 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1755 } 1756 1757 1758 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1759 two arguments. */ 1760 1761 static 1762 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1763 { 1764 switch (ty) { 1765 case Ity_I8: 1766 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1767 break; 1768 case Ity_I16: 1769 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1770 break; 1771 case Ity_I32: 1772 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1773 break; 1774 case Ity_I64: 1775 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1776 break; 1777 default: 1778 vpanic("setFlags_MUL(amd64)"); 1779 } 1780 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1781 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1782 } 1783 1784 1785 /* -------------- Condition codes. -------------- */ 1786 1787 /* Condition codes, using the AMD encoding. */ 1788 1789 static HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1790 { 1791 switch (cond) { 1792 case AMD64CondO: return "o"; 1793 case AMD64CondNO: return "no"; 1794 case AMD64CondB: return "b"; 1795 case AMD64CondNB: return "ae"; /*"nb";*/ 1796 case AMD64CondZ: return "e"; /*"z";*/ 1797 case AMD64CondNZ: return "ne"; /*"nz";*/ 1798 case AMD64CondBE: return "be"; 1799 case AMD64CondNBE: return "a"; /*"nbe";*/ 1800 case AMD64CondS: return "s"; 1801 case AMD64CondNS: return "ns"; 1802 case AMD64CondP: return "p"; 1803 case AMD64CondNP: return "np"; 1804 case AMD64CondL: return "l"; 1805 case AMD64CondNL: return "ge"; /*"nl";*/ 1806 case AMD64CondLE: return "le"; 1807 case AMD64CondNLE: return "g"; /*"nle";*/ 1808 case AMD64CondAlways: return "ALWAYS"; 1809 default: vpanic("name_AMD64Condcode"); 1810 } 1811 } 1812 1813 static 1814 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1815 /*OUT*/Bool* needInvert ) 1816 { 1817 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1818 if (cond & 1) { 1819 *needInvert = True; 1820 return cond-1; 1821 } else { 1822 *needInvert = False; 1823 return cond; 1824 } 1825 } 1826 1827 1828 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1829 1830 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1831 appropriately. 1832 1833 Optionally, generate a store for the 'tres' value. This can either 1834 be a normal store, or it can be a cas-with-possible-failure style 1835 store: 1836 1837 if taddr is IRTemp_INVALID, then no store is generated. 1838 1839 if taddr is not IRTemp_INVALID, then a store (using taddr as 1840 the address) is generated: 1841 1842 if texpVal is IRTemp_INVALID then a normal store is 1843 generated, and restart_point must be zero (it is irrelevant). 1844 1845 if texpVal is not IRTemp_INVALID then a cas-style store is 1846 generated. texpVal is the expected value, restart_point 1847 is the restart point if the store fails, and texpVal must 1848 have the same type as tres. 1849 1850 */ 1851 static void helper_ADC ( Int sz, 1852 IRTemp tres, IRTemp ta1, IRTemp ta2, 1853 /* info about optional store: */ 1854 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1855 { 1856 UInt thunkOp; 1857 IRType ty = szToITy(sz); 1858 IRTemp oldc = newTemp(Ity_I64); 1859 IRTemp oldcn = newTemp(ty); 1860 IROp plus = mkSizedOp(ty, Iop_Add8); 1861 IROp xor = mkSizedOp(ty, Iop_Xor8); 1862 1863 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1864 1865 switch (sz) { 1866 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 1867 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 1868 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 1869 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 1870 default: vassert(0); 1871 } 1872 1873 /* oldc = old carry flag, 0 or 1 */ 1874 assign( oldc, binop(Iop_And64, 1875 mk_amd64g_calculate_rflags_c(), 1876 mkU64(1)) ); 1877 1878 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1879 1880 assign( tres, binop(plus, 1881 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1882 mkexpr(oldcn)) ); 1883 1884 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1885 start of this function. */ 1886 if (taddr != IRTemp_INVALID) { 1887 if (texpVal == IRTemp_INVALID) { 1888 vassert(restart_point == 0); 1889 storeLE( mkexpr(taddr), mkexpr(tres) ); 1890 } else { 1891 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1892 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1893 casLE( mkexpr(taddr), 1894 mkexpr(texpVal), mkexpr(tres), restart_point ); 1895 } 1896 } 1897 1898 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1899 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 1900 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1901 mkexpr(oldcn)) )) ); 1902 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1903 } 1904 1905 1906 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1907 appropriately. As with helper_ADC, possibly generate a store of 1908 the result -- see comments on helper_ADC for details. 1909 */ 1910 static void helper_SBB ( Int sz, 1911 IRTemp tres, IRTemp ta1, IRTemp ta2, 1912 /* info about optional store: */ 1913 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1914 { 1915 UInt thunkOp; 1916 IRType ty = szToITy(sz); 1917 IRTemp oldc = newTemp(Ity_I64); 1918 IRTemp oldcn = newTemp(ty); 1919 IROp minus = mkSizedOp(ty, Iop_Sub8); 1920 IROp xor = mkSizedOp(ty, Iop_Xor8); 1921 1922 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1923 1924 switch (sz) { 1925 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 1926 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 1927 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 1928 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 1929 default: vassert(0); 1930 } 1931 1932 /* oldc = old carry flag, 0 or 1 */ 1933 assign( oldc, binop(Iop_And64, 1934 mk_amd64g_calculate_rflags_c(), 1935 mkU64(1)) ); 1936 1937 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1938 1939 assign( tres, binop(minus, 1940 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1941 mkexpr(oldcn)) ); 1942 1943 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1944 start of this function. */ 1945 if (taddr != IRTemp_INVALID) { 1946 if (texpVal == IRTemp_INVALID) { 1947 vassert(restart_point == 0); 1948 storeLE( mkexpr(taddr), mkexpr(tres) ); 1949 } else { 1950 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1951 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1952 casLE( mkexpr(taddr), 1953 mkexpr(texpVal), mkexpr(tres), restart_point ); 1954 } 1955 } 1956 1957 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1958 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 1959 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1960 mkexpr(oldcn)) )) ); 1961 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1962 } 1963 1964 1965 /* -------------- Helpers for disassembly printing. -------------- */ 1966 1967 static HChar* nameGrp1 ( Int opc_aux ) 1968 { 1969 static HChar* grp1_names[8] 1970 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1971 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 1972 return grp1_names[opc_aux]; 1973 } 1974 1975 static HChar* nameGrp2 ( Int opc_aux ) 1976 { 1977 static HChar* grp2_names[8] 1978 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1979 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 1980 return grp2_names[opc_aux]; 1981 } 1982 1983 static HChar* nameGrp4 ( Int opc_aux ) 1984 { 1985 static HChar* grp4_names[8] 1986 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1987 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 1988 return grp4_names[opc_aux]; 1989 } 1990 1991 static HChar* nameGrp5 ( Int opc_aux ) 1992 { 1993 static HChar* grp5_names[8] 1994 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 1995 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 1996 return grp5_names[opc_aux]; 1997 } 1998 1999 static HChar* nameGrp8 ( Int opc_aux ) 2000 { 2001 static HChar* grp8_names[8] 2002 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2003 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2004 return grp8_names[opc_aux]; 2005 } 2006 2007 //.. static HChar* nameSReg ( UInt sreg ) 2008 //.. { 2009 //.. switch (sreg) { 2010 //.. case R_ES: return "%es"; 2011 //.. case R_CS: return "%cs"; 2012 //.. case R_SS: return "%ss"; 2013 //.. case R_DS: return "%ds"; 2014 //.. case R_FS: return "%fs"; 2015 //.. case R_GS: return "%gs"; 2016 //.. default: vpanic("nameSReg(x86)"); 2017 //.. } 2018 //.. } 2019 2020 static HChar* nameMMXReg ( Int mmxreg ) 2021 { 2022 static HChar* mmx_names[8] 2023 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2024 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2025 return mmx_names[mmxreg]; 2026 } 2027 2028 static HChar* nameXMMReg ( Int xmmreg ) 2029 { 2030 static HChar* xmm_names[16] 2031 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2032 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2033 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2034 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2035 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2036 return xmm_names[xmmreg]; 2037 } 2038 2039 static HChar* nameMMXGran ( Int gran ) 2040 { 2041 switch (gran) { 2042 case 0: return "b"; 2043 case 1: return "w"; 2044 case 2: return "d"; 2045 case 3: return "q"; 2046 default: vpanic("nameMMXGran(amd64,guest)"); 2047 } 2048 } 2049 2050 static HChar nameISize ( Int size ) 2051 { 2052 switch (size) { 2053 case 8: return 'q'; 2054 case 4: return 'l'; 2055 case 2: return 'w'; 2056 case 1: return 'b'; 2057 default: vpanic("nameISize(amd64)"); 2058 } 2059 } 2060 2061 2062 /*------------------------------------------------------------*/ 2063 /*--- JMP helpers ---*/ 2064 /*------------------------------------------------------------*/ 2065 2066 static void jmp_lit( IRJumpKind kind, Addr64 d64 ) 2067 { 2068 irsb->next = mkU64(d64); 2069 irsb->jumpkind = kind; 2070 } 2071 2072 static void jmp_treg( IRJumpKind kind, IRTemp t ) 2073 { 2074 irsb->next = mkexpr(t); 2075 irsb->jumpkind = kind; 2076 } 2077 2078 static 2079 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2080 { 2081 Bool invert; 2082 AMD64Condcode condPos; 2083 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2084 if (invert) { 2085 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2086 Ijk_Boring, 2087 IRConst_U64(d64_false) ) ); 2088 irsb->next = mkU64(d64_true); 2089 irsb->jumpkind = Ijk_Boring; 2090 } else { 2091 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2092 Ijk_Boring, 2093 IRConst_U64(d64_true) ) ); 2094 irsb->next = mkU64(d64_false); 2095 irsb->jumpkind = Ijk_Boring; 2096 } 2097 } 2098 2099 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2100 guest address of the next instruction to be executed. 2101 2102 This function generates an AbiHint to say that -128(%rsp) 2103 .. -1(%rsp) should now be regarded as uninitialised. 2104 */ 2105 static 2106 void make_redzone_AbiHint ( VexAbiInfo* vbi, 2107 IRTemp new_rsp, IRTemp nia, HChar* who ) 2108 { 2109 Int szB = vbi->guest_stack_redzone_size; 2110 vassert(szB >= 0); 2111 2112 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2113 for is ELF. So just check it's the expected 128 value 2114 (paranoia). */ 2115 vassert(szB == 128); 2116 2117 if (0) vex_printf("AbiHint: %s\n", who); 2118 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2119 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2120 if (szB > 0) 2121 stmt( IRStmt_AbiHint( 2122 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2123 szB, 2124 mkexpr(nia) 2125 )); 2126 } 2127 2128 2129 /*------------------------------------------------------------*/ 2130 /*--- Disassembling addressing modes ---*/ 2131 /*------------------------------------------------------------*/ 2132 2133 static 2134 HChar* segRegTxt ( Prefix pfx ) 2135 { 2136 if (pfx & PFX_CS) return "%cs:"; 2137 if (pfx & PFX_DS) return "%ds:"; 2138 if (pfx & PFX_ES) return "%es:"; 2139 if (pfx & PFX_FS) return "%fs:"; 2140 if (pfx & PFX_GS) return "%gs:"; 2141 if (pfx & PFX_SS) return "%ss:"; 2142 return ""; /* no override */ 2143 } 2144 2145 2146 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2147 linear address by adding any required segment override as indicated 2148 by sorb, and also dealing with any address size override 2149 present. */ 2150 static 2151 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, 2152 Prefix pfx, IRExpr* virtual ) 2153 { 2154 /* --- segment overrides --- */ 2155 if (pfx & PFX_FS) { 2156 if (vbi->guest_amd64_assume_fs_is_zero) { 2157 /* Note that this is a linux-kernel specific hack that relies 2158 on the assumption that %fs is always zero. */ 2159 /* return virtual + guest_FS_ZERO. */ 2160 virtual = binop(Iop_Add64, virtual, 2161 IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); 2162 } else { 2163 unimplemented("amd64 %fs segment override"); 2164 } 2165 } 2166 2167 if (pfx & PFX_GS) { 2168 if (vbi->guest_amd64_assume_gs_is_0x60) { 2169 /* Note that this is a darwin-kernel specific hack that relies 2170 on the assumption that %gs is always 0x60. */ 2171 /* return virtual + guest_GS_0x60. */ 2172 virtual = binop(Iop_Add64, virtual, 2173 IRExpr_Get(OFFB_GS_0x60, Ity_I64)); 2174 } else { 2175 unimplemented("amd64 %gs segment override"); 2176 } 2177 } 2178 2179 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2180 2181 /* --- address size override --- */ 2182 if (haveASO(pfx)) 2183 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2184 2185 return virtual; 2186 } 2187 2188 //.. { 2189 //.. Int sreg; 2190 //.. IRType hWordTy; 2191 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2192 //.. 2193 //.. if (sorb == 0) 2194 //.. /* the common case - no override */ 2195 //.. return virtual; 2196 //.. 2197 //.. switch (sorb) { 2198 //.. case 0x3E: sreg = R_DS; break; 2199 //.. case 0x26: sreg = R_ES; break; 2200 //.. case 0x64: sreg = R_FS; break; 2201 //.. case 0x65: sreg = R_GS; break; 2202 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2203 //.. } 2204 //.. 2205 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2206 //.. 2207 //.. seg_selector = newTemp(Ity_I32); 2208 //.. ldt_ptr = newTemp(hWordTy); 2209 //.. gdt_ptr = newTemp(hWordTy); 2210 //.. r64 = newTemp(Ity_I64); 2211 //.. 2212 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2213 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2214 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2215 //.. 2216 //.. /* 2217 //.. Call this to do the translation and limit checks: 2218 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2219 //.. UInt seg_selector, UInt virtual_addr ) 2220 //.. */ 2221 //.. assign( 2222 //.. r64, 2223 //.. mkIRExprCCall( 2224 //.. Ity_I64, 2225 //.. 0/*regparms*/, 2226 //.. "x86g_use_seg_selector", 2227 //.. &x86g_use_seg_selector, 2228 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2229 //.. mkexpr(seg_selector), virtual) 2230 //.. ) 2231 //.. ); 2232 //.. 2233 //.. /* If the high 32 of the result are non-zero, there was a 2234 //.. failure in address translation. In which case, make a 2235 //.. quick exit. 2236 //.. */ 2237 //.. stmt( 2238 //.. IRStmt_Exit( 2239 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2240 //.. Ijk_MapFail, 2241 //.. IRConst_U32( guest_eip_curr_instr ) 2242 //.. ) 2243 //.. ); 2244 //.. 2245 //.. /* otherwise, here's the translated result. */ 2246 //.. return unop(Iop_64to32, mkexpr(r64)); 2247 //.. } 2248 2249 2250 /* Generate IR to calculate an address indicated by a ModRM and 2251 following SIB bytes. The expression, and the number of bytes in 2252 the address mode, are returned (the latter in *len). Note that 2253 this fn should not be called if the R/M part of the address denotes 2254 a register instead of memory. If print_codegen is true, text of 2255 the addressing mode is placed in buf. 2256 2257 The computed address is stored in a new tempreg, and the 2258 identity of the tempreg is returned. 2259 2260 extra_bytes holds the number of bytes after the amode, as supplied 2261 by the caller. This is needed to make sense of %rip-relative 2262 addresses. Note that the value that *len is set to is only the 2263 length of the amode itself and does not include the value supplied 2264 in extra_bytes. 2265 */ 2266 2267 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2268 { 2269 IRTemp tmp = newTemp(Ity_I64); 2270 assign( tmp, addr64 ); 2271 return tmp; 2272 } 2273 2274 static 2275 IRTemp disAMode ( /*OUT*/Int* len, 2276 VexAbiInfo* vbi, Prefix pfx, Long delta, 2277 /*OUT*/HChar* buf, Int extra_bytes ) 2278 { 2279 UChar mod_reg_rm = getUChar(delta); 2280 delta++; 2281 2282 buf[0] = (UChar)0; 2283 vassert(extra_bytes >= 0 && extra_bytes < 10); 2284 2285 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2286 jump table seems a bit excessive. 2287 */ 2288 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2289 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2290 /* is now XX0XXYYY */ 2291 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2292 switch (mod_reg_rm) { 2293 2294 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2295 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2296 */ 2297 case 0x00: case 0x01: case 0x02: case 0x03: 2298 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2299 { UChar rm = toUChar(mod_reg_rm & 7); 2300 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2301 *len = 1; 2302 return disAMode_copy2tmp( 2303 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2304 } 2305 2306 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2307 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2308 */ 2309 case 0x08: case 0x09: case 0x0A: case 0x0B: 2310 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2311 { UChar rm = toUChar(mod_reg_rm & 7); 2312 Long d = getSDisp8(delta); 2313 if (d == 0) { 2314 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2315 } else { 2316 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2317 } 2318 *len = 2; 2319 return disAMode_copy2tmp( 2320 handleAddrOverrides(vbi, pfx, 2321 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2322 } 2323 2324 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2325 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2326 */ 2327 case 0x10: case 0x11: case 0x12: case 0x13: 2328 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2329 { UChar rm = toUChar(mod_reg_rm & 7); 2330 Long d = getSDisp32(delta); 2331 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2332 *len = 5; 2333 return disAMode_copy2tmp( 2334 handleAddrOverrides(vbi, pfx, 2335 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2336 } 2337 2338 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2339 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2340 case 0x18: case 0x19: case 0x1A: case 0x1B: 2341 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2342 vpanic("disAMode(amd64): not an addr!"); 2343 2344 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2345 correctly at the start of handling each instruction. */ 2346 case 0x05: 2347 { Long d = getSDisp32(delta); 2348 *len = 5; 2349 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2350 /* We need to know the next instruction's start address. 2351 Try and figure out what it is, record the guess, and ask 2352 the top-level driver logic (bbToIR_AMD64) to check we 2353 guessed right, after the instruction is completely 2354 decoded. */ 2355 guest_RIP_next_mustcheck = True; 2356 guest_RIP_next_assumed = guest_RIP_bbstart 2357 + delta+4 + extra_bytes; 2358 return disAMode_copy2tmp( 2359 handleAddrOverrides(vbi, pfx, 2360 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2361 mkU64(d)))); 2362 } 2363 2364 case 0x04: { 2365 /* SIB, with no displacement. Special cases: 2366 -- %rsp cannot act as an index value. 2367 If index_r indicates %rsp, zero is used for the index. 2368 -- when mod is zero and base indicates RBP or R13, base is 2369 instead a 32-bit sign-extended literal. 2370 It's all madness, I tell you. Extract %index, %base and 2371 scale from the SIB byte. The value denoted is then: 2372 | %index == %RSP && (%base == %RBP || %base == %R13) 2373 = d32 following SIB byte 2374 | %index == %RSP && !(%base == %RBP || %base == %R13) 2375 = %base 2376 | %index != %RSP && (%base == %RBP || %base == %R13) 2377 = d32 following SIB byte + (%index << scale) 2378 | %index != %RSP && !(%base == %RBP || %base == %R13) 2379 = %base + (%index << scale) 2380 */ 2381 UChar sib = getUChar(delta); 2382 UChar scale = toUChar((sib >> 6) & 3); 2383 UChar index_r = toUChar((sib >> 3) & 7); 2384 UChar base_r = toUChar(sib & 7); 2385 /* correct since #(R13) == 8 + #(RBP) */ 2386 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2387 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2388 delta++; 2389 2390 if ((!index_is_SP) && (!base_is_BPor13)) { 2391 if (scale == 0) { 2392 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2393 nameIRegRexB(8,pfx,base_r), 2394 nameIReg64rexX(pfx,index_r)); 2395 } else { 2396 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2397 nameIRegRexB(8,pfx,base_r), 2398 nameIReg64rexX(pfx,index_r), 1<<scale); 2399 } 2400 *len = 2; 2401 return 2402 disAMode_copy2tmp( 2403 handleAddrOverrides(vbi, pfx, 2404 binop(Iop_Add64, 2405 getIRegRexB(8,pfx,base_r), 2406 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2407 mkU8(scale))))); 2408 } 2409 2410 if ((!index_is_SP) && base_is_BPor13) { 2411 Long d = getSDisp32(delta); 2412 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2413 nameIReg64rexX(pfx,index_r), 1<<scale); 2414 *len = 6; 2415 return 2416 disAMode_copy2tmp( 2417 handleAddrOverrides(vbi, pfx, 2418 binop(Iop_Add64, 2419 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2420 mkU8(scale)), 2421 mkU64(d)))); 2422 } 2423 2424 if (index_is_SP && (!base_is_BPor13)) { 2425 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2426 *len = 2; 2427 return disAMode_copy2tmp( 2428 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2429 } 2430 2431 if (index_is_SP && base_is_BPor13) { 2432 Long d = getSDisp32(delta); 2433 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2434 *len = 6; 2435 return disAMode_copy2tmp( 2436 handleAddrOverrides(vbi, pfx, mkU64(d))); 2437 } 2438 2439 vassert(0); 2440 } 2441 2442 /* SIB, with 8-bit displacement. Special cases: 2443 -- %esp cannot act as an index value. 2444 If index_r indicates %esp, zero is used for the index. 2445 Denoted value is: 2446 | %index == %ESP 2447 = d8 + %base 2448 | %index != %ESP 2449 = d8 + %base + (%index << scale) 2450 */ 2451 case 0x0C: { 2452 UChar sib = getUChar(delta); 2453 UChar scale = toUChar((sib >> 6) & 3); 2454 UChar index_r = toUChar((sib >> 3) & 7); 2455 UChar base_r = toUChar(sib & 7); 2456 Long d = getSDisp8(delta+1); 2457 2458 if (index_r == R_RSP && 0==getRexX(pfx)) { 2459 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2460 d, nameIRegRexB(8,pfx,base_r)); 2461 *len = 3; 2462 return disAMode_copy2tmp( 2463 handleAddrOverrides(vbi, pfx, 2464 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2465 } else { 2466 if (scale == 0) { 2467 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2468 nameIRegRexB(8,pfx,base_r), 2469 nameIReg64rexX(pfx,index_r)); 2470 } else { 2471 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2472 nameIRegRexB(8,pfx,base_r), 2473 nameIReg64rexX(pfx,index_r), 1<<scale); 2474 } 2475 *len = 3; 2476 return 2477 disAMode_copy2tmp( 2478 handleAddrOverrides(vbi, pfx, 2479 binop(Iop_Add64, 2480 binop(Iop_Add64, 2481 getIRegRexB(8,pfx,base_r), 2482 binop(Iop_Shl64, 2483 getIReg64rexX(pfx,index_r), mkU8(scale))), 2484 mkU64(d)))); 2485 } 2486 vassert(0); /*NOTREACHED*/ 2487 } 2488 2489 /* SIB, with 32-bit displacement. Special cases: 2490 -- %rsp cannot act as an index value. 2491 If index_r indicates %rsp, zero is used for the index. 2492 Denoted value is: 2493 | %index == %RSP 2494 = d32 + %base 2495 | %index != %RSP 2496 = d32 + %base + (%index << scale) 2497 */ 2498 case 0x14: { 2499 UChar sib = getUChar(delta); 2500 UChar scale = toUChar((sib >> 6) & 3); 2501 UChar index_r = toUChar((sib >> 3) & 7); 2502 UChar base_r = toUChar(sib & 7); 2503 Long d = getSDisp32(delta+1); 2504 2505 if (index_r == R_RSP && 0==getRexX(pfx)) { 2506 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2507 d, nameIRegRexB(8,pfx,base_r)); 2508 *len = 6; 2509 return disAMode_copy2tmp( 2510 handleAddrOverrides(vbi, pfx, 2511 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2512 } else { 2513 if (scale == 0) { 2514 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2515 nameIRegRexB(8,pfx,base_r), 2516 nameIReg64rexX(pfx,index_r)); 2517 } else { 2518 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2519 nameIRegRexB(8,pfx,base_r), 2520 nameIReg64rexX(pfx,index_r), 1<<scale); 2521 } 2522 *len = 6; 2523 return 2524 disAMode_copy2tmp( 2525 handleAddrOverrides(vbi, pfx, 2526 binop(Iop_Add64, 2527 binop(Iop_Add64, 2528 getIRegRexB(8,pfx,base_r), 2529 binop(Iop_Shl64, 2530 getIReg64rexX(pfx,index_r), mkU8(scale))), 2531 mkU64(d)))); 2532 } 2533 vassert(0); /*NOTREACHED*/ 2534 } 2535 2536 default: 2537 vpanic("disAMode(amd64)"); 2538 return 0; /*notreached*/ 2539 } 2540 } 2541 2542 2543 /* Figure out the number of (insn-stream) bytes constituting the amode 2544 beginning at delta. Is useful for getting hold of literals beyond 2545 the end of the amode before it has been disassembled. */ 2546 2547 static UInt lengthAMode ( Prefix pfx, Long delta ) 2548 { 2549 UChar mod_reg_rm = getUChar(delta); 2550 delta++; 2551 2552 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2553 jump table seems a bit excessive. 2554 */ 2555 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2556 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2557 /* is now XX0XXYYY */ 2558 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2559 switch (mod_reg_rm) { 2560 2561 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2562 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2563 */ 2564 case 0x00: case 0x01: case 0x02: case 0x03: 2565 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2566 return 1; 2567 2568 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2569 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2570 */ 2571 case 0x08: case 0x09: case 0x0A: case 0x0B: 2572 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2573 return 2; 2574 2575 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2576 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2577 */ 2578 case 0x10: case 0x11: case 0x12: case 0x13: 2579 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2580 return 5; 2581 2582 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2583 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2584 /* Not an address, but still handled. */ 2585 case 0x18: case 0x19: case 0x1A: case 0x1B: 2586 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2587 return 1; 2588 2589 /* RIP + disp32. */ 2590 case 0x05: 2591 return 5; 2592 2593 case 0x04: { 2594 /* SIB, with no displacement. */ 2595 UChar sib = getUChar(delta); 2596 UChar base_r = toUChar(sib & 7); 2597 /* correct since #(R13) == 8 + #(RBP) */ 2598 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2599 2600 if (base_is_BPor13) { 2601 return 6; 2602 } else { 2603 return 2; 2604 } 2605 } 2606 2607 /* SIB, with 8-bit displacement. */ 2608 case 0x0C: 2609 return 3; 2610 2611 /* SIB, with 32-bit displacement. */ 2612 case 0x14: 2613 return 6; 2614 2615 default: 2616 vpanic("lengthAMode(amd64)"); 2617 return 0; /*notreached*/ 2618 } 2619 } 2620 2621 2622 /*------------------------------------------------------------*/ 2623 /*--- Disassembling common idioms ---*/ 2624 /*------------------------------------------------------------*/ 2625 2626 /* Handle binary integer instructions of the form 2627 op E, G meaning 2628 op reg-or-mem, reg 2629 Is passed the a ptr to the modRM byte, the actual operation, and the 2630 data size. Returns the address advanced completely over this 2631 instruction. 2632 2633 E(src) is reg-or-mem 2634 G(dst) is reg. 2635 2636 If E is reg, --> GET %G, tmp 2637 OP %E, tmp 2638 PUT tmp, %G 2639 2640 If E is mem and OP is not reversible, 2641 --> (getAddr E) -> tmpa 2642 LD (tmpa), tmpa 2643 GET %G, tmp2 2644 OP tmpa, tmp2 2645 PUT tmp2, %G 2646 2647 If E is mem and OP is reversible 2648 --> (getAddr E) -> tmpa 2649 LD (tmpa), tmpa 2650 OP %G, tmpa 2651 PUT tmpa, %G 2652 */ 2653 static 2654 ULong dis_op2_E_G ( VexAbiInfo* vbi, 2655 Prefix pfx, 2656 Bool addSubCarry, 2657 IROp op8, 2658 Bool keep, 2659 Int size, 2660 Long delta0, 2661 HChar* t_amd64opc ) 2662 { 2663 HChar dis_buf[50]; 2664 Int len; 2665 IRType ty = szToITy(size); 2666 IRTemp dst1 = newTemp(ty); 2667 IRTemp src = newTemp(ty); 2668 IRTemp dst0 = newTemp(ty); 2669 UChar rm = getUChar(delta0); 2670 IRTemp addr = IRTemp_INVALID; 2671 2672 /* addSubCarry == True indicates the intended operation is 2673 add-with-carry or subtract-with-borrow. */ 2674 if (addSubCarry) { 2675 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2676 vassert(keep); 2677 } 2678 2679 if (epartIsReg(rm)) { 2680 /* Specially handle XOR reg,reg, because that doesn't really 2681 depend on reg, and doing the obvious thing potentially 2682 generates a spurious value check failure due to the bogus 2683 dependency. */ 2684 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2685 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2686 if (False && op8 == Iop_Sub8) 2687 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2688 putIRegG(size,pfx,rm, mkU(ty,0)); 2689 } 2690 2691 assign( dst0, getIRegG(size,pfx,rm) ); 2692 assign( src, getIRegE(size,pfx,rm) ); 2693 2694 if (addSubCarry && op8 == Iop_Add8) { 2695 helper_ADC( size, dst1, dst0, src, 2696 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2697 putIRegG(size, pfx, rm, mkexpr(dst1)); 2698 } else 2699 if (addSubCarry && op8 == Iop_Sub8) { 2700 helper_SBB( size, dst1, dst0, src, 2701 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2702 putIRegG(size, pfx, rm, mkexpr(dst1)); 2703 } else { 2704 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2705 if (isAddSub(op8)) 2706 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2707 else 2708 setFlags_DEP1(op8, dst1, ty); 2709 if (keep) 2710 putIRegG(size, pfx, rm, mkexpr(dst1)); 2711 } 2712 2713 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2714 nameIRegE(size,pfx,rm), 2715 nameIRegG(size,pfx,rm)); 2716 return 1+delta0; 2717 } else { 2718 /* E refers to memory */ 2719 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2720 assign( dst0, getIRegG(size,pfx,rm) ); 2721 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2722 2723 if (addSubCarry && op8 == Iop_Add8) { 2724 helper_ADC( size, dst1, dst0, src, 2725 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2726 putIRegG(size, pfx, rm, mkexpr(dst1)); 2727 } else 2728 if (addSubCarry && op8 == Iop_Sub8) { 2729 helper_SBB( size, dst1, dst0, src, 2730 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2731 putIRegG(size, pfx, rm, mkexpr(dst1)); 2732 } else { 2733 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2734 if (isAddSub(op8)) 2735 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2736 else 2737 setFlags_DEP1(op8, dst1, ty); 2738 if (keep) 2739 putIRegG(size, pfx, rm, mkexpr(dst1)); 2740 } 2741 2742 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2743 dis_buf, nameIRegG(size, pfx, rm)); 2744 return len+delta0; 2745 } 2746 } 2747 2748 2749 2750 /* Handle binary integer instructions of the form 2751 op G, E meaning 2752 op reg, reg-or-mem 2753 Is passed the a ptr to the modRM byte, the actual operation, and the 2754 data size. Returns the address advanced completely over this 2755 instruction. 2756 2757 G(src) is reg. 2758 E(dst) is reg-or-mem 2759 2760 If E is reg, --> GET %E, tmp 2761 OP %G, tmp 2762 PUT tmp, %E 2763 2764 If E is mem, --> (getAddr E) -> tmpa 2765 LD (tmpa), tmpv 2766 OP %G, tmpv 2767 ST tmpv, (tmpa) 2768 */ 2769 static 2770 ULong dis_op2_G_E ( VexAbiInfo* vbi, 2771 Prefix pfx, 2772 Bool addSubCarry, 2773 IROp op8, 2774 Bool keep, 2775 Int size, 2776 Long delta0, 2777 HChar* t_amd64opc ) 2778 { 2779 HChar dis_buf[50]; 2780 Int len; 2781 IRType ty = szToITy(size); 2782 IRTemp dst1 = newTemp(ty); 2783 IRTemp src = newTemp(ty); 2784 IRTemp dst0 = newTemp(ty); 2785 UChar rm = getUChar(delta0); 2786 IRTemp addr = IRTemp_INVALID; 2787 2788 /* addSubCarry == True indicates the intended operation is 2789 add-with-carry or subtract-with-borrow. */ 2790 if (addSubCarry) { 2791 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2792 vassert(keep); 2793 } 2794 2795 if (epartIsReg(rm)) { 2796 /* Specially handle XOR reg,reg, because that doesn't really 2797 depend on reg, and doing the obvious thing potentially 2798 generates a spurious value check failure due to the bogus 2799 dependency. Ditto SBB reg,reg. */ 2800 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2801 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2802 putIRegE(size,pfx,rm, mkU(ty,0)); 2803 } 2804 2805 assign(dst0, getIRegE(size,pfx,rm)); 2806 assign(src, getIRegG(size,pfx,rm)); 2807 2808 if (addSubCarry && op8 == Iop_Add8) { 2809 helper_ADC( size, dst1, dst0, src, 2810 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2811 putIRegE(size, pfx, rm, mkexpr(dst1)); 2812 } else 2813 if (addSubCarry && op8 == Iop_Sub8) { 2814 helper_SBB( size, dst1, dst0, src, 2815 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2816 putIRegE(size, pfx, rm, mkexpr(dst1)); 2817 } else { 2818 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2819 if (isAddSub(op8)) 2820 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2821 else 2822 setFlags_DEP1(op8, dst1, ty); 2823 if (keep) 2824 putIRegE(size, pfx, rm, mkexpr(dst1)); 2825 } 2826 2827 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2828 nameIRegG(size,pfx,rm), 2829 nameIRegE(size,pfx,rm)); 2830 return 1+delta0; 2831 } 2832 2833 /* E refers to memory */ 2834 { 2835 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2836 assign(dst0, loadLE(ty,mkexpr(addr))); 2837 assign(src, getIRegG(size,pfx,rm)); 2838 2839 if (addSubCarry && op8 == Iop_Add8) { 2840 if (pfx & PFX_LOCK) { 2841 /* cas-style store */ 2842 helper_ADC( size, dst1, dst0, src, 2843 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2844 } else { 2845 /* normal store */ 2846 helper_ADC( size, dst1, dst0, src, 2847 /*store*/addr, IRTemp_INVALID, 0 ); 2848 } 2849 } else 2850 if (addSubCarry && op8 == Iop_Sub8) { 2851 if (pfx & PFX_LOCK) { 2852 /* cas-style store */ 2853 helper_SBB( size, dst1, dst0, src, 2854 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2855 } else { 2856 /* normal store */ 2857 helper_SBB( size, dst1, dst0, src, 2858 /*store*/addr, IRTemp_INVALID, 0 ); 2859 } 2860 } else { 2861 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2862 if (keep) { 2863 if (pfx & PFX_LOCK) { 2864 if (0) vex_printf("locked case\n" ); 2865 casLE( mkexpr(addr), 2866 mkexpr(dst0)/*expval*/, 2867 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 2868 } else { 2869 if (0) vex_printf("nonlocked case\n"); 2870 storeLE(mkexpr(addr), mkexpr(dst1)); 2871 } 2872 } 2873 if (isAddSub(op8)) 2874 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2875 else 2876 setFlags_DEP1(op8, dst1, ty); 2877 } 2878 2879 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2880 nameIRegG(size,pfx,rm), dis_buf); 2881 return len+delta0; 2882 } 2883 } 2884 2885 2886 /* Handle move instructions of the form 2887 mov E, G meaning 2888 mov reg-or-mem, reg 2889 Is passed the a ptr to the modRM byte, and the data size. Returns 2890 the address advanced completely over this instruction. 2891 2892 E(src) is reg-or-mem 2893 G(dst) is reg. 2894 2895 If E is reg, --> GET %E, tmpv 2896 PUT tmpv, %G 2897 2898 If E is mem --> (getAddr E) -> tmpa 2899 LD (tmpa), tmpb 2900 PUT tmpb, %G 2901 */ 2902 static 2903 ULong dis_mov_E_G ( VexAbiInfo* vbi, 2904 Prefix pfx, 2905 Int size, 2906 Long delta0 ) 2907 { 2908 Int len; 2909 UChar rm = getUChar(delta0); 2910 HChar dis_buf[50]; 2911 2912 if (epartIsReg(rm)) { 2913 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 2914 DIP("mov%c %s,%s\n", nameISize(size), 2915 nameIRegE(size,pfx,rm), 2916 nameIRegG(size,pfx,rm)); 2917 return 1+delta0; 2918 } 2919 2920 /* E refers to memory */ 2921 { 2922 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2923 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 2924 DIP("mov%c %s,%s\n", nameISize(size), 2925 dis_buf, 2926 nameIRegG(size,pfx,rm)); 2927 return delta0+len; 2928 } 2929 } 2930 2931 2932 /* Handle move instructions of the form 2933 mov G, E meaning 2934 mov reg, reg-or-mem 2935 Is passed the a ptr to the modRM byte, and the data size. Returns 2936 the address advanced completely over this instruction. 2937 2938 G(src) is reg. 2939 E(dst) is reg-or-mem 2940 2941 If E is reg, --> GET %G, tmp 2942 PUT tmp, %E 2943 2944 If E is mem, --> (getAddr E) -> tmpa 2945 GET %G, tmpv 2946 ST tmpv, (tmpa) 2947 */ 2948 static 2949 ULong dis_mov_G_E ( VexAbiInfo* vbi, 2950 Prefix pfx, 2951 Int size, 2952 Long delta0 ) 2953 { 2954 Int len; 2955 UChar rm = getUChar(delta0); 2956 HChar dis_buf[50]; 2957 2958 if (epartIsReg(rm)) { 2959 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 2960 DIP("mov%c %s,%s\n", nameISize(size), 2961 nameIRegG(size,pfx,rm), 2962 nameIRegE(size,pfx,rm)); 2963 return 1+delta0; 2964 } 2965 2966 /* E refers to memory */ 2967 { 2968 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2969 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 2970 DIP("mov%c %s,%s\n", nameISize(size), 2971 nameIRegG(size,pfx,rm), 2972 dis_buf); 2973 return len+delta0; 2974 } 2975 } 2976 2977 2978 /* op $immediate, AL/AX/EAX/RAX. */ 2979 static 2980 ULong dis_op_imm_A ( Int size, 2981 Bool carrying, 2982 IROp op8, 2983 Bool keep, 2984 Long delta, 2985 HChar* t_amd64opc ) 2986 { 2987 Int size4 = imin(size,4); 2988 IRType ty = szToITy(size); 2989 IRTemp dst0 = newTemp(ty); 2990 IRTemp src = newTemp(ty); 2991 IRTemp dst1 = newTemp(ty); 2992 Long lit = getSDisp(size4,delta); 2993 assign(dst0, getIRegRAX(size)); 2994 assign(src, mkU(ty,lit & mkSizeMask(size))); 2995 2996 if (isAddSub(op8) && !carrying) { 2997 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2998 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2999 } 3000 else 3001 if (isLogic(op8)) { 3002 vassert(!carrying); 3003 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3004 setFlags_DEP1(op8, dst1, ty); 3005 } 3006 else 3007 if (op8 == Iop_Add8 && carrying) { 3008 helper_ADC( size, dst1, dst0, src, 3009 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3010 } 3011 else 3012 if (op8 == Iop_Sub8 && carrying) { 3013 helper_SBB( size, dst1, dst0, src, 3014 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3015 } 3016 else 3017 vpanic("dis_op_imm_A(amd64,guest)"); 3018 3019 if (keep) 3020 putIRegRAX(size, mkexpr(dst1)); 3021 3022 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3023 lit, nameIRegRAX(size)); 3024 return delta+size4; 3025 } 3026 3027 3028 /* Sign- and Zero-extending moves. */ 3029 static 3030 ULong dis_movx_E_G ( VexAbiInfo* vbi, 3031 Prefix pfx, 3032 Long delta, Int szs, Int szd, Bool sign_extend ) 3033 { 3034 UChar rm = getUChar(delta); 3035 if (epartIsReg(rm)) { 3036 putIRegG(szd, pfx, rm, 3037 doScalarWidening( 3038 szs,szd,sign_extend, 3039 getIRegE(szs,pfx,rm))); 3040 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3041 nameISize(szs), 3042 nameISize(szd), 3043 nameIRegE(szs,pfx,rm), 3044 nameIRegG(szd,pfx,rm)); 3045 return 1+delta; 3046 } 3047 3048 /* E refers to memory */ 3049 { 3050 Int len; 3051 HChar dis_buf[50]; 3052 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3053 putIRegG(szd, pfx, rm, 3054 doScalarWidening( 3055 szs,szd,sign_extend, 3056 loadLE(szToITy(szs),mkexpr(addr)))); 3057 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3058 nameISize(szs), 3059 nameISize(szd), 3060 dis_buf, 3061 nameIRegG(szd,pfx,rm)); 3062 return len+delta; 3063 } 3064 } 3065 3066 3067 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3068 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3069 static 3070 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3071 { 3072 /* special-case the 64-bit case */ 3073 if (sz == 8) { 3074 IROp op = signed_divide ? Iop_DivModS128to64 3075 : Iop_DivModU128to64; 3076 IRTemp src128 = newTemp(Ity_I128); 3077 IRTemp dst128 = newTemp(Ity_I128); 3078 assign( src128, binop(Iop_64HLto128, 3079 getIReg64(R_RDX), 3080 getIReg64(R_RAX)) ); 3081 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3082 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3083 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3084 } else { 3085 IROp op = signed_divide ? Iop_DivModS64to32 3086 : Iop_DivModU64to32; 3087 IRTemp src64 = newTemp(Ity_I64); 3088 IRTemp dst64 = newTemp(Ity_I64); 3089 switch (sz) { 3090 case 4: 3091 assign( src64, 3092 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3093 assign( dst64, 3094 binop(op, mkexpr(src64), mkexpr(t)) ); 3095 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3096 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3097 break; 3098 case 2: { 3099 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3100 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3101 assign( src64, unop(widen3264, 3102 binop(Iop_16HLto32, 3103 getIRegRDX(2), 3104 getIRegRAX(2))) ); 3105 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3106 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3107 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3108 break; 3109 } 3110 case 1: { 3111 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3112 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3113 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3114 assign( src64, unop(widen3264, 3115 unop(widen1632, getIRegRAX(2))) ); 3116 assign( dst64, 3117 binop(op, mkexpr(src64), 3118 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3119 putIRegRAX( 1, unop(Iop_16to8, 3120 unop(Iop_32to16, 3121 unop(Iop_64to32,mkexpr(dst64)))) ); 3122 putIRegAH( unop(Iop_16to8, 3123 unop(Iop_32to16, 3124 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3125 break; 3126 } 3127 default: 3128 vpanic("codegen_div(amd64)"); 3129 } 3130 } 3131 } 3132 3133 static 3134 ULong dis_Grp1 ( VexAbiInfo* vbi, 3135 Prefix pfx, 3136 Long delta, UChar modrm, 3137 Int am_sz, Int d_sz, Int sz, Long d64 ) 3138 { 3139 Int len; 3140 HChar dis_buf[50]; 3141 IRType ty = szToITy(sz); 3142 IRTemp dst1 = newTemp(ty); 3143 IRTemp src = newTemp(ty); 3144 IRTemp dst0 = newTemp(ty); 3145 IRTemp addr = IRTemp_INVALID; 3146 IROp op8 = Iop_INVALID; 3147 ULong mask = mkSizeMask(sz); 3148 3149 switch (gregLO3ofRM(modrm)) { 3150 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3151 case 2: break; // ADC 3152 case 3: break; // SBB 3153 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3154 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3155 /*NOTREACHED*/ 3156 default: vpanic("dis_Grp1(amd64): unhandled case"); 3157 } 3158 3159 if (epartIsReg(modrm)) { 3160 vassert(am_sz == 1); 3161 3162 assign(dst0, getIRegE(sz,pfx,modrm)); 3163 assign(src, mkU(ty,d64 & mask)); 3164 3165 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3166 helper_ADC( sz, dst1, dst0, src, 3167 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3168 } else 3169 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3170 helper_SBB( sz, dst1, dst0, src, 3171 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3172 } else { 3173 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3174 if (isAddSub(op8)) 3175 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3176 else 3177 setFlags_DEP1(op8, dst1, ty); 3178 } 3179 3180 if (gregLO3ofRM(modrm) < 7) 3181 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3182 3183 delta += (am_sz + d_sz); 3184 DIP("%s%c $%lld, %s\n", 3185 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3186 nameIRegE(sz,pfx,modrm)); 3187 } else { 3188 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3189 3190 assign(dst0, loadLE(ty,mkexpr(addr))); 3191 assign(src, mkU(ty,d64 & mask)); 3192 3193 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3194 if (pfx & PFX_LOCK) { 3195 /* cas-style store */ 3196 helper_ADC( sz, dst1, dst0, src, 3197 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3198 } else { 3199 /* normal store */ 3200 helper_ADC( sz, dst1, dst0, src, 3201 /*store*/addr, IRTemp_INVALID, 0 ); 3202 } 3203 } else 3204 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3205 if (pfx & PFX_LOCK) { 3206 /* cas-style store */ 3207 helper_SBB( sz, dst1, dst0, src, 3208 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3209 } else { 3210 /* normal store */ 3211 helper_SBB( sz, dst1, dst0, src, 3212 /*store*/addr, IRTemp_INVALID, 0 ); 3213 } 3214 } else { 3215 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3216 if (gregLO3ofRM(modrm) < 7) { 3217 if (pfx & PFX_LOCK) { 3218 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3219 mkexpr(dst1)/*newVal*/, 3220 guest_RIP_curr_instr ); 3221 } else { 3222 storeLE(mkexpr(addr), mkexpr(dst1)); 3223 } 3224 } 3225 if (isAddSub(op8)) 3226 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3227 else 3228 setFlags_DEP1(op8, dst1, ty); 3229 } 3230 3231 delta += (len+d_sz); 3232 DIP("%s%c $%lld, %s\n", 3233 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3234 d64, dis_buf); 3235 } 3236 return delta; 3237 } 3238 3239 3240 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3241 expression. */ 3242 3243 static 3244 ULong dis_Grp2 ( VexAbiInfo* vbi, 3245 Prefix pfx, 3246 Long delta, UChar modrm, 3247 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3248 HChar* shift_expr_txt, Bool* decode_OK ) 3249 { 3250 /* delta on entry points at the modrm byte. */ 3251 HChar dis_buf[50]; 3252 Int len; 3253 Bool isShift, isRotate, isRotateC; 3254 IRType ty = szToITy(sz); 3255 IRTemp dst0 = newTemp(ty); 3256 IRTemp dst1 = newTemp(ty); 3257 IRTemp addr = IRTemp_INVALID; 3258 3259 *decode_OK = True; 3260 3261 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3262 3263 /* Put value to shift/rotate in dst0. */ 3264 if (epartIsReg(modrm)) { 3265 assign(dst0, getIRegE(sz, pfx, modrm)); 3266 delta += (am_sz + d_sz); 3267 } else { 3268 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3269 assign(dst0, loadLE(ty,mkexpr(addr))); 3270 delta += len + d_sz; 3271 } 3272 3273 isShift = False; 3274 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; } 3275 3276 isRotate = False; 3277 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3278 3279 isRotateC = False; 3280 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3281 3282 if (gregLO3ofRM(modrm) == 6) { 3283 *decode_OK = False; 3284 return delta; 3285 } 3286 3287 if (!isShift && !isRotate && !isRotateC) { 3288 /*NOTREACHED*/ 3289 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3290 } 3291 3292 if (isRotateC) { 3293 /* Call a helper; this insn is so ridiculous it does not deserve 3294 better. One problem is, the helper has to calculate both the 3295 new value and the new flags. This is more than 64 bits, and 3296 there is no way to return more than 64 bits from the helper. 3297 Hence the crude and obvious solution is to call it twice, 3298 using the sign of the sz field to indicate whether it is the 3299 value or rflags result we want. 3300 */ 3301 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3302 IRExpr** argsVALUE; 3303 IRExpr** argsRFLAGS; 3304 3305 IRTemp new_value = newTemp(Ity_I64); 3306 IRTemp new_rflags = newTemp(Ity_I64); 3307 IRTemp old_rflags = newTemp(Ity_I64); 3308 3309 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3310 3311 argsVALUE 3312 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3313 widenUto64(shift_expr), /* rotate amount */ 3314 mkexpr(old_rflags), 3315 mkU64(sz) ); 3316 assign( new_value, 3317 mkIRExprCCall( 3318 Ity_I64, 3319 0/*regparm*/, 3320 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3321 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3322 argsVALUE 3323 ) 3324 ); 3325 3326 argsRFLAGS 3327 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3328 widenUto64(shift_expr), /* rotate amount */ 3329 mkexpr(old_rflags), 3330 mkU64(-sz) ); 3331 assign( new_rflags, 3332 mkIRExprCCall( 3333 Ity_I64, 3334 0/*regparm*/, 3335 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3336 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3337 argsRFLAGS 3338 ) 3339 ); 3340 3341 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3342 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3343 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3344 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3345 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3346 } 3347 3348 else 3349 if (isShift) { 3350 3351 IRTemp pre64 = newTemp(Ity_I64); 3352 IRTemp res64 = newTemp(Ity_I64); 3353 IRTemp res64ss = newTemp(Ity_I64); 3354 IRTemp shift_amt = newTemp(Ity_I8); 3355 UChar mask = toUChar(sz==8 ? 63 : 31); 3356 IROp op64; 3357 3358 switch (gregLO3ofRM(modrm)) { 3359 case 4: op64 = Iop_Shl64; break; 3360 case 5: op64 = Iop_Shr64; break; 3361 case 7: op64 = Iop_Sar64; break; 3362 /*NOTREACHED*/ 3363 default: vpanic("dis_Grp2:shift"); break; 3364 } 3365 3366 /* Widen the value to be shifted to 64 bits, do the shift, and 3367 narrow back down. This seems surprisingly long-winded, but 3368 unfortunately the AMD semantics requires that 8/16/32-bit 3369 shifts give defined results for shift values all the way up 3370 to 32, and this seems the simplest way to do it. It has the 3371 advantage that the only IR level shifts generated are of 64 3372 bit values, and the shift amount is guaranteed to be in the 3373 range 0 .. 63, thereby observing the IR semantics requiring 3374 all shift values to be in the range 0 .. 2^word_size-1. 3375 3376 Therefore the shift amount is masked with 63 for 64-bit shifts 3377 and 31 for all others. 3378 */ 3379 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3380 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3381 3382 /* suitably widen the value to be shifted to 64 bits. */ 3383 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3384 : widenUto64(mkexpr(dst0)) ); 3385 3386 /* res64 = pre64 `shift` shift_amt */ 3387 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3388 3389 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3390 assign( res64ss, 3391 binop(op64, 3392 mkexpr(pre64), 3393 binop(Iop_And8, 3394 binop(Iop_Sub8, 3395 mkexpr(shift_amt), mkU8(1)), 3396 mkU8(mask))) ); 3397 3398 /* Build the flags thunk. */ 3399 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3400 3401 /* Narrow the result back down. */ 3402 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3403 3404 } /* if (isShift) */ 3405 3406 else 3407 if (isRotate) { 3408 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3409 : (ty==Ity_I32 ? 2 : 3)); 3410 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3411 IRTemp rot_amt = newTemp(Ity_I8); 3412 IRTemp rot_amt64 = newTemp(Ity_I8); 3413 IRTemp oldFlags = newTemp(Ity_I64); 3414 UChar mask = toUChar(sz==8 ? 63 : 31); 3415 3416 /* rot_amt = shift_expr & mask */ 3417 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3418 expressions never shift beyond the word size and thus remain 3419 well defined. */ 3420 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3421 3422 if (ty == Ity_I64) 3423 assign(rot_amt, mkexpr(rot_amt64)); 3424 else 3425 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3426 3427 if (left) { 3428 3429 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3430 assign(dst1, 3431 binop( mkSizedOp(ty,Iop_Or8), 3432 binop( mkSizedOp(ty,Iop_Shl8), 3433 mkexpr(dst0), 3434 mkexpr(rot_amt) 3435 ), 3436 binop( mkSizedOp(ty,Iop_Shr8), 3437 mkexpr(dst0), 3438 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3439 ) 3440 ) 3441 ); 3442 ccOp += AMD64G_CC_OP_ROLB; 3443 3444 } else { /* right */ 3445 3446 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3447 assign(dst1, 3448 binop( mkSizedOp(ty,Iop_Or8), 3449 binop( mkSizedOp(ty,Iop_Shr8), 3450 mkexpr(dst0), 3451 mkexpr(rot_amt) 3452 ), 3453 binop( mkSizedOp(ty,Iop_Shl8), 3454 mkexpr(dst0), 3455 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3456 ) 3457 ) 3458 ); 3459 ccOp += AMD64G_CC_OP_RORB; 3460 3461 } 3462 3463 /* dst1 now holds the rotated value. Build flag thunk. We 3464 need the resulting value for this, and the previous flags. 3465 Except don't set it if the rotate count is zero. */ 3466 3467 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3468 3469 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3470 stmt( IRStmt_Put( OFFB_CC_OP, 3471 IRExpr_Mux0X( mkexpr(rot_amt64), 3472 IRExpr_Get(OFFB_CC_OP,Ity_I64), 3473 mkU64(ccOp))) ); 3474 stmt( IRStmt_Put( OFFB_CC_DEP1, 3475 IRExpr_Mux0X( mkexpr(rot_amt64), 3476 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 3477 widenUto64(mkexpr(dst1)))) ); 3478 stmt( IRStmt_Put( OFFB_CC_DEP2, 3479 IRExpr_Mux0X( mkexpr(rot_amt64), 3480 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 3481 mkU64(0))) ); 3482 stmt( IRStmt_Put( OFFB_CC_NDEP, 3483 IRExpr_Mux0X( mkexpr(rot_amt64), 3484 IRExpr_Get(OFFB_CC_NDEP,Ity_I64), 3485 mkexpr(oldFlags))) ); 3486 } /* if (isRotate) */ 3487 3488 /* Save result, and finish up. */ 3489 if (epartIsReg(modrm)) { 3490 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3491 if (vex_traceflags & VEX_TRACE_FE) { 3492 vex_printf("%s%c ", 3493 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3494 if (shift_expr_txt) 3495 vex_printf("%s", shift_expr_txt); 3496 else 3497 ppIRExpr(shift_expr); 3498 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3499 } 3500 } else { 3501 storeLE(mkexpr(addr), mkexpr(dst1)); 3502 if (vex_traceflags & VEX_TRACE_FE) { 3503 vex_printf("%s%c ", 3504 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3505 if (shift_expr_txt) 3506 vex_printf("%s", shift_expr_txt); 3507 else 3508 ppIRExpr(shift_expr); 3509 vex_printf(", %s\n", dis_buf); 3510 } 3511 } 3512 return delta; 3513 } 3514 3515 3516 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3517 static 3518 ULong dis_Grp8_Imm ( VexAbiInfo* vbi, 3519 Prefix pfx, 3520 Long delta, UChar modrm, 3521 Int am_sz, Int sz, ULong src_val, 3522 Bool* decode_OK ) 3523 { 3524 /* src_val denotes a d8. 3525 And delta on entry points at the modrm byte. */ 3526 3527 IRType ty = szToITy(sz); 3528 IRTemp t2 = newTemp(Ity_I64); 3529 IRTemp t2m = newTemp(Ity_I64); 3530 IRTemp t_addr = IRTemp_INVALID; 3531 HChar dis_buf[50]; 3532 ULong mask; 3533 3534 /* we're optimists :-) */ 3535 *decode_OK = True; 3536 3537 /* Limit src_val -- the bit offset -- to something within a word. 3538 The Intel docs say that literal offsets larger than a word are 3539 masked in this way. */ 3540 switch (sz) { 3541 case 2: src_val &= 15; break; 3542 case 4: src_val &= 31; break; 3543 case 8: src_val &= 63; break; 3544 default: *decode_OK = False; return delta; 3545 } 3546 3547 /* Invent a mask suitable for the operation. */ 3548 switch (gregLO3ofRM(modrm)) { 3549 case 4: /* BT */ mask = 0; break; 3550 case 5: /* BTS */ mask = 1ULL << src_val; break; 3551 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3552 case 7: /* BTC */ mask = 1ULL << src_val; break; 3553 /* If this needs to be extended, probably simplest to make a 3554 new function to handle the other cases (0 .. 3). The 3555 Intel docs do however not indicate any use for 0 .. 3, so 3556 we don't expect this to happen. */ 3557 default: *decode_OK = False; return delta; 3558 } 3559 3560 /* Fetch the value to be tested and modified into t2, which is 3561 64-bits wide regardless of sz. */ 3562 if (epartIsReg(modrm)) { 3563 vassert(am_sz == 1); 3564 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3565 delta += (am_sz + 1); 3566 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3567 nameISize(sz), 3568 src_val, nameIRegE(sz,pfx,modrm)); 3569 } else { 3570 Int len; 3571 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3572 delta += (len+1); 3573 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3574 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3575 nameISize(sz), 3576 src_val, dis_buf); 3577 } 3578 3579 /* Compute the new value into t2m, if non-BT. */ 3580 switch (gregLO3ofRM(modrm)) { 3581 case 4: /* BT */ 3582 break; 3583 case 5: /* BTS */ 3584 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3585 break; 3586 case 6: /* BTR */ 3587 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3588 break; 3589 case 7: /* BTC */ 3590 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3591 break; 3592 default: 3593 /*NOTREACHED*/ /*the previous switch guards this*/ 3594 vassert(0); 3595 } 3596 3597 /* Write the result back, if non-BT. */ 3598 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3599 if (epartIsReg(modrm)) { 3600 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3601 } else { 3602 if (pfx & PFX_LOCK) { 3603 casLE( mkexpr(t_addr), 3604 narrowTo(ty, mkexpr(t2))/*expd*/, 3605 narrowTo(ty, mkexpr(t2m))/*new*/, 3606 guest_RIP_curr_instr ); 3607 } else { 3608 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3609 } 3610 } 3611 } 3612 3613 /* Copy relevant bit from t2 into the carry flag. */ 3614 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3615 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3616 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3617 stmt( IRStmt_Put( 3618 OFFB_CC_DEP1, 3619 binop(Iop_And64, 3620 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3621 mkU64(1)) 3622 )); 3623 /* Set NDEP even though it isn't used. This makes redundant-PUT 3624 elimination of previous stores to this field work better. */ 3625 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3626 3627 return delta; 3628 } 3629 3630 3631 /* Signed/unsigned widening multiply. Generate IR to multiply the 3632 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3633 RDX:RAX/EDX:EAX/DX:AX/AX. 3634 */ 3635 static void codegen_mulL_A_D ( Int sz, Bool syned, 3636 IRTemp tmp, HChar* tmp_txt ) 3637 { 3638 IRType ty = szToITy(sz); 3639 IRTemp t1 = newTemp(ty); 3640 3641 assign( t1, getIRegRAX(sz) ); 3642 3643 switch (ty) { 3644 case Ity_I64: { 3645 IRTemp res128 = newTemp(Ity_I128); 3646 IRTemp resHi = newTemp(Ity_I64); 3647 IRTemp resLo = newTemp(Ity_I64); 3648 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3649 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3650 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3651 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3652 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3653 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3654 putIReg64(R_RDX, mkexpr(resHi)); 3655 putIReg64(R_RAX, mkexpr(resLo)); 3656 break; 3657 } 3658 case Ity_I32: { 3659 IRTemp res64 = newTemp(Ity_I64); 3660 IRTemp resHi = newTemp(Ity_I32); 3661 IRTemp resLo = newTemp(Ity_I32); 3662 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3663 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3664 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3665 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3666 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3667 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3668 putIRegRDX(4, mkexpr(resHi)); 3669 putIRegRAX(4, mkexpr(resLo)); 3670 break; 3671 } 3672 case Ity_I16: { 3673 IRTemp res32 = newTemp(Ity_I32); 3674 IRTemp resHi = newTemp(Ity_I16); 3675 IRTemp resLo = newTemp(Ity_I16); 3676 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3677 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3678 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3679 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3680 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3681 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3682 putIRegRDX(2, mkexpr(resHi)); 3683 putIRegRAX(2, mkexpr(resLo)); 3684 break; 3685 } 3686 case Ity_I8: { 3687 IRTemp res16 = newTemp(Ity_I16); 3688 IRTemp resHi = newTemp(Ity_I8); 3689 IRTemp resLo = newTemp(Ity_I8); 3690 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3691 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3692 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3693 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3694 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3695 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3696 putIRegRAX(2, mkexpr(res16)); 3697 break; 3698 } 3699 default: 3700 ppIRType(ty); 3701 vpanic("codegen_mulL_A_D(amd64)"); 3702 } 3703 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3704 } 3705 3706 3707 /* Group 3 extended opcodes. */ 3708 static 3709 ULong dis_Grp3 ( VexAbiInfo* vbi, 3710 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3711 { 3712 Long d64; 3713 UChar modrm; 3714 HChar dis_buf[50]; 3715 Int len; 3716 IRTemp addr; 3717 IRType ty = szToITy(sz); 3718 IRTemp t1 = newTemp(ty); 3719 IRTemp dst1, src, dst0; 3720 *decode_OK = True; 3721 modrm = getUChar(delta); 3722 if (epartIsReg(modrm)) { 3723 switch (gregLO3ofRM(modrm)) { 3724 case 0: { /* TEST */ 3725 delta++; 3726 d64 = getSDisp(imin(4,sz), delta); 3727 delta += imin(4,sz); 3728 dst1 = newTemp(ty); 3729 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3730 getIRegE(sz,pfx,modrm), 3731 mkU(ty, d64 & mkSizeMask(sz)))); 3732 setFlags_DEP1( Iop_And8, dst1, ty ); 3733 DIP("test%c $%lld, %s\n", 3734 nameISize(sz), d64, 3735 nameIRegE(sz, pfx, modrm)); 3736 break; 3737 } 3738 case 1: 3739 *decode_OK = False; 3740 return delta; 3741 case 2: /* NOT */ 3742 delta++; 3743 putIRegE(sz, pfx, modrm, 3744 unop(mkSizedOp(ty,Iop_Not8), 3745 getIRegE(sz, pfx, modrm))); 3746 DIP("not%c %s\n", nameISize(sz), 3747 nameIRegE(sz, pfx, modrm)); 3748 break; 3749 case 3: /* NEG */ 3750 delta++; 3751 dst0 = newTemp(ty); 3752 src = newTemp(ty); 3753 dst1 = newTemp(ty); 3754 assign(dst0, mkU(ty,0)); 3755 assign(src, getIRegE(sz, pfx, modrm)); 3756 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3757 mkexpr(src))); 3758 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3759 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3760 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 3761 break; 3762 case 4: /* MUL (unsigned widening) */ 3763 delta++; 3764 src = newTemp(ty); 3765 assign(src, getIRegE(sz,pfx,modrm)); 3766 codegen_mulL_A_D ( sz, False, src, 3767 nameIRegE(sz,pfx,modrm) ); 3768 break; 3769 case 5: /* IMUL (signed widening) */ 3770 delta++; 3771 src = newTemp(ty); 3772 assign(src, getIRegE(sz,pfx,modrm)); 3773 codegen_mulL_A_D ( sz, True, src, 3774 nameIRegE(sz,pfx,modrm) ); 3775 break; 3776 case 6: /* DIV */ 3777 delta++; 3778 assign( t1, getIRegE(sz, pfx, modrm) ); 3779 codegen_div ( sz, t1, False ); 3780 DIP("div%c %s\n", nameISize(sz), 3781 nameIRegE(sz, pfx, modrm)); 3782 break; 3783 case 7: /* IDIV */ 3784 delta++; 3785 assign( t1, getIRegE(sz, pfx, modrm) ); 3786 codegen_div ( sz, t1, True ); 3787 DIP("idiv%c %s\n", nameISize(sz), 3788 nameIRegE(sz, pfx, modrm)); 3789 break; 3790 default: 3791 /*NOTREACHED*/ 3792 vpanic("Grp3(amd64,R)"); 3793 } 3794 } else { 3795 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 3796 /* we have to inform disAMode of any immediate 3797 bytes used */ 3798 gregLO3ofRM(modrm)==0/*TEST*/ 3799 ? imin(4,sz) 3800 : 0 3801 ); 3802 t1 = newTemp(ty); 3803 delta += len; 3804 assign(t1, loadLE(ty,mkexpr(addr))); 3805 switch (gregLO3ofRM(modrm)) { 3806 case 0: { /* TEST */ 3807 d64 = getSDisp(imin(4,sz), delta); 3808 delta += imin(4,sz); 3809 dst1 = newTemp(ty); 3810 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3811 mkexpr(t1), 3812 mkU(ty, d64 & mkSizeMask(sz)))); 3813 setFlags_DEP1( Iop_And8, dst1, ty ); 3814 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 3815 break; 3816 } 3817 case 1: 3818 *decode_OK = False; 3819 return delta; 3820 case 2: /* NOT */ 3821 dst1 = newTemp(ty); 3822 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 3823 if (pfx & PFX_LOCK) { 3824 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3825 guest_RIP_curr_instr ); 3826 } else { 3827 storeLE( mkexpr(addr), mkexpr(dst1) ); 3828 } 3829 DIP("not%c %s\n", nameISize(sz), dis_buf); 3830 break; 3831 case 3: /* NEG */ 3832 dst0 = newTemp(ty); 3833 src = newTemp(ty); 3834 dst1 = newTemp(ty); 3835 assign(dst0, mkU(ty,0)); 3836 assign(src, mkexpr(t1)); 3837 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3838 mkexpr(src))); 3839 if (pfx & PFX_LOCK) { 3840 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3841 guest_RIP_curr_instr ); 3842 } else { 3843 storeLE( mkexpr(addr), mkexpr(dst1) ); 3844 } 3845 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3846 DIP("neg%c %s\n", nameISize(sz), dis_buf); 3847 break; 3848 case 4: /* MUL (unsigned widening) */ 3849 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 3850 break; 3851 case 5: /* IMUL */ 3852 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 3853 break; 3854 case 6: /* DIV */ 3855 codegen_div ( sz, t1, False ); 3856 DIP("div%c %s\n", nameISize(sz), dis_buf); 3857 break; 3858 case 7: /* IDIV */ 3859 codegen_div ( sz, t1, True ); 3860 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 3861 break; 3862 default: 3863 /*NOTREACHED*/ 3864 vpanic("Grp3(amd64,M)"); 3865 } 3866 } 3867 return delta; 3868 } 3869 3870 3871 /* Group 4 extended opcodes. */ 3872 static 3873 ULong dis_Grp4 ( VexAbiInfo* vbi, 3874 Prefix pfx, Long delta, Bool* decode_OK ) 3875 { 3876 Int alen; 3877 UChar modrm; 3878 HChar dis_buf[50]; 3879 IRType ty = Ity_I8; 3880 IRTemp t1 = newTemp(ty); 3881 IRTemp t2 = newTemp(ty); 3882 3883 *decode_OK = True; 3884 3885 modrm = getUChar(delta); 3886 if (epartIsReg(modrm)) { 3887 assign(t1, getIRegE(1, pfx, modrm)); 3888 switch (gregLO3ofRM(modrm)) { 3889 case 0: /* INC */ 3890 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3891 putIRegE(1, pfx, modrm, mkexpr(t2)); 3892 setFlags_INC_DEC( True, t2, ty ); 3893 break; 3894 case 1: /* DEC */ 3895 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3896 putIRegE(1, pfx, modrm, mkexpr(t2)); 3897 setFlags_INC_DEC( False, t2, ty ); 3898 break; 3899 default: 3900 *decode_OK = False; 3901 return delta; 3902 } 3903 delta++; 3904 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 3905 nameIRegE(1, pfx, modrm)); 3906 } else { 3907 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 3908 assign( t1, loadLE(ty, mkexpr(addr)) ); 3909 switch (gregLO3ofRM(modrm)) { 3910 case 0: /* INC */ 3911 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3912 if (pfx & PFX_LOCK) { 3913 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3914 guest_RIP_curr_instr ); 3915 } else { 3916 storeLE( mkexpr(addr), mkexpr(t2) ); 3917 } 3918 setFlags_INC_DEC( True, t2, ty ); 3919 break; 3920 case 1: /* DEC */ 3921 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3922 if (pfx & PFX_LOCK) { 3923 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3924 guest_RIP_curr_instr ); 3925 } else { 3926 storeLE( mkexpr(addr), mkexpr(t2) ); 3927 } 3928 setFlags_INC_DEC( False, t2, ty ); 3929 break; 3930 default: 3931 *decode_OK = False; 3932 return delta; 3933 } 3934 delta += alen; 3935 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 3936 } 3937 return delta; 3938 } 3939 3940 3941 /* Group 5 extended opcodes. */ 3942 static 3943 ULong dis_Grp5 ( VexAbiInfo* vbi, 3944 Prefix pfx, Int sz, Long delta, 3945 DisResult* dres, Bool* decode_OK ) 3946 { 3947 Int len; 3948 UChar modrm; 3949 HChar dis_buf[50]; 3950 IRTemp addr = IRTemp_INVALID; 3951 IRType ty = szToITy(sz); 3952 IRTemp t1 = newTemp(ty); 3953 IRTemp t2 = IRTemp_INVALID; 3954 IRTemp t3 = IRTemp_INVALID; 3955 Bool showSz = True; 3956 3957 *decode_OK = True; 3958 3959 modrm = getUChar(delta); 3960 if (epartIsReg(modrm)) { 3961 assign(t1, getIRegE(sz,pfx,modrm)); 3962 switch (gregLO3ofRM(modrm)) { 3963 case 0: /* INC */ 3964 t2 = newTemp(ty); 3965 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3966 mkexpr(t1), mkU(ty,1))); 3967 setFlags_INC_DEC( True, t2, ty ); 3968 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3969 break; 3970 case 1: /* DEC */ 3971 t2 = newTemp(ty); 3972 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3973 mkexpr(t1), mkU(ty,1))); 3974 setFlags_INC_DEC( False, t2, ty ); 3975 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3976 break; 3977 case 2: /* call Ev */ 3978 /* Ignore any sz value and operate as if sz==8. */ 3979 if (!(sz == 4 || sz == 8)) goto unhandled; 3980 sz = 8; 3981 t3 = newTemp(Ity_I64); 3982 assign(t3, getIRegE(sz,pfx,modrm)); 3983 t2 = newTemp(Ity_I64); 3984 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 3985 putIReg64(R_RSP, mkexpr(t2)); 3986 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 3987 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 3988 jmp_treg(Ijk_Call,t3); 3989 dres->whatNext = Dis_StopHere; 3990 showSz = False; 3991 break; 3992 case 4: /* jmp Ev */ 3993 /* Ignore any sz value and operate as if sz==8. */ 3994 if (!(sz == 4 || sz == 8)) goto unhandled; 3995 sz = 8; 3996 t3 = newTemp(Ity_I64); 3997 assign(t3, getIRegE(sz,pfx,modrm)); 3998 jmp_treg(Ijk_Boring,t3); 3999 dres->whatNext = Dis_StopHere; 4000 showSz = False; 4001 break; 4002 default: 4003 *decode_OK = False; 4004 return delta; 4005 } 4006 delta++; 4007 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4008 showSz ? nameISize(sz) : ' ', 4009 nameIRegE(sz, pfx, modrm)); 4010 } else { 4011 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4012 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4013 && gregLO3ofRM(modrm) != 6) { 4014 assign(t1, loadLE(ty,mkexpr(addr))); 4015 } 4016 switch (gregLO3ofRM(modrm)) { 4017 case 0: /* INC */ 4018 t2 = newTemp(ty); 4019 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4020 mkexpr(t1), mkU(ty,1))); 4021 if (pfx & PFX_LOCK) { 4022 casLE( mkexpr(addr), 4023 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4024 } else { 4025 storeLE(mkexpr(addr),mkexpr(t2)); 4026 } 4027 setFlags_INC_DEC( True, t2, ty ); 4028 break; 4029 case 1: /* DEC */ 4030 t2 = newTemp(ty); 4031 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4032 mkexpr(t1), mkU(ty,1))); 4033 if (pfx & PFX_LOCK) { 4034 casLE( mkexpr(addr), 4035 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4036 } else { 4037 storeLE(mkexpr(addr),mkexpr(t2)); 4038 } 4039 setFlags_INC_DEC( False, t2, ty ); 4040 break; 4041 case 2: /* call Ev */ 4042 /* Ignore any sz value and operate as if sz==8. */ 4043 if (!(sz == 4 || sz == 8)) goto unhandled; 4044 sz = 8; 4045 t3 = newTemp(Ity_I64); 4046 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4047 t2 = newTemp(Ity_I64); 4048 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4049 putIReg64(R_RSP, mkexpr(t2)); 4050 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4051 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4052 jmp_treg(Ijk_Call,t3); 4053 dres->whatNext = Dis_StopHere; 4054 showSz = False; 4055 break; 4056 case 4: /* JMP Ev */ 4057 /* Ignore any sz value and operate as if sz==8. */ 4058 if (!(sz == 4 || sz == 8)) goto unhandled; 4059 sz = 8; 4060 t3 = newTemp(Ity_I64); 4061 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4062 jmp_treg(Ijk_Boring,t3); 4063 dres->whatNext = Dis_StopHere; 4064 showSz = False; 4065 break; 4066 case 6: /* PUSH Ev */ 4067 /* There is no encoding for 32-bit operand size; hence ... */ 4068 if (sz == 4) sz = 8; 4069 if (!(sz == 8 || sz == 2)) goto unhandled; 4070 if (sz == 8) { 4071 t3 = newTemp(Ity_I64); 4072 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4073 t2 = newTemp(Ity_I64); 4074 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4075 putIReg64(R_RSP, mkexpr(t2) ); 4076 storeLE( mkexpr(t2), mkexpr(t3) ); 4077 break; 4078 } else { 4079 goto unhandled; /* awaiting test case */ 4080 } 4081 default: 4082 unhandled: 4083 *decode_OK = False; 4084 return delta; 4085 } 4086 delta += len; 4087 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4088 showSz ? nameISize(sz) : ' ', 4089 dis_buf); 4090 } 4091 return delta; 4092 } 4093 4094 4095 /*------------------------------------------------------------*/ 4096 /*--- Disassembling string ops (including REP prefixes) ---*/ 4097 /*------------------------------------------------------------*/ 4098 4099 /* Code shared by all the string ops */ 4100 static 4101 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4102 { 4103 UChar logSz; 4104 if (sz == 8 || sz == 4 || sz == 2) { 4105 logSz = 1; 4106 if (sz == 4) logSz = 2; 4107 if (sz == 8) logSz = 3; 4108 assign( t_inc, 4109 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4110 mkU8(logSz) ) ); 4111 } else { 4112 assign( t_inc, 4113 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4114 } 4115 } 4116 4117 static 4118 void dis_string_op( void (*dis_OP)( Int, IRTemp ), 4119 Int sz, HChar* name, Prefix pfx ) 4120 { 4121 IRTemp t_inc = newTemp(Ity_I64); 4122 /* Really we ought to inspect the override prefixes, but we don't. 4123 The following assertion catches any resulting sillyness. */ 4124 vassert(pfx == clearSegBits(pfx)); 4125 dis_string_op_increment(sz, t_inc); 4126 dis_OP( sz, t_inc ); 4127 DIP("%s%c\n", name, nameISize(sz)); 4128 } 4129 4130 static 4131 void dis_MOVS ( Int sz, IRTemp t_inc ) 4132 { 4133 IRType ty = szToITy(sz); 4134 IRTemp td = newTemp(Ity_I64); /* RDI */ 4135 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4136 4137 assign( td, getIReg64(R_RDI) ); 4138 assign( ts, getIReg64(R_RSI) ); 4139 4140 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4141 4142 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); 4143 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); 4144 } 4145 4146 static 4147 void dis_LODS ( Int sz, IRTemp t_inc ) 4148 { 4149 IRType ty = szToITy(sz); 4150 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4151 4152 assign( ts, getIReg64(R_RSI) ); 4153 4154 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4155 4156 putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); 4157 } 4158 4159 static 4160 void dis_STOS ( Int sz, IRTemp t_inc ) 4161 { 4162 IRType ty = szToITy(sz); 4163 IRTemp ta = newTemp(ty); /* rAX */ 4164 IRTemp td = newTemp(Ity_I64); /* RDI */ 4165 4166 assign( ta, getIRegRAX(sz) ); 4167 4168 assign( td, getIReg64(R_RDI) ); 4169 4170 storeLE( mkexpr(td), mkexpr(ta) ); 4171 4172 putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); 4173 } 4174 4175 static 4176 void dis_CMPS ( Int sz, IRTemp t_inc ) 4177 { 4178 IRType ty = szToITy(sz); 4179 IRTemp tdv = newTemp(ty); /* (RDI) */ 4180 IRTemp tsv = newTemp(ty); /* (RSI) */ 4181 IRTemp td = newTemp(Ity_I64); /* RDI */ 4182 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4183 4184 assign( td, getIReg64(R_RDI) ); 4185 4186 assign( ts, getIReg64(R_RSI) ); 4187 4188 assign( tdv, loadLE(ty,mkexpr(td)) ); 4189 4190 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4191 4192 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4193 4194 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); 4195 4196 putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) ); 4197 } 4198 4199 static 4200 void dis_SCAS ( Int sz, IRTemp t_inc ) 4201 { 4202 IRType ty = szToITy(sz); 4203 IRTemp ta = newTemp(ty); /* rAX */ 4204 IRTemp td = newTemp(Ity_I64); /* RDI */ 4205 IRTemp tdv = newTemp(ty); /* (RDI) */ 4206 4207 assign( ta, getIRegRAX(sz) ); 4208 4209 assign( td, getIReg64(R_RDI) ); 4210 4211 assign( tdv, loadLE(ty,mkexpr(td)) ); 4212 4213 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4214 4215 putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) ); 4216 } 4217 4218 4219 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4220 the insn is the last one in the basic block, and so emit a jump to 4221 the next insn, rather than just falling through. */ 4222 static 4223 void dis_REP_op ( AMD64Condcode cond, 4224 void (*dis_OP)(Int, IRTemp), 4225 Int sz, Addr64 rip, Addr64 rip_next, HChar* name, 4226 Prefix pfx ) 4227 { 4228 IRTemp t_inc = newTemp(Ity_I64); 4229 IRTemp tc = newTemp(Ity_I64); /* RCX */ 4230 4231 /* Really we ought to inspect the override prefixes, but we don't. 4232 The following assertion catches any resulting sillyness. */ 4233 vassert(pfx == clearSegBits(pfx)); 4234 4235 assign( tc, getIReg64(R_RCX) ); 4236 4237 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)), 4238 Ijk_Boring, 4239 IRConst_U64(rip_next) ) ); 4240 4241 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4242 4243 dis_string_op_increment(sz, t_inc); 4244 dis_OP (sz, t_inc); 4245 4246 if (cond == AMD64CondAlways) { 4247 jmp_lit(Ijk_Boring,rip); 4248 } else { 4249 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4250 Ijk_Boring, 4251 IRConst_U64(rip) ) ); 4252 jmp_lit(Ijk_Boring,rip_next); 4253 } 4254 DIP("%s%c\n", name, nameISize(sz)); 4255 } 4256 4257 4258 /*------------------------------------------------------------*/ 4259 /*--- Arithmetic, etc. ---*/ 4260 /*------------------------------------------------------------*/ 4261 4262 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4263 static 4264 ULong dis_mul_E_G ( VexAbiInfo* vbi, 4265 Prefix pfx, 4266 Int size, 4267 Long delta0 ) 4268 { 4269 Int alen; 4270 HChar dis_buf[50]; 4271 UChar rm = getUChar(delta0); 4272 IRType ty = szToITy(size); 4273 IRTemp te = newTemp(ty); 4274 IRTemp tg = newTemp(ty); 4275 IRTemp resLo = newTemp(ty); 4276 4277 assign( tg, getIRegG(size, pfx, rm) ); 4278 if (epartIsReg(rm)) { 4279 assign( te, getIRegE(size, pfx, rm) ); 4280 } else { 4281 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4282 assign( te, loadLE(ty,mkexpr(addr)) ); 4283 } 4284 4285 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4286 4287 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4288 4289 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4290 4291 if (epartIsReg(rm)) { 4292 DIP("imul%c %s, %s\n", nameISize(size), 4293 nameIRegE(size,pfx,rm), 4294 nameIRegG(size,pfx,rm)); 4295 return 1+delta0; 4296 } else { 4297 DIP("imul%c %s, %s\n", nameISize(size), 4298 dis_buf, 4299 nameIRegG(size,pfx,rm)); 4300 return alen+delta0; 4301 } 4302 } 4303 4304 4305 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4306 static 4307 ULong dis_imul_I_E_G ( VexAbiInfo* vbi, 4308 Prefix pfx, 4309 Int size, 4310 Long delta, 4311 Int litsize ) 4312 { 4313 Long d64; 4314 Int alen; 4315 HChar dis_buf[50]; 4316 UChar rm = getUChar(delta); 4317 IRType ty = szToITy(size); 4318 IRTemp te = newTemp(ty); 4319 IRTemp tl = newTemp(ty); 4320 IRTemp resLo = newTemp(ty); 4321 4322 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4323 4324 if (epartIsReg(rm)) { 4325 assign(te, getIRegE(size, pfx, rm)); 4326 delta++; 4327 } else { 4328 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4329 imin(4,litsize) ); 4330 assign(te, loadLE(ty, mkexpr(addr))); 4331 delta += alen; 4332 } 4333 d64 = getSDisp(imin(4,litsize),delta); 4334 delta += imin(4,litsize); 4335 4336 d64 &= mkSizeMask(size); 4337 assign(tl, mkU(ty,d64)); 4338 4339 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4340 4341 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4342 4343 putIRegG(size, pfx, rm, mkexpr(resLo)); 4344 4345 DIP("imul%c $%lld, %s, %s\n", 4346 nameISize(size), d64, 4347 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4348 nameIRegG(size,pfx,rm) ); 4349 return delta; 4350 } 4351 4352 4353 /* Generate an IR sequence to do a popcount operation on the supplied 4354 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4355 Ity_I16, Ity_I32 or Ity_I64 only. */ 4356 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4357 { 4358 Int i; 4359 if (ty == Ity_I16) { 4360 IRTemp old = IRTemp_INVALID; 4361 IRTemp nyu = IRTemp_INVALID; 4362 IRTemp mask[4], shift[4]; 4363 for (i = 0; i < 4; i++) { 4364 mask[i] = newTemp(ty); 4365 shift[i] = 1 << i; 4366 } 4367 assign(mask[0], mkU16(0x5555)); 4368 assign(mask[1], mkU16(0x3333)); 4369 assign(mask[2], mkU16(0x0F0F)); 4370 assign(mask[3], mkU16(0x00FF)); 4371 old = src; 4372 for (i = 0; i < 4; i++) { 4373 nyu = newTemp(ty); 4374 assign(nyu, 4375 binop(Iop_Add16, 4376 binop(Iop_And16, 4377 mkexpr(old), 4378 mkexpr(mask[i])), 4379 binop(Iop_And16, 4380 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4381 mkexpr(mask[i])))); 4382 old = nyu; 4383 } 4384 return nyu; 4385 } 4386 if (ty == Ity_I32) { 4387 IRTemp old = IRTemp_INVALID; 4388 IRTemp nyu = IRTemp_INVALID; 4389 IRTemp mask[5], shift[5]; 4390 for (i = 0; i < 5; i++) { 4391 mask[i] = newTemp(ty); 4392 shift[i] = 1 << i; 4393 } 4394 assign(mask[0], mkU32(0x55555555)); 4395 assign(mask[1], mkU32(0x33333333)); 4396 assign(mask[2], mkU32(0x0F0F0F0F)); 4397 assign(mask[3], mkU32(0x00FF00FF)); 4398 assign(mask[4], mkU32(0x0000FFFF)); 4399 old = src; 4400 for (i = 0; i < 5; i++) { 4401 nyu = newTemp(ty); 4402 assign(nyu, 4403 binop(Iop_Add32, 4404 binop(Iop_And32, 4405 mkexpr(old), 4406 mkexpr(mask[i])), 4407 binop(Iop_And32, 4408 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4409 mkexpr(mask[i])))); 4410 old = nyu; 4411 } 4412 return nyu; 4413 } 4414 if (ty == Ity_I64) { 4415 IRTemp old = IRTemp_INVALID; 4416 IRTemp nyu = IRTemp_INVALID; 4417 IRTemp mask[6], shift[6]; 4418 for (i = 0; i < 6; i++) { 4419 mask[i] = newTemp(ty); 4420 shift[i] = 1 << i; 4421 } 4422 assign(mask[0], mkU64(0x5555555555555555ULL)); 4423 assign(mask[1], mkU64(0x3333333333333333ULL)); 4424 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4425 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4426 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4427 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4428 old = src; 4429 for (i = 0; i < 6; i++) { 4430 nyu = newTemp(ty); 4431 assign(nyu, 4432 binop(Iop_Add64, 4433 binop(Iop_And64, 4434 mkexpr(old), 4435 mkexpr(mask[i])), 4436 binop(Iop_And64, 4437 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4438 mkexpr(mask[i])))); 4439 old = nyu; 4440 } 4441 return nyu; 4442 } 4443 /*NOTREACHED*/ 4444 vassert(0); 4445 } 4446 4447 4448 /* Generate an IR sequence to do a count-leading-zeroes operation on 4449 the supplied IRTemp, and return a new IRTemp holding the result. 4450 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4451 the argument is zero, return the number of bits in the word (the 4452 natural semantics). */ 4453 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4454 { 4455 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4456 4457 IRTemp src64 = newTemp(Ity_I64); 4458 assign(src64, widenUto64( mkexpr(src) )); 4459 4460 IRTemp src64x = newTemp(Ity_I64); 4461 assign(src64x, 4462 binop(Iop_Shl64, mkexpr(src64), 4463 mkU8(64 - 8 * sizeofIRType(ty)))); 4464 4465 // Clz64 has undefined semantics when its input is zero, so 4466 // special-case around that. 4467 IRTemp res64 = newTemp(Ity_I64); 4468 assign(res64, 4469 IRExpr_Mux0X( 4470 unop(Iop_1Uto8, 4471 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))), 4472 unop(Iop_Clz64, mkexpr(src64x)), 4473 mkU64(8 * sizeofIRType(ty)) 4474 )); 4475 4476 IRTemp res = newTemp(ty); 4477 assign(res, narrowTo(ty, mkexpr(res64))); 4478 return res; 4479 } 4480 4481 4482 /*------------------------------------------------------------*/ 4483 /*--- ---*/ 4484 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4485 /*--- ---*/ 4486 /*------------------------------------------------------------*/ 4487 4488 /* --- Helper functions for dealing with the register stack. --- */ 4489 4490 /* --- Set the emulation-warning pseudo-register. --- */ 4491 4492 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4493 { 4494 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4495 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 4496 } 4497 4498 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4499 4500 static IRExpr* mkQNaN64 ( void ) 4501 { 4502 /* QNaN is 0 2047 1 0(51times) 4503 == 0b 11111111111b 1 0(51times) 4504 == 0x7FF8 0000 0000 0000 4505 */ 4506 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4507 } 4508 4509 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4510 4511 static IRExpr* get_ftop ( void ) 4512 { 4513 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4514 } 4515 4516 static void put_ftop ( IRExpr* e ) 4517 { 4518 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4519 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4520 } 4521 4522 /* --------- Get/put the C3210 bits. --------- */ 4523 4524 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4525 { 4526 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4527 } 4528 4529 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4530 { 4531 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4532 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4533 } 4534 4535 /* --------- Get/put the FPU rounding mode. --------- */ 4536 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4537 { 4538 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4539 } 4540 4541 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4542 { 4543 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4544 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4545 } 4546 4547 4548 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4549 /* Produces a value in 0 .. 3, which is encoded as per the type 4550 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4551 per IRRoundingMode, we merely need to get it and mask it for 4552 safety. 4553 */ 4554 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4555 { 4556 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4557 } 4558 4559 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4560 { 4561 return mkU32(Irrm_NEAREST); 4562 } 4563 4564 4565 /* --------- Get/set FP register tag bytes. --------- */ 4566 4567 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4568 4569 static void put_ST_TAG ( Int i, IRExpr* value ) 4570 { 4571 IRRegArray* descr; 4572 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 4573 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4574 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4575 } 4576 4577 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 4578 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 4579 4580 static IRExpr* get_ST_TAG ( Int i ) 4581 { 4582 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4583 return IRExpr_GetI( descr, get_ftop(), i ); 4584 } 4585 4586 4587 /* --------- Get/set FP registers. --------- */ 4588 4589 /* Given i, and some expression e, emit 'ST(i) = e' and set the 4590 register's tag to indicate the register is full. The previous 4591 state of the register is not checked. */ 4592 4593 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 4594 { 4595 IRRegArray* descr; 4596 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 4597 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4598 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4599 /* Mark the register as in-use. */ 4600 put_ST_TAG(i, mkU8(1)); 4601 } 4602 4603 /* Given i, and some expression e, emit 4604 ST(i) = is_full(i) ? NaN : e 4605 and set the tag accordingly. 4606 */ 4607 4608 static void put_ST ( Int i, IRExpr* value ) 4609 { 4610 put_ST_UNCHECKED( i, 4611 IRExpr_Mux0X( get_ST_TAG(i), 4612 /* 0 means empty */ 4613 value, 4614 /* non-0 means full */ 4615 mkQNaN64() 4616 ) 4617 ); 4618 } 4619 4620 4621 /* Given i, generate an expression yielding 'ST(i)'. */ 4622 4623 static IRExpr* get_ST_UNCHECKED ( Int i ) 4624 { 4625 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4626 return IRExpr_GetI( descr, get_ftop(), i ); 4627 } 4628 4629 4630 /* Given i, generate an expression yielding 4631 is_full(i) ? ST(i) : NaN 4632 */ 4633 4634 static IRExpr* get_ST ( Int i ) 4635 { 4636 return 4637 IRExpr_Mux0X( get_ST_TAG(i), 4638 /* 0 means empty */ 4639 mkQNaN64(), 4640 /* non-0 means full */ 4641 get_ST_UNCHECKED(i)); 4642 } 4643 4644 4645 /* Adjust FTOP downwards by one register. */ 4646 4647 static void fp_push ( void ) 4648 { 4649 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 4650 } 4651 4652 /* Adjust FTOP upwards by one register, and mark the vacated register 4653 as empty. */ 4654 4655 static void fp_pop ( void ) 4656 { 4657 put_ST_TAG(0, mkU8(0)); 4658 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4659 } 4660 4661 /* Clear the C2 bit of the FPU status register, for 4662 sin/cos/tan/sincos. */ 4663 4664 static void clear_C2 ( void ) 4665 { 4666 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); 4667 } 4668 4669 /* Invent a plausible-looking FPU status word value: 4670 ((ftop & 7) << 11) | (c3210 & 0x4700) 4671 */ 4672 static IRExpr* get_FPU_sw ( void ) 4673 { 4674 return 4675 unop(Iop_32to16, 4676 binop(Iop_Or32, 4677 binop(Iop_Shl32, 4678 binop(Iop_And32, get_ftop(), mkU32(7)), 4679 mkU8(11)), 4680 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 4681 mkU32(0x4700)) 4682 )); 4683 } 4684 4685 4686 /* ------------------------------------------------------- */ 4687 /* Given all that stack-mangling junk, we can now go ahead 4688 and describe FP instructions. 4689 */ 4690 4691 /* ST(0) = ST(0) `op` mem64/32(addr) 4692 Need to check ST(0)'s tag on read, but not on write. 4693 */ 4694 static 4695 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4696 IROp op, Bool dbl ) 4697 { 4698 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4699 if (dbl) { 4700 put_ST_UNCHECKED(0, 4701 triop( op, 4702 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4703 get_ST(0), 4704 loadLE(Ity_F64,mkexpr(addr)) 4705 )); 4706 } else { 4707 put_ST_UNCHECKED(0, 4708 triop( op, 4709 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4710 get_ST(0), 4711 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 4712 )); 4713 } 4714 } 4715 4716 4717 /* ST(0) = mem64/32(addr) `op` ST(0) 4718 Need to check ST(0)'s tag on read, but not on write. 4719 */ 4720 static 4721 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4722 IROp op, Bool dbl ) 4723 { 4724 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4725 if (dbl) { 4726 put_ST_UNCHECKED(0, 4727 triop( op, 4728 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4729 loadLE(Ity_F64,mkexpr(addr)), 4730 get_ST(0) 4731 )); 4732 } else { 4733 put_ST_UNCHECKED(0, 4734 triop( op, 4735 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4736 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 4737 get_ST(0) 4738 )); 4739 } 4740 } 4741 4742 4743 /* ST(dst) = ST(dst) `op` ST(src). 4744 Check dst and src tags when reading but not on write. 4745 */ 4746 static 4747 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4748 Bool pop_after ) 4749 { 4750 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4751 put_ST_UNCHECKED( 4752 st_dst, 4753 triop( op, 4754 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4755 get_ST(st_dst), 4756 get_ST(st_src) ) 4757 ); 4758 if (pop_after) 4759 fp_pop(); 4760 } 4761 4762 /* ST(dst) = ST(src) `op` ST(dst). 4763 Check dst and src tags when reading but not on write. 4764 */ 4765 static 4766 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4767 Bool pop_after ) 4768 { 4769 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4770 put_ST_UNCHECKED( 4771 st_dst, 4772 triop( op, 4773 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4774 get_ST(st_src), 4775 get_ST(st_dst) ) 4776 ); 4777 if (pop_after) 4778 fp_pop(); 4779 } 4780 4781 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 4782 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 4783 { 4784 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 4785 /* This is a bit of a hack (and isn't really right). It sets 4786 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 4787 documentation implies A and S are unchanged. 4788 */ 4789 /* It's also fishy in that it is used both for COMIP and 4790 UCOMIP, and they aren't the same (although similar). */ 4791 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 4792 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 4793 stmt( IRStmt_Put( 4794 OFFB_CC_DEP1, 4795 binop( Iop_And64, 4796 unop( Iop_32Uto64, 4797 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 4798 mkU64(0x45) 4799 ))); 4800 if (pop_after) 4801 fp_pop(); 4802 } 4803 4804 4805 /* returns 4806 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 4807 */ 4808 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 4809 { 4810 IRTemp t32 = newTemp(Ity_I32); 4811 assign( t32, e32 ); 4812 return 4813 IRExpr_Mux0X( 4814 unop(Iop_1Uto8, 4815 binop(Iop_CmpLT64U, 4816 unop(Iop_32Uto64, 4817 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 4818 mkU64(65536))), 4819 mkU16( 0x8000 ), 4820 unop(Iop_32to16, mkexpr(t32))); 4821 } 4822 4823 4824 static 4825 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 4826 VexAbiInfo* vbi, Prefix pfx, Long delta ) 4827 { 4828 Int len; 4829 UInt r_src, r_dst; 4830 HChar dis_buf[50]; 4831 IRTemp t1, t2; 4832 4833 /* On entry, delta points at the second byte of the insn (the modrm 4834 byte).*/ 4835 UChar first_opcode = getUChar(delta-1); 4836 UChar modrm = getUChar(delta+0); 4837 4838 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 4839 4840 if (first_opcode == 0xD8) { 4841 if (modrm < 0xC0) { 4842 4843 /* bits 5,4,3 are an opcode extension, and the modRM also 4844 specifies an address. */ 4845 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 4846 delta += len; 4847 4848 switch (gregLO3ofRM(modrm)) { 4849 4850 case 0: /* FADD single-real */ 4851 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 4852 break; 4853 4854 case 1: /* FMUL single-real */ 4855 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 4856 break; 4857 4858 //.. case 2: /* FCOM single-real */ 4859 //.. DIP("fcoms %s\n", dis_buf); 4860 //.. /* This forces C1 to zero, which isn't right. */ 4861 //.. put_C3210( 4862 //.. binop( Iop_And32, 4863 //.. binop(Iop_Shl32, 4864 //.. binop(Iop_CmpF64, 4865 //.. get_ST(0), 4866 //.. unop(Iop_F32toF64, 4867 //.. loadLE(Ity_F32,mkexpr(addr)))), 4868 //.. mkU8(8)), 4869 //.. mkU32(0x4500) 4870 //.. )); 4871 //.. break; 4872 //.. 4873 //.. case 3: /* FCOMP single-real */ 4874 //.. DIP("fcomps %s\n", dis_buf); 4875 //.. /* This forces C1 to zero, which isn't right. */ 4876 //.. put_C3210( 4877 //.. binop( Iop_And32, 4878 //.. binop(Iop_Shl32, 4879 //.. binop(Iop_CmpF64, 4880 //.. get_ST(0), 4881 //.. unop(Iop_F32toF64, 4882 //.. loadLE(Ity_F32,mkexpr(addr)))), 4883 //.. mkU8(8)), 4884 //.. mkU32(0x4500) 4885 //.. )); 4886 //.. fp_pop(); 4887 //.. break; 4888 4889 case 4: /* FSUB single-real */ 4890 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 4891 break; 4892 4893 case 5: /* FSUBR single-real */ 4894 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 4895 break; 4896 4897 case 6: /* FDIV single-real */ 4898 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 4899 break; 4900 4901 case 7: /* FDIVR single-real */ 4902 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 4903 break; 4904 4905 default: 4906 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 4907 vex_printf("first_opcode == 0xD8\n"); 4908 goto decode_fail; 4909 } 4910 } else { 4911 delta++; 4912 switch (modrm) { 4913 4914 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 4915 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 4916 break; 4917 4918 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 4919 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 4920 break; 4921 4922 /* Dunno if this is right */ 4923 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 4924 r_dst = (UInt)modrm - 0xD0; 4925 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 4926 /* This forces C1 to zero, which isn't right. */ 4927 put_C3210( 4928 unop(Iop_32Uto64, 4929 binop( Iop_And32, 4930 binop(Iop_Shl32, 4931 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 4932 mkU8(8)), 4933 mkU32(0x4500) 4934 ))); 4935 break; 4936 4937 /* Dunno if this is right */ 4938 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 4939 r_dst = (UInt)modrm - 0xD8; 4940 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 4941 /* This forces C1 to zero, which isn't right. */ 4942 put_C3210( 4943 unop(Iop_32Uto64, 4944 binop( Iop_And32, 4945 binop(Iop_Shl32, 4946 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 4947 mkU8(8)), 4948 mkU32(0x4500) 4949 ))); 4950 fp_pop(); 4951 break; 4952 4953 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 4954 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 4955 break; 4956 4957 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 4958 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 4959 break; 4960 4961 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 4962 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 4963 break; 4964 4965 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 4966 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 4967 break; 4968 4969 default: 4970 goto decode_fail; 4971 } 4972 } 4973 } 4974 4975 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 4976 else 4977 if (first_opcode == 0xD9) { 4978 if (modrm < 0xC0) { 4979 4980 /* bits 5,4,3 are an opcode extension, and the modRM also 4981 specifies an address. */ 4982 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 4983 delta += len; 4984 4985 switch (gregLO3ofRM(modrm)) { 4986 4987 case 0: /* FLD single-real */ 4988 DIP("flds %s\n", dis_buf); 4989 fp_push(); 4990 put_ST(0, unop(Iop_F32toF64, 4991 loadLE(Ity_F32, mkexpr(addr)))); 4992 break; 4993 4994 case 2: /* FST single-real */ 4995 DIP("fsts %s\n", dis_buf); 4996 storeLE(mkexpr(addr), 4997 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 4998 break; 4999 5000 case 3: /* FSTP single-real */ 5001 DIP("fstps %s\n", dis_buf); 5002 storeLE(mkexpr(addr), 5003 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5004 fp_pop(); 5005 break; 5006 5007 case 4: { /* FLDENV m28 */ 5008 /* Uses dirty helper: 5009 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5010 IRTemp ew = newTemp(Ity_I32); 5011 IRTemp w64 = newTemp(Ity_I64); 5012 IRDirty* d = unsafeIRDirty_0_N ( 5013 0/*regparms*/, 5014 "amd64g_dirtyhelper_FLDENV", 5015 &amd64g_dirtyhelper_FLDENV, 5016 mkIRExprVec_1( mkexpr(addr) ) 5017 ); 5018 d->needsBBP = True; 5019 d->tmp = w64; 5020 /* declare we're reading memory */ 5021 d->mFx = Ifx_Read; 5022 d->mAddr = mkexpr(addr); 5023 d->mSize = 28; 5024 5025 /* declare we're writing guest state */ 5026 d->nFxState = 4; 5027 5028 d->fxState[0].fx = Ifx_Write; 5029 d->fxState[0].offset = OFFB_FTOP; 5030 d->fxState[0].size = sizeof(UInt); 5031 5032 d->fxState[1].fx = Ifx_Write; 5033 d->fxState[1].offset = OFFB_FPTAGS; 5034 d->fxState[1].size = 8 * sizeof(UChar); 5035 5036 d->fxState[2].fx = Ifx_Write; 5037 d->fxState[2].offset = OFFB_FPROUND; 5038 d->fxState[2].size = sizeof(ULong); 5039 5040 d->fxState[3].fx = Ifx_Write; 5041 d->fxState[3].offset = OFFB_FC3210; 5042 d->fxState[3].size = sizeof(ULong); 5043 5044 stmt( IRStmt_Dirty(d) ); 5045 5046 /* ew contains any emulation warning we may need to 5047 issue. If needed, side-exit to the next insn, 5048 reporting the warning, so that Valgrind's dispatcher 5049 sees the warning. */ 5050 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5051 put_emwarn( mkexpr(ew) ); 5052 stmt( 5053 IRStmt_Exit( 5054 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5055 Ijk_EmWarn, 5056 IRConst_U64( guest_RIP_bbstart+delta ) 5057 ) 5058 ); 5059 5060 DIP("fldenv %s\n", dis_buf); 5061 break; 5062 } 5063 5064 case 5: {/* FLDCW */ 5065 /* The only thing we observe in the control word is the 5066 rounding mode. Therefore, pass the 16-bit value 5067 (x87 native-format control word) to a clean helper, 5068 getting back a 64-bit value, the lower half of which 5069 is the FPROUND value to store, and the upper half of 5070 which is the emulation-warning token which may be 5071 generated. 5072 */ 5073 /* ULong amd64h_check_fldcw ( ULong ); */ 5074 IRTemp t64 = newTemp(Ity_I64); 5075 IRTemp ew = newTemp(Ity_I32); 5076 DIP("fldcw %s\n", dis_buf); 5077 assign( t64, mkIRExprCCall( 5078 Ity_I64, 0/*regparms*/, 5079 "amd64g_check_fldcw", 5080 &amd64g_check_fldcw, 5081 mkIRExprVec_1( 5082 unop( Iop_16Uto64, 5083 loadLE(Ity_I16, mkexpr(addr))) 5084 ) 5085 ) 5086 ); 5087 5088 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5089 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5090 put_emwarn( mkexpr(ew) ); 5091 /* Finally, if an emulation warning was reported, 5092 side-exit to the next insn, reporting the warning, 5093 so that Valgrind's dispatcher sees the warning. */ 5094 stmt( 5095 IRStmt_Exit( 5096 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5097 Ijk_EmWarn, 5098 IRConst_U64( guest_RIP_bbstart+delta ) 5099 ) 5100 ); 5101 break; 5102 } 5103 5104 case 6: { /* FNSTENV m28 */ 5105 /* Uses dirty helper: 5106 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5107 IRDirty* d = unsafeIRDirty_0_N ( 5108 0/*regparms*/, 5109 "amd64g_dirtyhelper_FSTENV", 5110 &amd64g_dirtyhelper_FSTENV, 5111 mkIRExprVec_1( mkexpr(addr) ) 5112 ); 5113 d->needsBBP = True; 5114 /* declare we're writing memory */ 5115 d->mFx = Ifx_Write; 5116 d->mAddr = mkexpr(addr); 5117 d->mSize = 28; 5118 5119 /* declare we're reading guest state */ 5120 d->nFxState = 4; 5121 5122 d->fxState[0].fx = Ifx_Read; 5123 d->fxState[0].offset = OFFB_FTOP; 5124 d->fxState[0].size = sizeof(UInt); 5125 5126 d->fxState[1].fx = Ifx_Read; 5127 d->fxState[1].offset = OFFB_FPTAGS; 5128 d->fxState[1].size = 8 * sizeof(UChar); 5129 5130 d->fxState[2].fx = Ifx_Read; 5131 d->fxState[2].offset = OFFB_FPROUND; 5132 d->fxState[2].size = sizeof(ULong); 5133 5134 d->fxState[3].fx = Ifx_Read; 5135 d->fxState[3].offset = OFFB_FC3210; 5136 d->fxState[3].size = sizeof(ULong); 5137 5138 stmt( IRStmt_Dirty(d) ); 5139 5140 DIP("fnstenv %s\n", dis_buf); 5141 break; 5142 } 5143 5144 case 7: /* FNSTCW */ 5145 /* Fake up a native x87 FPU control word. The only 5146 thing it depends on is FPROUND[1:0], so call a clean 5147 helper to cook it up. */ 5148 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5149 DIP("fnstcw %s\n", dis_buf); 5150 storeLE( 5151 mkexpr(addr), 5152 unop( Iop_64to16, 5153 mkIRExprCCall( 5154 Ity_I64, 0/*regp*/, 5155 "amd64g_create_fpucw", &amd64g_create_fpucw, 5156 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5157 ) 5158 ) 5159 ); 5160 break; 5161 5162 default: 5163 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5164 vex_printf("first_opcode == 0xD9\n"); 5165 goto decode_fail; 5166 } 5167 5168 } else { 5169 delta++; 5170 switch (modrm) { 5171 5172 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5173 r_src = (UInt)modrm - 0xC0; 5174 DIP("fld %%st(%u)\n", r_src); 5175 t1 = newTemp(Ity_F64); 5176 assign(t1, get_ST(r_src)); 5177 fp_push(); 5178 put_ST(0, mkexpr(t1)); 5179 break; 5180 5181 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5182 r_src = (UInt)modrm - 0xC8; 5183 DIP("fxch %%st(%u)\n", r_src); 5184 t1 = newTemp(Ity_F64); 5185 t2 = newTemp(Ity_F64); 5186 assign(t1, get_ST(0)); 5187 assign(t2, get_ST(r_src)); 5188 put_ST_UNCHECKED(0, mkexpr(t2)); 5189 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5190 break; 5191 5192 case 0xE0: /* FCHS */ 5193 DIP("fchs\n"); 5194 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5195 break; 5196 5197 case 0xE1: /* FABS */ 5198 DIP("fabs\n"); 5199 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5200 break; 5201 5202 case 0xE5: { /* FXAM */ 5203 /* This is an interesting one. It examines %st(0), 5204 regardless of whether the tag says it's empty or not. 5205 Here, just pass both the tag (in our format) and the 5206 value (as a double, actually a ULong) to a helper 5207 function. */ 5208 IRExpr** args 5209 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5210 unop(Iop_ReinterpF64asI64, 5211 get_ST_UNCHECKED(0)) ); 5212 put_C3210(mkIRExprCCall( 5213 Ity_I64, 5214 0/*regparm*/, 5215 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5216 args 5217 )); 5218 DIP("fxam\n"); 5219 break; 5220 } 5221 5222 case 0xE8: /* FLD1 */ 5223 DIP("fld1\n"); 5224 fp_push(); 5225 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5226 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5227 break; 5228 5229 case 0xE9: /* FLDL2T */ 5230 DIP("fldl2t\n"); 5231 fp_push(); 5232 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5233 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5234 break; 5235 5236 case 0xEA: /* FLDL2E */ 5237 DIP("fldl2e\n"); 5238 fp_push(); 5239 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5240 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5241 break; 5242 5243 case 0xEB: /* FLDPI */ 5244 DIP("fldpi\n"); 5245 fp_push(); 5246 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5247 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5248 break; 5249 5250 case 0xEC: /* FLDLG2 */ 5251 DIP("fldlg2\n"); 5252 fp_push(); 5253 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5254 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5255 break; 5256 5257 case 0xED: /* FLDLN2 */ 5258 DIP("fldln2\n"); 5259 fp_push(); 5260 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5261 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5262 break; 5263 5264 case 0xEE: /* FLDZ */ 5265 DIP("fldz\n"); 5266 fp_push(); 5267 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5268 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5269 break; 5270 5271 case 0xF0: /* F2XM1 */ 5272 DIP("f2xm1\n"); 5273 put_ST_UNCHECKED(0, 5274 binop(Iop_2xm1F64, 5275 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5276 get_ST(0))); 5277 break; 5278 5279 case 0xF1: /* FYL2X */ 5280 DIP("fyl2x\n"); 5281 put_ST_UNCHECKED(1, 5282 triop(Iop_Yl2xF64, 5283 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5284 get_ST(1), 5285 get_ST(0))); 5286 fp_pop(); 5287 break; 5288 5289 case 0xF2: /* FPTAN */ 5290 DIP("ftan\n"); 5291 put_ST_UNCHECKED(0, 5292 binop(Iop_TanF64, 5293 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5294 get_ST(0))); 5295 fp_push(); 5296 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 5297 clear_C2(); /* HACK */ 5298 break; 5299 5300 case 0xF3: /* FPATAN */ 5301 DIP("fpatan\n"); 5302 put_ST_UNCHECKED(1, 5303 triop(Iop_AtanF64, 5304 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5305 get_ST(1), 5306 get_ST(0))); 5307 fp_pop(); 5308 break; 5309 5310 case 0xF4: { /* FXTRACT */ 5311 IRTemp argF = newTemp(Ity_F64); 5312 IRTemp sigF = newTemp(Ity_F64); 5313 IRTemp expF = newTemp(Ity_F64); 5314 IRTemp argI = newTemp(Ity_I64); 5315 IRTemp sigI = newTemp(Ity_I64); 5316 IRTemp expI = newTemp(Ity_I64); 5317 DIP("fxtract\n"); 5318 assign( argF, get_ST(0) ); 5319 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5320 assign( sigI, 5321 mkIRExprCCall( 5322 Ity_I64, 0/*regparms*/, 5323 "x86amd64g_calculate_FXTRACT", 5324 &x86amd64g_calculate_FXTRACT, 5325 mkIRExprVec_2( mkexpr(argI), 5326 mkIRExpr_HWord(0)/*sig*/ )) 5327 ); 5328 assign( expI, 5329 mkIRExprCCall( 5330 Ity_I64, 0/*regparms*/, 5331 "x86amd64g_calculate_FXTRACT", 5332 &x86amd64g_calculate_FXTRACT, 5333 mkIRExprVec_2( mkexpr(argI), 5334 mkIRExpr_HWord(1)/*exp*/ )) 5335 ); 5336 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5337 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5338 /* exponent */ 5339 put_ST_UNCHECKED(0, mkexpr(expF) ); 5340 fp_push(); 5341 /* significand */ 5342 put_ST(0, mkexpr(sigF) ); 5343 break; 5344 } 5345 5346 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5347 IRTemp a1 = newTemp(Ity_F64); 5348 IRTemp a2 = newTemp(Ity_F64); 5349 DIP("fprem1\n"); 5350 /* Do FPREM1 twice, once to get the remainder, and once 5351 to get the C3210 flag values. */ 5352 assign( a1, get_ST(0) ); 5353 assign( a2, get_ST(1) ); 5354 put_ST_UNCHECKED(0, 5355 triop(Iop_PRem1F64, 5356 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5357 mkexpr(a1), 5358 mkexpr(a2))); 5359 put_C3210( 5360 unop(Iop_32Uto64, 5361 triop(Iop_PRem1C3210F64, 5362 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5363 mkexpr(a1), 5364 mkexpr(a2)) )); 5365 break; 5366 } 5367 5368 case 0xF7: /* FINCSTP */ 5369 DIP("fincstp\n"); 5370 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5371 break; 5372 5373 case 0xF8: { /* FPREM -- not IEEE compliant */ 5374 IRTemp a1 = newTemp(Ity_F64); 5375 IRTemp a2 = newTemp(Ity_F64); 5376 DIP("fprem\n"); 5377 /* Do FPREM twice, once to get the remainder, and once 5378 to get the C3210 flag values. */ 5379 assign( a1, get_ST(0) ); 5380 assign( a2, get_ST(1) ); 5381 put_ST_UNCHECKED(0, 5382 triop(Iop_PRemF64, 5383 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5384 mkexpr(a1), 5385 mkexpr(a2))); 5386 put_C3210( 5387 unop(Iop_32Uto64, 5388 triop(Iop_PRemC3210F64, 5389 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5390 mkexpr(a1), 5391 mkexpr(a2)) )); 5392 break; 5393 } 5394 5395 case 0xF9: /* FYL2XP1 */ 5396 DIP("fyl2xp1\n"); 5397 put_ST_UNCHECKED(1, 5398 triop(Iop_Yl2xp1F64, 5399 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5400 get_ST(1), 5401 get_ST(0))); 5402 fp_pop(); 5403 break; 5404 5405 case 0xFA: /* FSQRT */ 5406 DIP("fsqrt\n"); 5407 put_ST_UNCHECKED(0, 5408 binop(Iop_SqrtF64, 5409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5410 get_ST(0))); 5411 break; 5412 5413 case 0xFB: { /* FSINCOS */ 5414 IRTemp a1 = newTemp(Ity_F64); 5415 assign( a1, get_ST(0) ); 5416 DIP("fsincos\n"); 5417 put_ST_UNCHECKED(0, 5418 binop(Iop_SinF64, 5419 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5420 mkexpr(a1))); 5421 fp_push(); 5422 put_ST(0, 5423 binop(Iop_CosF64, 5424 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5425 mkexpr(a1))); 5426 clear_C2(); /* HACK */ 5427 break; 5428 } 5429 5430 case 0xFC: /* FRNDINT */ 5431 DIP("frndint\n"); 5432 put_ST_UNCHECKED(0, 5433 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5434 break; 5435 5436 case 0xFD: /* FSCALE */ 5437 DIP("fscale\n"); 5438 put_ST_UNCHECKED(0, 5439 triop(Iop_ScaleF64, 5440 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5441 get_ST(0), 5442 get_ST(1))); 5443 break; 5444 5445 case 0xFE: /* FSIN */ 5446 DIP("fsin\n"); 5447 put_ST_UNCHECKED(0, 5448 binop(Iop_SinF64, 5449 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5450 get_ST(0))); 5451 clear_C2(); /* HACK */ 5452 break; 5453 5454 case 0xFF: /* FCOS */ 5455 DIP("fcos\n"); 5456 put_ST_UNCHECKED(0, 5457 binop(Iop_CosF64, 5458 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5459 get_ST(0))); 5460 clear_C2(); /* HACK */ 5461 break; 5462 5463 default: 5464 goto decode_fail; 5465 } 5466 } 5467 } 5468 5469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 5470 else 5471 if (first_opcode == 0xDA) { 5472 5473 if (modrm < 0xC0) { 5474 5475 /* bits 5,4,3 are an opcode extension, and the modRM also 5476 specifies an address. */ 5477 IROp fop; 5478 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5479 delta += len; 5480 switch (gregLO3ofRM(modrm)) { 5481 5482 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 5483 DIP("fiaddl %s\n", dis_buf); 5484 fop = Iop_AddF64; 5485 goto do_fop_m32; 5486 5487 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 5488 DIP("fimull %s\n", dis_buf); 5489 fop = Iop_MulF64; 5490 goto do_fop_m32; 5491 5492 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 5493 DIP("fisubl %s\n", dis_buf); 5494 fop = Iop_SubF64; 5495 goto do_fop_m32; 5496 5497 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 5498 DIP("fisubrl %s\n", dis_buf); 5499 fop = Iop_SubF64; 5500 goto do_foprev_m32; 5501 5502 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 5503 DIP("fisubl %s\n", dis_buf); 5504 fop = Iop_DivF64; 5505 goto do_fop_m32; 5506 5507 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 5508 DIP("fidivrl %s\n", dis_buf); 5509 fop = Iop_DivF64; 5510 goto do_foprev_m32; 5511 5512 do_fop_m32: 5513 put_ST_UNCHECKED(0, 5514 triop(fop, 5515 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5516 get_ST(0), 5517 unop(Iop_I32StoF64, 5518 loadLE(Ity_I32, mkexpr(addr))))); 5519 break; 5520 5521 do_foprev_m32: 5522 put_ST_UNCHECKED(0, 5523 triop(fop, 5524 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5525 unop(Iop_I32StoF64, 5526 loadLE(Ity_I32, mkexpr(addr))), 5527 get_ST(0))); 5528 break; 5529 5530 default: 5531 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5532 vex_printf("first_opcode == 0xDA\n"); 5533 goto decode_fail; 5534 } 5535 5536 } else { 5537 5538 delta++; 5539 switch (modrm) { 5540 5541 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 5542 r_src = (UInt)modrm - 0xC0; 5543 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 5544 put_ST_UNCHECKED(0, 5545 IRExpr_Mux0X( 5546 unop(Iop_1Uto8, 5547 mk_amd64g_calculate_condition(AMD64CondB)), 5548 get_ST(0), get_ST(r_src)) ); 5549 break; 5550 5551 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 5552 r_src = (UInt)modrm - 0xC8; 5553 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 5554 put_ST_UNCHECKED(0, 5555 IRExpr_Mux0X( 5556 unop(Iop_1Uto8, 5557 mk_amd64g_calculate_condition(AMD64CondZ)), 5558 get_ST(0), get_ST(r_src)) ); 5559 break; 5560 5561 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 5562 r_src = (UInt)modrm - 0xD0; 5563 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 5564 put_ST_UNCHECKED(0, 5565 IRExpr_Mux0X( 5566 unop(Iop_1Uto8, 5567 mk_amd64g_calculate_condition(AMD64CondBE)), 5568 get_ST(0), get_ST(r_src)) ); 5569 break; 5570 5571 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 5572 r_src = (UInt)modrm - 0xD8; 5573 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 5574 put_ST_UNCHECKED(0, 5575 IRExpr_Mux0X( 5576 unop(Iop_1Uto8, 5577 mk_amd64g_calculate_condition(AMD64CondP)), 5578 get_ST(0), get_ST(r_src)) ); 5579 break; 5580 5581 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 5582 DIP("fucompp %%st(0),%%st(1)\n"); 5583 /* This forces C1 to zero, which isn't right. */ 5584 put_C3210( 5585 unop(Iop_32Uto64, 5586 binop( Iop_And32, 5587 binop(Iop_Shl32, 5588 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5589 mkU8(8)), 5590 mkU32(0x4500) 5591 ))); 5592 fp_pop(); 5593 fp_pop(); 5594 break; 5595 5596 default: 5597 goto decode_fail; 5598 } 5599 5600 } 5601 } 5602 5603 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 5604 else 5605 if (first_opcode == 0xDB) { 5606 if (modrm < 0xC0) { 5607 5608 /* bits 5,4,3 are an opcode extension, and the modRM also 5609 specifies an address. */ 5610 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5611 delta += len; 5612 5613 switch (gregLO3ofRM(modrm)) { 5614 5615 case 0: /* FILD m32int */ 5616 DIP("fildl %s\n", dis_buf); 5617 fp_push(); 5618 put_ST(0, unop(Iop_I32StoF64, 5619 loadLE(Ity_I32, mkexpr(addr)))); 5620 break; 5621 5622 case 1: /* FISTTPL m32 (SSE3) */ 5623 DIP("fisttpl %s\n", dis_buf); 5624 storeLE( mkexpr(addr), 5625 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 5626 fp_pop(); 5627 break; 5628 5629 case 2: /* FIST m32 */ 5630 DIP("fistl %s\n", dis_buf); 5631 storeLE( mkexpr(addr), 5632 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5633 break; 5634 5635 case 3: /* FISTP m32 */ 5636 DIP("fistpl %s\n", dis_buf); 5637 storeLE( mkexpr(addr), 5638 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5639 fp_pop(); 5640 break; 5641 5642 case 5: { /* FLD extended-real */ 5643 /* Uses dirty helper: 5644 ULong amd64g_loadF80le ( ULong ) 5645 addr holds the address. First, do a dirty call to 5646 get hold of the data. */ 5647 IRTemp val = newTemp(Ity_I64); 5648 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 5649 5650 IRDirty* d = unsafeIRDirty_1_N ( 5651 val, 5652 0/*regparms*/, 5653 "amd64g_dirtyhelper_loadF80le", 5654 &amd64g_dirtyhelper_loadF80le, 5655 args 5656 ); 5657 /* declare that we're reading memory */ 5658 d->mFx = Ifx_Read; 5659 d->mAddr = mkexpr(addr); 5660 d->mSize = 10; 5661 5662 /* execute the dirty call, dumping the result in val. */ 5663 stmt( IRStmt_Dirty(d) ); 5664 fp_push(); 5665 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 5666 5667 DIP("fldt %s\n", dis_buf); 5668 break; 5669 } 5670 5671 case 7: { /* FSTP extended-real */ 5672 /* Uses dirty helper: 5673 void amd64g_storeF80le ( ULong addr, ULong data ) 5674 */ 5675 IRExpr** args 5676 = mkIRExprVec_2( mkexpr(addr), 5677 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 5678 5679 IRDirty* d = unsafeIRDirty_0_N ( 5680 0/*regparms*/, 5681 "amd64g_dirtyhelper_storeF80le", 5682 &amd64g_dirtyhelper_storeF80le, 5683 args 5684 ); 5685 /* declare we're writing memory */ 5686 d->mFx = Ifx_Write; 5687 d->mAddr = mkexpr(addr); 5688 d->mSize = 10; 5689 5690 /* execute the dirty call. */ 5691 stmt( IRStmt_Dirty(d) ); 5692 fp_pop(); 5693 5694 DIP("fstpt\n %s", dis_buf); 5695 break; 5696 } 5697 5698 default: 5699 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5700 vex_printf("first_opcode == 0xDB\n"); 5701 goto decode_fail; 5702 } 5703 5704 } else { 5705 5706 delta++; 5707 switch (modrm) { 5708 5709 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 5710 r_src = (UInt)modrm - 0xC0; 5711 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 5712 put_ST_UNCHECKED(0, 5713 IRExpr_Mux0X( 5714 unop(Iop_1Uto8, 5715 mk_amd64g_calculate_condition(AMD64CondNB)), 5716 get_ST(0), get_ST(r_src)) ); 5717 break; 5718 5719 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 5720 r_src = (UInt)modrm - 0xC8; 5721 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 5722 put_ST_UNCHECKED( 5723 0, 5724 IRExpr_Mux0X( 5725 unop(Iop_1Uto8, 5726 mk_amd64g_calculate_condition(AMD64CondNZ)), 5727 get_ST(0), 5728 get_ST(r_src) 5729 ) 5730 ); 5731 break; 5732 5733 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 5734 r_src = (UInt)modrm - 0xD0; 5735 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 5736 put_ST_UNCHECKED( 5737 0, 5738 IRExpr_Mux0X( 5739 unop(Iop_1Uto8, 5740 mk_amd64g_calculate_condition(AMD64CondNBE)), 5741 get_ST(0), 5742 get_ST(r_src) 5743 ) 5744 ); 5745 break; 5746 5747 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 5748 r_src = (UInt)modrm - 0xD8; 5749 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 5750 put_ST_UNCHECKED( 5751 0, 5752 IRExpr_Mux0X( 5753 unop(Iop_1Uto8, 5754 mk_amd64g_calculate_condition(AMD64CondNP)), 5755 get_ST(0), 5756 get_ST(r_src) 5757 ) 5758 ); 5759 break; 5760 5761 case 0xE2: 5762 DIP("fnclex\n"); 5763 break; 5764 5765 case 0xE3: { 5766 /* Uses dirty helper: 5767 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5768 IRDirty* d = unsafeIRDirty_0_N ( 5769 0/*regparms*/, 5770 "amd64g_dirtyhelper_FINIT", 5771 &amd64g_dirtyhelper_FINIT, 5772 mkIRExprVec_0() 5773 ); 5774 d->needsBBP = True; 5775 5776 /* declare we're writing guest state */ 5777 d->nFxState = 5; 5778 5779 d->fxState[0].fx = Ifx_Write; 5780 d->fxState[0].offset = OFFB_FTOP; 5781 d->fxState[0].size = sizeof(UInt); 5782 5783 d->fxState[1].fx = Ifx_Write; 5784 d->fxState[1].offset = OFFB_FPREGS; 5785 d->fxState[1].size = 8 * sizeof(ULong); 5786 5787 d->fxState[2].fx = Ifx_Write; 5788 d->fxState[2].offset = OFFB_FPTAGS; 5789 d->fxState[2].size = 8 * sizeof(UChar); 5790 5791 d->fxState[3].fx = Ifx_Write; 5792 d->fxState[3].offset = OFFB_FPROUND; 5793 d->fxState[3].size = sizeof(ULong); 5794 5795 d->fxState[4].fx = Ifx_Write; 5796 d->fxState[4].offset = OFFB_FC3210; 5797 d->fxState[4].size = sizeof(ULong); 5798 5799 stmt( IRStmt_Dirty(d) ); 5800 5801 DIP("fninit\n"); 5802 break; 5803 } 5804 5805 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 5806 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 5807 break; 5808 5809 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 5810 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 5811 break; 5812 5813 default: 5814 goto decode_fail; 5815 } 5816 } 5817 } 5818 5819 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 5820 else 5821 if (first_opcode == 0xDC) { 5822 if (modrm < 0xC0) { 5823 5824 /* bits 5,4,3 are an opcode extension, and the modRM also 5825 specifies an address. */ 5826 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5827 delta += len; 5828 5829 switch (gregLO3ofRM(modrm)) { 5830 5831 case 0: /* FADD double-real */ 5832 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 5833 break; 5834 5835 case 1: /* FMUL double-real */ 5836 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 5837 break; 5838 5839 //.. case 2: /* FCOM double-real */ 5840 //.. DIP("fcoml %s\n", dis_buf); 5841 //.. /* This forces C1 to zero, which isn't right. */ 5842 //.. put_C3210( 5843 //.. binop( Iop_And32, 5844 //.. binop(Iop_Shl32, 5845 //.. binop(Iop_CmpF64, 5846 //.. get_ST(0), 5847 //.. loadLE(Ity_F64,mkexpr(addr))), 5848 //.. mkU8(8)), 5849 //.. mkU32(0x4500) 5850 //.. )); 5851 //.. break; 5852 5853 case 3: /* FCOMP double-real */ 5854 DIP("fcompl %s\n", dis_buf); 5855 /* This forces C1 to zero, which isn't right. */ 5856 put_C3210( 5857 unop(Iop_32Uto64, 5858 binop( Iop_And32, 5859 binop(Iop_Shl32, 5860 binop(Iop_CmpF64, 5861 get_ST(0), 5862 loadLE(Ity_F64,mkexpr(addr))), 5863 mkU8(8)), 5864 mkU32(0x4500) 5865 ))); 5866 fp_pop(); 5867 break; 5868 5869 case 4: /* FSUB double-real */ 5870 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 5871 break; 5872 5873 case 5: /* FSUBR double-real */ 5874 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 5875 break; 5876 5877 case 6: /* FDIV double-real */ 5878 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 5879 break; 5880 5881 case 7: /* FDIVR double-real */ 5882 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 5883 break; 5884 5885 default: 5886 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5887 vex_printf("first_opcode == 0xDC\n"); 5888 goto decode_fail; 5889 } 5890 5891 } else { 5892 5893 delta++; 5894 switch (modrm) { 5895 5896 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 5897 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 5898 break; 5899 5900 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 5901 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 5902 break; 5903 5904 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 5905 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 5906 break; 5907 5908 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 5909 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 5910 break; 5911 5912 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 5913 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 5914 break; 5915 5916 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 5917 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 5918 break; 5919 5920 default: 5921 goto decode_fail; 5922 } 5923 5924 } 5925 } 5926 5927 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 5928 else 5929 if (first_opcode == 0xDD) { 5930 5931 if (modrm < 0xC0) { 5932 5933 /* bits 5,4,3 are an opcode extension, and the modRM also 5934 specifies an address. */ 5935 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5936 delta += len; 5937 5938 switch (gregLO3ofRM(modrm)) { 5939 5940 case 0: /* FLD double-real */ 5941 DIP("fldl %s\n", dis_buf); 5942 fp_push(); 5943 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 5944 break; 5945 5946 case 1: /* FISTTPQ m64 (SSE3) */ 5947 DIP("fistppll %s\n", dis_buf); 5948 storeLE( mkexpr(addr), 5949 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 5950 fp_pop(); 5951 break; 5952 5953 case 2: /* FST double-real */ 5954 DIP("fstl %s\n", dis_buf); 5955 storeLE(mkexpr(addr), get_ST(0)); 5956 break; 5957 5958 case 3: /* FSTP double-real */ 5959 DIP("fstpl %s\n", dis_buf); 5960 storeLE(mkexpr(addr), get_ST(0)); 5961 fp_pop(); 5962 break; 5963 5964 //.. case 4: { /* FRSTOR m108 */ 5965 //.. /* Uses dirty helper: 5966 //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 5967 //.. IRTemp ew = newTemp(Ity_I32); 5968 //.. IRDirty* d = unsafeIRDirty_0_N ( 5969 //.. 0/*regparms*/, 5970 //.. "x86g_dirtyhelper_FRSTOR", 5971 //.. &x86g_dirtyhelper_FRSTOR, 5972 //.. mkIRExprVec_1( mkexpr(addr) ) 5973 //.. ); 5974 //.. d->needsBBP = True; 5975 //.. d->tmp = ew; 5976 //.. /* declare we're reading memory */ 5977 //.. d->mFx = Ifx_Read; 5978 //.. d->mAddr = mkexpr(addr); 5979 //.. d->mSize = 108; 5980 //.. 5981 //.. /* declare we're writing guest state */ 5982 //.. d->nFxState = 5; 5983 //.. 5984 //.. d->fxState[0].fx = Ifx_Write; 5985 //.. d->fxState[0].offset = OFFB_FTOP; 5986 //.. d->fxState[0].size = sizeof(UInt); 5987 //.. 5988 //.. d->fxState[1].fx = Ifx_Write; 5989 //.. d->fxState[1].offset = OFFB_FPREGS; 5990 //.. d->fxState[1].size = 8 * sizeof(ULong); 5991 //.. 5992 //.. d->fxState[2].fx = Ifx_Write; 5993 //.. d->fxState[2].offset = OFFB_FPTAGS; 5994 //.. d->fxState[2].size = 8 * sizeof(UChar); 5995 //.. 5996 //.. d->fxState[3].fx = Ifx_Write; 5997 //.. d->fxState[3].offset = OFFB_FPROUND; 5998 //.. d->fxState[3].size = sizeof(UInt); 5999 //.. 6000 //.. d->fxState[4].fx = Ifx_Write; 6001 //.. d->fxState[4].offset = OFFB_FC3210; 6002 //.. d->fxState[4].size = sizeof(UInt); 6003 //.. 6004 //.. stmt( IRStmt_Dirty(d) ); 6005 //.. 6006 //.. /* ew contains any emulation warning we may need to 6007 //.. issue. If needed, side-exit to the next insn, 6008 //.. reporting the warning, so that Valgrind's dispatcher 6009 //.. sees the warning. */ 6010 //.. put_emwarn( mkexpr(ew) ); 6011 //.. stmt( 6012 //.. IRStmt_Exit( 6013 //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6014 //.. Ijk_EmWarn, 6015 //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta) 6016 //.. ) 6017 //.. ); 6018 //.. 6019 //.. DIP("frstor %s\n", dis_buf); 6020 //.. break; 6021 //.. } 6022 //.. 6023 //.. case 6: { /* FNSAVE m108 */ 6024 //.. /* Uses dirty helper: 6025 //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 6026 //.. IRDirty* d = unsafeIRDirty_0_N ( 6027 //.. 0/*regparms*/, 6028 //.. "x86g_dirtyhelper_FSAVE", 6029 //.. &x86g_dirtyhelper_FSAVE, 6030 //.. mkIRExprVec_1( mkexpr(addr) ) 6031 //.. ); 6032 //.. d->needsBBP = True; 6033 //.. /* declare we're writing memory */ 6034 //.. d->mFx = Ifx_Write; 6035 //.. d->mAddr = mkexpr(addr); 6036 //.. d->mSize = 108; 6037 //.. 6038 //.. /* declare we're reading guest state */ 6039 //.. d->nFxState = 5; 6040 //.. 6041 //.. d->fxState[0].fx = Ifx_Read; 6042 //.. d->fxState[0].offset = OFFB_FTOP; 6043 //.. d->fxState[0].size = sizeof(UInt); 6044 //.. 6045 //.. d->fxState[1].fx = Ifx_Read; 6046 //.. d->fxState[1].offset = OFFB_FPREGS; 6047 //.. d->fxState[1].size = 8 * sizeof(ULong); 6048 //.. 6049 //.. d->fxState[2].fx = Ifx_Read; 6050 //.. d->fxState[2].offset = OFFB_FPTAGS; 6051 //.. d->fxState[2].size = 8 * sizeof(UChar); 6052 //.. 6053 //.. d->fxState[3].fx = Ifx_Read; 6054 //.. d->fxState[3].offset = OFFB_FPROUND; 6055 //.. d->fxState[3].size = sizeof(UInt); 6056 //.. 6057 //.. d->fxState[4].fx = Ifx_Read; 6058 //.. d->fxState[4].offset = OFFB_FC3210; 6059 //.. d->fxState[4].size = sizeof(UInt); 6060 //.. 6061 //.. stmt( IRStmt_Dirty(d) ); 6062 //.. 6063 //.. DIP("fnsave %s\n", dis_buf); 6064 //.. break; 6065 //.. } 6066 6067 case 7: { /* FNSTSW m16 */ 6068 IRExpr* sw = get_FPU_sw(); 6069 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6070 storeLE( mkexpr(addr), sw ); 6071 DIP("fnstsw %s\n", dis_buf); 6072 break; 6073 } 6074 6075 default: 6076 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6077 vex_printf("first_opcode == 0xDD\n"); 6078 goto decode_fail; 6079 } 6080 } else { 6081 delta++; 6082 switch (modrm) { 6083 6084 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6085 r_dst = (UInt)modrm - 0xC0; 6086 DIP("ffree %%st(%u)\n", r_dst); 6087 put_ST_TAG ( r_dst, mkU8(0) ); 6088 break; 6089 6090 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6091 r_dst = (UInt)modrm - 0xD0; 6092 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6093 /* P4 manual says: "If the destination operand is a 6094 non-empty register, the invalid-operation exception 6095 is not generated. Hence put_ST_UNCHECKED. */ 6096 put_ST_UNCHECKED(r_dst, get_ST(0)); 6097 break; 6098 6099 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6100 r_dst = (UInt)modrm - 0xD8; 6101 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6102 /* P4 manual says: "If the destination operand is a 6103 non-empty register, the invalid-operation exception 6104 is not generated. Hence put_ST_UNCHECKED. */ 6105 put_ST_UNCHECKED(r_dst, get_ST(0)); 6106 fp_pop(); 6107 break; 6108 6109 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6110 r_dst = (UInt)modrm - 0xE0; 6111 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6112 /* This forces C1 to zero, which isn't right. */ 6113 put_C3210( 6114 unop(Iop_32Uto64, 6115 binop( Iop_And32, 6116 binop(Iop_Shl32, 6117 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6118 mkU8(8)), 6119 mkU32(0x4500) 6120 ))); 6121 break; 6122 6123 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6124 r_dst = (UInt)modrm - 0xE8; 6125 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6126 /* This forces C1 to zero, which isn't right. */ 6127 put_C3210( 6128 unop(Iop_32Uto64, 6129 binop( Iop_And32, 6130 binop(Iop_Shl32, 6131 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6132 mkU8(8)), 6133 mkU32(0x4500) 6134 ))); 6135 fp_pop(); 6136 break; 6137 6138 default: 6139 goto decode_fail; 6140 } 6141 } 6142 } 6143 6144 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6145 else 6146 if (first_opcode == 0xDE) { 6147 6148 if (modrm < 0xC0) { 6149 6150 /* bits 5,4,3 are an opcode extension, and the modRM also 6151 specifies an address. */ 6152 IROp fop; 6153 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6154 delta += len; 6155 6156 switch (gregLO3ofRM(modrm)) { 6157 6158 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6159 DIP("fiaddw %s\n", dis_buf); 6160 fop = Iop_AddF64; 6161 goto do_fop_m16; 6162 6163 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6164 DIP("fimulw %s\n", dis_buf); 6165 fop = Iop_MulF64; 6166 goto do_fop_m16; 6167 6168 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6169 DIP("fisubw %s\n", dis_buf); 6170 fop = Iop_SubF64; 6171 goto do_fop_m16; 6172 6173 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6174 DIP("fisubrw %s\n", dis_buf); 6175 fop = Iop_SubF64; 6176 goto do_foprev_m16; 6177 6178 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6179 DIP("fisubw %s\n", dis_buf); 6180 fop = Iop_DivF64; 6181 goto do_fop_m16; 6182 6183 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6184 DIP("fidivrw %s\n", dis_buf); 6185 fop = Iop_DivF64; 6186 goto do_foprev_m16; 6187 6188 do_fop_m16: 6189 put_ST_UNCHECKED(0, 6190 triop(fop, 6191 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6192 get_ST(0), 6193 unop(Iop_I32StoF64, 6194 unop(Iop_16Sto32, 6195 loadLE(Ity_I16, mkexpr(addr)))))); 6196 break; 6197 6198 do_foprev_m16: 6199 put_ST_UNCHECKED(0, 6200 triop(fop, 6201 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6202 unop(Iop_I32StoF64, 6203 unop(Iop_16Sto32, 6204 loadLE(Ity_I16, mkexpr(addr)))), 6205 get_ST(0))); 6206 break; 6207 6208 default: 6209 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6210 vex_printf("first_opcode == 0xDE\n"); 6211 goto decode_fail; 6212 } 6213 6214 } else { 6215 6216 delta++; 6217 switch (modrm) { 6218 6219 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6220 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6221 break; 6222 6223 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6224 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6225 break; 6226 6227 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6228 DIP("fcompp %%st(0),%%st(1)\n"); 6229 /* This forces C1 to zero, which isn't right. */ 6230 put_C3210( 6231 unop(Iop_32Uto64, 6232 binop( Iop_And32, 6233 binop(Iop_Shl32, 6234 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6235 mkU8(8)), 6236 mkU32(0x4500) 6237 ))); 6238 fp_pop(); 6239 fp_pop(); 6240 break; 6241 6242 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6243 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6244 break; 6245 6246 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6247 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6248 break; 6249 6250 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6251 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6252 break; 6253 6254 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6255 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6256 break; 6257 6258 default: 6259 goto decode_fail; 6260 } 6261 6262 } 6263 } 6264 6265 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6266 else 6267 if (first_opcode == 0xDF) { 6268 6269 if (modrm < 0xC0) { 6270 6271 /* bits 5,4,3 are an opcode extension, and the modRM also 6272 specifies an address. */ 6273 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6274 delta += len; 6275 6276 switch (gregLO3ofRM(modrm)) { 6277 6278 case 0: /* FILD m16int */ 6279 DIP("fildw %s\n", dis_buf); 6280 fp_push(); 6281 put_ST(0, unop(Iop_I32StoF64, 6282 unop(Iop_16Sto32, 6283 loadLE(Ity_I16, mkexpr(addr))))); 6284 break; 6285 6286 case 1: /* FISTTPS m16 (SSE3) */ 6287 DIP("fisttps %s\n", dis_buf); 6288 storeLE( mkexpr(addr), 6289 x87ishly_qnarrow_32_to_16( 6290 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6291 fp_pop(); 6292 break; 6293 6294 case 2: /* FIST m16 */ 6295 DIP("fists %s\n", dis_buf); 6296 storeLE( mkexpr(addr), 6297 x87ishly_qnarrow_32_to_16( 6298 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6299 break; 6300 6301 case 3: /* FISTP m16 */ 6302 DIP("fistps %s\n", dis_buf); 6303 storeLE( mkexpr(addr), 6304 x87ishly_qnarrow_32_to_16( 6305 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6306 fp_pop(); 6307 break; 6308 6309 case 5: /* FILD m64 */ 6310 DIP("fildll %s\n", dis_buf); 6311 fp_push(); 6312 put_ST(0, binop(Iop_I64StoF64, 6313 get_roundingmode(), 6314 loadLE(Ity_I64, mkexpr(addr)))); 6315 break; 6316 6317 case 7: /* FISTP m64 */ 6318 DIP("fistpll %s\n", dis_buf); 6319 storeLE( mkexpr(addr), 6320 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6321 fp_pop(); 6322 break; 6323 6324 default: 6325 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6326 vex_printf("first_opcode == 0xDF\n"); 6327 goto decode_fail; 6328 } 6329 6330 } else { 6331 6332 delta++; 6333 switch (modrm) { 6334 6335 case 0xC0: /* FFREEP %st(0) */ 6336 DIP("ffreep %%st(%d)\n", 0); 6337 put_ST_TAG ( 0, mkU8(0) ); 6338 fp_pop(); 6339 break; 6340 6341 case 0xE0: /* FNSTSW %ax */ 6342 DIP("fnstsw %%ax\n"); 6343 /* Invent a plausible-looking FPU status word value and 6344 dump it in %AX: 6345 ((ftop & 7) << 11) | (c3210 & 0x4700) 6346 */ 6347 putIRegRAX( 6348 2, 6349 unop(Iop_32to16, 6350 binop(Iop_Or32, 6351 binop(Iop_Shl32, 6352 binop(Iop_And32, get_ftop(), mkU32(7)), 6353 mkU8(11)), 6354 binop(Iop_And32, 6355 unop(Iop_64to32, get_C3210()), 6356 mkU32(0x4700)) 6357 ))); 6358 break; 6359 6360 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6361 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6362 break; 6363 6364 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6365 /* not really right since COMIP != UCOMIP */ 6366 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6367 break; 6368 6369 default: 6370 goto decode_fail; 6371 } 6372 } 6373 6374 } 6375 6376 else 6377 goto decode_fail; 6378 6379 *decode_ok = True; 6380 return delta; 6381 6382 decode_fail: 6383 *decode_ok = False; 6384 return delta; 6385 } 6386 6387 6388 /*------------------------------------------------------------*/ 6389 /*--- ---*/ 6390 /*--- MMX INSTRUCTIONS ---*/ 6391 /*--- ---*/ 6392 /*------------------------------------------------------------*/ 6393 6394 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6395 IA32 arch manual, volume 3): 6396 6397 Read from, or write to MMX register (viz, any insn except EMMS): 6398 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6399 * FP stack pointer set to zero 6400 6401 EMMS: 6402 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6403 * FP stack pointer set to zero 6404 */ 6405 6406 static void do_MMX_preamble ( void ) 6407 { 6408 Int i; 6409 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6410 IRExpr* zero = mkU32(0); 6411 IRExpr* tag1 = mkU8(1); 6412 put_ftop(zero); 6413 for (i = 0; i < 8; i++) 6414 stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); 6415 } 6416 6417 static void do_EMMS_preamble ( void ) 6418 { 6419 Int i; 6420 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6421 IRExpr* zero = mkU32(0); 6422 IRExpr* tag0 = mkU8(0); 6423 put_ftop(zero); 6424 for (i = 0; i < 8; i++) 6425 stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); 6426 } 6427 6428 6429 static IRExpr* getMMXReg ( UInt archreg ) 6430 { 6431 vassert(archreg < 8); 6432 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 6433 } 6434 6435 6436 static void putMMXReg ( UInt archreg, IRExpr* e ) 6437 { 6438 vassert(archreg < 8); 6439 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 6440 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 6441 } 6442 6443 6444 /* Helper for non-shift MMX insns. Note this is incomplete in the 6445 sense that it does not first call do_MMX_preamble() -- that is the 6446 responsibility of its caller. */ 6447 6448 static 6449 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, 6450 Prefix pfx, 6451 Long delta, 6452 UChar opc, 6453 HChar* name, 6454 Bool show_granularity ) 6455 { 6456 HChar dis_buf[50]; 6457 UChar modrm = getUChar(delta); 6458 Bool isReg = epartIsReg(modrm); 6459 IRExpr* argL = NULL; 6460 IRExpr* argR = NULL; 6461 IRExpr* argG = NULL; 6462 IRExpr* argE = NULL; 6463 IRTemp res = newTemp(Ity_I64); 6464 6465 Bool invG = False; 6466 IROp op = Iop_INVALID; 6467 void* hAddr = NULL; 6468 HChar* hName = NULL; 6469 Bool eLeft = False; 6470 6471 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 6472 6473 switch (opc) { 6474 /* Original MMX ones */ 6475 case 0xFC: op = Iop_Add8x8; break; 6476 case 0xFD: op = Iop_Add16x4; break; 6477 case 0xFE: op = Iop_Add32x2; break; 6478 6479 case 0xEC: op = Iop_QAdd8Sx8; break; 6480 case 0xED: op = Iop_QAdd16Sx4; break; 6481 6482 case 0xDC: op = Iop_QAdd8Ux8; break; 6483 case 0xDD: op = Iop_QAdd16Ux4; break; 6484 6485 case 0xF8: op = Iop_Sub8x8; break; 6486 case 0xF9: op = Iop_Sub16x4; break; 6487 case 0xFA: op = Iop_Sub32x2; break; 6488 6489 case 0xE8: op = Iop_QSub8Sx8; break; 6490 case 0xE9: op = Iop_QSub16Sx4; break; 6491 6492 case 0xD8: op = Iop_QSub8Ux8; break; 6493 case 0xD9: op = Iop_QSub16Ux4; break; 6494 6495 case 0xE5: op = Iop_MulHi16Sx4; break; 6496 case 0xD5: op = Iop_Mul16x4; break; 6497 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 6498 6499 case 0x74: op = Iop_CmpEQ8x8; break; 6500 case 0x75: op = Iop_CmpEQ16x4; break; 6501 case 0x76: op = Iop_CmpEQ32x2; break; 6502 6503 case 0x64: op = Iop_CmpGT8Sx8; break; 6504 case 0x65: op = Iop_CmpGT16Sx4; break; 6505 case 0x66: op = Iop_CmpGT32Sx2; break; 6506 6507 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; 6508 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; 6509 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; 6510 6511 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 6512 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 6513 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 6514 6515 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 6516 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 6517 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 6518 6519 case 0xDB: op = Iop_And64; break; 6520 case 0xDF: op = Iop_And64; invG = True; break; 6521 case 0xEB: op = Iop_Or64; break; 6522 case 0xEF: /* Possibly do better here if argL and argR are the 6523 same reg */ 6524 op = Iop_Xor64; break; 6525 6526 /* Introduced in SSE1 */ 6527 case 0xE0: op = Iop_Avg8Ux8; break; 6528 case 0xE3: op = Iop_Avg16Ux4; break; 6529 case 0xEE: op = Iop_Max16Sx4; break; 6530 case 0xDE: op = Iop_Max8Ux8; break; 6531 case 0xEA: op = Iop_Min16Sx4; break; 6532 case 0xDA: op = Iop_Min8Ux8; break; 6533 case 0xE4: op = Iop_MulHi16Ux4; break; 6534 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 6535 6536 /* Introduced in SSE2 */ 6537 case 0xD4: op = Iop_Add64; break; 6538 case 0xFB: op = Iop_Sub64; break; 6539 6540 default: 6541 vex_printf("\n0x%x\n", (Int)opc); 6542 vpanic("dis_MMXop_regmem_to_reg"); 6543 } 6544 6545 # undef XXX 6546 6547 argG = getMMXReg(gregLO3ofRM(modrm)); 6548 if (invG) 6549 argG = unop(Iop_Not64, argG); 6550 6551 if (isReg) { 6552 delta++; 6553 argE = getMMXReg(eregLO3ofRM(modrm)); 6554 } else { 6555 Int len; 6556 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6557 delta += len; 6558 argE = loadLE(Ity_I64, mkexpr(addr)); 6559 } 6560 6561 if (eLeft) { 6562 argL = argE; 6563 argR = argG; 6564 } else { 6565 argL = argG; 6566 argR = argE; 6567 } 6568 6569 if (op != Iop_INVALID) { 6570 vassert(hName == NULL); 6571 vassert(hAddr == NULL); 6572 assign(res, binop(op, argL, argR)); 6573 } else { 6574 vassert(hName != NULL); 6575 vassert(hAddr != NULL); 6576 assign( res, 6577 mkIRExprCCall( 6578 Ity_I64, 6579 0/*regparms*/, hName, hAddr, 6580 mkIRExprVec_2( argL, argR ) 6581 ) 6582 ); 6583 } 6584 6585 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 6586 6587 DIP("%s%s %s, %s\n", 6588 name, show_granularity ? nameMMXGran(opc & 3) : "", 6589 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 6590 nameMMXReg(gregLO3ofRM(modrm)) ); 6591 6592 return delta; 6593 } 6594 6595 6596 /* Vector by scalar shift of G by the amount specified at the bottom 6597 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 6598 6599 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, 6600 Prefix pfx, Long delta, 6601 HChar* opname, IROp op ) 6602 { 6603 HChar dis_buf[50]; 6604 Int alen, size; 6605 IRTemp addr; 6606 Bool shl, shr, sar; 6607 UChar rm = getUChar(delta); 6608 IRTemp g0 = newTemp(Ity_I64); 6609 IRTemp g1 = newTemp(Ity_I64); 6610 IRTemp amt = newTemp(Ity_I64); 6611 IRTemp amt8 = newTemp(Ity_I8); 6612 6613 if (epartIsReg(rm)) { 6614 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 6615 DIP("%s %s,%s\n", opname, 6616 nameMMXReg(eregLO3ofRM(rm)), 6617 nameMMXReg(gregLO3ofRM(rm)) ); 6618 delta++; 6619 } else { 6620 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 6621 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 6622 DIP("%s %s,%s\n", opname, 6623 dis_buf, 6624 nameMMXReg(gregLO3ofRM(rm)) ); 6625 delta += alen; 6626 } 6627 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 6628 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 6629 6630 shl = shr = sar = False; 6631 size = 0; 6632 switch (op) { 6633 case Iop_ShlN16x4: shl = True; size = 32; break; 6634 case Iop_ShlN32x2: shl = True; size = 32; break; 6635 case Iop_Shl64: shl = True; size = 64; break; 6636 case Iop_ShrN16x4: shr = True; size = 16; break; 6637 case Iop_ShrN32x2: shr = True; size = 32; break; 6638 case Iop_Shr64: shr = True; size = 64; break; 6639 case Iop_SarN16x4: sar = True; size = 16; break; 6640 case Iop_SarN32x2: sar = True; size = 32; break; 6641 default: vassert(0); 6642 } 6643 6644 if (shl || shr) { 6645 assign( 6646 g1, 6647 IRExpr_Mux0X( 6648 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6649 mkU64(0), 6650 binop(op, mkexpr(g0), mkexpr(amt8)) 6651 ) 6652 ); 6653 } else 6654 if (sar) { 6655 assign( 6656 g1, 6657 IRExpr_Mux0X( 6658 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6659 binop(op, mkexpr(g0), mkU8(size-1)), 6660 binop(op, mkexpr(g0), mkexpr(amt8)) 6661 ) 6662 ); 6663 } else { 6664 vassert(0); 6665 } 6666 6667 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 6668 return delta; 6669 } 6670 6671 6672 /* Vector by scalar shift of E by an immediate byte. This is a 6673 straight copy of dis_SSE_shiftE_imm. */ 6674 6675 static 6676 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op ) 6677 { 6678 Bool shl, shr, sar; 6679 UChar rm = getUChar(delta); 6680 IRTemp e0 = newTemp(Ity_I64); 6681 IRTemp e1 = newTemp(Ity_I64); 6682 UChar amt, size; 6683 vassert(epartIsReg(rm)); 6684 vassert(gregLO3ofRM(rm) == 2 6685 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 6686 amt = getUChar(delta+1); 6687 delta += 2; 6688 DIP("%s $%d,%s\n", opname, 6689 (Int)amt, 6690 nameMMXReg(eregLO3ofRM(rm)) ); 6691 6692 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 6693 6694 shl = shr = sar = False; 6695 size = 0; 6696 switch (op) { 6697 case Iop_ShlN16x4: shl = True; size = 16; break; 6698 case Iop_ShlN32x2: shl = True; size = 32; break; 6699 case Iop_Shl64: shl = True; size = 64; break; 6700 case Iop_SarN16x4: sar = True; size = 16; break; 6701 case Iop_SarN32x2: sar = True; size = 32; break; 6702 case Iop_ShrN16x4: shr = True; size = 16; break; 6703 case Iop_ShrN32x2: shr = True; size = 32; break; 6704 case Iop_Shr64: shr = True; size = 64; break; 6705 default: vassert(0); 6706 } 6707 6708 if (shl || shr) { 6709 assign( e1, amt >= size 6710 ? mkU64(0) 6711 : binop(op, mkexpr(e0), mkU8(amt)) 6712 ); 6713 } else 6714 if (sar) { 6715 assign( e1, amt >= size 6716 ? binop(op, mkexpr(e0), mkU8(size-1)) 6717 : binop(op, mkexpr(e0), mkU8(amt)) 6718 ); 6719 } else { 6720 vassert(0); 6721 } 6722 6723 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 6724 return delta; 6725 } 6726 6727 6728 /* Completely handle all MMX instructions except emms. */ 6729 6730 static 6731 ULong dis_MMX ( Bool* decode_ok, 6732 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 6733 { 6734 Int len; 6735 UChar modrm; 6736 HChar dis_buf[50]; 6737 UChar opc = getUChar(delta); 6738 delta++; 6739 6740 /* dis_MMX handles all insns except emms. */ 6741 do_MMX_preamble(); 6742 6743 switch (opc) { 6744 6745 case 0x6E: 6746 if (sz == 4) { 6747 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 6748 modrm = getUChar(delta); 6749 if (epartIsReg(modrm)) { 6750 delta++; 6751 putMMXReg( 6752 gregLO3ofRM(modrm), 6753 binop( Iop_32HLto64, 6754 mkU32(0), 6755 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 6756 DIP("movd %s, %s\n", 6757 nameIReg32(eregOfRexRM(pfx,modrm)), 6758 nameMMXReg(gregLO3ofRM(modrm))); 6759 } else { 6760 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6761 delta += len; 6762 putMMXReg( 6763 gregLO3ofRM(modrm), 6764 binop( Iop_32HLto64, 6765 mkU32(0), 6766 loadLE(Ity_I32, mkexpr(addr)) ) ); 6767 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6768 } 6769 } 6770 else 6771 if (sz == 8) { 6772 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 6773 modrm = getUChar(delta); 6774 if (epartIsReg(modrm)) { 6775 delta++; 6776 putMMXReg( gregLO3ofRM(modrm), 6777 getIReg64(eregOfRexRM(pfx,modrm)) ); 6778 DIP("movd %s, %s\n", 6779 nameIReg64(eregOfRexRM(pfx,modrm)), 6780 nameMMXReg(gregLO3ofRM(modrm))); 6781 } else { 6782 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6783 delta += len; 6784 putMMXReg( gregLO3ofRM(modrm), 6785 loadLE(Ity_I64, mkexpr(addr)) ); 6786 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6787 } 6788 } 6789 else { 6790 goto mmx_decode_failure; 6791 } 6792 break; 6793 6794 case 0x7E: 6795 if (sz == 4) { 6796 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 6797 modrm = getUChar(delta); 6798 if (epartIsReg(modrm)) { 6799 delta++; 6800 putIReg32( eregOfRexRM(pfx,modrm), 6801 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 6802 DIP("movd %s, %s\n", 6803 nameMMXReg(gregLO3ofRM(modrm)), 6804 nameIReg32(eregOfRexRM(pfx,modrm))); 6805 } else { 6806 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6807 delta += len; 6808 storeLE( mkexpr(addr), 6809 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 6810 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6811 } 6812 } 6813 else 6814 if (sz == 8) { 6815 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 6816 modrm = getUChar(delta); 6817 if (epartIsReg(modrm)) { 6818 delta++; 6819 putIReg64( eregOfRexRM(pfx,modrm), 6820 getMMXReg(gregLO3ofRM(modrm)) ); 6821 DIP("movd %s, %s\n", 6822 nameMMXReg(gregLO3ofRM(modrm)), 6823 nameIReg64(eregOfRexRM(pfx,modrm))); 6824 } else { 6825 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6826 delta += len; 6827 storeLE( mkexpr(addr), 6828 getMMXReg(gregLO3ofRM(modrm)) ); 6829 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6830 } 6831 } else { 6832 goto mmx_decode_failure; 6833 } 6834 break; 6835 6836 case 0x6F: 6837 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 6838 if (sz != 4 6839 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6840 goto mmx_decode_failure; 6841 modrm = getUChar(delta); 6842 if (epartIsReg(modrm)) { 6843 delta++; 6844 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 6845 DIP("movq %s, %s\n", 6846 nameMMXReg(eregLO3ofRM(modrm)), 6847 nameMMXReg(gregLO3ofRM(modrm))); 6848 } else { 6849 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6850 delta += len; 6851 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 6852 DIP("movq %s, %s\n", 6853 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 6854 } 6855 break; 6856 6857 case 0x7F: 6858 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 6859 if (sz != 4 6860 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6861 goto mmx_decode_failure; 6862 modrm = getUChar(delta); 6863 if (epartIsReg(modrm)) { 6864 /* Fall through. The assembler doesn't appear to generate 6865 these. */ 6866 goto mmx_decode_failure; 6867 } else { 6868 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6869 delta += len; 6870 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 6871 DIP("mov(nt)q %s, %s\n", 6872 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 6873 } 6874 break; 6875 6876 case 0xFC: 6877 case 0xFD: 6878 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 6879 if (sz != 4) 6880 goto mmx_decode_failure; 6881 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 6882 break; 6883 6884 case 0xEC: 6885 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6886 if (sz != 4 6887 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6888 goto mmx_decode_failure; 6889 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 6890 break; 6891 6892 case 0xDC: 6893 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6894 if (sz != 4) 6895 goto mmx_decode_failure; 6896 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 6897 break; 6898 6899 case 0xF8: 6900 case 0xF9: 6901 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 6902 if (sz != 4) 6903 goto mmx_decode_failure; 6904 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 6905 break; 6906 6907 case 0xE8: 6908 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6909 if (sz != 4) 6910 goto mmx_decode_failure; 6911 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 6912 break; 6913 6914 case 0xD8: 6915 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 6916 if (sz != 4) 6917 goto mmx_decode_failure; 6918 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 6919 break; 6920 6921 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 6922 if (sz != 4) 6923 goto mmx_decode_failure; 6924 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 6925 break; 6926 6927 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 6928 if (sz != 4) 6929 goto mmx_decode_failure; 6930 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 6931 break; 6932 6933 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 6934 vassert(sz == 4); 6935 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 6936 break; 6937 6938 case 0x74: 6939 case 0x75: 6940 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 6941 if (sz != 4) 6942 goto mmx_decode_failure; 6943 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 6944 break; 6945 6946 case 0x64: 6947 case 0x65: 6948 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 6949 if (sz != 4) 6950 goto mmx_decode_failure; 6951 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 6952 break; 6953 6954 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 6955 if (sz != 4) 6956 goto mmx_decode_failure; 6957 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 6958 break; 6959 6960 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 6961 if (sz != 4) 6962 goto mmx_decode_failure; 6963 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 6964 break; 6965 6966 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 6967 if (sz != 4) 6968 goto mmx_decode_failure; 6969 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 6970 break; 6971 6972 case 0x68: 6973 case 0x69: 6974 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 6975 if (sz != 4 6976 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6977 goto mmx_decode_failure; 6978 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 6979 break; 6980 6981 case 0x60: 6982 case 0x61: 6983 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 6984 if (sz != 4 6985 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 6986 goto mmx_decode_failure; 6987 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 6988 break; 6989 6990 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 6991 if (sz != 4) 6992 goto mmx_decode_failure; 6993 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 6994 break; 6995 6996 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 6997 if (sz != 4) 6998 goto mmx_decode_failure; 6999 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7000 break; 7001 7002 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7003 if (sz != 4) 7004 goto mmx_decode_failure; 7005 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7006 break; 7007 7008 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7009 if (sz != 4) 7010 goto mmx_decode_failure; 7011 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7012 break; 7013 7014 # define SHIFT_BY_REG(_name,_op) \ 7015 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7016 break; 7017 7018 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7019 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7020 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7021 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7022 7023 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7024 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7025 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7026 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7027 7028 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7029 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7030 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7031 7032 # undef SHIFT_BY_REG 7033 7034 case 0x71: 7035 case 0x72: 7036 case 0x73: { 7037 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7038 UChar byte2, subopc; 7039 if (sz != 4) 7040 goto mmx_decode_failure; 7041 byte2 = getUChar(delta); /* amode / sub-opcode */ 7042 subopc = toUChar( (byte2 >> 3) & 7 ); 7043 7044 # define SHIFT_BY_IMM(_name,_op) \ 7045 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7046 } while (0) 7047 7048 if (subopc == 2 /*SRL*/ && opc == 0x71) 7049 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7050 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7051 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7052 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7053 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7054 7055 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7056 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7057 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7058 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7059 7060 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7061 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7062 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7063 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7064 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7065 SHIFT_BY_IMM("psllq", Iop_Shl64); 7066 7067 else goto mmx_decode_failure; 7068 7069 # undef SHIFT_BY_IMM 7070 break; 7071 } 7072 7073 case 0xF7: { 7074 IRTemp addr = newTemp(Ity_I64); 7075 IRTemp regD = newTemp(Ity_I64); 7076 IRTemp regM = newTemp(Ity_I64); 7077 IRTemp mask = newTemp(Ity_I64); 7078 IRTemp olddata = newTemp(Ity_I64); 7079 IRTemp newdata = newTemp(Ity_I64); 7080 7081 modrm = getUChar(delta); 7082 if (sz != 4 || (!epartIsReg(modrm))) 7083 goto mmx_decode_failure; 7084 delta++; 7085 7086 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7087 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7088 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7089 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7090 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7091 assign( newdata, 7092 binop(Iop_Or64, 7093 binop(Iop_And64, 7094 mkexpr(regD), 7095 mkexpr(mask) ), 7096 binop(Iop_And64, 7097 mkexpr(olddata), 7098 unop(Iop_Not64, mkexpr(mask)))) ); 7099 storeLE( mkexpr(addr), mkexpr(newdata) ); 7100 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7101 nameMMXReg( gregLO3ofRM(modrm) ) ); 7102 break; 7103 } 7104 7105 /* --- MMX decode failure --- */ 7106 default: 7107 mmx_decode_failure: 7108 *decode_ok = False; 7109 return delta; /* ignored */ 7110 7111 } 7112 7113 *decode_ok = True; 7114 return delta; 7115 } 7116 7117 7118 /*------------------------------------------------------------*/ 7119 /*--- More misc arithmetic and other obscure insns. ---*/ 7120 /*------------------------------------------------------------*/ 7121 7122 /* Generate base << amt with vacated places filled with stuff 7123 from xtra. amt guaranteed in 0 .. 63. */ 7124 static 7125 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7126 { 7127 /* if amt == 0 7128 then base 7129 else (base << amt) | (xtra >>u (64-amt)) 7130 */ 7131 return 7132 IRExpr_Mux0X( 7133 mkexpr(amt), 7134 mkexpr(base), 7135 binop(Iop_Or64, 7136 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7137 binop(Iop_Shr64, mkexpr(xtra), 7138 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7139 ) 7140 ); 7141 } 7142 7143 /* Generate base >>u amt with vacated places filled with stuff 7144 from xtra. amt guaranteed in 0 .. 63. */ 7145 static 7146 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7147 { 7148 /* if amt == 0 7149 then base 7150 else (base >>u amt) | (xtra << (64-amt)) 7151 */ 7152 return 7153 IRExpr_Mux0X( 7154 mkexpr(amt), 7155 mkexpr(base), 7156 binop(Iop_Or64, 7157 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7158 binop(Iop_Shl64, mkexpr(xtra), 7159 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7160 ) 7161 ); 7162 } 7163 7164 /* Double length left and right shifts. Apparently only required in 7165 v-size (no b- variant). */ 7166 static 7167 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi, 7168 Prefix pfx, 7169 Long delta, UChar modrm, 7170 Int sz, 7171 IRExpr* shift_amt, 7172 Bool amt_is_literal, 7173 HChar* shift_amt_txt, 7174 Bool left_shift ) 7175 { 7176 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7177 for printing it. And eip on entry points at the modrm byte. */ 7178 Int len; 7179 HChar dis_buf[50]; 7180 7181 IRType ty = szToITy(sz); 7182 IRTemp gsrc = newTemp(ty); 7183 IRTemp esrc = newTemp(ty); 7184 IRTemp addr = IRTemp_INVALID; 7185 IRTemp tmpSH = newTemp(Ity_I8); 7186 IRTemp tmpSS = newTemp(Ity_I8); 7187 IRTemp tmp64 = IRTemp_INVALID; 7188 IRTemp res64 = IRTemp_INVALID; 7189 IRTemp rss64 = IRTemp_INVALID; 7190 IRTemp resTy = IRTemp_INVALID; 7191 IRTemp rssTy = IRTemp_INVALID; 7192 Int mask = sz==8 ? 63 : 31; 7193 7194 vassert(sz == 2 || sz == 4 || sz == 8); 7195 7196 /* The E-part is the destination; this is shifted. The G-part 7197 supplies bits to be shifted into the E-part, but is not 7198 changed. 7199 7200 If shifting left, form a double-length word with E at the top 7201 and G at the bottom, and shift this left. The result is then in 7202 the high part. 7203 7204 If shifting right, form a double-length word with G at the top 7205 and E at the bottom, and shift this right. The result is then 7206 at the bottom. */ 7207 7208 /* Fetch the operands. */ 7209 7210 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7211 7212 if (epartIsReg(modrm)) { 7213 delta++; 7214 assign( esrc, getIRegE(sz, pfx, modrm) ); 7215 DIP("sh%cd%c %s, %s, %s\n", 7216 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7217 shift_amt_txt, 7218 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7219 } else { 7220 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7221 /* # bytes following amode */ 7222 amt_is_literal ? 1 : 0 ); 7223 delta += len; 7224 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7225 DIP("sh%cd%c %s, %s, %s\n", 7226 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7227 shift_amt_txt, 7228 nameIRegG(sz, pfx, modrm), dis_buf); 7229 } 7230 7231 /* Calculate the masked shift amount (tmpSH), the masked subshift 7232 amount (tmpSS), the shifted value (res64) and the subshifted 7233 value (rss64). */ 7234 7235 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7236 assign( tmpSS, binop(Iop_And8, 7237 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7238 mkU8(mask))); 7239 7240 tmp64 = newTemp(Ity_I64); 7241 res64 = newTemp(Ity_I64); 7242 rss64 = newTemp(Ity_I64); 7243 7244 if (sz == 2 || sz == 4) { 7245 7246 /* G is xtra; E is data */ 7247 /* what a freaking nightmare: */ 7248 if (sz == 4 && left_shift) { 7249 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7250 assign( res64, 7251 binop(Iop_Shr64, 7252 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7253 mkU8(32)) ); 7254 assign( rss64, 7255 binop(Iop_Shr64, 7256 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7257 mkU8(32)) ); 7258 } 7259 else 7260 if (sz == 4 && !left_shift) { 7261 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7262 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7263 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7264 } 7265 else 7266 if (sz == 2 && left_shift) { 7267 assign( tmp64, 7268 binop(Iop_32HLto64, 7269 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7270 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7271 )); 7272 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7273 assign( res64, 7274 binop(Iop_Shr64, 7275 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7276 mkU8(48)) ); 7277 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7278 assign( rss64, 7279 binop(Iop_Shr64, 7280 binop(Iop_Shl64, 7281 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7282 mkU8(48)), 7283 mkexpr(tmpSS)), 7284 mkU8(48)) ); 7285 } 7286 else 7287 if (sz == 2 && !left_shift) { 7288 assign( tmp64, 7289 binop(Iop_32HLto64, 7290 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7291 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7292 )); 7293 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7294 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7295 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7296 assign( rss64, binop(Iop_Shr64, 7297 unop(Iop_16Uto64, mkexpr(esrc)), 7298 mkexpr(tmpSS)) ); 7299 } 7300 7301 } else { 7302 7303 vassert(sz == 8); 7304 if (left_shift) { 7305 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7306 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7307 } else { 7308 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7309 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7310 } 7311 7312 } 7313 7314 resTy = newTemp(ty); 7315 rssTy = newTemp(ty); 7316 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7317 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7318 7319 /* Put result back and write the flags thunk. */ 7320 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7321 resTy, rssTy, ty, tmpSH ); 7322 7323 if (epartIsReg(modrm)) { 7324 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7325 } else { 7326 storeLE( mkexpr(addr), mkexpr(resTy) ); 7327 } 7328 7329 if (amt_is_literal) delta++; 7330 return delta; 7331 } 7332 7333 7334 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7335 required. */ 7336 7337 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7338 7339 static HChar* nameBtOp ( BtOp op ) 7340 { 7341 switch (op) { 7342 case BtOpNone: return ""; 7343 case BtOpSet: return "s"; 7344 case BtOpReset: return "r"; 7345 case BtOpComp: return "c"; 7346 default: vpanic("nameBtOp(amd64)"); 7347 } 7348 } 7349 7350 7351 static 7352 ULong dis_bt_G_E ( VexAbiInfo* vbi, 7353 Prefix pfx, Int sz, Long delta, BtOp op ) 7354 { 7355 HChar dis_buf[50]; 7356 UChar modrm; 7357 Int len; 7358 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7359 t_addr1, t_rsp, t_mask, t_new; 7360 7361 vassert(sz == 2 || sz == 4 || sz == 8); 7362 7363 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7364 = t_addr0 = t_addr1 = t_rsp 7365 = t_mask = t_new = IRTemp_INVALID; 7366 7367 t_fetched = newTemp(Ity_I8); 7368 t_new = newTemp(Ity_I8); 7369 t_bitno0 = newTemp(Ity_I64); 7370 t_bitno1 = newTemp(Ity_I64); 7371 t_bitno2 = newTemp(Ity_I8); 7372 t_addr1 = newTemp(Ity_I64); 7373 modrm = getUChar(delta); 7374 7375 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 7376 7377 if (epartIsReg(modrm)) { 7378 delta++; 7379 /* Get it onto the client's stack. Oh, this is a horrible 7380 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 7381 Because of the ELF ABI stack redzone, there may be live data 7382 up to 128 bytes below %RSP. So we can't just push it on the 7383 stack, else we may wind up trashing live data, and causing 7384 impossible-to-find simulation errors. (Yes, this did 7385 happen.) So we need to drop RSP before at least 128 before 7386 pushing it. That unfortunately means hitting Memcheck's 7387 fast-case painting code. Ideally we should drop more than 7388 128, to reduce the chances of breaking buggy programs that 7389 have live data below -128(%RSP). Memcheck fast-cases moves 7390 of 288 bytes due to the need to handle ppc64-linux quickly, 7391 so let's use 288. Of course the real fix is to get rid of 7392 this kludge entirely. */ 7393 t_rsp = newTemp(Ity_I64); 7394 t_addr0 = newTemp(Ity_I64); 7395 7396 vassert(vbi->guest_stack_redzone_size == 128); 7397 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 7398 putIReg64(R_RSP, mkexpr(t_rsp)); 7399 7400 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 7401 7402 /* Make t_addr0 point at it. */ 7403 assign( t_addr0, mkexpr(t_rsp) ); 7404 7405 /* Mask out upper bits of the shift amount, since we're doing a 7406 reg. */ 7407 assign( t_bitno1, binop(Iop_And64, 7408 mkexpr(t_bitno0), 7409 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 7410 7411 } else { 7412 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 7413 delta += len; 7414 assign( t_bitno1, mkexpr(t_bitno0) ); 7415 } 7416 7417 /* At this point: t_addr0 is the address being operated on. If it 7418 was a reg, we will have pushed it onto the client's stack. 7419 t_bitno1 is the bit number, suitably masked in the case of a 7420 reg. */ 7421 7422 /* Now the main sequence. */ 7423 assign( t_addr1, 7424 binop(Iop_Add64, 7425 mkexpr(t_addr0), 7426 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 7427 7428 /* t_addr1 now holds effective address */ 7429 7430 assign( t_bitno2, 7431 unop(Iop_64to8, 7432 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 7433 7434 /* t_bitno2 contains offset of bit within byte */ 7435 7436 if (op != BtOpNone) { 7437 t_mask = newTemp(Ity_I8); 7438 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 7439 } 7440 7441 /* t_mask is now a suitable byte mask */ 7442 7443 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 7444 7445 if (op != BtOpNone) { 7446 switch (op) { 7447 case BtOpSet: 7448 assign( t_new, 7449 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7450 break; 7451 case BtOpComp: 7452 assign( t_new, 7453 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 7454 break; 7455 case BtOpReset: 7456 assign( t_new, 7457 binop(Iop_And8, mkexpr(t_fetched), 7458 unop(Iop_Not8, mkexpr(t_mask))) ); 7459 break; 7460 default: 7461 vpanic("dis_bt_G_E(amd64)"); 7462 } 7463 if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) { 7464 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 7465 mkexpr(t_new)/*new*/, 7466 guest_RIP_curr_instr ); 7467 } else { 7468 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 7469 } 7470 } 7471 7472 /* Side effect done; now get selected bit into Carry flag */ 7473 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 7474 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7475 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7476 stmt( IRStmt_Put( 7477 OFFB_CC_DEP1, 7478 binop(Iop_And64, 7479 binop(Iop_Shr64, 7480 unop(Iop_8Uto64, mkexpr(t_fetched)), 7481 mkexpr(t_bitno2)), 7482 mkU64(1))) 7483 ); 7484 /* Set NDEP even though it isn't used. This makes redundant-PUT 7485 elimination of previous stores to this field work better. */ 7486 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7487 7488 /* Move reg operand from stack back to reg */ 7489 if (epartIsReg(modrm)) { 7490 /* t_rsp still points at it. */ 7491 /* only write the reg if actually modifying it; doing otherwise 7492 zeroes the top half erroneously when doing btl due to 7493 standard zero-extend rule */ 7494 if (op != BtOpNone) 7495 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 7496 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 7497 } 7498 7499 DIP("bt%s%c %s, %s\n", 7500 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 7501 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 7502 7503 return delta; 7504 } 7505 7506 7507 7508 /* Handle BSF/BSR. Only v-size seems necessary. */ 7509 static 7510 ULong dis_bs_E_G ( VexAbiInfo* vbi, 7511 Prefix pfx, Int sz, Long delta, Bool fwds ) 7512 { 7513 Bool isReg; 7514 UChar modrm; 7515 HChar dis_buf[50]; 7516 7517 IRType ty = szToITy(sz); 7518 IRTemp src = newTemp(ty); 7519 IRTemp dst = newTemp(ty); 7520 IRTemp src64 = newTemp(Ity_I64); 7521 IRTemp dst64 = newTemp(Ity_I64); 7522 IRTemp src8 = newTemp(Ity_I8); 7523 7524 vassert(sz == 8 || sz == 4 || sz == 2); 7525 7526 modrm = getUChar(delta); 7527 isReg = epartIsReg(modrm); 7528 if (isReg) { 7529 delta++; 7530 assign( src, getIRegE(sz, pfx, modrm) ); 7531 } else { 7532 Int len; 7533 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7534 delta += len; 7535 assign( src, loadLE(ty, mkexpr(addr)) ); 7536 } 7537 7538 DIP("bs%c%c %s, %s\n", 7539 fwds ? 'f' : 'r', nameISize(sz), 7540 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 7541 nameIRegG(sz, pfx, modrm)); 7542 7543 /* First, widen src to 64 bits if it is not already. */ 7544 assign( src64, widenUto64(mkexpr(src)) ); 7545 7546 /* Generate an 8-bit expression which is zero iff the 7547 original is zero, and nonzero otherwise */ 7548 assign( src8, 7549 unop(Iop_1Uto8, 7550 binop(Iop_CmpNE64, 7551 mkexpr(src64), mkU64(0))) ); 7552 7553 /* Flags: Z is 1 iff source value is zero. All others 7554 are undefined -- we force them to zero. */ 7555 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7556 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7557 stmt( IRStmt_Put( 7558 OFFB_CC_DEP1, 7559 IRExpr_Mux0X( mkexpr(src8), 7560 /* src==0 */ 7561 mkU64(AMD64G_CC_MASK_Z), 7562 /* src!=0 */ 7563 mkU64(0) 7564 ) 7565 )); 7566 /* Set NDEP even though it isn't used. This makes redundant-PUT 7567 elimination of previous stores to this field work better. */ 7568 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7569 7570 /* Result: iff source value is zero, we can't use 7571 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 7572 But anyway, amd64 semantics say the result is undefined in 7573 such situations. Hence handle the zero case specially. */ 7574 7575 /* Bleh. What we compute: 7576 7577 bsf64: if src == 0 then {dst is unchanged} 7578 else Ctz64(src) 7579 7580 bsr64: if src == 0 then {dst is unchanged} 7581 else 63 - Clz64(src) 7582 7583 bsf32: if src == 0 then {dst is unchanged} 7584 else Ctz64(32Uto64(src)) 7585 7586 bsr32: if src == 0 then {dst is unchanged} 7587 else 63 - Clz64(32Uto64(src)) 7588 7589 bsf16: if src == 0 then {dst is unchanged} 7590 else Ctz64(32Uto64(16Uto32(src))) 7591 7592 bsr16: if src == 0 then {dst is unchanged} 7593 else 63 - Clz64(32Uto64(16Uto32(src))) 7594 */ 7595 7596 /* The main computation, guarding against zero. */ 7597 assign( dst64, 7598 IRExpr_Mux0X( 7599 mkexpr(src8), 7600 /* src == 0 -- leave dst unchanged */ 7601 widenUto64( getIRegG( sz, pfx, modrm ) ), 7602 /* src != 0 */ 7603 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 7604 : binop(Iop_Sub64, 7605 mkU64(63), 7606 unop(Iop_Clz64, mkexpr(src64))) 7607 ) 7608 ); 7609 7610 if (sz == 2) 7611 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 7612 else 7613 if (sz == 4) 7614 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 7615 else 7616 assign( dst, mkexpr(dst64) ); 7617 7618 /* dump result back */ 7619 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 7620 7621 return delta; 7622 } 7623 7624 7625 /* swap rAX with the reg specified by reg and REX.B */ 7626 static 7627 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 7628 { 7629 IRType ty = szToITy(sz); 7630 IRTemp t1 = newTemp(ty); 7631 IRTemp t2 = newTemp(ty); 7632 vassert(sz == 4 || sz == 8); 7633 vassert(regLo3 < 8); 7634 if (sz == 8) { 7635 assign( t1, getIReg64(R_RAX) ); 7636 assign( t2, getIRegRexB(8, pfx, regLo3) ); 7637 putIReg64( R_RAX, mkexpr(t2) ); 7638 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 7639 } else { 7640 assign( t1, getIReg32(R_RAX) ); 7641 assign( t2, getIRegRexB(4, pfx, regLo3) ); 7642 putIReg32( R_RAX, mkexpr(t2) ); 7643 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 7644 } 7645 DIP("xchg%c %s, %s\n", 7646 nameISize(sz), nameIRegRAX(sz), 7647 nameIRegRexB(sz,pfx, regLo3)); 7648 } 7649 7650 7651 static 7652 void codegen_SAHF ( void ) 7653 { 7654 /* Set the flags to: 7655 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 7656 -- retain the old O flag 7657 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7658 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 7659 */ 7660 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7661 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7662 IRTemp oldflags = newTemp(Ity_I64); 7663 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 7664 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 7665 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 7666 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 7667 stmt( IRStmt_Put( OFFB_CC_DEP1, 7668 binop(Iop_Or64, 7669 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 7670 binop(Iop_And64, 7671 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 7672 mkU64(mask_SZACP)) 7673 ) 7674 )); 7675 } 7676 7677 7678 static 7679 void codegen_LAHF ( void ) 7680 { 7681 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 7682 IRExpr* rax_with_hole; 7683 IRExpr* new_byte; 7684 IRExpr* new_rax; 7685 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 7686 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 7687 7688 IRTemp flags = newTemp(Ity_I64); 7689 assign( flags, mk_amd64g_calculate_rflags_all() ); 7690 7691 rax_with_hole 7692 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 7693 new_byte 7694 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 7695 mkU64(1<<1)); 7696 new_rax 7697 = binop(Iop_Or64, rax_with_hole, 7698 binop(Iop_Shl64, new_byte, mkU8(8))); 7699 putIReg64(R_RAX, new_rax); 7700 } 7701 7702 7703 static 7704 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 7705 VexAbiInfo* vbi, 7706 Prefix pfx, 7707 Int size, 7708 Long delta0 ) 7709 { 7710 HChar dis_buf[50]; 7711 Int len; 7712 7713 IRType ty = szToITy(size); 7714 IRTemp acc = newTemp(ty); 7715 IRTemp src = newTemp(ty); 7716 IRTemp dest = newTemp(ty); 7717 IRTemp dest2 = newTemp(ty); 7718 IRTemp acc2 = newTemp(ty); 7719 IRTemp cond8 = newTemp(Ity_I8); 7720 IRTemp addr = IRTemp_INVALID; 7721 UChar rm = getUChar(delta0); 7722 7723 /* There are 3 cases to consider: 7724 7725 reg-reg: ignore any lock prefix, generate sequence based 7726 on Mux0X 7727 7728 reg-mem, not locked: ignore any lock prefix, generate sequence 7729 based on Mux0X 7730 7731 reg-mem, locked: use IRCAS 7732 */ 7733 7734 if (epartIsReg(rm)) { 7735 /* case 1 */ 7736 assign( dest, getIRegE(size, pfx, rm) ); 7737 delta0++; 7738 assign( src, getIRegG(size, pfx, rm) ); 7739 assign( acc, getIRegRAX(size) ); 7740 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7741 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7742 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 7743 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7744 putIRegRAX(size, mkexpr(acc2)); 7745 putIRegE(size, pfx, rm, mkexpr(dest2)); 7746 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7747 nameIRegG(size,pfx,rm), 7748 nameIRegE(size,pfx,rm) ); 7749 } 7750 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 7751 /* case 2 */ 7752 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7753 assign( dest, loadLE(ty, mkexpr(addr)) ); 7754 delta0 += len; 7755 assign( src, getIRegG(size, pfx, rm) ); 7756 assign( acc, getIRegRAX(size) ); 7757 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7758 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7759 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 7760 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7761 putIRegRAX(size, mkexpr(acc2)); 7762 storeLE( mkexpr(addr), mkexpr(dest2) ); 7763 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7764 nameIRegG(size,pfx,rm), dis_buf); 7765 } 7766 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 7767 /* case 3 */ 7768 /* src is new value. acc is expected value. dest is old value. 7769 Compute success from the output of the IRCAS, and steer the 7770 new value for RAX accordingly: in case of success, RAX is 7771 unchanged. */ 7772 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7773 delta0 += len; 7774 assign( src, getIRegG(size, pfx, rm) ); 7775 assign( acc, getIRegRAX(size) ); 7776 stmt( IRStmt_CAS( 7777 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 7778 NULL, mkexpr(acc), NULL, mkexpr(src) ) 7779 )); 7780 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 7781 assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) ); 7782 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 7783 putIRegRAX(size, mkexpr(acc2)); 7784 DIP("cmpxchg%c %s,%s\n", nameISize(size), 7785 nameIRegG(size,pfx,rm), dis_buf); 7786 } 7787 else vassert(0); 7788 7789 *ok = True; 7790 return delta0; 7791 } 7792 7793 7794 /* Handle conditional move instructions of the form 7795 cmovcc E(reg-or-mem), G(reg) 7796 7797 E(src) is reg-or-mem 7798 G(dst) is reg. 7799 7800 If E is reg, --> GET %E, tmps 7801 GET %G, tmpd 7802 CMOVcc tmps, tmpd 7803 PUT tmpd, %G 7804 7805 If E is mem --> (getAddr E) -> tmpa 7806 LD (tmpa), tmps 7807 GET %G, tmpd 7808 CMOVcc tmps, tmpd 7809 PUT tmpd, %G 7810 */ 7811 static 7812 ULong dis_cmov_E_G ( VexAbiInfo* vbi, 7813 Prefix pfx, 7814 Int sz, 7815 AMD64Condcode cond, 7816 Long delta0 ) 7817 { 7818 UChar rm = getUChar(delta0); 7819 HChar dis_buf[50]; 7820 Int len; 7821 7822 IRType ty = szToITy(sz); 7823 IRTemp tmps = newTemp(ty); 7824 IRTemp tmpd = newTemp(ty); 7825 7826 if (epartIsReg(rm)) { 7827 assign( tmps, getIRegE(sz, pfx, rm) ); 7828 assign( tmpd, getIRegG(sz, pfx, rm) ); 7829 7830 putIRegG( sz, pfx, rm, 7831 IRExpr_Mux0X( unop(Iop_1Uto8, 7832 mk_amd64g_calculate_condition(cond)), 7833 mkexpr(tmpd), 7834 mkexpr(tmps) ) 7835 ); 7836 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 7837 nameIRegE(sz,pfx,rm), 7838 nameIRegG(sz,pfx,rm)); 7839 return 1+delta0; 7840 } 7841 7842 /* E refers to memory */ 7843 { 7844 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7845 assign( tmps, loadLE(ty, mkexpr(addr)) ); 7846 assign( tmpd, getIRegG(sz, pfx, rm) ); 7847 7848 putIRegG( sz, pfx, rm, 7849 IRExpr_Mux0X( unop(Iop_1Uto8, 7850 mk_amd64g_calculate_condition(cond)), 7851 mkexpr(tmpd), 7852 mkexpr(tmps) ) 7853 ); 7854 7855 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 7856 dis_buf, 7857 nameIRegG(sz,pfx,rm)); 7858 return len+delta0; 7859 } 7860 } 7861 7862 7863 static 7864 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 7865 VexAbiInfo* vbi, 7866 Prefix pfx, Int sz, Long delta0 ) 7867 { 7868 Int len; 7869 UChar rm = getUChar(delta0); 7870 HChar dis_buf[50]; 7871 7872 IRType ty = szToITy(sz); 7873 IRTemp tmpd = newTemp(ty); 7874 IRTemp tmpt0 = newTemp(ty); 7875 IRTemp tmpt1 = newTemp(ty); 7876 7877 /* There are 3 cases to consider: 7878 7879 reg-reg: ignore any lock prefix, 7880 generate 'naive' (non-atomic) sequence 7881 7882 reg-mem, not locked: ignore any lock prefix, generate 'naive' 7883 (non-atomic) sequence 7884 7885 reg-mem, locked: use IRCAS 7886 */ 7887 7888 if (epartIsReg(rm)) { 7889 /* case 1 */ 7890 assign( tmpd, getIRegE(sz, pfx, rm) ); 7891 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7892 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7893 mkexpr(tmpd), mkexpr(tmpt0)) ); 7894 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7895 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7896 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 7897 DIP("xadd%c %s, %s\n", 7898 nameISize(sz), nameIRegG(sz,pfx,rm), 7899 nameIRegE(sz,pfx,rm)); 7900 *decode_ok = True; 7901 return 1+delta0; 7902 } 7903 else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) { 7904 /* case 2 */ 7905 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7906 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 7907 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7908 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7909 mkexpr(tmpd), mkexpr(tmpt0)) ); 7910 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7911 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 7912 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7913 DIP("xadd%c %s, %s\n", 7914 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 7915 *decode_ok = True; 7916 return len+delta0; 7917 } 7918 else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) { 7919 /* case 3 */ 7920 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 7921 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 7922 assign( tmpt0, getIRegG(sz, pfx, rm) ); 7923 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 7924 mkexpr(tmpd), mkexpr(tmpt0)) ); 7925 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 7926 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 7927 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 7928 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 7929 DIP("xadd%c %s, %s\n", 7930 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 7931 *decode_ok = True; 7932 return len+delta0; 7933 } 7934 /*UNREACHED*/ 7935 vassert(0); 7936 } 7937 7938 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 7939 //.. 7940 //.. static 7941 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 7942 //.. { 7943 //.. Int len; 7944 //.. IRTemp addr; 7945 //.. UChar rm = getUChar(delta0); 7946 //.. HChar dis_buf[50]; 7947 //.. 7948 //.. if (epartIsReg(rm)) { 7949 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 7950 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 7951 //.. return 1+delta0; 7952 //.. } else { 7953 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 7954 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 7955 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 7956 //.. return len+delta0; 7957 //.. } 7958 //.. } 7959 //.. 7960 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 7961 //.. dst is ireg and sz==4, zero out top half of it. */ 7962 //.. 7963 //.. static 7964 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 7965 //.. Int sz, 7966 //.. UInt delta0 ) 7967 //.. { 7968 //.. Int len; 7969 //.. IRTemp addr; 7970 //.. UChar rm = getUChar(delta0); 7971 //.. HChar dis_buf[50]; 7972 //.. 7973 //.. vassert(sz == 2 || sz == 4); 7974 //.. 7975 //.. if (epartIsReg(rm)) { 7976 //.. if (sz == 4) 7977 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 7978 //.. else 7979 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 7980 //.. 7981 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 7982 //.. return 1+delta0; 7983 //.. } else { 7984 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 7985 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 7986 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 7987 //.. return len+delta0; 7988 //.. } 7989 //.. } 7990 //.. 7991 //.. 7992 //.. static 7993 //.. void dis_push_segreg ( UInt sreg, Int sz ) 7994 //.. { 7995 //.. IRTemp t1 = newTemp(Ity_I16); 7996 //.. IRTemp ta = newTemp(Ity_I32); 7997 //.. vassert(sz == 2 || sz == 4); 7998 //.. 7999 //.. assign( t1, getSReg(sreg) ); 8000 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8001 //.. putIReg(4, R_ESP, mkexpr(ta)); 8002 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8003 //.. 8004 //.. DIP("pushw %s\n", nameSReg(sreg)); 8005 //.. } 8006 //.. 8007 //.. static 8008 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8009 //.. { 8010 //.. IRTemp t1 = newTemp(Ity_I16); 8011 //.. IRTemp ta = newTemp(Ity_I32); 8012 //.. vassert(sz == 2 || sz == 4); 8013 //.. 8014 //.. assign( ta, getIReg(4, R_ESP) ); 8015 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8016 //.. 8017 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8018 //.. putSReg( sreg, mkexpr(t1) ); 8019 //.. DIP("pop %s\n", nameSReg(sreg)); 8020 //.. } 8021 8022 static 8023 void dis_ret ( VexAbiInfo* vbi, ULong d64 ) 8024 { 8025 IRTemp t1 = newTemp(Ity_I64); 8026 IRTemp t2 = newTemp(Ity_I64); 8027 IRTemp t3 = newTemp(Ity_I64); 8028 assign(t1, getIReg64(R_RSP)); 8029 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8030 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8031 putIReg64(R_RSP, mkexpr(t3)); 8032 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8033 jmp_treg(Ijk_Ret,t2); 8034 } 8035 8036 8037 /*------------------------------------------------------------*/ 8038 /*--- SSE/SSE2/SSE3 helpers ---*/ 8039 /*------------------------------------------------------------*/ 8040 8041 /* Worker function; do not call directly. 8042 Handles full width G = G `op` E and G = (not G) `op` E. 8043 */ 8044 8045 static ULong dis_SSE_E_to_G_all_wrk ( 8046 VexAbiInfo* vbi, 8047 Prefix pfx, Long delta, 8048 HChar* opname, IROp op, 8049 Bool invertG 8050 ) 8051 { 8052 HChar dis_buf[50]; 8053 Int alen; 8054 IRTemp addr; 8055 UChar rm = getUChar(delta); 8056 IRExpr* gpart 8057 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8058 : getXMMReg(gregOfRexRM(pfx,rm)); 8059 if (epartIsReg(rm)) { 8060 putXMMReg( gregOfRexRM(pfx,rm), 8061 binop(op, gpart, 8062 getXMMReg(eregOfRexRM(pfx,rm))) ); 8063 DIP("%s %s,%s\n", opname, 8064 nameXMMReg(eregOfRexRM(pfx,rm)), 8065 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8066 return delta+1; 8067 } else { 8068 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8069 putXMMReg( gregOfRexRM(pfx,rm), 8070 binop(op, gpart, 8071 loadLE(Ity_V128, mkexpr(addr))) ); 8072 DIP("%s %s,%s\n", opname, 8073 dis_buf, 8074 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8075 return delta+alen; 8076 } 8077 } 8078 8079 8080 /* All lanes SSE binary operation, G = G `op` E. */ 8081 8082 static 8083 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi, 8084 Prefix pfx, Long delta, 8085 HChar* opname, IROp op ) 8086 { 8087 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8088 } 8089 8090 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8091 8092 static 8093 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi, 8094 Prefix pfx, Long delta, 8095 HChar* opname, IROp op ) 8096 { 8097 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8098 } 8099 8100 8101 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8102 8103 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi, 8104 Prefix pfx, Long delta, 8105 HChar* opname, IROp op ) 8106 { 8107 HChar dis_buf[50]; 8108 Int alen; 8109 IRTemp addr; 8110 UChar rm = getUChar(delta); 8111 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8112 if (epartIsReg(rm)) { 8113 putXMMReg( gregOfRexRM(pfx,rm), 8114 binop(op, gpart, 8115 getXMMReg(eregOfRexRM(pfx,rm))) ); 8116 DIP("%s %s,%s\n", opname, 8117 nameXMMReg(eregOfRexRM(pfx,rm)), 8118 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8119 return delta+1; 8120 } else { 8121 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8122 E operand needs to be made simply of zeroes. */ 8123 IRTemp epart = newTemp(Ity_V128); 8124 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8125 assign( epart, unop( Iop_32UtoV128, 8126 loadLE(Ity_I32, mkexpr(addr))) ); 8127 putXMMReg( gregOfRexRM(pfx,rm), 8128 binop(op, gpart, mkexpr(epart)) ); 8129 DIP("%s %s,%s\n", opname, 8130 dis_buf, 8131 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8132 return delta+alen; 8133 } 8134 } 8135 8136 8137 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8138 8139 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi, 8140 Prefix pfx, Long delta, 8141 HChar* opname, IROp op ) 8142 { 8143 HChar dis_buf[50]; 8144 Int alen; 8145 IRTemp addr; 8146 UChar rm = getUChar(delta); 8147 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8148 if (epartIsReg(rm)) { 8149 putXMMReg( gregOfRexRM(pfx,rm), 8150 binop(op, gpart, 8151 getXMMReg(eregOfRexRM(pfx,rm))) ); 8152 DIP("%s %s,%s\n", opname, 8153 nameXMMReg(eregOfRexRM(pfx,rm)), 8154 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8155 return delta+1; 8156 } else { 8157 /* We can only do a 64-bit memory read, so the upper half of the 8158 E operand needs to be made simply of zeroes. */ 8159 IRTemp epart = newTemp(Ity_V128); 8160 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8161 assign( epart, unop( Iop_64UtoV128, 8162 loadLE(Ity_I64, mkexpr(addr))) ); 8163 putXMMReg( gregOfRexRM(pfx,rm), 8164 binop(op, gpart, mkexpr(epart)) ); 8165 DIP("%s %s,%s\n", opname, 8166 dis_buf, 8167 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8168 return delta+alen; 8169 } 8170 } 8171 8172 8173 /* All lanes unary SSE operation, G = op(E). */ 8174 8175 static ULong dis_SSE_E_to_G_unary_all ( 8176 VexAbiInfo* vbi, 8177 Prefix pfx, Long delta, 8178 HChar* opname, IROp op 8179 ) 8180 { 8181 HChar dis_buf[50]; 8182 Int alen; 8183 IRTemp addr; 8184 UChar rm = getUChar(delta); 8185 if (epartIsReg(rm)) { 8186 putXMMReg( gregOfRexRM(pfx,rm), 8187 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) ); 8188 DIP("%s %s,%s\n", opname, 8189 nameXMMReg(eregOfRexRM(pfx,rm)), 8190 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8191 return delta+1; 8192 } else { 8193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8194 putXMMReg( gregOfRexRM(pfx,rm), 8195 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 8196 DIP("%s %s,%s\n", opname, 8197 dis_buf, 8198 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8199 return delta+alen; 8200 } 8201 } 8202 8203 8204 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8205 8206 static ULong dis_SSE_E_to_G_unary_lo32 ( 8207 VexAbiInfo* vbi, 8208 Prefix pfx, Long delta, 8209 HChar* opname, IROp op 8210 ) 8211 { 8212 /* First we need to get the old G value and patch the low 32 bits 8213 of the E operand into it. Then apply op and write back to G. */ 8214 HChar dis_buf[50]; 8215 Int alen; 8216 IRTemp addr; 8217 UChar rm = getUChar(delta); 8218 IRTemp oldG0 = newTemp(Ity_V128); 8219 IRTemp oldG1 = newTemp(Ity_V128); 8220 8221 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8222 8223 if (epartIsReg(rm)) { 8224 assign( oldG1, 8225 binop( Iop_SetV128lo32, 8226 mkexpr(oldG0), 8227 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8228 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8229 DIP("%s %s,%s\n", opname, 8230 nameXMMReg(eregOfRexRM(pfx,rm)), 8231 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8232 return delta+1; 8233 } else { 8234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8235 assign( oldG1, 8236 binop( Iop_SetV128lo32, 8237 mkexpr(oldG0), 8238 loadLE(Ity_I32, mkexpr(addr)) )); 8239 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8240 DIP("%s %s,%s\n", opname, 8241 dis_buf, 8242 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8243 return delta+alen; 8244 } 8245 } 8246 8247 8248 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 8249 8250 static ULong dis_SSE_E_to_G_unary_lo64 ( 8251 VexAbiInfo* vbi, 8252 Prefix pfx, Long delta, 8253 HChar* opname, IROp op 8254 ) 8255 { 8256 /* First we need to get the old G value and patch the low 64 bits 8257 of the E operand into it. Then apply op and write back to G. */ 8258 HChar dis_buf[50]; 8259 Int alen; 8260 IRTemp addr; 8261 UChar rm = getUChar(delta); 8262 IRTemp oldG0 = newTemp(Ity_V128); 8263 IRTemp oldG1 = newTemp(Ity_V128); 8264 8265 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8266 8267 if (epartIsReg(rm)) { 8268 assign( oldG1, 8269 binop( Iop_SetV128lo64, 8270 mkexpr(oldG0), 8271 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 8272 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8273 DIP("%s %s,%s\n", opname, 8274 nameXMMReg(eregOfRexRM(pfx,rm)), 8275 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8276 return delta+1; 8277 } else { 8278 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8279 assign( oldG1, 8280 binop( Iop_SetV128lo64, 8281 mkexpr(oldG0), 8282 loadLE(Ity_I64, mkexpr(addr)) )); 8283 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8284 DIP("%s %s,%s\n", opname, 8285 dis_buf, 8286 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8287 return delta+alen; 8288 } 8289 } 8290 8291 8292 /* SSE integer binary operation: 8293 G = G `op` E (eLeft == False) 8294 G = E `op` G (eLeft == True) 8295 */ 8296 static ULong dis_SSEint_E_to_G( 8297 VexAbiInfo* vbi, 8298 Prefix pfx, Long delta, 8299 HChar* opname, IROp op, 8300 Bool eLeft 8301 ) 8302 { 8303 HChar dis_buf[50]; 8304 Int alen; 8305 IRTemp addr; 8306 UChar rm = getUChar(delta); 8307 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8308 IRExpr* epart = NULL; 8309 if (epartIsReg(rm)) { 8310 epart = getXMMReg(eregOfRexRM(pfx,rm)); 8311 DIP("%s %s,%s\n", opname, 8312 nameXMMReg(eregOfRexRM(pfx,rm)), 8313 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8314 delta += 1; 8315 } else { 8316 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8317 epart = loadLE(Ity_V128, mkexpr(addr)); 8318 DIP("%s %s,%s\n", opname, 8319 dis_buf, 8320 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8321 delta += alen; 8322 } 8323 putXMMReg( gregOfRexRM(pfx,rm), 8324 eLeft ? binop(op, epart, gpart) 8325 : binop(op, gpart, epart) ); 8326 return delta; 8327 } 8328 8329 8330 /* Helper for doing SSE FP comparisons. */ 8331 8332 static void findSSECmpOp ( Bool* needNot, IROp* op, 8333 Int imm8, Bool all_lanes, Int sz ) 8334 { 8335 imm8 &= 7; 8336 *needNot = False; 8337 *op = Iop_INVALID; 8338 if (imm8 >= 4) { 8339 *needNot = True; 8340 imm8 -= 4; 8341 } 8342 8343 if (sz == 4 && all_lanes) { 8344 switch (imm8) { 8345 case 0: *op = Iop_CmpEQ32Fx4; return; 8346 case 1: *op = Iop_CmpLT32Fx4; return; 8347 case 2: *op = Iop_CmpLE32Fx4; return; 8348 case 3: *op = Iop_CmpUN32Fx4; return; 8349 default: break; 8350 } 8351 } 8352 if (sz == 4 && !all_lanes) { 8353 switch (imm8) { 8354 case 0: *op = Iop_CmpEQ32F0x4; return; 8355 case 1: *op = Iop_CmpLT32F0x4; return; 8356 case 2: *op = Iop_CmpLE32F0x4; return; 8357 case 3: *op = Iop_CmpUN32F0x4; return; 8358 default: break; 8359 } 8360 } 8361 if (sz == 8 && all_lanes) { 8362 switch (imm8) { 8363 case 0: *op = Iop_CmpEQ64Fx2; return; 8364 case 1: *op = Iop_CmpLT64Fx2; return; 8365 case 2: *op = Iop_CmpLE64Fx2; return; 8366 case 3: *op = Iop_CmpUN64Fx2; return; 8367 default: break; 8368 } 8369 } 8370 if (sz == 8 && !all_lanes) { 8371 switch (imm8) { 8372 case 0: *op = Iop_CmpEQ64F0x2; return; 8373 case 1: *op = Iop_CmpLT64F0x2; return; 8374 case 2: *op = Iop_CmpLE64F0x2; return; 8375 case 3: *op = Iop_CmpUN64F0x2; return; 8376 default: break; 8377 } 8378 } 8379 vpanic("findSSECmpOp(amd64,guest)"); 8380 } 8381 8382 /* Handles SSE 32F/64F comparisons. */ 8383 8384 static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi, 8385 Prefix pfx, Long delta, 8386 HChar* opname, Bool all_lanes, Int sz ) 8387 { 8388 HChar dis_buf[50]; 8389 Int alen, imm8; 8390 IRTemp addr; 8391 Bool needNot = False; 8392 IROp op = Iop_INVALID; 8393 IRTemp plain = newTemp(Ity_V128); 8394 UChar rm = getUChar(delta); 8395 UShort mask = 0; 8396 vassert(sz == 4 || sz == 8); 8397 if (epartIsReg(rm)) { 8398 imm8 = getUChar(delta+1); 8399 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 8400 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 8401 getXMMReg(eregOfRexRM(pfx,rm))) ); 8402 delta += 2; 8403 DIP("%s $%d,%s,%s\n", opname, 8404 (Int)imm8, 8405 nameXMMReg(eregOfRexRM(pfx,rm)), 8406 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8407 } else { 8408 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 8409 imm8 = getUChar(delta+alen); 8410 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 8411 assign( plain, 8412 binop( 8413 op, 8414 getXMMReg(gregOfRexRM(pfx,rm)), 8415 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 8416 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 8417 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 8418 ) 8419 ); 8420 delta += alen+1; 8421 DIP("%s $%d,%s,%s\n", opname, 8422 (Int)imm8, 8423 dis_buf, 8424 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8425 } 8426 8427 if (needNot && all_lanes) { 8428 putXMMReg( gregOfRexRM(pfx,rm), 8429 unop(Iop_NotV128, mkexpr(plain)) ); 8430 } 8431 else 8432 if (needNot && !all_lanes) { 8433 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 8434 putXMMReg( gregOfRexRM(pfx,rm), 8435 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 8436 } 8437 else { 8438 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 8439 } 8440 8441 return delta; 8442 } 8443 8444 8445 /* Vector by scalar shift of G by the amount specified at the bottom 8446 of E. */ 8447 8448 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi, 8449 Prefix pfx, Long delta, 8450 HChar* opname, IROp op ) 8451 { 8452 HChar dis_buf[50]; 8453 Int alen, size; 8454 IRTemp addr; 8455 Bool shl, shr, sar; 8456 UChar rm = getUChar(delta); 8457 IRTemp g0 = newTemp(Ity_V128); 8458 IRTemp g1 = newTemp(Ity_V128); 8459 IRTemp amt = newTemp(Ity_I32); 8460 IRTemp amt8 = newTemp(Ity_I8); 8461 if (epartIsReg(rm)) { 8462 assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) ); 8463 DIP("%s %s,%s\n", opname, 8464 nameXMMReg(eregOfRexRM(pfx,rm)), 8465 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8466 delta++; 8467 } else { 8468 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8469 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 8470 DIP("%s %s,%s\n", opname, 8471 dis_buf, 8472 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8473 delta += alen; 8474 } 8475 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8476 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 8477 8478 shl = shr = sar = False; 8479 size = 0; 8480 switch (op) { 8481 case Iop_ShlN16x8: shl = True; size = 32; break; 8482 case Iop_ShlN32x4: shl = True; size = 32; break; 8483 case Iop_ShlN64x2: shl = True; size = 64; break; 8484 case Iop_SarN16x8: sar = True; size = 16; break; 8485 case Iop_SarN32x4: sar = True; size = 32; break; 8486 case Iop_ShrN16x8: shr = True; size = 16; break; 8487 case Iop_ShrN32x4: shr = True; size = 32; break; 8488 case Iop_ShrN64x2: shr = True; size = 64; break; 8489 default: vassert(0); 8490 } 8491 8492 if (shl || shr) { 8493 assign( 8494 g1, 8495 IRExpr_Mux0X( 8496 unop(Iop_1Uto8, 8497 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), 8498 mkV128(0x0000), 8499 binop(op, mkexpr(g0), mkexpr(amt8)) 8500 ) 8501 ); 8502 } else 8503 if (sar) { 8504 assign( 8505 g1, 8506 IRExpr_Mux0X( 8507 unop(Iop_1Uto8, 8508 binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))), 8509 binop(op, mkexpr(g0), mkU8(size-1)), 8510 binop(op, mkexpr(g0), mkexpr(amt8)) 8511 ) 8512 ); 8513 } else { 8514 vassert(0); 8515 } 8516 8517 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 8518 return delta; 8519 } 8520 8521 8522 /* Vector by scalar shift of E by an immediate byte. */ 8523 8524 static 8525 ULong dis_SSE_shiftE_imm ( Prefix pfx, 8526 Long delta, HChar* opname, IROp op ) 8527 { 8528 Bool shl, shr, sar; 8529 UChar rm = getUChar(delta); 8530 IRTemp e0 = newTemp(Ity_V128); 8531 IRTemp e1 = newTemp(Ity_V128); 8532 UChar amt, size; 8533 vassert(epartIsReg(rm)); 8534 vassert(gregLO3ofRM(rm) == 2 8535 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 8536 amt = getUChar(delta+1); 8537 delta += 2; 8538 DIP("%s $%d,%s\n", opname, 8539 (Int)amt, 8540 nameXMMReg(eregOfRexRM(pfx,rm)) ); 8541 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 8542 8543 shl = shr = sar = False; 8544 size = 0; 8545 switch (op) { 8546 case Iop_ShlN16x8: shl = True; size = 16; break; 8547 case Iop_ShlN32x4: shl = True; size = 32; break; 8548 case Iop_ShlN64x2: shl = True; size = 64; break; 8549 case Iop_SarN16x8: sar = True; size = 16; break; 8550 case Iop_SarN32x4: sar = True; size = 32; break; 8551 case Iop_ShrN16x8: shr = True; size = 16; break; 8552 case Iop_ShrN32x4: shr = True; size = 32; break; 8553 case Iop_ShrN64x2: shr = True; size = 64; break; 8554 default: vassert(0); 8555 } 8556 8557 if (shl || shr) { 8558 assign( e1, amt >= size 8559 ? mkV128(0x0000) 8560 : binop(op, mkexpr(e0), mkU8(amt)) 8561 ); 8562 } else 8563 if (sar) { 8564 assign( e1, amt >= size 8565 ? binop(op, mkexpr(e0), mkU8(size-1)) 8566 : binop(op, mkexpr(e0), mkU8(amt)) 8567 ); 8568 } else { 8569 vassert(0); 8570 } 8571 8572 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 8573 return delta; 8574 } 8575 8576 8577 /* Get the current SSE rounding mode. */ 8578 8579 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 8580 { 8581 return 8582 unop( Iop_64to32, 8583 binop( Iop_And64, 8584 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 8585 mkU64(3) )); 8586 } 8587 8588 static void put_sse_roundingmode ( IRExpr* sseround ) 8589 { 8590 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 8591 stmt( IRStmt_Put( OFFB_SSEROUND, 8592 unop(Iop_32Uto64,sseround) ) ); 8593 } 8594 8595 /* Break a 128-bit value up into four 32-bit ints. */ 8596 8597 static void breakup128to32s ( IRTemp t128, 8598 /*OUTs*/ 8599 IRTemp* t3, IRTemp* t2, 8600 IRTemp* t1, IRTemp* t0 ) 8601 { 8602 IRTemp hi64 = newTemp(Ity_I64); 8603 IRTemp lo64 = newTemp(Ity_I64); 8604 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 8605 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 8606 8607 vassert(t0 && *t0 == IRTemp_INVALID); 8608 vassert(t1 && *t1 == IRTemp_INVALID); 8609 vassert(t2 && *t2 == IRTemp_INVALID); 8610 vassert(t3 && *t3 == IRTemp_INVALID); 8611 8612 *t0 = newTemp(Ity_I32); 8613 *t1 = newTemp(Ity_I32); 8614 *t2 = newTemp(Ity_I32); 8615 *t3 = newTemp(Ity_I32); 8616 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 8617 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 8618 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 8619 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 8620 } 8621 8622 /* Construct a 128-bit value from four 32-bit ints. */ 8623 8624 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 8625 IRTemp t1, IRTemp t0 ) 8626 { 8627 return 8628 binop( Iop_64HLtoV128, 8629 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 8630 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 8631 ); 8632 } 8633 8634 /* Break a 64-bit value up into four 16-bit ints. */ 8635 8636 static void breakup64to16s ( IRTemp t64, 8637 /*OUTs*/ 8638 IRTemp* t3, IRTemp* t2, 8639 IRTemp* t1, IRTemp* t0 ) 8640 { 8641 IRTemp hi32 = newTemp(Ity_I32); 8642 IRTemp lo32 = newTemp(Ity_I32); 8643 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 8644 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 8645 8646 vassert(t0 && *t0 == IRTemp_INVALID); 8647 vassert(t1 && *t1 == IRTemp_INVALID); 8648 vassert(t2 && *t2 == IRTemp_INVALID); 8649 vassert(t3 && *t3 == IRTemp_INVALID); 8650 8651 *t0 = newTemp(Ity_I16); 8652 *t1 = newTemp(Ity_I16); 8653 *t2 = newTemp(Ity_I16); 8654 *t3 = newTemp(Ity_I16); 8655 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 8656 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 8657 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 8658 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 8659 } 8660 8661 /* Construct a 64-bit value from four 16-bit ints. */ 8662 8663 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 8664 IRTemp t1, IRTemp t0 ) 8665 { 8666 return 8667 binop( Iop_32HLto64, 8668 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 8669 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 8670 ); 8671 } 8672 8673 8674 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 8675 values (aa,bb), computes, for each of the 4 16-bit lanes: 8676 8677 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 8678 */ 8679 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 8680 { 8681 IRTemp aa = newTemp(Ity_I64); 8682 IRTemp bb = newTemp(Ity_I64); 8683 IRTemp aahi32s = newTemp(Ity_I64); 8684 IRTemp aalo32s = newTemp(Ity_I64); 8685 IRTemp bbhi32s = newTemp(Ity_I64); 8686 IRTemp bblo32s = newTemp(Ity_I64); 8687 IRTemp rHi = newTemp(Ity_I64); 8688 IRTemp rLo = newTemp(Ity_I64); 8689 IRTemp one32x2 = newTemp(Ity_I64); 8690 assign(aa, aax); 8691 assign(bb, bbx); 8692 assign( aahi32s, 8693 binop(Iop_SarN32x2, 8694 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 8695 mkU8(16) )); 8696 assign( aalo32s, 8697 binop(Iop_SarN32x2, 8698 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 8699 mkU8(16) )); 8700 assign( bbhi32s, 8701 binop(Iop_SarN32x2, 8702 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 8703 mkU8(16) )); 8704 assign( bblo32s, 8705 binop(Iop_SarN32x2, 8706 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 8707 mkU8(16) )); 8708 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 8709 assign( 8710 rHi, 8711 binop( 8712 Iop_ShrN32x2, 8713 binop( 8714 Iop_Add32x2, 8715 binop( 8716 Iop_ShrN32x2, 8717 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 8718 mkU8(14) 8719 ), 8720 mkexpr(one32x2) 8721 ), 8722 mkU8(1) 8723 ) 8724 ); 8725 assign( 8726 rLo, 8727 binop( 8728 Iop_ShrN32x2, 8729 binop( 8730 Iop_Add32x2, 8731 binop( 8732 Iop_ShrN32x2, 8733 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 8734 mkU8(14) 8735 ), 8736 mkexpr(one32x2) 8737 ), 8738 mkU8(1) 8739 ) 8740 ); 8741 return 8742 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 8743 } 8744 8745 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 8746 values (aa,bb), computes, for each lane: 8747 8748 if aa_lane < 0 then - bb_lane 8749 else if aa_lane > 0 then bb_lane 8750 else 0 8751 */ 8752 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 8753 { 8754 IRTemp aa = newTemp(Ity_I64); 8755 IRTemp bb = newTemp(Ity_I64); 8756 IRTemp zero = newTemp(Ity_I64); 8757 IRTemp bbNeg = newTemp(Ity_I64); 8758 IRTemp negMask = newTemp(Ity_I64); 8759 IRTemp posMask = newTemp(Ity_I64); 8760 IROp opSub = Iop_INVALID; 8761 IROp opCmpGTS = Iop_INVALID; 8762 8763 switch (laneszB) { 8764 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 8765 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 8766 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 8767 default: vassert(0); 8768 } 8769 8770 assign( aa, aax ); 8771 assign( bb, bbx ); 8772 assign( zero, mkU64(0) ); 8773 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 8774 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 8775 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 8776 8777 return 8778 binop(Iop_Or64, 8779 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 8780 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 8781 8782 } 8783 8784 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 8785 value aa, computes, for each lane 8786 8787 if aa < 0 then -aa else aa 8788 8789 Note that the result is interpreted as unsigned, so that the 8790 absolute value of the most negative signed input can be 8791 represented. 8792 */ 8793 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 8794 { 8795 IRTemp aa = newTemp(Ity_I64); 8796 IRTemp zero = newTemp(Ity_I64); 8797 IRTemp aaNeg = newTemp(Ity_I64); 8798 IRTemp negMask = newTemp(Ity_I64); 8799 IRTemp posMask = newTemp(Ity_I64); 8800 IROp opSub = Iop_INVALID; 8801 IROp opSarN = Iop_INVALID; 8802 8803 switch (laneszB) { 8804 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 8805 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 8806 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 8807 default: vassert(0); 8808 } 8809 8810 assign( aa, aax ); 8811 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 8812 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 8813 assign( zero, mkU64(0) ); 8814 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 8815 return 8816 binop(Iop_Or64, 8817 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 8818 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 8819 } 8820 8821 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 8822 IRTemp lo64, Long byteShift ) 8823 { 8824 vassert(byteShift >= 1 && byteShift <= 7); 8825 return 8826 binop(Iop_Or64, 8827 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 8828 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 8829 ); 8830 } 8831 8832 /* Generate a SIGSEGV followed by a restart of the current instruction 8833 if effective_addr is not 16-aligned. This is required behaviour 8834 for some SSE3 instructions and all 128-bit SSSE3 instructions. 8835 This assumes that guest_RIP_curr_instr is set correctly! */ 8836 /* TODO(glider): we've replaced the 0xF mask with 0x0, effectively disabling 8837 * the check. Need to enable it once TSan stops generating unaligned 8838 * accesses in the wrappers. 8839 * See http://code.google.com/p/data-race-test/issues/detail?id=49 */ 8840 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 8841 { 8842 stmt( 8843 IRStmt_Exit( 8844 binop(Iop_CmpNE64, 8845 binop(Iop_And64,mkexpr(effective_addr),mkU64(0x0)), 8846 mkU64(0)), 8847 Ijk_SigSEGV, 8848 IRConst_U64(guest_RIP_curr_instr) 8849 ) 8850 ); 8851 } 8852 8853 8854 /* Helper for deciding whether a given insn (starting at the opcode 8855 byte) may validly be used with a LOCK prefix. The following insns 8856 may be used with LOCK when their destination operand is in memory. 8857 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 8858 8859 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 8860 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 8861 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 8862 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 8863 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 8864 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 8865 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 8866 8867 DEC FE /1, FF /1 8868 INC FE /0, FF /0 8869 8870 NEG F6 /3, F7 /3 8871 NOT F6 /2, F7 /2 8872 8873 XCHG 86, 87 8874 8875 BTC 0F BB, 0F BA /7 8876 BTR 0F B3, 0F BA /6 8877 BTS 0F AB, 0F BA /5 8878 8879 CMPXCHG 0F B0, 0F B1 8880 CMPXCHG8B 0F C7 /1 8881 8882 XADD 0F C0, 0F C1 8883 8884 ------------------------------ 8885 8886 80 /0 = addb $imm8, rm8 8887 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 8888 82 /0 = addb $imm8, rm8 8889 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 8890 8891 00 = addb r8, rm8 8892 01 = addl r32, rm32 and addw r16, rm16 8893 8894 Same for ADD OR ADC SBB AND SUB XOR 8895 8896 FE /1 = dec rm8 8897 FF /1 = dec rm32 and dec rm16 8898 8899 FE /0 = inc rm8 8900 FF /0 = inc rm32 and inc rm16 8901 8902 F6 /3 = neg rm8 8903 F7 /3 = neg rm32 and neg rm16 8904 8905 F6 /2 = not rm8 8906 F7 /2 = not rm32 and not rm16 8907 8908 0F BB = btcw r16, rm16 and btcl r32, rm32 8909 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 8910 8911 Same for BTS, BTR 8912 */ 8913 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 8914 { 8915 switch (opc[0]) { 8916 case 0x00: case 0x01: case 0x08: case 0x09: 8917 case 0x10: case 0x11: case 0x18: case 0x19: 8918 case 0x20: case 0x21: case 0x28: case 0x29: 8919 case 0x30: case 0x31: 8920 if (!epartIsReg(opc[1])) 8921 return True; 8922 break; 8923 8924 case 0x80: case 0x81: case 0x82: case 0x83: 8925 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 8926 && !epartIsReg(opc[1])) 8927 return True; 8928 break; 8929 8930 case 0xFE: case 0xFF: 8931 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 8932 && !epartIsReg(opc[1])) 8933 return True; 8934 break; 8935 8936 case 0xF6: case 0xF7: 8937 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 8938 && !epartIsReg(opc[1])) 8939 return True; 8940 break; 8941 8942 case 0x86: case 0x87: 8943 if (!epartIsReg(opc[1])) 8944 return True; 8945 break; 8946 8947 case 0x0F: { 8948 switch (opc[1]) { 8949 case 0xBB: case 0xB3: case 0xAB: 8950 if (!epartIsReg(opc[2])) 8951 return True; 8952 break; 8953 case 0xBA: 8954 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 8955 && !epartIsReg(opc[2])) 8956 return True; 8957 break; 8958 case 0xB0: case 0xB1: 8959 if (!epartIsReg(opc[2])) 8960 return True; 8961 break; 8962 case 0xC7: 8963 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 8964 return True; 8965 break; 8966 case 0xC0: case 0xC1: 8967 if (!epartIsReg(opc[2])) 8968 return True; 8969 break; 8970 default: 8971 break; 8972 } /* switch (opc[1]) */ 8973 break; 8974 } 8975 8976 default: 8977 break; 8978 } /* switch (opc[0]) */ 8979 8980 return False; 8981 } 8982 8983 8984 /*------------------------------------------------------------*/ 8985 /*--- Disassemble a single instruction ---*/ 8986 /*------------------------------------------------------------*/ 8987 8988 /* Disassemble a single instruction into IR. The instruction is 8989 located in host memory at &guest_code[delta]. */ 8990 8991 static 8992 DisResult disInstr_AMD64_WRK ( 8993 /*OUT*/Bool* expect_CAS, 8994 Bool put_IP, 8995 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 8996 Bool resteerCisOk, 8997 void* callback_opaque, 8998 Long delta64, 8999 VexArchInfo* archinfo, 9000 VexAbiInfo* vbi 9001 ) 9002 { 9003 IRType ty; 9004 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 9005 Int alen; 9006 UChar opc, modrm, abyte, pre; 9007 Long d64; 9008 HChar dis_buf[50]; 9009 Int am_sz, d_sz, n, n_prefixes; 9010 DisResult dres; 9011 UChar* insn; /* used in SSE decoders */ 9012 9013 /* The running delta */ 9014 Long delta = delta64; 9015 9016 /* Holds eip at the start of the insn, so that we can print 9017 consistent error messages for unimplemented insns. */ 9018 Long delta_start = delta; 9019 9020 /* sz denotes the nominal data-op size of the insn; we change it to 9021 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 9022 conflict REX.W takes precedence. */ 9023 Int sz = 4; 9024 9025 /* pfx holds the summary of prefixes. */ 9026 Prefix pfx = PFX_EMPTY; 9027 9028 /* Set result defaults. */ 9029 dres.whatNext = Dis_Continue; 9030 dres.len = 0; 9031 dres.continueAt = 0; 9032 9033 *expect_CAS = False; 9034 9035 vassert(guest_RIP_next_assumed == 0); 9036 vassert(guest_RIP_next_mustcheck == False); 9037 9038 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 9039 9040 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 9041 9042 /* We may be asked to update the guest RIP before going further. */ 9043 if (put_IP) 9044 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) ); 9045 9046 /* Spot "Special" instructions (see comment at top of file). */ 9047 { 9048 UChar* code = (UChar*)(guest_code + delta); 9049 /* Spot the 16-byte preamble: 9050 48C1C703 rolq $3, %rdi 9051 48C1C70D rolq $13, %rdi 9052 48C1C73D rolq $61, %rdi 9053 48C1C733 rolq $51, %rdi 9054 */ 9055 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 9056 && code[ 3] == 0x03 && 9057 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 9058 && code[ 7] == 0x0D && 9059 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 9060 && code[11] == 0x3D && 9061 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 9062 && code[15] == 0x33) { 9063 /* Got a "Special" instruction preamble. Which one is it? */ 9064 if (code[16] == 0x48 && code[17] == 0x87 9065 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 9066 /* %RDX = client_request ( %RAX ) */ 9067 DIP("%%rdx = client_request ( %%rax )\n"); 9068 delta += 19; 9069 jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta); 9070 dres.whatNext = Dis_StopHere; 9071 goto decode_success; 9072 } 9073 else 9074 if (code[16] == 0x48 && code[17] == 0x87 9075 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 9076 /* %RAX = guest_NRADDR */ 9077 DIP("%%rax = guest_NRADDR\n"); 9078 delta += 19; 9079 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 9080 goto decode_success; 9081 } 9082 else 9083 if (code[16] == 0x48 && code[17] == 0x87 9084 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 9085 /* call-noredir *%RAX */ 9086 DIP("call-noredir *%%rax\n"); 9087 delta += 19; 9088 t1 = newTemp(Ity_I64); 9089 assign(t1, getIRegRAX(8)); 9090 t2 = newTemp(Ity_I64); 9091 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 9092 putIReg64(R_RSP, mkexpr(t2)); 9093 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 9094 jmp_treg(Ijk_NoRedir,t1); 9095 dres.whatNext = Dis_StopHere; 9096 goto decode_success; 9097 } 9098 /* We don't know what it is. */ 9099 goto decode_failure; 9100 /*NOTREACHED*/ 9101 } 9102 } 9103 9104 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 9105 as many invalid combinations as possible. */ 9106 n_prefixes = 0; 9107 while (True) { 9108 if (n_prefixes > 7) goto decode_failure; 9109 pre = getUChar(delta); 9110 switch (pre) { 9111 case 0x66: pfx |= PFX_66; break; 9112 case 0x67: pfx |= PFX_ASO; break; 9113 case 0xF2: pfx |= PFX_F2; break; 9114 case 0xF3: pfx |= PFX_F3; break; 9115 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 9116 case 0x2E: pfx |= PFX_CS; break; 9117 case 0x3E: pfx |= PFX_DS; break; 9118 case 0x26: pfx |= PFX_ES; break; 9119 case 0x64: pfx |= PFX_FS; break; 9120 case 0x65: pfx |= PFX_GS; break; 9121 case 0x36: pfx |= PFX_SS; break; 9122 case 0x40 ... 0x4F: 9123 pfx |= PFX_REX; 9124 if (pre & (1<<3)) pfx |= PFX_REXW; 9125 if (pre & (1<<2)) pfx |= PFX_REXR; 9126 if (pre & (1<<1)) pfx |= PFX_REXX; 9127 if (pre & (1<<0)) pfx |= PFX_REXB; 9128 break; 9129 default: 9130 goto not_a_prefix; 9131 } 9132 n_prefixes++; 9133 delta++; 9134 } 9135 9136 not_a_prefix: 9137 9138 /* Dump invalid combinations */ 9139 n = 0; 9140 if (pfx & PFX_F2) n++; 9141 if (pfx & PFX_F3) n++; 9142 if (n > 1) 9143 goto decode_failure; /* can't have both */ 9144 9145 n = 0; 9146 if (pfx & PFX_CS) n++; 9147 if (pfx & PFX_DS) n++; 9148 if (pfx & PFX_ES) n++; 9149 if (pfx & PFX_FS) n++; 9150 if (pfx & PFX_GS) n++; 9151 if (pfx & PFX_SS) n++; 9152 if (n > 1) 9153 goto decode_failure; /* multiple seg overrides == illegal */ 9154 9155 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 9156 that we should accept it. */ 9157 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero) 9158 goto decode_failure; 9159 9160 /* Ditto for %gs prefixes. */ 9161 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60) 9162 goto decode_failure; 9163 9164 /* Set up sz. */ 9165 sz = 4; 9166 if (pfx & PFX_66) sz = 2; 9167 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 9168 9169 /* Now we should be looking at the primary opcode byte or the 9170 leading F2 or F3. Check that any LOCK prefix is actually 9171 allowed. */ 9172 9173 if (pfx & PFX_LOCK) { 9174 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 9175 DIP("lock "); 9176 } else { 9177 *expect_CAS = False; 9178 goto decode_failure; 9179 } 9180 } 9181 9182 9183 /* ---------------------------------------------------- */ 9184 /* --- The SSE/SSE2 decoder. --- */ 9185 /* ---------------------------------------------------- */ 9186 9187 /* What did I do to deserve SSE ? Perhaps I was really bad in a 9188 previous life? */ 9189 9190 /* Note, this doesn't handle SSE3 right now. All amd64s support 9191 SSE2 as a minimum so there is no point distinguishing SSE1 vs 9192 SSE2. */ 9193 9194 insn = (UChar*)&guest_code[delta]; 9195 9196 /* FXSAVE is spuriously at the start here only because it is 9197 thusly placed in guest-x86/toIR.c. */ 9198 9199 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. 9200 Note that the presence or absence of REX.W slightly affects the 9201 written format: whether the saved FPU IP and DP pointers are 64 9202 or 32 bits. But the helper function we call simply writes zero 9203 bits in the relevant fields (which are 64 bits regardless of 9204 what REX.W is) and so it's good enough (iow, equally broken) in 9205 both cases. */ 9206 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9207 && insn[0] == 0x0F && insn[1] == 0xAE 9208 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) { 9209 IRDirty* d; 9210 modrm = getUChar(delta+2); 9211 vassert(!epartIsReg(modrm)); 9212 9213 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9214 delta += 2+alen; 9215 gen_SEGV_if_not_16_aligned(addr); 9216 9217 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 9218 9219 /* Uses dirty helper: 9220 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ 9221 d = unsafeIRDirty_0_N ( 9222 0/*regparms*/, 9223 "amd64g_dirtyhelper_FXSAVE", 9224 &amd64g_dirtyhelper_FXSAVE, 9225 mkIRExprVec_1( mkexpr(addr) ) 9226 ); 9227 d->needsBBP = True; 9228 9229 /* declare we're writing memory */ 9230 d->mFx = Ifx_Write; 9231 d->mAddr = mkexpr(addr); 9232 d->mSize = 512; 9233 9234 /* declare we're reading guest state */ 9235 d->nFxState = 7; 9236 9237 d->fxState[0].fx = Ifx_Read; 9238 d->fxState[0].offset = OFFB_FTOP; 9239 d->fxState[0].size = sizeof(UInt); 9240 9241 d->fxState[1].fx = Ifx_Read; 9242 d->fxState[1].offset = OFFB_FPREGS; 9243 d->fxState[1].size = 8 * sizeof(ULong); 9244 9245 d->fxState[2].fx = Ifx_Read; 9246 d->fxState[2].offset = OFFB_FPTAGS; 9247 d->fxState[2].size = 8 * sizeof(UChar); 9248 9249 d->fxState[3].fx = Ifx_Read; 9250 d->fxState[3].offset = OFFB_FPROUND; 9251 d->fxState[3].size = sizeof(ULong); 9252 9253 d->fxState[4].fx = Ifx_Read; 9254 d->fxState[4].offset = OFFB_FC3210; 9255 d->fxState[4].size = sizeof(ULong); 9256 9257 d->fxState[5].fx = Ifx_Read; 9258 d->fxState[5].offset = OFFB_XMM0; 9259 d->fxState[5].size = 16 * sizeof(U128); 9260 9261 d->fxState[6].fx = Ifx_Read; 9262 d->fxState[6].offset = OFFB_SSEROUND; 9263 d->fxState[6].size = sizeof(ULong); 9264 9265 /* Be paranoid ... this assertion tries to ensure the 16 %xmm 9266 images are packed back-to-back. If not, the value of 9267 d->fxState[5].size is wrong. */ 9268 vassert(16 == sizeof(U128)); 9269 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); 9270 9271 stmt( IRStmt_Dirty(d) ); 9272 9273 goto decode_success; 9274 } 9275 9276 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. 9277 As with FXSAVE above we ignore the value of REX.W since we're 9278 not bothering with the FPU DP and IP fields. */ 9279 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9280 && insn[0] == 0x0F && insn[1] == 0xAE 9281 && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 1) { 9282 IRDirty* d; 9283 modrm = getUChar(delta+2); 9284 vassert(!epartIsReg(modrm)); 9285 9286 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9287 delta += 2+alen; 9288 gen_SEGV_if_not_16_aligned(addr); 9289 9290 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 9291 9292 /* Uses dirty helper: 9293 VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) 9294 NOTE: 9295 the VexEmWarn value is simply ignored 9296 */ 9297 d = unsafeIRDirty_0_N ( 9298 0/*regparms*/, 9299 "amd64g_dirtyhelper_FXRSTOR", 9300 &amd64g_dirtyhelper_FXRSTOR, 9301 mkIRExprVec_1( mkexpr(addr) ) 9302 ); 9303 d->needsBBP = True; 9304 9305 /* declare we're reading memory */ 9306 d->mFx = Ifx_Read; 9307 d->mAddr = mkexpr(addr); 9308 d->mSize = 512; 9309 9310 /* declare we're writing guest state */ 9311 d->nFxState = 7; 9312 9313 d->fxState[0].fx = Ifx_Write; 9314 d->fxState[0].offset = OFFB_FTOP; 9315 d->fxState[0].size = sizeof(UInt); 9316 9317 d->fxState[1].fx = Ifx_Write; 9318 d->fxState[1].offset = OFFB_FPREGS; 9319 d->fxState[1].size = 8 * sizeof(ULong); 9320 9321 d->fxState[2].fx = Ifx_Write; 9322 d->fxState[2].offset = OFFB_FPTAGS; 9323 d->fxState[2].size = 8 * sizeof(UChar); 9324 9325 d->fxState[3].fx = Ifx_Write; 9326 d->fxState[3].offset = OFFB_FPROUND; 9327 d->fxState[3].size = sizeof(ULong); 9328 9329 d->fxState[4].fx = Ifx_Write; 9330 d->fxState[4].offset = OFFB_FC3210; 9331 d->fxState[4].size = sizeof(ULong); 9332 9333 d->fxState[5].fx = Ifx_Write; 9334 d->fxState[5].offset = OFFB_XMM0; 9335 d->fxState[5].size = 16 * sizeof(U128); 9336 9337 d->fxState[6].fx = Ifx_Write; 9338 d->fxState[6].offset = OFFB_SSEROUND; 9339 d->fxState[6].size = sizeof(ULong); 9340 9341 /* Be paranoid ... this assertion tries to ensure the 16 %xmm 9342 images are packed back-to-back. If not, the value of 9343 d->fxState[5].size is wrong. */ 9344 vassert(16 == sizeof(U128)); 9345 vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); 9346 9347 stmt( IRStmt_Dirty(d) ); 9348 9349 goto decode_success; 9350 } 9351 9352 /* ------ SSE decoder main ------ */ 9353 9354 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 9355 if (haveNo66noF2noF3(pfx) && sz == 4 9356 && insn[0] == 0x0F && insn[1] == 0x58) { 9357 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 ); 9358 goto decode_success; 9359 } 9360 9361 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 9362 if (haveF3no66noF2(pfx) && sz == 4 9363 && insn[0] == 0x0F && insn[1] == 0x58) { 9364 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 ); 9365 goto decode_success; 9366 } 9367 9368 /* 0F 55 = ANDNPS -- G = (not G) and E */ 9369 if (haveNo66noF2noF3(pfx) && sz == 4 9370 && insn[0] == 0x0F && insn[1] == 0x55) { 9371 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 ); 9372 goto decode_success; 9373 } 9374 9375 /* 0F 54 = ANDPS -- G = G and E */ 9376 if (haveNo66noF2noF3(pfx) && sz == 4 9377 && insn[0] == 0x0F && insn[1] == 0x54) { 9378 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 ); 9379 goto decode_success; 9380 } 9381 9382 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 9383 if (haveNo66noF2noF3(pfx) && sz == 4 9384 && insn[0] == 0x0F && insn[1] == 0xC2) { 9385 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 ); 9386 goto decode_success; 9387 } 9388 9389 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 9390 if (haveF3no66noF2(pfx) && sz == 4 9391 && insn[0] == 0x0F && insn[1] == 0xC2) { 9392 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 ); 9393 goto decode_success; 9394 } 9395 9396 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 9397 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 9398 if (haveNo66noF2noF3(pfx) && sz == 4 9399 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9400 IRTemp argL = newTemp(Ity_F32); 9401 IRTemp argR = newTemp(Ity_F32); 9402 modrm = getUChar(delta+2); 9403 if (epartIsReg(modrm)) { 9404 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 9405 0/*lowest lane*/ ) ); 9406 delta += 2+1; 9407 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", 9408 nameXMMReg(eregOfRexRM(pfx,modrm)), 9409 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9410 } else { 9411 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9412 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 9413 delta += 2+alen; 9414 DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "", 9415 dis_buf, 9416 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9417 } 9418 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 9419 0/*lowest lane*/ ) ); 9420 9421 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9422 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9423 stmt( IRStmt_Put( 9424 OFFB_CC_DEP1, 9425 binop( Iop_And64, 9426 unop( Iop_32Uto64, 9427 binop(Iop_CmpF64, 9428 unop(Iop_F32toF64,mkexpr(argL)), 9429 unop(Iop_F32toF64,mkexpr(argR)))), 9430 mkU64(0x45) 9431 ))); 9432 9433 goto decode_success; 9434 } 9435 9436 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 9437 half xmm */ 9438 if (haveNo66noF2noF3(pfx) && sz == 4 9439 && insn[0] == 0x0F && insn[1] == 0x2A) { 9440 IRTemp arg64 = newTemp(Ity_I64); 9441 IRTemp rmode = newTemp(Ity_I32); 9442 9443 modrm = getUChar(delta+2); 9444 do_MMX_preamble(); 9445 if (epartIsReg(modrm)) { 9446 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 9447 delta += 2+1; 9448 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 9449 nameXMMReg(gregOfRexRM(pfx,modrm))); 9450 } else { 9451 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9452 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9453 delta += 2+alen; 9454 DIP("cvtpi2ps %s,%s\n", dis_buf, 9455 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9456 } 9457 9458 assign( rmode, get_sse_roundingmode() ); 9459 9460 putXMMRegLane32F( 9461 gregOfRexRM(pfx,modrm), 0, 9462 binop(Iop_F64toF32, 9463 mkexpr(rmode), 9464 unop(Iop_I32StoF64, 9465 unop(Iop_64to32, mkexpr(arg64)) )) ); 9466 9467 putXMMRegLane32F( 9468 gregOfRexRM(pfx,modrm), 1, 9469 binop(Iop_F64toF32, 9470 mkexpr(rmode), 9471 unop(Iop_I32StoF64, 9472 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 9473 9474 goto decode_success; 9475 } 9476 9477 /* F3 0F 2A = CVTSI2SS 9478 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 9479 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 9480 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8) 9481 && insn[0] == 0x0F && insn[1] == 0x2A) { 9482 9483 IRTemp rmode = newTemp(Ity_I32); 9484 assign( rmode, get_sse_roundingmode() ); 9485 modrm = getUChar(delta+2); 9486 9487 if (sz == 4) { 9488 IRTemp arg32 = newTemp(Ity_I32); 9489 if (epartIsReg(modrm)) { 9490 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 9491 delta += 2+1; 9492 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 9493 nameXMMReg(gregOfRexRM(pfx,modrm))); 9494 } else { 9495 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9496 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 9497 delta += 2+alen; 9498 DIP("cvtsi2ss %s,%s\n", dis_buf, 9499 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9500 } 9501 putXMMRegLane32F( 9502 gregOfRexRM(pfx,modrm), 0, 9503 binop(Iop_F64toF32, 9504 mkexpr(rmode), 9505 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 9506 } else { 9507 /* sz == 8 */ 9508 IRTemp arg64 = newTemp(Ity_I64); 9509 if (epartIsReg(modrm)) { 9510 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 9511 delta += 2+1; 9512 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 9513 nameXMMReg(gregOfRexRM(pfx,modrm))); 9514 } else { 9515 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9516 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9517 delta += 2+alen; 9518 DIP("cvtsi2ssq %s,%s\n", dis_buf, 9519 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9520 } 9521 putXMMRegLane32F( 9522 gregOfRexRM(pfx,modrm), 0, 9523 binop(Iop_F64toF32, 9524 mkexpr(rmode), 9525 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 9526 } 9527 9528 goto decode_success; 9529 } 9530 9531 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 9532 I32 in mmx, according to prevailing SSE rounding mode */ 9533 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 9534 I32 in mmx, rounding towards zero */ 9535 if (haveNo66noF2noF3(pfx) && sz == 4 9536 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9537 IRTemp dst64 = newTemp(Ity_I64); 9538 IRTemp rmode = newTemp(Ity_I32); 9539 IRTemp f32lo = newTemp(Ity_F32); 9540 IRTemp f32hi = newTemp(Ity_F32); 9541 Bool r2zero = toBool(insn[1] == 0x2C); 9542 9543 do_MMX_preamble(); 9544 modrm = getUChar(delta+2); 9545 9546 if (epartIsReg(modrm)) { 9547 delta += 2+1; 9548 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 9549 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 9550 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 9551 nameXMMReg(eregOfRexRM(pfx,modrm)), 9552 nameMMXReg(gregLO3ofRM(modrm))); 9553 } else { 9554 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9555 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9556 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 9557 mkexpr(addr), 9558 mkU64(4) ))); 9559 delta += 2+alen; 9560 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 9561 dis_buf, 9562 nameMMXReg(gregLO3ofRM(modrm))); 9563 } 9564 9565 if (r2zero) { 9566 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9567 } else { 9568 assign( rmode, get_sse_roundingmode() ); 9569 } 9570 9571 assign( 9572 dst64, 9573 binop( Iop_32HLto64, 9574 binop( Iop_F64toI32S, 9575 mkexpr(rmode), 9576 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 9577 binop( Iop_F64toI32S, 9578 mkexpr(rmode), 9579 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 9580 ) 9581 ); 9582 9583 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 9584 goto decode_success; 9585 } 9586 9587 /* F3 0F 2D = CVTSS2SI 9588 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 9589 according to prevailing SSE rounding mode 9590 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 9591 according to prevailing SSE rounding mode 9592 */ 9593 /* F3 0F 2C = CVTTSS2SI 9594 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 9595 truncating towards zero 9596 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 9597 truncating towards zero 9598 */ 9599 if (haveF3no66noF2(pfx) 9600 && insn[0] == 0x0F 9601 && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9602 IRTemp rmode = newTemp(Ity_I32); 9603 IRTemp f32lo = newTemp(Ity_F32); 9604 Bool r2zero = toBool(insn[1] == 0x2C); 9605 vassert(sz == 4 || sz == 8); 9606 9607 modrm = getUChar(delta+2); 9608 if (epartIsReg(modrm)) { 9609 delta += 2+1; 9610 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 9611 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 9612 nameXMMReg(eregOfRexRM(pfx,modrm)), 9613 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 9614 } else { 9615 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9616 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9617 delta += 2+alen; 9618 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 9619 dis_buf, 9620 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 9621 } 9622 9623 if (r2zero) { 9624 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9625 } else { 9626 assign( rmode, get_sse_roundingmode() ); 9627 } 9628 9629 if (sz == 4) { 9630 putIReg32( gregOfRexRM(pfx,modrm), 9631 binop( Iop_F64toI32S, 9632 mkexpr(rmode), 9633 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9634 } else { 9635 putIReg64( gregOfRexRM(pfx,modrm), 9636 binop( Iop_F64toI64S, 9637 mkexpr(rmode), 9638 unop(Iop_F32toF64, mkexpr(f32lo))) ); 9639 } 9640 9641 goto decode_success; 9642 } 9643 9644 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 9645 if (haveNo66noF2noF3(pfx) && sz == 4 9646 && insn[0] == 0x0F && insn[1] == 0x5E) { 9647 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 ); 9648 goto decode_success; 9649 } 9650 9651 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 9652 if (haveF3no66noF2(pfx) && sz == 4 9653 && insn[0] == 0x0F && insn[1] == 0x5E) { 9654 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 ); 9655 goto decode_success; 9656 } 9657 9658 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 9659 if (insn[0] == 0x0F && insn[1] == 0xAE 9660 && haveNo66noF2noF3(pfx) 9661 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) { 9662 9663 IRTemp t64 = newTemp(Ity_I64); 9664 IRTemp ew = newTemp(Ity_I32); 9665 9666 vassert(sz == 4); 9667 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9668 delta += 2+alen; 9669 DIP("ldmxcsr %s\n", dis_buf); 9670 9671 /* The only thing we observe in %mxcsr is the rounding mode. 9672 Therefore, pass the 32-bit value (SSE native-format control 9673 word) to a clean helper, getting back a 64-bit value, the 9674 lower half of which is the SSEROUND value to store, and the 9675 upper half of which is the emulation-warning token which may 9676 be generated. 9677 */ 9678 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 9679 assign( t64, mkIRExprCCall( 9680 Ity_I64, 0/*regparms*/, 9681 "amd64g_check_ldmxcsr", 9682 &amd64g_check_ldmxcsr, 9683 mkIRExprVec_1( 9684 unop(Iop_32Uto64, 9685 loadLE(Ity_I32, mkexpr(addr)) 9686 ) 9687 ) 9688 ) 9689 ); 9690 9691 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 9692 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 9693 put_emwarn( mkexpr(ew) ); 9694 /* Finally, if an emulation warning was reported, side-exit to 9695 the next insn, reporting the warning, so that Valgrind's 9696 dispatcher sees the warning. */ 9697 stmt( 9698 IRStmt_Exit( 9699 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 9700 Ijk_EmWarn, 9701 IRConst_U64(guest_RIP_bbstart+delta) 9702 ) 9703 ); 9704 goto decode_success; 9705 } 9706 9707 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9708 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 9709 if (haveNo66noF2noF3(pfx) && sz == 4 9710 && insn[0] == 0x0F && insn[1] == 0xF7) { 9711 Bool ok = False; 9712 delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 ); 9713 if (!ok) 9714 goto decode_failure; 9715 goto decode_success; 9716 } 9717 9718 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 9719 if (haveNo66noF2noF3(pfx) && sz == 4 9720 && insn[0] == 0x0F && insn[1] == 0x5F) { 9721 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 ); 9722 goto decode_success; 9723 } 9724 9725 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 9726 if (haveF3no66noF2(pfx) && sz == 4 9727 && insn[0] == 0x0F && insn[1] == 0x5F) { 9728 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 ); 9729 goto decode_success; 9730 } 9731 9732 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 9733 if (haveNo66noF2noF3(pfx) && sz == 4 9734 && insn[0] == 0x0F && insn[1] == 0x5D) { 9735 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 ); 9736 goto decode_success; 9737 } 9738 9739 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 9740 if (haveF3no66noF2(pfx) && sz == 4 9741 && insn[0] == 0x0F && insn[1] == 0x5D) { 9742 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 ); 9743 goto decode_success; 9744 } 9745 9746 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 9747 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 9748 if (haveNo66noF2noF3(pfx) 9749 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9750 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 9751 modrm = getUChar(delta+2); 9752 if (epartIsReg(modrm)) { 9753 putXMMReg( gregOfRexRM(pfx,modrm), 9754 getXMMReg( eregOfRexRM(pfx,modrm) )); 9755 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9756 nameXMMReg(gregOfRexRM(pfx,modrm))); 9757 delta += 2+1; 9758 } else { 9759 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9760 if (insn[1] == 0x28/*movaps*/) 9761 gen_SEGV_if_not_16_aligned( addr ); 9762 putXMMReg( gregOfRexRM(pfx,modrm), 9763 loadLE(Ity_V128, mkexpr(addr)) ); 9764 DIP("mov[ua]ps %s,%s\n", dis_buf, 9765 nameXMMReg(gregOfRexRM(pfx,modrm))); 9766 delta += 2+alen; 9767 } 9768 goto decode_success; 9769 } 9770 9771 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 9772 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 9773 if (haveNo66noF2noF3(pfx) 9774 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9775 && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) { 9776 modrm = getUChar(delta+2); 9777 if (epartIsReg(modrm)) { 9778 /* fall through; awaiting test case */ 9779 } else { 9780 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9781 if (insn[1] == 0x29/*movaps*/) 9782 gen_SEGV_if_not_16_aligned( addr ); 9783 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 9784 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 9785 dis_buf ); 9786 delta += 2+alen; 9787 goto decode_success; 9788 } 9789 } 9790 9791 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 9792 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 9793 if (haveNo66noF2noF3(pfx) 9794 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9795 && insn[0] == 0x0F && insn[1] == 0x16) { 9796 modrm = getUChar(delta+2); 9797 if (epartIsReg(modrm)) { 9798 delta += 2+1; 9799 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 9800 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 9801 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9802 nameXMMReg(gregOfRexRM(pfx,modrm))); 9803 } else { 9804 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9805 delta += 2+alen; 9806 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 9807 loadLE(Ity_I64, mkexpr(addr)) ); 9808 DIP("movhps %s,%s\n", dis_buf, 9809 nameXMMReg( gregOfRexRM(pfx,modrm) )); 9810 } 9811 goto decode_success; 9812 } 9813 9814 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 9815 if (haveNo66noF2noF3(pfx) 9816 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9817 && insn[0] == 0x0F && insn[1] == 0x17) { 9818 if (!epartIsReg(insn[2])) { 9819 delta += 2; 9820 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9821 delta += alen; 9822 storeLE( mkexpr(addr), 9823 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 9824 1/*upper lane*/ ) ); 9825 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 9826 dis_buf); 9827 goto decode_success; 9828 } 9829 /* else fall through */ 9830 } 9831 9832 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 9833 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 9834 if (haveNo66noF2noF3(pfx) 9835 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9836 && insn[0] == 0x0F && insn[1] == 0x12) { 9837 modrm = getUChar(delta+2); 9838 if (epartIsReg(modrm)) { 9839 delta += 2+1; 9840 putXMMRegLane64( gregOfRexRM(pfx,modrm), 9841 0/*lower lane*/, 9842 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 9843 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9844 nameXMMReg(gregOfRexRM(pfx,modrm))); 9845 } else { 9846 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9847 delta += 2+alen; 9848 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 9849 loadLE(Ity_I64, mkexpr(addr)) ); 9850 DIP("movlps %s, %s\n", 9851 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 9852 } 9853 goto decode_success; 9854 } 9855 9856 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 9857 if (haveNo66noF2noF3(pfx) 9858 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9859 && insn[0] == 0x0F && insn[1] == 0x13) { 9860 if (!epartIsReg(insn[2])) { 9861 delta += 2; 9862 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9863 delta += alen; 9864 storeLE( mkexpr(addr), 9865 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 9866 0/*lower lane*/ ) ); 9867 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 9868 dis_buf); 9869 goto decode_success; 9870 } 9871 /* else fall through */ 9872 } 9873 9874 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 9875 to 4 lowest bits of ireg(G) */ 9876 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 9877 && insn[0] == 0x0F && insn[1] == 0x50) { 9878 /* sz == 8 is a kludge to handle insns with REX.W redundantly 9879 set to 1, which has been known to happen: 9880 9881 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 9882 9883 20071106: Intel docs say that REX.W isn't redundant: when 9884 present, a 64-bit register is written; when not present, only 9885 the 32-bit half is written. However, testing on a Core2 9886 machine suggests the entire 64 bit register is written 9887 irrespective of the status of REX.W. That could be because 9888 of the default rule that says "if the lower half of a 32-bit 9889 register is written, the upper half is zeroed". By using 9890 putIReg32 here we inadvertantly produce the same behaviour as 9891 the Core2, for the same reason -- putIReg32 implements said 9892 rule. 9893 9894 AMD docs give no indication that REX.W is even valid for this 9895 insn. */ 9896 modrm = getUChar(delta+2); 9897 if (epartIsReg(modrm)) { 9898 Int src; 9899 t0 = newTemp(Ity_I32); 9900 t1 = newTemp(Ity_I32); 9901 t2 = newTemp(Ity_I32); 9902 t3 = newTemp(Ity_I32); 9903 delta += 2+1; 9904 src = eregOfRexRM(pfx,modrm); 9905 assign( t0, binop( Iop_And32, 9906 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 9907 mkU32(1) )); 9908 assign( t1, binop( Iop_And32, 9909 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 9910 mkU32(2) )); 9911 assign( t2, binop( Iop_And32, 9912 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 9913 mkU32(4) )); 9914 assign( t3, binop( Iop_And32, 9915 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 9916 mkU32(8) )); 9917 putIReg32( gregOfRexRM(pfx,modrm), 9918 binop(Iop_Or32, 9919 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 9920 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 9921 ) 9922 ); 9923 DIP("movmskps %s,%s\n", nameXMMReg(src), 9924 nameIReg32(gregOfRexRM(pfx,modrm))); 9925 goto decode_success; 9926 } 9927 /* else fall through */ 9928 } 9929 9930 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 9931 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 9932 if ( ( (haveNo66noF2noF3(pfx) && sz == 4) 9933 || (have66noF2noF3(pfx) && sz == 2) 9934 ) 9935 && insn[0] == 0x0F && insn[1] == 0x2B) { 9936 modrm = getUChar(delta+2); 9937 if (!epartIsReg(modrm)) { 9938 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9939 gen_SEGV_if_not_16_aligned( addr ); 9940 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 9941 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 9942 dis_buf, 9943 nameXMMReg(gregOfRexRM(pfx,modrm))); 9944 delta += 2+alen; 9945 goto decode_success; 9946 } 9947 /* else fall through */ 9948 } 9949 9950 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9951 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 9952 Intel manual does not say anything about the usual business of 9953 the FP reg tags getting trashed whenever an MMX insn happens. 9954 So we just leave them alone. 9955 */ 9956 if (haveNo66noF2noF3(pfx) && sz == 4 9957 && insn[0] == 0x0F && insn[1] == 0xE7) { 9958 modrm = getUChar(delta+2); 9959 if (!epartIsReg(modrm)) { 9960 /* do_MMX_preamble(); Intel docs don't specify this */ 9961 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9962 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 9963 DIP("movntq %s,%s\n", dis_buf, 9964 nameMMXReg(gregLO3ofRM(modrm))); 9965 delta += 2+alen; 9966 goto decode_success; 9967 } 9968 /* else fall through */ 9969 } 9970 9971 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 9972 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 9973 if (haveF3no66noF2(pfx) 9974 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 9975 && insn[0] == 0x0F && insn[1] == 0x10) { 9976 modrm = getUChar(delta+2); 9977 if (epartIsReg(modrm)) { 9978 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 9979 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 9980 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 9981 nameXMMReg(gregOfRexRM(pfx,modrm))); 9982 delta += 2+1; 9983 } else { 9984 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 9985 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 9986 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 9987 loadLE(Ity_I32, mkexpr(addr)) ); 9988 DIP("movss %s,%s\n", dis_buf, 9989 nameXMMReg(gregOfRexRM(pfx,modrm))); 9990 delta += 2+alen; 9991 } 9992 goto decode_success; 9993 } 9994 9995 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 9996 or lo 1/4 xmm). */ 9997 if (haveF3no66noF2(pfx) && sz == 4 9998 && insn[0] == 0x0F && insn[1] == 0x11) { 9999 modrm = getUChar(delta+2); 10000 if (epartIsReg(modrm)) { 10001 /* fall through, we don't yet have a test case */ 10002 } else { 10003 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10004 storeLE( mkexpr(addr), 10005 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 10006 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 10007 dis_buf); 10008 delta += 2+alen; 10009 goto decode_success; 10010 } 10011 } 10012 10013 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 10014 if (haveNo66noF2noF3(pfx) && sz == 4 10015 && insn[0] == 0x0F && insn[1] == 0x59) { 10016 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 ); 10017 goto decode_success; 10018 } 10019 10020 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 10021 if (haveF3no66noF2(pfx) && sz == 4 10022 && insn[0] == 0x0F && insn[1] == 0x59) { 10023 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 ); 10024 goto decode_success; 10025 } 10026 10027 /* 0F 56 = ORPS -- G = G and E */ 10028 if (haveNo66noF2noF3(pfx) && sz == 4 10029 && insn[0] == 0x0F && insn[1] == 0x56) { 10030 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 ); 10031 goto decode_success; 10032 } 10033 10034 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10035 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 10036 if (haveNo66noF2noF3(pfx) && sz == 4 10037 && insn[0] == 0x0F && insn[1] == 0xE0) { 10038 do_MMX_preamble(); 10039 delta = dis_MMXop_regmem_to_reg ( 10040 vbi, pfx, delta+2, insn[1], "pavgb", False ); 10041 goto decode_success; 10042 } 10043 10044 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10045 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 10046 if (haveNo66noF2noF3(pfx) && sz == 4 10047 && insn[0] == 0x0F && insn[1] == 0xE3) { 10048 do_MMX_preamble(); 10049 delta = dis_MMXop_regmem_to_reg ( 10050 vbi, pfx, delta+2, insn[1], "pavgw", False ); 10051 goto decode_success; 10052 } 10053 10054 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10055 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 10056 zero-extend of it in ireg(G). */ 10057 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 10058 && insn[0] == 0x0F && insn[1] == 0xC5) { 10059 modrm = insn[2]; 10060 if (epartIsReg(modrm)) { 10061 IRTemp sV = newTemp(Ity_I64); 10062 t5 = newTemp(Ity_I16); 10063 do_MMX_preamble(); 10064 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 10065 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 10066 switch (insn[3] & 3) { 10067 case 0: assign(t5, mkexpr(t0)); break; 10068 case 1: assign(t5, mkexpr(t1)); break; 10069 case 2: assign(t5, mkexpr(t2)); break; 10070 case 3: assign(t5, mkexpr(t3)); break; 10071 default: vassert(0); 10072 } 10073 if (sz == 8) 10074 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 10075 else 10076 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 10077 DIP("pextrw $%d,%s,%s\n", 10078 (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)), 10079 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 10080 : nameIReg32(gregOfRexRM(pfx,modrm)) 10081 ); 10082 delta += 4; 10083 goto decode_success; 10084 } 10085 /* else fall through */ 10086 /* note, for anyone filling in the mem case: this insn has one 10087 byte after the amode and therefore you must pass 1 as the 10088 last arg to disAMode */ 10089 } 10090 10091 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10092 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10093 put it into the specified lane of mmx(G). */ 10094 if (haveNo66noF2noF3(pfx) 10095 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 10096 && insn[0] == 0x0F && insn[1] == 0xC4) { 10097 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 10098 mmx reg. t4 is the new lane value. t5 is the original 10099 mmx value. t6 is the new mmx value. */ 10100 Int lane; 10101 t4 = newTemp(Ity_I16); 10102 t5 = newTemp(Ity_I64); 10103 t6 = newTemp(Ity_I64); 10104 modrm = insn[2]; 10105 do_MMX_preamble(); 10106 10107 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 10108 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 10109 10110 if (epartIsReg(modrm)) { 10111 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 10112 delta += 3+1; 10113 lane = insn[3+1-1]; 10114 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10115 nameIReg16(eregOfRexRM(pfx,modrm)), 10116 nameMMXReg(gregLO3ofRM(modrm))); 10117 } else { 10118 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); 10119 delta += 3+alen; 10120 lane = insn[3+alen-1]; 10121 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10122 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10123 dis_buf, 10124 nameMMXReg(gregLO3ofRM(modrm))); 10125 } 10126 10127 switch (lane & 3) { 10128 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 10129 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 10130 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 10131 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 10132 default: vassert(0); 10133 } 10134 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 10135 goto decode_success; 10136 } 10137 10138 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10139 /* 0F EE = PMAXSW -- 16x4 signed max */ 10140 if (haveNo66noF2noF3(pfx) && sz == 4 10141 && insn[0] == 0x0F && insn[1] == 0xEE) { 10142 do_MMX_preamble(); 10143 delta = dis_MMXop_regmem_to_reg ( 10144 vbi, pfx, delta+2, insn[1], "pmaxsw", False ); 10145 goto decode_success; 10146 } 10147 10148 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10149 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 10150 if (haveNo66noF2noF3(pfx) && sz == 4 10151 && insn[0] == 0x0F && insn[1] == 0xDE) { 10152 do_MMX_preamble(); 10153 delta = dis_MMXop_regmem_to_reg ( 10154 vbi, pfx, delta+2, insn[1], "pmaxub", False ); 10155 goto decode_success; 10156 } 10157 10158 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10159 /* 0F EA = PMINSW -- 16x4 signed min */ 10160 if (haveNo66noF2noF3(pfx) && sz == 4 10161 && insn[0] == 0x0F && insn[1] == 0xEA) { 10162 do_MMX_preamble(); 10163 delta = dis_MMXop_regmem_to_reg ( 10164 vbi, pfx, delta+2, insn[1], "pminsw", False ); 10165 goto decode_success; 10166 } 10167 10168 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10169 /* 0F DA = PMINUB -- 8x8 unsigned min */ 10170 if (haveNo66noF2noF3(pfx) && sz == 4 10171 && insn[0] == 0x0F && insn[1] == 0xDA) { 10172 do_MMX_preamble(); 10173 delta = dis_MMXop_regmem_to_reg ( 10174 vbi, pfx, delta+2, insn[1], "pminub", False ); 10175 goto decode_success; 10176 } 10177 10178 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10179 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 10180 mmx(G), turn them into a byte, and put zero-extend of it in 10181 ireg(G). */ 10182 if (haveNo66noF2noF3(pfx) && sz == 4 10183 && insn[0] == 0x0F && insn[1] == 0xD7) { 10184 modrm = insn[2]; 10185 if (epartIsReg(modrm)) { 10186 do_MMX_preamble(); 10187 t0 = newTemp(Ity_I64); 10188 t1 = newTemp(Ity_I64); 10189 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 10190 assign(t1, mkIRExprCCall( 10191 Ity_I64, 0/*regparms*/, 10192 "amd64g_calculate_mmx_pmovmskb", 10193 &amd64g_calculate_mmx_pmovmskb, 10194 mkIRExprVec_1(mkexpr(t0)))); 10195 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1))); 10196 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 10197 nameIReg32(gregOfRexRM(pfx,modrm))); 10198 delta += 3; 10199 goto decode_success; 10200 } 10201 /* else fall through */ 10202 } 10203 10204 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10205 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 10206 if (haveNo66noF2noF3(pfx) && sz == 4 10207 && insn[0] == 0x0F && insn[1] == 0xE4) { 10208 do_MMX_preamble(); 10209 delta = dis_MMXop_regmem_to_reg ( 10210 vbi, pfx, delta+2, insn[1], "pmuluh", False ); 10211 goto decode_success; 10212 } 10213 10214 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 10215 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 10216 /* 0F 18 /2 = PREFETCH1 */ 10217 /* 0F 18 /3 = PREFETCH2 */ 10218 if (insn[0] == 0x0F && insn[1] == 0x18 10219 && haveNo66noF2noF3(pfx) 10220 && !epartIsReg(insn[2]) 10221 && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) { 10222 HChar* hintstr = "??"; 10223 10224 modrm = getUChar(delta+2); 10225 vassert(!epartIsReg(modrm)); 10226 10227 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10228 delta += 2+alen; 10229 10230 switch (gregLO3ofRM(modrm)) { 10231 case 0: hintstr = "nta"; break; 10232 case 1: hintstr = "t0"; break; 10233 case 2: hintstr = "t1"; break; 10234 case 3: hintstr = "t2"; break; 10235 default: vassert(0); 10236 } 10237 10238 DIP("prefetch%s %s\n", hintstr, dis_buf); 10239 goto decode_success; 10240 } 10241 10242 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10243 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 10244 if (haveNo66noF2noF3(pfx) && sz == 4 10245 && insn[0] == 0x0F && insn[1] == 0xF6) { 10246 do_MMX_preamble(); 10247 delta = dis_MMXop_regmem_to_reg ( 10248 vbi, pfx, delta+2, insn[1], "psadbw", False ); 10249 goto decode_success; 10250 } 10251 10252 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 10253 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 10254 if (haveNo66noF2noF3(pfx) && sz == 4 10255 && insn[0] == 0x0F && insn[1] == 0x70) { 10256 Int order; 10257 IRTemp sV, dV, s3, s2, s1, s0; 10258 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10259 sV = newTemp(Ity_I64); 10260 dV = newTemp(Ity_I64); 10261 do_MMX_preamble(); 10262 modrm = insn[2]; 10263 if (epartIsReg(modrm)) { 10264 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 10265 order = (Int)insn[3]; 10266 delta += 2+2; 10267 DIP("pshufw $%d,%s,%s\n", order, 10268 nameMMXReg(eregLO3ofRM(modrm)), 10269 nameMMXReg(gregLO3ofRM(modrm))); 10270 } else { 10271 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 10272 1/*extra byte after amode*/ ); 10273 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 10274 order = (Int)insn[2+alen]; 10275 delta += 3+alen; 10276 DIP("pshufw $%d,%s,%s\n", order, 10277 dis_buf, 10278 nameMMXReg(gregLO3ofRM(modrm))); 10279 } 10280 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 10281 # define SEL(n) \ 10282 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10283 assign(dV, 10284 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 10285 SEL((order>>2)&3), SEL((order>>0)&3) ) 10286 ); 10287 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 10288 # undef SEL 10289 goto decode_success; 10290 } 10291 10292 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 10293 if (haveNo66noF2noF3(pfx) && sz == 4 10294 && insn[0] == 0x0F && insn[1] == 0x53) { 10295 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10296 "rcpps", Iop_Recip32Fx4 ); 10297 goto decode_success; 10298 } 10299 10300 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 10301 if (haveF3no66noF2(pfx) && sz == 4 10302 && insn[0] == 0x0F && insn[1] == 0x53) { 10303 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10304 "rcpss", Iop_Recip32F0x4 ); 10305 goto decode_success; 10306 } 10307 10308 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 10309 if (haveNo66noF2noF3(pfx) && sz == 4 10310 && insn[0] == 0x0F && insn[1] == 0x52) { 10311 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10312 "rsqrtps", Iop_RSqrt32Fx4 ); 10313 goto decode_success; 10314 } 10315 10316 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 10317 if (haveF3no66noF2(pfx) && sz == 4 10318 && insn[0] == 0x0F && insn[1] == 0x52) { 10319 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10320 "rsqrtss", Iop_RSqrt32F0x4 ); 10321 goto decode_success; 10322 } 10323 10324 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 10325 if (haveNo66noF2noF3(pfx) 10326 && insn[0] == 0x0F && insn[1] == 0xAE 10327 && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7 10328 && sz == 4) { 10329 delta += 3; 10330 /* Insert a memory fence. It's sometimes important that these 10331 are carried through to the generated code. */ 10332 stmt( IRStmt_MBE(Imbe_Fence) ); 10333 DIP("sfence\n"); 10334 goto decode_success; 10335 } 10336 10337 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 10338 if (haveNo66noF2noF3(pfx) && sz == 4 10339 && insn[0] == 0x0F && insn[1] == 0xC6) { 10340 Int select; 10341 IRTemp sV, dV; 10342 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10343 sV = newTemp(Ity_V128); 10344 dV = newTemp(Ity_V128); 10345 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10346 modrm = insn[2]; 10347 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 10348 10349 if (epartIsReg(modrm)) { 10350 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10351 select = (Int)insn[3]; 10352 delta += 2+2; 10353 DIP("shufps $%d,%s,%s\n", select, 10354 nameXMMReg(eregOfRexRM(pfx,modrm)), 10355 nameXMMReg(gregOfRexRM(pfx,modrm))); 10356 } else { 10357 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 10358 1/*byte at end of insn*/ ); 10359 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10360 select = (Int)insn[2+alen]; 10361 delta += 3+alen; 10362 DIP("shufps $%d,%s,%s\n", select, 10363 dis_buf, 10364 nameXMMReg(gregOfRexRM(pfx,modrm))); 10365 } 10366 10367 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10368 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10369 10370 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10371 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10372 10373 putXMMReg( 10374 gregOfRexRM(pfx,modrm), 10375 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 10376 SELD((select>>2)&3), SELD((select>>0)&3) ) 10377 ); 10378 10379 # undef SELD 10380 # undef SELS 10381 10382 goto decode_success; 10383 } 10384 10385 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 10386 if (haveNo66noF2noF3(pfx) && sz == 4 10387 && insn[0] == 0x0F && insn[1] == 0x51) { 10388 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 10389 "sqrtps", Iop_Sqrt32Fx4 ); 10390 goto decode_success; 10391 } 10392 10393 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 10394 if (haveF3no66noF2(pfx) && sz == 4 10395 && insn[0] == 0x0F && insn[1] == 0x51) { 10396 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2, 10397 "sqrtss", Iop_Sqrt32F0x4 ); 10398 goto decode_success; 10399 } 10400 10401 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 10402 if (insn[0] == 0x0F && insn[1] == 0xAE 10403 && haveNo66noF2noF3(pfx) 10404 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) { 10405 10406 vassert(sz == 4); 10407 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10408 delta += 2+alen; 10409 10410 /* Fake up a native SSE mxcsr word. The only thing it depends 10411 on is SSEROUND[1:0], so call a clean helper to cook it up. 10412 */ 10413 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 10414 DIP("stmxcsr %s\n", dis_buf); 10415 storeLE( 10416 mkexpr(addr), 10417 unop(Iop_64to32, 10418 mkIRExprCCall( 10419 Ity_I64, 0/*regp*/, 10420 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 10421 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 10422 ) 10423 ) 10424 ); 10425 goto decode_success; 10426 } 10427 10428 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 10429 if (haveNo66noF2noF3(pfx) && sz == 4 10430 && insn[0] == 0x0F && insn[1] == 0x5C) { 10431 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 ); 10432 goto decode_success; 10433 } 10434 10435 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 10436 if (haveF3no66noF2(pfx) && sz == 4 10437 && insn[0] == 0x0F && insn[1] == 0x5C) { 10438 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 ); 10439 goto decode_success; 10440 } 10441 10442 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 10443 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 10444 /* These just appear to be special cases of SHUFPS */ 10445 if (haveNo66noF2noF3(pfx) && sz == 4 10446 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10447 IRTemp sV, dV; 10448 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10449 Bool hi = toBool(insn[1] == 0x15); 10450 sV = newTemp(Ity_V128); 10451 dV = newTemp(Ity_V128); 10452 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10453 modrm = insn[2]; 10454 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 10455 10456 if (epartIsReg(modrm)) { 10457 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10458 delta += 2+1; 10459 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10460 nameXMMReg(eregOfRexRM(pfx,modrm)), 10461 nameXMMReg(gregOfRexRM(pfx,modrm))); 10462 } else { 10463 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10464 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10465 delta += 2+alen; 10466 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10467 dis_buf, 10468 nameXMMReg(gregOfRexRM(pfx,modrm))); 10469 } 10470 10471 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10472 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10473 10474 if (hi) { 10475 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) ); 10476 } else { 10477 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) ); 10478 } 10479 10480 goto decode_success; 10481 } 10482 10483 /* 0F 57 = XORPS -- G = G and E */ 10484 if (haveNo66noF2noF3(pfx) && sz == 4 10485 && insn[0] == 0x0F && insn[1] == 0x57) { 10486 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 ); 10487 goto decode_success; 10488 } 10489 10490 /* ---------------------------------------------------- */ 10491 /* --- end of the SSE decoder. --- */ 10492 /* ---------------------------------------------------- */ 10493 10494 /* ---------------------------------------------------- */ 10495 /* --- start of the SSE2 decoder. --- */ 10496 /* ---------------------------------------------------- */ 10497 10498 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 10499 if (have66noF2noF3(pfx) 10500 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 10501 && insn[0] == 0x0F && insn[1] == 0x58) { 10502 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 ); 10503 goto decode_success; 10504 } 10505 10506 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 10507 if (haveF2no66noF3(pfx) 10508 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 10509 && insn[0] == 0x0F && insn[1] == 0x58) { 10510 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 ); 10511 goto decode_success; 10512 } 10513 10514 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 10515 if (have66noF2noF3(pfx) && sz == 2 10516 && insn[0] == 0x0F && insn[1] == 0x55) { 10517 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 ); 10518 goto decode_success; 10519 } 10520 10521 /* 66 0F 54 = ANDPD -- G = G and E */ 10522 if (have66noF2noF3(pfx) && sz == 2 10523 && insn[0] == 0x0F && insn[1] == 0x54) { 10524 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 ); 10525 goto decode_success; 10526 } 10527 10528 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 10529 if (have66noF2noF3(pfx) && sz == 2 10530 && insn[0] == 0x0F && insn[1] == 0xC2) { 10531 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 ); 10532 goto decode_success; 10533 } 10534 10535 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 10536 if (haveF2no66noF3(pfx) && sz == 4 10537 && insn[0] == 0x0F && insn[1] == 0xC2) { 10538 delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 ); 10539 goto decode_success; 10540 } 10541 10542 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 10543 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 10544 if (have66noF2noF3(pfx) && sz == 2 10545 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 10546 IRTemp argL = newTemp(Ity_F64); 10547 IRTemp argR = newTemp(Ity_F64); 10548 modrm = getUChar(delta+2); 10549 if (epartIsReg(modrm)) { 10550 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 10551 0/*lowest lane*/ ) ); 10552 delta += 2+1; 10553 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", 10554 nameXMMReg(eregOfRexRM(pfx,modrm)), 10555 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10556 } else { 10557 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10558 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 10559 delta += 2+alen; 10560 DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "", 10561 dis_buf, 10562 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10563 } 10564 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 10565 0/*lowest lane*/ ) ); 10566 10567 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10568 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10569 stmt( IRStmt_Put( 10570 OFFB_CC_DEP1, 10571 binop( Iop_And64, 10572 unop( Iop_32Uto64, 10573 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 10574 mkU64(0x45) 10575 ))); 10576 10577 goto decode_success; 10578 } 10579 10580 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 10581 F64 in xmm(G) */ 10582 if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) { 10583 IRTemp arg64 = newTemp(Ity_I64); 10584 if (sz != 4) goto decode_failure; 10585 10586 modrm = getUChar(delta+2); 10587 if (epartIsReg(modrm)) { 10588 assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) ); 10589 delta += 2+1; 10590 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10591 nameXMMReg(gregOfRexRM(pfx,modrm))); 10592 } else { 10593 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10594 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 10595 delta += 2+alen; 10596 DIP("cvtdq2pd %s,%s\n", dis_buf, 10597 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10598 } 10599 10600 putXMMRegLane64F( 10601 gregOfRexRM(pfx,modrm), 0, 10602 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 10603 ); 10604 10605 putXMMRegLane64F( 10606 gregOfRexRM(pfx,modrm), 1, 10607 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 10608 ); 10609 10610 goto decode_success; 10611 } 10612 10613 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 10614 xmm(G) */ 10615 if (haveNo66noF2noF3(pfx) && sz == 4 10616 && insn[0] == 0x0F && insn[1] == 0x5B) { 10617 IRTemp argV = newTemp(Ity_V128); 10618 IRTemp rmode = newTemp(Ity_I32); 10619 10620 modrm = getUChar(delta+2); 10621 if (epartIsReg(modrm)) { 10622 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10623 delta += 2+1; 10624 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10625 nameXMMReg(gregOfRexRM(pfx,modrm))); 10626 } else { 10627 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10628 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10629 delta += 2+alen; 10630 DIP("cvtdq2ps %s,%s\n", dis_buf, 10631 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10632 } 10633 10634 assign( rmode, get_sse_roundingmode() ); 10635 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 10636 10637 # define CVT(_t) binop( Iop_F64toF32, \ 10638 mkexpr(rmode), \ 10639 unop(Iop_I32StoF64,mkexpr(_t))) 10640 10641 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); 10642 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); 10643 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10644 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10645 10646 # undef CVT 10647 10648 goto decode_success; 10649 } 10650 10651 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 10652 lo half xmm(G), and zero upper half, rounding towards zero */ 10653 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 10654 lo half xmm(G), according to prevailing rounding mode, and zero 10655 upper half */ 10656 if ( ( (haveF2no66noF3(pfx) && sz == 4) 10657 || (have66noF2noF3(pfx) && sz == 2) 10658 ) 10659 && insn[0] == 0x0F && insn[1] == 0xE6) { 10660 IRTemp argV = newTemp(Ity_V128); 10661 IRTemp rmode = newTemp(Ity_I32); 10662 Bool r2zero = toBool(sz == 2); 10663 10664 modrm = getUChar(delta+2); 10665 if (epartIsReg(modrm)) { 10666 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10667 delta += 2+1; 10668 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", 10669 nameXMMReg(eregOfRexRM(pfx,modrm)), 10670 nameXMMReg(gregOfRexRM(pfx,modrm))); 10671 } else { 10672 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10673 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10674 delta += 2+alen; 10675 DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "", 10676 dis_buf, 10677 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10678 } 10679 10680 if (r2zero) { 10681 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10682 } else { 10683 assign( rmode, get_sse_roundingmode() ); 10684 } 10685 10686 t0 = newTemp(Ity_F64); 10687 t1 = newTemp(Ity_F64); 10688 assign( t0, unop(Iop_ReinterpI64asF64, 10689 unop(Iop_V128to64, mkexpr(argV))) ); 10690 assign( t1, unop(Iop_ReinterpI64asF64, 10691 unop(Iop_V128HIto64, mkexpr(argV))) ); 10692 10693 # define CVT(_t) binop( Iop_F64toI32S, \ 10694 mkexpr(rmode), \ 10695 mkexpr(_t) ) 10696 10697 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); 10698 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); 10699 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10700 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10701 10702 # undef CVT 10703 10704 goto decode_success; 10705 } 10706 10707 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 10708 I32 in mmx, according to prevailing SSE rounding mode */ 10709 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 10710 I32 in mmx, rounding towards zero */ 10711 if (have66noF2noF3(pfx) && sz == 2 10712 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 10713 IRTemp dst64 = newTemp(Ity_I64); 10714 IRTemp rmode = newTemp(Ity_I32); 10715 IRTemp f64lo = newTemp(Ity_F64); 10716 IRTemp f64hi = newTemp(Ity_F64); 10717 Bool r2zero = toBool(insn[1] == 0x2C); 10718 10719 do_MMX_preamble(); 10720 modrm = getUChar(delta+2); 10721 10722 if (epartIsReg(modrm)) { 10723 delta += 2+1; 10724 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10725 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 10726 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 10727 nameXMMReg(eregOfRexRM(pfx,modrm)), 10728 nameMMXReg(gregLO3ofRM(modrm))); 10729 } else { 10730 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10731 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10732 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 10733 mkexpr(addr), 10734 mkU64(8) ))); 10735 delta += 2+alen; 10736 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 10737 dis_buf, 10738 nameMMXReg(gregLO3ofRM(modrm))); 10739 } 10740 10741 if (r2zero) { 10742 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10743 } else { 10744 assign( rmode, get_sse_roundingmode() ); 10745 } 10746 10747 assign( 10748 dst64, 10749 binop( Iop_32HLto64, 10750 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 10751 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 10752 ) 10753 ); 10754 10755 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 10756 goto decode_success; 10757 } 10758 10759 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 10760 lo half xmm(G), rounding according to prevailing SSE rounding 10761 mode, and zero upper half */ 10762 /* Note, this is practically identical to CVTPD2DQ. It would have 10763 been nicer to merge them together, but the insn[] offsets differ 10764 by one. */ 10765 if (have66noF2noF3(pfx) && sz == 2 10766 && insn[0] == 0x0F && insn[1] == 0x5A) { 10767 IRTemp argV = newTemp(Ity_V128); 10768 IRTemp rmode = newTemp(Ity_I32); 10769 10770 modrm = getUChar(delta+2); 10771 if (epartIsReg(modrm)) { 10772 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10773 delta += 2+1; 10774 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10775 nameXMMReg(gregOfRexRM(pfx,modrm))); 10776 } else { 10777 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10778 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10779 delta += 2+alen; 10780 DIP("cvtpd2ps %s,%s\n", dis_buf, 10781 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10782 } 10783 10784 assign( rmode, get_sse_roundingmode() ); 10785 t0 = newTemp(Ity_F64); 10786 t1 = newTemp(Ity_F64); 10787 assign( t0, unop(Iop_ReinterpI64asF64, 10788 unop(Iop_V128to64, mkexpr(argV))) ); 10789 assign( t1, unop(Iop_ReinterpI64asF64, 10790 unop(Iop_V128HIto64, mkexpr(argV))) ); 10791 10792 # define CVT(_t) binop( Iop_F64toF32, \ 10793 mkexpr(rmode), \ 10794 mkexpr(_t) ) 10795 10796 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) ); 10797 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) ); 10798 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10799 putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10800 10801 # undef CVT 10802 10803 goto decode_success; 10804 } 10805 10806 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 10807 xmm(G) */ 10808 if (have66noF2noF3(pfx) && sz == 2 10809 && insn[0] == 0x0F && insn[1] == 0x2A) { 10810 IRTemp arg64 = newTemp(Ity_I64); 10811 10812 modrm = getUChar(delta+2); 10813 if (epartIsReg(modrm)) { 10814 /* Only switch to MMX mode if the source is a MMX register. 10815 This is inconsistent with all other instructions which 10816 convert between XMM and (M64 or MMX), which always switch 10817 to MMX mode even if 64-bit operand is M64 and not MMX. At 10818 least, that's what the Intel docs seem to me to say. 10819 Fixes #210264. */ 10820 do_MMX_preamble(); 10821 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 10822 delta += 2+1; 10823 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 10824 nameXMMReg(gregOfRexRM(pfx,modrm))); 10825 } else { 10826 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10827 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 10828 delta += 2+alen; 10829 DIP("cvtpi2pd %s,%s\n", dis_buf, 10830 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10831 } 10832 10833 putXMMRegLane64F( 10834 gregOfRexRM(pfx,modrm), 0, 10835 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 10836 ); 10837 10838 putXMMRegLane64F( 10839 gregOfRexRM(pfx,modrm), 1, 10840 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 10841 ); 10842 10843 goto decode_success; 10844 } 10845 10846 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 10847 xmm(G), rounding towards zero */ 10848 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 10849 xmm(G), as per the prevailing rounding mode */ 10850 if ( ( (have66noF2noF3(pfx) && sz == 2) 10851 || (haveF3no66noF2(pfx) && sz == 4) 10852 ) 10853 && insn[0] == 0x0F && insn[1] == 0x5B) { 10854 IRTemp argV = newTemp(Ity_V128); 10855 IRTemp rmode = newTemp(Ity_I32); 10856 Bool r2zero = toBool(sz == 4); 10857 10858 modrm = getUChar(delta+2); 10859 if (epartIsReg(modrm)) { 10860 assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10861 delta += 2+1; 10862 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10863 nameXMMReg(gregOfRexRM(pfx,modrm))); 10864 } else { 10865 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10866 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10867 delta += 2+alen; 10868 DIP("cvtps2dq %s,%s\n", dis_buf, 10869 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10870 } 10871 10872 if (r2zero) { 10873 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10874 } else { 10875 assign( rmode, get_sse_roundingmode() ); 10876 } 10877 10878 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 10879 10880 /* This is less than ideal. If it turns out to be a performance 10881 bottleneck it can be improved. */ 10882 # define CVT(_t) \ 10883 binop( Iop_F64toI32S, \ 10884 mkexpr(rmode), \ 10885 unop( Iop_F32toF64, \ 10886 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10887 10888 putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) ); 10889 putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) ); 10890 putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) ); 10891 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) ); 10892 10893 # undef CVT 10894 10895 goto decode_success; 10896 } 10897 10898 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 10899 F64 in xmm(G). */ 10900 if (haveNo66noF2noF3(pfx) && sz == 4 10901 && insn[0] == 0x0F && insn[1] == 0x5A) { 10902 IRTemp f32lo = newTemp(Ity_F32); 10903 IRTemp f32hi = newTemp(Ity_F32); 10904 10905 modrm = getUChar(delta+2); 10906 if (epartIsReg(modrm)) { 10907 assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) ); 10908 assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) ); 10909 delta += 2+1; 10910 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10911 nameXMMReg(gregOfRexRM(pfx,modrm))); 10912 } else { 10913 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10914 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10915 assign( f32hi, loadLE(Ity_F32, 10916 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10917 delta += 2+alen; 10918 DIP("cvtps2pd %s,%s\n", dis_buf, 10919 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10920 } 10921 10922 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1, 10923 unop(Iop_F32toF64, mkexpr(f32hi)) ); 10924 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 10925 unop(Iop_F32toF64, mkexpr(f32lo)) ); 10926 10927 goto decode_success; 10928 } 10929 10930 /* F2 0F 2D = CVTSD2SI 10931 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 10932 according to prevailing SSE rounding mode 10933 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 10934 according to prevailing SSE rounding mode 10935 */ 10936 /* F2 0F 2C = CVTTSD2SI 10937 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 10938 truncating towards zero 10939 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 10940 truncating towards zero 10941 */ 10942 if (haveF2no66noF3(pfx) 10943 && insn[0] == 0x0F 10944 && (insn[1] == 0x2D || insn[1] == 0x2C)) { 10945 IRTemp rmode = newTemp(Ity_I32); 10946 IRTemp f64lo = newTemp(Ity_F64); 10947 Bool r2zero = toBool(insn[1] == 0x2C); 10948 vassert(sz == 4 || sz == 8); 10949 10950 modrm = getUChar(delta+2); 10951 if (epartIsReg(modrm)) { 10952 delta += 2+1; 10953 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10954 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 10955 nameXMMReg(eregOfRexRM(pfx,modrm)), 10956 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 10957 } else { 10958 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10959 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10960 delta += 2+alen; 10961 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 10962 dis_buf, 10963 nameIReg(sz, gregOfRexRM(pfx,modrm), False)); 10964 } 10965 10966 if (r2zero) { 10967 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10968 } else { 10969 assign( rmode, get_sse_roundingmode() ); 10970 } 10971 10972 if (sz == 4) { 10973 putIReg32( gregOfRexRM(pfx,modrm), 10974 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10975 } else { 10976 putIReg64( gregOfRexRM(pfx,modrm), 10977 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10978 } 10979 10980 goto decode_success; 10981 } 10982 10983 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 10984 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 10985 if (haveF2no66noF3(pfx) && sz == 4 10986 && insn[0] == 0x0F && insn[1] == 0x5A) { 10987 IRTemp rmode = newTemp(Ity_I32); 10988 IRTemp f64lo = newTemp(Ity_F64); 10989 vassert(sz == 4); 10990 10991 modrm = getUChar(delta+2); 10992 if (epartIsReg(modrm)) { 10993 delta += 2+1; 10994 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10995 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 10996 nameXMMReg(gregOfRexRM(pfx,modrm))); 10997 } else { 10998 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 10999 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 11000 delta += 2+alen; 11001 DIP("cvtsd2ss %s,%s\n", dis_buf, 11002 nameXMMReg(gregOfRexRM(pfx,modrm))); 11003 } 11004 11005 assign( rmode, get_sse_roundingmode() ); 11006 putXMMRegLane32F( 11007 gregOfRexRM(pfx,modrm), 0, 11008 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 11009 ); 11010 11011 goto decode_success; 11012 } 11013 11014 /* F2 0F 2A = CVTSI2SD 11015 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 11016 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 11017 */ 11018 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8) 11019 && insn[0] == 0x0F && insn[1] == 0x2A) { 11020 modrm = getUChar(delta+2); 11021 11022 if (sz == 4) { 11023 IRTemp arg32 = newTemp(Ity_I32); 11024 if (epartIsReg(modrm)) { 11025 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 11026 delta += 2+1; 11027 DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11028 nameXMMReg(gregOfRexRM(pfx,modrm))); 11029 } else { 11030 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11031 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 11032 delta += 2+alen; 11033 DIP("cvtsi2sd %s,%s\n", dis_buf, 11034 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11035 } 11036 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 11037 unop(Iop_I32StoF64, mkexpr(arg32)) 11038 ); 11039 } else { 11040 /* sz == 8 */ 11041 IRTemp arg64 = newTemp(Ity_I64); 11042 if (epartIsReg(modrm)) { 11043 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 11044 delta += 2+1; 11045 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11046 nameXMMReg(gregOfRexRM(pfx,modrm))); 11047 } else { 11048 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11049 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11050 delta += 2+alen; 11051 DIP("cvtsi2sdq %s,%s\n", dis_buf, 11052 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 11053 } 11054 putXMMRegLane64F( 11055 gregOfRexRM(pfx,modrm), 11056 0, 11057 binop( Iop_I64StoF64, 11058 get_sse_roundingmode(), 11059 mkexpr(arg64) 11060 ) 11061 ); 11062 11063 } 11064 11065 goto decode_success; 11066 } 11067 11068 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 11069 low half xmm(G) */ 11070 if (haveF3no66noF2(pfx) && sz == 4 11071 && insn[0] == 0x0F && insn[1] == 0x5A) { 11072 IRTemp f32lo = newTemp(Ity_F32); 11073 11074 modrm = getUChar(delta+2); 11075 if (epartIsReg(modrm)) { 11076 delta += 2+1; 11077 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 11078 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11079 nameXMMReg(gregOfRexRM(pfx,modrm))); 11080 } else { 11081 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11082 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 11083 delta += 2+alen; 11084 DIP("cvtss2sd %s,%s\n", dis_buf, 11085 nameXMMReg(gregOfRexRM(pfx,modrm))); 11086 } 11087 11088 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 11089 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 11090 11091 goto decode_success; 11092 } 11093 11094 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 11095 if (have66noF2noF3(pfx) && sz == 2 11096 && insn[0] == 0x0F && insn[1] == 0x5E) { 11097 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 ); 11098 goto decode_success; 11099 } 11100 11101 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 11102 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) { 11103 vassert(sz == 4); 11104 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 ); 11105 goto decode_success; 11106 } 11107 11108 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 11109 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 11110 if (haveNo66noF2noF3(pfx) && sz == 4 11111 && insn[0] == 0x0F && insn[1] == 0xAE 11112 && epartIsReg(insn[2]) 11113 && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) { 11114 delta += 3; 11115 /* Insert a memory fence. It's sometimes important that these 11116 are carried through to the generated code. */ 11117 stmt( IRStmt_MBE(Imbe_Fence) ); 11118 DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m"); 11119 goto decode_success; 11120 } 11121 11122 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 11123 if (have66noF2noF3(pfx) && sz == 2 11124 && insn[0] == 0x0F && insn[1] == 0x5F) { 11125 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 ); 11126 goto decode_success; 11127 } 11128 11129 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 11130 if (haveF2no66noF3(pfx) && sz == 4 11131 && insn[0] == 0x0F && insn[1] == 0x5F) { 11132 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 ); 11133 goto decode_success; 11134 } 11135 11136 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 11137 if (have66noF2noF3(pfx) && sz == 2 11138 && insn[0] == 0x0F && insn[1] == 0x5D) { 11139 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 ); 11140 goto decode_success; 11141 } 11142 11143 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 11144 if (haveF2no66noF3(pfx) && sz == 4 11145 && insn[0] == 0x0F && insn[1] == 0x5D) { 11146 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 ); 11147 goto decode_success; 11148 } 11149 11150 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 11151 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 11152 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 11153 if (have66noF2noF3(pfx) 11154 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11155 && insn[0] == 0x0F 11156 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 11157 HChar* wot = insn[1]==0x28 ? "apd" : 11158 insn[1]==0x10 ? "upd" : "dqa"; 11159 modrm = getUChar(delta+2); 11160 if (epartIsReg(modrm)) { 11161 putXMMReg( gregOfRexRM(pfx,modrm), 11162 getXMMReg( eregOfRexRM(pfx,modrm) )); 11163 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)), 11164 nameXMMReg(gregOfRexRM(pfx,modrm))); 11165 delta += 2+1; 11166 } else { 11167 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11168 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 11169 gen_SEGV_if_not_16_aligned( addr ); 11170 putXMMReg( gregOfRexRM(pfx,modrm), 11171 loadLE(Ity_V128, mkexpr(addr)) ); 11172 DIP("mov%s %s,%s\n", wot, dis_buf, 11173 nameXMMReg(gregOfRexRM(pfx,modrm))); 11174 delta += 2+alen; 11175 } 11176 goto decode_success; 11177 } 11178 11179 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 11180 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 11181 if (have66noF2noF3(pfx) && insn[0] == 0x0F 11182 && (insn[1] == 0x29 || insn[1] == 0x11)) { 11183 HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 11184 modrm = getUChar(delta+2); 11185 if (epartIsReg(modrm)) { 11186 putXMMReg( eregOfRexRM(pfx,modrm), 11187 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11188 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)), 11189 nameXMMReg(eregOfRexRM(pfx,modrm))); 11190 delta += 2+1; 11191 } else { 11192 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11193 if (insn[1] == 0x29/*movapd*/) 11194 gen_SEGV_if_not_16_aligned( addr ); 11195 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11196 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)), 11197 dis_buf ); 11198 delta += 2+alen; 11199 } 11200 goto decode_success; 11201 } 11202 11203 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */ 11204 /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */ 11205 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) { 11206 vassert(sz == 2 || sz == 8); 11207 if (sz == 2) sz = 4; 11208 modrm = getUChar(delta+2); 11209 if (epartIsReg(modrm)) { 11210 delta += 2+1; 11211 if (sz == 4) { 11212 putXMMReg( 11213 gregOfRexRM(pfx,modrm), 11214 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 11215 ); 11216 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 11217 nameXMMReg(gregOfRexRM(pfx,modrm))); 11218 } else { 11219 putXMMReg( 11220 gregOfRexRM(pfx,modrm), 11221 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 11222 ); 11223 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 11224 nameXMMReg(gregOfRexRM(pfx,modrm))); 11225 } 11226 } else { 11227 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11228 delta += 2+alen; 11229 putXMMReg( 11230 gregOfRexRM(pfx,modrm), 11231 sz == 4 11232 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 11233 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 11234 ); 11235 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 11236 nameXMMReg(gregOfRexRM(pfx,modrm))); 11237 } 11238 goto decode_success; 11239 } 11240 11241 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 11242 /* or from xmm low 1/2 to ireg64 or m64. */ 11243 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) { 11244 if (sz == 2) sz = 4; 11245 vassert(sz == 4 || sz == 8); 11246 modrm = getUChar(delta+2); 11247 if (epartIsReg(modrm)) { 11248 delta += 2+1; 11249 if (sz == 4) { 11250 putIReg32( eregOfRexRM(pfx,modrm), 11251 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 11252 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11253 nameIReg32(eregOfRexRM(pfx,modrm))); 11254 } else { 11255 putIReg64( eregOfRexRM(pfx,modrm), 11256 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11257 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11258 nameIReg64(eregOfRexRM(pfx,modrm))); 11259 } 11260 } else { 11261 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11262 delta += 2+alen; 11263 storeLE( mkexpr(addr), 11264 sz == 4 11265 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 11266 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 11267 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 11268 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11269 } 11270 goto decode_success; 11271 } 11272 11273 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 11274 if (have66noF2noF3(pfx) && sz == 2 11275 && insn[0] == 0x0F && insn[1] == 0x7F) { 11276 modrm = getUChar(delta+2); 11277 if (epartIsReg(modrm)) { 11278 delta += 2+1; 11279 putXMMReg( eregOfRexRM(pfx,modrm), 11280 getXMMReg(gregOfRexRM(pfx,modrm)) ); 11281 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11282 nameXMMReg(eregOfRexRM(pfx,modrm))); 11283 } else { 11284 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11285 gen_SEGV_if_not_16_aligned( addr ); 11286 delta += 2+alen; 11287 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11288 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11289 } 11290 goto decode_success; 11291 } 11292 11293 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 11294 if (haveF3no66noF2(pfx) && sz == 4 11295 && insn[0] == 0x0F && insn[1] == 0x6F) { 11296 modrm = getUChar(delta+2); 11297 if (epartIsReg(modrm)) { 11298 putXMMReg( gregOfRexRM(pfx,modrm), 11299 getXMMReg( eregOfRexRM(pfx,modrm) )); 11300 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11301 nameXMMReg(gregOfRexRM(pfx,modrm))); 11302 delta += 2+1; 11303 } else { 11304 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11305 putXMMReg( gregOfRexRM(pfx,modrm), 11306 loadLE(Ity_V128, mkexpr(addr)) ); 11307 DIP("movdqu %s,%s\n", dis_buf, 11308 nameXMMReg(gregOfRexRM(pfx,modrm))); 11309 delta += 2+alen; 11310 } 11311 goto decode_success; 11312 } 11313 11314 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 11315 if (haveF3no66noF2(pfx) && sz == 4 11316 && insn[0] == 0x0F && insn[1] == 0x7F) { 11317 modrm = getUChar(delta+2); 11318 if (epartIsReg(modrm)) { 11319 goto decode_failure; /* awaiting test case */ 11320 delta += 2+1; 11321 putXMMReg( eregOfRexRM(pfx,modrm), 11322 getXMMReg(gregOfRexRM(pfx,modrm)) ); 11323 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11324 nameXMMReg(eregOfRexRM(pfx,modrm))); 11325 } else { 11326 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11327 delta += 2+alen; 11328 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11329 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 11330 } 11331 goto decode_success; 11332 } 11333 11334 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 11335 if (haveF2no66noF3(pfx) && sz == 4 11336 && insn[0] == 0x0F && insn[1] == 0xD6) { 11337 modrm = getUChar(delta+2); 11338 if (epartIsReg(modrm)) { 11339 do_MMX_preamble(); 11340 putMMXReg( gregLO3ofRM(modrm), 11341 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11342 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11343 nameMMXReg(gregLO3ofRM(modrm))); 11344 delta += 2+1; 11345 goto decode_success; 11346 } else { 11347 /* apparently no mem case for this insn */ 11348 goto decode_failure; 11349 } 11350 } 11351 11352 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 11353 /* These seems identical to MOVHPS. This instruction encoding is 11354 completely crazy. */ 11355 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) { 11356 modrm = getUChar(delta+2); 11357 if (epartIsReg(modrm)) { 11358 /* fall through; apparently reg-reg is not possible */ 11359 } else { 11360 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11361 delta += 2+alen; 11362 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 11363 loadLE(Ity_I64, mkexpr(addr)) ); 11364 DIP("movhpd %s,%s\n", dis_buf, 11365 nameXMMReg( gregOfRexRM(pfx,modrm) )); 11366 goto decode_success; 11367 } 11368 } 11369 11370 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 11371 /* Again, this seems identical to MOVHPS. */ 11372 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) { 11373 if (!epartIsReg(insn[2])) { 11374 delta += 2; 11375 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11376 delta += alen; 11377 storeLE( mkexpr(addr), 11378 getXMMRegLane64( gregOfRexRM(pfx,insn[2]), 11379 1/*upper lane*/ ) ); 11380 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ), 11381 dis_buf); 11382 goto decode_success; 11383 } 11384 /* else fall through */ 11385 } 11386 11387 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 11388 /* Identical to MOVLPS ? */ 11389 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) { 11390 modrm = getUChar(delta+2); 11391 if (epartIsReg(modrm)) { 11392 /* fall through; apparently reg-reg is not possible */ 11393 } else { 11394 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11395 delta += 2+alen; 11396 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11397 0/*lower lane*/, 11398 loadLE(Ity_I64, mkexpr(addr)) ); 11399 DIP("movlpd %s, %s\n", 11400 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11401 goto decode_success; 11402 } 11403 } 11404 11405 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 11406 /* Identical to MOVLPS ? */ 11407 if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) { 11408 modrm = getUChar(delta+2); 11409 if (!epartIsReg(modrm)) { 11410 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11411 delta += 2+alen; 11412 storeLE( mkexpr(addr), 11413 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11414 0/*lower lane*/ ) ); 11415 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11416 dis_buf); 11417 goto decode_success; 11418 } 11419 /* else fall through */ 11420 } 11421 11422 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 11423 2 lowest bits of ireg(G) */ 11424 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8) 11425 && insn[0] == 0x0F && insn[1] == 0x50) { 11426 /* sz == 8 is a kludge to handle insns with REX.W redundantly 11427 set to 1, which has been known to happen: 11428 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 11429 20071106: see further comments on MOVMSKPS implementation above. 11430 */ 11431 modrm = getUChar(delta+2); 11432 if (epartIsReg(modrm)) { 11433 Int src; 11434 t0 = newTemp(Ity_I32); 11435 t1 = newTemp(Ity_I32); 11436 delta += 2+1; 11437 src = eregOfRexRM(pfx,modrm); 11438 assign( t0, binop( Iop_And32, 11439 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 11440 mkU32(1) )); 11441 assign( t1, binop( Iop_And32, 11442 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 11443 mkU32(2) )); 11444 putIReg32( gregOfRexRM(pfx,modrm), 11445 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 11446 ); 11447 DIP("movmskpd %s,%s\n", nameXMMReg(src), 11448 nameIReg32(gregOfRexRM(pfx,modrm))); 11449 goto decode_success; 11450 } 11451 /* else fall through */ 11452 goto decode_failure; 11453 } 11454 11455 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 11456 if (have66noF2noF3(pfx) && sz == 2 11457 && insn[0] == 0x0F && insn[1] == 0xF7) { 11458 modrm = getUChar(delta+2); 11459 if (epartIsReg(modrm)) { 11460 IRTemp regD = newTemp(Ity_V128); 11461 IRTemp mask = newTemp(Ity_V128); 11462 IRTemp olddata = newTemp(Ity_V128); 11463 IRTemp newdata = newTemp(Ity_V128); 11464 addr = newTemp(Ity_I64); 11465 11466 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 11467 assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) )); 11468 11469 /* Unfortunately can't do the obvious thing with SarN8x16 11470 here since that can't be re-emitted as SSE2 code - no such 11471 insn. */ 11472 assign( 11473 mask, 11474 binop(Iop_64HLtoV128, 11475 binop(Iop_SarN8x8, 11476 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 11477 mkU8(7) ), 11478 binop(Iop_SarN8x8, 11479 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 11480 mkU8(7) ) )); 11481 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 11482 assign( newdata, 11483 binop(Iop_OrV128, 11484 binop(Iop_AndV128, 11485 mkexpr(regD), 11486 mkexpr(mask) ), 11487 binop(Iop_AndV128, 11488 mkexpr(olddata), 11489 unop(Iop_NotV128, mkexpr(mask)))) ); 11490 storeLE( mkexpr(addr), mkexpr(newdata) ); 11491 11492 delta += 2+1; 11493 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ), 11494 nameXMMReg( gregOfRexRM(pfx,modrm) ) ); 11495 goto decode_success; 11496 } 11497 /* else fall through */ 11498 } 11499 11500 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 11501 if (have66noF2noF3(pfx) && sz == 2 11502 && insn[0] == 0x0F && insn[1] == 0xE7) { 11503 modrm = getUChar(delta+2); 11504 if (!epartIsReg(modrm)) { 11505 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11506 gen_SEGV_if_not_16_aligned( addr ); 11507 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11508 DIP("movntdq %s,%s\n", dis_buf, 11509 nameXMMReg(gregOfRexRM(pfx,modrm))); 11510 delta += 2+alen; 11511 goto decode_success; 11512 } 11513 /* else fall through */ 11514 goto decode_failure; 11515 } 11516 11517 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 11518 if (haveNo66noF2noF3(pfx) && 11519 insn[0] == 0x0F && insn[1] == 0xC3) { 11520 vassert(sz == 4 || sz == 8); 11521 modrm = getUChar(delta+2); 11522 if (!epartIsReg(modrm)) { 11523 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11524 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 11525 DIP("movnti %s,%s\n", dis_buf, 11526 nameIRegG(sz, pfx, modrm)); 11527 delta += 2+alen; 11528 goto decode_success; 11529 } 11530 /* else fall through */ 11531 } 11532 11533 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 11534 or lo half xmm). */ 11535 if (have66noF2noF3(pfx) 11536 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11537 && insn[0] == 0x0F && insn[1] == 0xD6) { 11538 modrm = getUChar(delta+2); 11539 if (epartIsReg(modrm)) { 11540 /* fall through, awaiting test case */ 11541 /* dst: lo half copied, hi half zeroed */ 11542 } else { 11543 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11544 storeLE( mkexpr(addr), 11545 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11546 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 11547 delta += 2+alen; 11548 goto decode_success; 11549 } 11550 } 11551 11552 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 11553 hi half). */ 11554 if (haveF3no66noF2(pfx) && sz == 4 11555 && insn[0] == 0x0F && insn[1] == 0xD6) { 11556 modrm = getUChar(delta+2); 11557 if (epartIsReg(modrm)) { 11558 do_MMX_preamble(); 11559 putXMMReg( gregOfRexRM(pfx,modrm), 11560 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 11561 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 11562 nameXMMReg(gregOfRexRM(pfx,modrm))); 11563 delta += 2+1; 11564 goto decode_success; 11565 } else { 11566 /* apparently no mem case for this insn */ 11567 goto decode_failure; 11568 } 11569 } 11570 11571 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 11572 G (lo half xmm). Upper half of G is zeroed out. */ 11573 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 11574 G (lo half xmm). If E is mem, upper half of G is zeroed out. 11575 If E is reg, upper half of G is unchanged. */ 11576 if ( (haveF2no66noF3(pfx) 11577 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11578 && insn[0] == 0x0F && insn[1] == 0x10) 11579 || 11580 (haveF3no66noF2(pfx) 11581 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11582 && insn[0] == 0x0F && insn[1] == 0x7E) 11583 ) { 11584 modrm = getUChar(delta+2); 11585 if (epartIsReg(modrm)) { 11586 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11587 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11588 if (insn[1] == 0x7E/*MOVQ*/) { 11589 /* zero bits 127:64 */ 11590 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 11591 } 11592 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11593 nameXMMReg(gregOfRexRM(pfx,modrm))); 11594 delta += 2+1; 11595 } else { 11596 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11597 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11598 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11599 loadLE(Ity_I64, mkexpr(addr)) ); 11600 DIP("movsd %s,%s\n", dis_buf, 11601 nameXMMReg(gregOfRexRM(pfx,modrm))); 11602 delta += 2+alen; 11603 } 11604 goto decode_success; 11605 } 11606 11607 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 11608 or lo half xmm). */ 11609 if (haveF2no66noF3(pfx) 11610 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11611 && insn[0] == 0x0F && insn[1] == 0x11) { 11612 modrm = getUChar(delta+2); 11613 if (epartIsReg(modrm)) { 11614 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 11615 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11616 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11617 nameXMMReg(eregOfRexRM(pfx,modrm))); 11618 delta += 2+1; 11619 } else { 11620 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11621 storeLE( mkexpr(addr), 11622 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11623 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11624 dis_buf); 11625 delta += 2+alen; 11626 } 11627 goto decode_success; 11628 } 11629 11630 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 11631 if (have66noF2noF3(pfx) 11632 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11633 && insn[0] == 0x0F && insn[1] == 0x59) { 11634 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 ); 11635 goto decode_success; 11636 } 11637 11638 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 11639 if (haveF2no66noF3(pfx) 11640 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11641 && insn[0] == 0x0F && insn[1] == 0x59) { 11642 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 ); 11643 goto decode_success; 11644 } 11645 11646 /* 66 0F 56 = ORPD -- G = G and E */ 11647 if (have66noF2noF3(pfx) && sz == 2 11648 && insn[0] == 0x0F && insn[1] == 0x56) { 11649 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 ); 11650 goto decode_success; 11651 } 11652 11653 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 11654 if (have66noF2noF3(pfx) && sz == 2 11655 && insn[0] == 0x0F && insn[1] == 0xC6) { 11656 Int select; 11657 IRTemp sV = newTemp(Ity_V128); 11658 IRTemp dV = newTemp(Ity_V128); 11659 IRTemp s1 = newTemp(Ity_I64); 11660 IRTemp s0 = newTemp(Ity_I64); 11661 IRTemp d1 = newTemp(Ity_I64); 11662 IRTemp d0 = newTemp(Ity_I64); 11663 11664 modrm = insn[2]; 11665 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 11666 11667 if (epartIsReg(modrm)) { 11668 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 11669 select = (Int)insn[3]; 11670 delta += 2+2; 11671 DIP("shufpd $%d,%s,%s\n", select, 11672 nameXMMReg(eregOfRexRM(pfx,modrm)), 11673 nameXMMReg(gregOfRexRM(pfx,modrm))); 11674 } else { 11675 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 ); 11676 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11677 select = (Int)insn[2+alen]; 11678 delta += 3+alen; 11679 DIP("shufpd $%d,%s,%s\n", select, 11680 dis_buf, 11681 nameXMMReg(gregOfRexRM(pfx,modrm))); 11682 } 11683 11684 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11685 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11686 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11687 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11688 11689 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 11690 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 11691 11692 putXMMReg( 11693 gregOfRexRM(pfx,modrm), 11694 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 11695 ); 11696 11697 # undef SELD 11698 # undef SELS 11699 11700 goto decode_success; 11701 } 11702 11703 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 11704 if (have66noF2noF3(pfx) && sz == 2 11705 && insn[0] == 0x0F && insn[1] == 0x51) { 11706 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2, 11707 "sqrtpd", Iop_Sqrt64Fx2 ); 11708 goto decode_success; 11709 } 11710 11711 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 11712 if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) { 11713 vassert(sz == 4); 11714 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2, 11715 "sqrtsd", Iop_Sqrt64F0x2 ); 11716 goto decode_success; 11717 } 11718 11719 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 11720 if (have66noF2noF3(pfx) && sz == 2 11721 && insn[0] == 0x0F && insn[1] == 0x5C) { 11722 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 ); 11723 goto decode_success; 11724 } 11725 11726 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 11727 if (haveF2no66noF3(pfx) 11728 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 11729 && insn[0] == 0x0F && insn[1] == 0x5C) { 11730 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 ); 11731 goto decode_success; 11732 } 11733 11734 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 11735 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 11736 /* These just appear to be special cases of SHUFPS */ 11737 if (have66noF2noF3(pfx) 11738 && sz == 2 /* could be 8 if rex also present */ 11739 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 11740 IRTemp s1 = newTemp(Ity_I64); 11741 IRTemp s0 = newTemp(Ity_I64); 11742 IRTemp d1 = newTemp(Ity_I64); 11743 IRTemp d0 = newTemp(Ity_I64); 11744 IRTemp sV = newTemp(Ity_V128); 11745 IRTemp dV = newTemp(Ity_V128); 11746 Bool hi = toBool(insn[1] == 0x15); 11747 11748 modrm = insn[2]; 11749 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 11750 11751 if (epartIsReg(modrm)) { 11752 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 11753 delta += 2+1; 11754 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11755 nameXMMReg(eregOfRexRM(pfx,modrm)), 11756 nameXMMReg(gregOfRexRM(pfx,modrm))); 11757 } else { 11758 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 11759 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11760 delta += 2+alen; 11761 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 11762 dis_buf, 11763 nameXMMReg(gregOfRexRM(pfx,modrm))); 11764 } 11765 11766 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11767 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11768 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11769 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11770 11771 if (hi) { 11772 putXMMReg( gregOfRexRM(pfx,modrm), 11773 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 11774 } else { 11775 putXMMReg( gregOfRexRM(pfx,modrm), 11776 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 11777 } 11778 11779 goto decode_success; 11780 } 11781 11782 /* 66 0F 57 = XORPD -- G = G xor E */ 11783 if (have66noF2noF3(pfx) && sz == 2 11784 && insn[0] == 0x0F && insn[1] == 0x57) { 11785 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 ); 11786 goto decode_success; 11787 } 11788 11789 /* 66 0F 6B = PACKSSDW */ 11790 if (have66noF2noF3(pfx) && sz == 2 11791 && insn[0] == 0x0F && insn[1] == 0x6B) { 11792 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11793 "packssdw", Iop_QNarrow32Sx4, True ); 11794 goto decode_success; 11795 } 11796 11797 /* 66 0F 63 = PACKSSWB */ 11798 if (have66noF2noF3(pfx) && sz == 2 11799 && insn[0] == 0x0F && insn[1] == 0x63) { 11800 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11801 "packsswb", Iop_QNarrow16Sx8, True ); 11802 goto decode_success; 11803 } 11804 11805 /* 66 0F 67 = PACKUSWB */ 11806 if (have66noF2noF3(pfx) && sz == 2 11807 && insn[0] == 0x0F && insn[1] == 0x67) { 11808 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11809 "packuswb", Iop_QNarrow16Ux8, True ); 11810 goto decode_success; 11811 } 11812 11813 /* 66 0F FC = PADDB */ 11814 if (have66noF2noF3(pfx) && sz == 2 11815 && insn[0] == 0x0F && insn[1] == 0xFC) { 11816 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11817 "paddb", Iop_Add8x16, False ); 11818 goto decode_success; 11819 } 11820 11821 /* 66 0F FE = PADDD */ 11822 if (have66noF2noF3(pfx) && sz == 2 11823 && insn[0] == 0x0F && insn[1] == 0xFE) { 11824 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11825 "paddd", Iop_Add32x4, False ); 11826 goto decode_success; 11827 } 11828 11829 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11830 /* 0F D4 = PADDQ -- add 64x1 */ 11831 if (haveNo66noF2noF3(pfx) && sz == 4 11832 && insn[0] == 0x0F && insn[1] == 0xD4) { 11833 do_MMX_preamble(); 11834 delta = dis_MMXop_regmem_to_reg ( 11835 vbi, pfx, delta+2, insn[1], "paddq", False ); 11836 goto decode_success; 11837 } 11838 11839 /* 66 0F D4 = PADDQ */ 11840 if (have66noF2noF3(pfx) && sz == 2 11841 && insn[0] == 0x0F && insn[1] == 0xD4) { 11842 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11843 "paddq", Iop_Add64x2, False ); 11844 goto decode_success; 11845 } 11846 11847 /* 66 0F FD = PADDW */ 11848 if (have66noF2noF3(pfx) && sz == 2 11849 && insn[0] == 0x0F && insn[1] == 0xFD) { 11850 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11851 "paddw", Iop_Add16x8, False ); 11852 goto decode_success; 11853 } 11854 11855 /* 66 0F EC = PADDSB */ 11856 if (have66noF2noF3(pfx) && sz == 2 11857 && insn[0] == 0x0F && insn[1] == 0xEC) { 11858 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11859 "paddsb", Iop_QAdd8Sx16, False ); 11860 goto decode_success; 11861 } 11862 11863 /* 66 0F ED = PADDSW */ 11864 if (have66noF2noF3(pfx) && sz == 2 11865 && insn[0] == 0x0F && insn[1] == 0xED) { 11866 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11867 "paddsw", Iop_QAdd16Sx8, False ); 11868 goto decode_success; 11869 } 11870 11871 /* 66 0F DC = PADDUSB */ 11872 if (have66noF2noF3(pfx) && sz == 2 11873 && insn[0] == 0x0F && insn[1] == 0xDC) { 11874 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11875 "paddusb", Iop_QAdd8Ux16, False ); 11876 goto decode_success; 11877 } 11878 11879 /* 66 0F DD = PADDUSW */ 11880 if (have66noF2noF3(pfx) && sz == 2 11881 && insn[0] == 0x0F && insn[1] == 0xDD) { 11882 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11883 "paddusw", Iop_QAdd16Ux8, False ); 11884 goto decode_success; 11885 } 11886 11887 /* 66 0F DB = PAND */ 11888 if (have66noF2noF3(pfx) && sz == 2 11889 && insn[0] == 0x0F && insn[1] == 0xDB) { 11890 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 ); 11891 goto decode_success; 11892 } 11893 11894 /* 66 0F DF = PANDN */ 11895 if (have66noF2noF3(pfx) && sz == 2 11896 && insn[0] == 0x0F && insn[1] == 0xDF) { 11897 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 ); 11898 goto decode_success; 11899 } 11900 11901 /* 66 0F E0 = PAVGB */ 11902 if (have66noF2noF3(pfx) && sz == 2 11903 && insn[0] == 0x0F && insn[1] == 0xE0) { 11904 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11905 "pavgb", Iop_Avg8Ux16, False ); 11906 goto decode_success; 11907 } 11908 11909 /* 66 0F E3 = PAVGW */ 11910 if (have66noF2noF3(pfx) && sz == 2 11911 && insn[0] == 0x0F && insn[1] == 0xE3) { 11912 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11913 "pavgw", Iop_Avg16Ux8, False ); 11914 goto decode_success; 11915 } 11916 11917 /* 66 0F 74 = PCMPEQB */ 11918 if (have66noF2noF3(pfx) && sz == 2 11919 && insn[0] == 0x0F && insn[1] == 0x74) { 11920 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11921 "pcmpeqb", Iop_CmpEQ8x16, False ); 11922 goto decode_success; 11923 } 11924 11925 /* 66 0F 76 = PCMPEQD */ 11926 if (have66noF2noF3(pfx) && sz == 2 11927 && insn[0] == 0x0F && insn[1] == 0x76) { 11928 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11929 "pcmpeqd", Iop_CmpEQ32x4, False ); 11930 goto decode_success; 11931 } 11932 11933 /* 66 0F 75 = PCMPEQW */ 11934 if (have66noF2noF3(pfx) && sz == 2 11935 && insn[0] == 0x0F && insn[1] == 0x75) { 11936 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11937 "pcmpeqw", Iop_CmpEQ16x8, False ); 11938 goto decode_success; 11939 } 11940 11941 /* 66 0F 64 = PCMPGTB */ 11942 if (have66noF2noF3(pfx) && sz == 2 11943 && insn[0] == 0x0F && insn[1] == 0x64) { 11944 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11945 "pcmpgtb", Iop_CmpGT8Sx16, False ); 11946 goto decode_success; 11947 } 11948 11949 /* 66 0F 66 = PCMPGTD */ 11950 if (have66noF2noF3(pfx) && sz == 2 11951 && insn[0] == 0x0F && insn[1] == 0x66) { 11952 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11953 "pcmpgtd", Iop_CmpGT32Sx4, False ); 11954 goto decode_success; 11955 } 11956 11957 /* 66 0F 65 = PCMPGTW */ 11958 if (have66noF2noF3(pfx) && sz == 2 11959 && insn[0] == 0x0F && insn[1] == 0x65) { 11960 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 11961 "pcmpgtw", Iop_CmpGT16Sx8, False ); 11962 goto decode_success; 11963 } 11964 11965 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 11966 zero-extend of it in ireg(G). */ 11967 if (have66noF2noF3(pfx) 11968 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 11969 && insn[0] == 0x0F && insn[1] == 0xC5) { 11970 modrm = insn[2]; 11971 if (epartIsReg(modrm)) { 11972 t5 = newTemp(Ity_V128); 11973 t4 = newTemp(Ity_I16); 11974 assign(t5, getXMMReg(eregOfRexRM(pfx,modrm))); 11975 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 11976 switch (insn[3] & 7) { 11977 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 11978 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 11979 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 11980 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 11981 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 11982 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 11983 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 11984 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 11985 default: vassert(0); 11986 } 11987 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4))); 11988 DIP("pextrw $%d,%s,%s\n", 11989 (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)), 11990 nameIReg32(gregOfRexRM(pfx,modrm))); 11991 delta += 4; 11992 goto decode_success; 11993 } 11994 /* else fall through */ 11995 /* note, if memory case is ever filled in, there is 1 byte after 11996 amode */ 11997 } 11998 11999 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 12000 put it into the specified lane of xmm(G). */ 12001 if (have66noF2noF3(pfx) 12002 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 12003 && insn[0] == 0x0F && insn[1] == 0xC4) { 12004 Int lane; 12005 t4 = newTemp(Ity_I16); 12006 modrm = insn[2]; 12007 12008 if (epartIsReg(modrm)) { 12009 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 12010 delta += 3+1; 12011 lane = insn[3+1-1]; 12012 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 12013 nameIReg16(eregOfRexRM(pfx,modrm)), 12014 nameXMMReg(gregOfRexRM(pfx,modrm))); 12015 } else { 12016 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12017 1/*byte after the amode*/ ); 12018 delta += 3+alen; 12019 lane = insn[3+alen-1]; 12020 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 12021 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 12022 dis_buf, 12023 nameXMMReg(gregOfRexRM(pfx,modrm))); 12024 } 12025 12026 putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) ); 12027 goto decode_success; 12028 } 12029 12030 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 12031 E(xmm or mem) to G(xmm) */ 12032 if (have66noF2noF3(pfx) && sz == 2 12033 && insn[0] == 0x0F && insn[1] == 0xF5) { 12034 IRTemp s1V = newTemp(Ity_V128); 12035 IRTemp s2V = newTemp(Ity_V128); 12036 IRTemp dV = newTemp(Ity_V128); 12037 IRTemp s1Hi = newTemp(Ity_I64); 12038 IRTemp s1Lo = newTemp(Ity_I64); 12039 IRTemp s2Hi = newTemp(Ity_I64); 12040 IRTemp s2Lo = newTemp(Ity_I64); 12041 IRTemp dHi = newTemp(Ity_I64); 12042 IRTemp dLo = newTemp(Ity_I64); 12043 modrm = insn[2]; 12044 if (epartIsReg(modrm)) { 12045 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12046 delta += 2+1; 12047 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12048 nameXMMReg(gregOfRexRM(pfx,modrm))); 12049 } else { 12050 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12051 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 12052 delta += 2+alen; 12053 DIP("pmaddwd %s,%s\n", dis_buf, 12054 nameXMMReg(gregOfRexRM(pfx,modrm))); 12055 } 12056 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12057 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 12058 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 12059 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 12060 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 12061 assign( dHi, mkIRExprCCall( 12062 Ity_I64, 0/*regparms*/, 12063 "amd64g_calculate_mmx_pmaddwd", 12064 &amd64g_calculate_mmx_pmaddwd, 12065 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 12066 )); 12067 assign( dLo, mkIRExprCCall( 12068 Ity_I64, 0/*regparms*/, 12069 "amd64g_calculate_mmx_pmaddwd", 12070 &amd64g_calculate_mmx_pmaddwd, 12071 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 12072 )); 12073 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 12074 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12075 goto decode_success; 12076 } 12077 12078 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 12079 if (have66noF2noF3(pfx) && sz == 2 12080 && insn[0] == 0x0F && insn[1] == 0xEE) { 12081 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12082 "pmaxsw", Iop_Max16Sx8, False ); 12083 goto decode_success; 12084 } 12085 12086 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 12087 if (have66noF2noF3(pfx) && sz == 2 12088 && insn[0] == 0x0F && insn[1] == 0xDE) { 12089 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12090 "pmaxub", Iop_Max8Ux16, False ); 12091 goto decode_success; 12092 } 12093 12094 /* 66 0F EA = PMINSW -- 16x8 signed min */ 12095 if (have66noF2noF3(pfx) && sz == 2 12096 && insn[0] == 0x0F && insn[1] == 0xEA) { 12097 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12098 "pminsw", Iop_Min16Sx8, False ); 12099 goto decode_success; 12100 } 12101 12102 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 12103 if (have66noF2noF3(pfx) && sz == 2 12104 && insn[0] == 0x0F && insn[1] == 0xDA) { 12105 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12106 "pminub", Iop_Min8Ux16, False ); 12107 goto decode_success; 12108 } 12109 12110 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in 12111 xmm(E), turn them into a byte, and put zero-extend of it in 12112 ireg(G). Doing this directly is just too cumbersome; give up 12113 therefore and call a helper. */ 12114 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 12115 if (have66noF2noF3(pfx) 12116 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 12117 && insn[0] == 0x0F && insn[1] == 0xD7) { 12118 modrm = insn[2]; 12119 if (epartIsReg(modrm)) { 12120 t0 = newTemp(Ity_I64); 12121 t1 = newTemp(Ity_I64); 12122 assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0)); 12123 assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1)); 12124 t5 = newTemp(Ity_I64); 12125 assign(t5, mkIRExprCCall( 12126 Ity_I64, 0/*regparms*/, 12127 "amd64g_calculate_sse_pmovmskb", 12128 &amd64g_calculate_sse_pmovmskb, 12129 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 12130 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5))); 12131 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12132 nameIReg32(gregOfRexRM(pfx,modrm))); 12133 delta += 3; 12134 goto decode_success; 12135 } 12136 /* else fall through */ 12137 } 12138 12139 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 12140 if (have66noF2noF3(pfx) && sz == 2 12141 && insn[0] == 0x0F && insn[1] == 0xE4) { 12142 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12143 "pmulhuw", Iop_MulHi16Ux8, False ); 12144 goto decode_success; 12145 } 12146 12147 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 12148 if (have66noF2noF3(pfx) && sz == 2 12149 && insn[0] == 0x0F && insn[1] == 0xE5) { 12150 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12151 "pmulhw", Iop_MulHi16Sx8, False ); 12152 goto decode_success; 12153 } 12154 12155 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 12156 if (have66noF2noF3(pfx) && sz == 2 12157 && insn[0] == 0x0F && insn[1] == 0xD5) { 12158 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12159 "pmullw", Iop_Mul16x8, False ); 12160 goto decode_success; 12161 } 12162 12163 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 12164 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 12165 0 to form 64-bit result */ 12166 if (haveNo66noF2noF3(pfx) && sz == 4 12167 && insn[0] == 0x0F && insn[1] == 0xF4) { 12168 IRTemp sV = newTemp(Ity_I64); 12169 IRTemp dV = newTemp(Ity_I64); 12170 t1 = newTemp(Ity_I32); 12171 t0 = newTemp(Ity_I32); 12172 modrm = insn[2]; 12173 12174 do_MMX_preamble(); 12175 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 12176 12177 if (epartIsReg(modrm)) { 12178 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 12179 delta += 2+1; 12180 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12181 nameMMXReg(gregLO3ofRM(modrm))); 12182 } else { 12183 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12184 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12185 delta += 2+alen; 12186 DIP("pmuludq %s,%s\n", dis_buf, 12187 nameMMXReg(gregLO3ofRM(modrm))); 12188 } 12189 12190 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 12191 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 12192 putMMXReg( gregLO3ofRM(modrm), 12193 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 12194 goto decode_success; 12195 } 12196 12197 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 12198 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 12199 half */ 12200 /* This is a really poor translation -- could be improved if 12201 performance critical */ 12202 if (have66noF2noF3(pfx) && sz == 2 12203 && insn[0] == 0x0F && insn[1] == 0xF4) { 12204 IRTemp sV, dV; 12205 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 12206 sV = newTemp(Ity_V128); 12207 dV = newTemp(Ity_V128); 12208 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 12209 t1 = newTemp(Ity_I64); 12210 t0 = newTemp(Ity_I64); 12211 modrm = insn[2]; 12212 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12213 12214 if (epartIsReg(modrm)) { 12215 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12216 delta += 2+1; 12217 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12218 nameXMMReg(gregOfRexRM(pfx,modrm))); 12219 } else { 12220 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12221 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12222 delta += 2+alen; 12223 DIP("pmuludq %s,%s\n", dis_buf, 12224 nameXMMReg(gregOfRexRM(pfx,modrm))); 12225 } 12226 12227 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 12228 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12229 12230 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 12231 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) ); 12232 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 12233 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) ); 12234 goto decode_success; 12235 } 12236 12237 /* 66 0F EB = POR */ 12238 if (have66noF2noF3(pfx) && sz == 2 12239 && insn[0] == 0x0F && insn[1] == 0xEB) { 12240 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 ); 12241 goto decode_success; 12242 } 12243 12244 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 12245 from E(xmm or mem) to G(xmm) */ 12246 if (have66noF2noF3(pfx) && sz == 2 12247 && insn[0] == 0x0F && insn[1] == 0xF6) { 12248 IRTemp s1V = newTemp(Ity_V128); 12249 IRTemp s2V = newTemp(Ity_V128); 12250 IRTemp dV = newTemp(Ity_V128); 12251 IRTemp s1Hi = newTemp(Ity_I64); 12252 IRTemp s1Lo = newTemp(Ity_I64); 12253 IRTemp s2Hi = newTemp(Ity_I64); 12254 IRTemp s2Lo = newTemp(Ity_I64); 12255 IRTemp dHi = newTemp(Ity_I64); 12256 IRTemp dLo = newTemp(Ity_I64); 12257 modrm = insn[2]; 12258 if (epartIsReg(modrm)) { 12259 assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12260 delta += 2+1; 12261 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12262 nameXMMReg(gregOfRexRM(pfx,modrm))); 12263 } else { 12264 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12265 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 12266 delta += 2+alen; 12267 DIP("psadbw %s,%s\n", dis_buf, 12268 nameXMMReg(gregOfRexRM(pfx,modrm))); 12269 } 12270 assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12271 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 12272 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 12273 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 12274 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 12275 assign( dHi, mkIRExprCCall( 12276 Ity_I64, 0/*regparms*/, 12277 "amd64g_calculate_mmx_psadbw", 12278 &amd64g_calculate_mmx_psadbw, 12279 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 12280 )); 12281 assign( dLo, mkIRExprCCall( 12282 Ity_I64, 0/*regparms*/, 12283 "amd64g_calculate_mmx_psadbw", 12284 &amd64g_calculate_mmx_psadbw, 12285 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 12286 )); 12287 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 12288 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12289 goto decode_success; 12290 } 12291 12292 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 12293 if (have66noF2noF3(pfx) && sz == 2 12294 && insn[0] == 0x0F && insn[1] == 0x70) { 12295 Int order; 12296 IRTemp sV, dV, s3, s2, s1, s0; 12297 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12298 sV = newTemp(Ity_V128); 12299 dV = newTemp(Ity_V128); 12300 modrm = insn[2]; 12301 if (epartIsReg(modrm)) { 12302 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12303 order = (Int)insn[3]; 12304 delta += 3+1; 12305 DIP("pshufd $%d,%s,%s\n", order, 12306 nameXMMReg(eregOfRexRM(pfx,modrm)), 12307 nameXMMReg(gregOfRexRM(pfx,modrm))); 12308 } else { 12309 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12310 1/*byte after the amode*/ ); 12311 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12312 order = (Int)insn[2+alen]; 12313 delta += 2+alen+1; 12314 DIP("pshufd $%d,%s,%s\n", order, 12315 dis_buf, 12316 nameXMMReg(gregOfRexRM(pfx,modrm))); 12317 } 12318 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12319 12320 # define SEL(n) \ 12321 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12322 assign(dV, 12323 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 12324 SEL((order>>2)&3), SEL((order>>0)&3) ) 12325 ); 12326 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12327 # undef SEL 12328 goto decode_success; 12329 } 12330 12331 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 12332 mem) to G(xmm), and copy lower half */ 12333 if (haveF3no66noF2(pfx) && sz == 4 12334 && insn[0] == 0x0F && insn[1] == 0x70) { 12335 Int order; 12336 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 12337 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12338 sV = newTemp(Ity_V128); 12339 dV = newTemp(Ity_V128); 12340 sVhi = newTemp(Ity_I64); 12341 dVhi = newTemp(Ity_I64); 12342 modrm = insn[2]; 12343 if (epartIsReg(modrm)) { 12344 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12345 order = (Int)insn[3]; 12346 delta += 3+1; 12347 DIP("pshufhw $%d,%s,%s\n", order, 12348 nameXMMReg(eregOfRexRM(pfx,modrm)), 12349 nameXMMReg(gregOfRexRM(pfx,modrm))); 12350 } else { 12351 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12352 1/*byte after the amode*/ ); 12353 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12354 order = (Int)insn[2+alen]; 12355 delta += 2+alen+1; 12356 DIP("pshufhw $%d,%s,%s\n", order, 12357 dis_buf, 12358 nameXMMReg(gregOfRexRM(pfx,modrm))); 12359 } 12360 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12361 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 12362 12363 # define SEL(n) \ 12364 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12365 assign(dVhi, 12366 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 12367 SEL((order>>2)&3), SEL((order>>0)&3) ) 12368 ); 12369 assign(dV, binop( Iop_64HLtoV128, 12370 mkexpr(dVhi), 12371 unop(Iop_V128to64, mkexpr(sV))) ); 12372 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12373 # undef SEL 12374 goto decode_success; 12375 } 12376 12377 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 12378 mem) to G(xmm), and copy upper half */ 12379 if (haveF2no66noF3(pfx) && sz == 4 12380 && insn[0] == 0x0F && insn[1] == 0x70) { 12381 Int order; 12382 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 12383 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12384 sV = newTemp(Ity_V128); 12385 dV = newTemp(Ity_V128); 12386 sVlo = newTemp(Ity_I64); 12387 dVlo = newTemp(Ity_I64); 12388 modrm = insn[2]; 12389 if (epartIsReg(modrm)) { 12390 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 12391 order = (Int)insn[3]; 12392 delta += 3+1; 12393 DIP("pshuflw $%d,%s,%s\n", order, 12394 nameXMMReg(eregOfRexRM(pfx,modrm)), 12395 nameXMMReg(gregOfRexRM(pfx,modrm))); 12396 } else { 12397 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 12398 1/*byte after the amode*/ ); 12399 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12400 order = (Int)insn[2+alen]; 12401 delta += 2+alen+1; 12402 DIP("pshuflw $%d,%s,%s\n", order, 12403 dis_buf, 12404 nameXMMReg(gregOfRexRM(pfx,modrm))); 12405 } 12406 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 12407 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 12408 12409 # define SEL(n) \ 12410 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 12411 assign(dVlo, 12412 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 12413 SEL((order>>2)&3), SEL((order>>0)&3) ) 12414 ); 12415 assign(dV, binop( Iop_64HLtoV128, 12416 unop(Iop_V128HIto64, mkexpr(sV)), 12417 mkexpr(dVlo) ) ); 12418 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV)); 12419 # undef SEL 12420 goto decode_success; 12421 } 12422 12423 /* 66 0F 72 /6 ib = PSLLD by immediate */ 12424 if (have66noF2noF3(pfx) && sz == 2 12425 && insn[0] == 0x0F && insn[1] == 0x72 12426 && epartIsReg(insn[2]) 12427 && gregLO3ofRM(insn[2]) == 6) { 12428 delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 ); 12429 goto decode_success; 12430 } 12431 12432 /* 66 0F F2 = PSLLD by E */ 12433 if (have66noF2noF3(pfx) && sz == 2 12434 && insn[0] == 0x0F && insn[1] == 0xF2) { 12435 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 ); 12436 goto decode_success; 12437 } 12438 12439 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 12440 /* note, if mem case ever filled in, 1 byte after amode */ 12441 if (have66noF2noF3(pfx) && sz == 2 12442 && insn[0] == 0x0F && insn[1] == 0x73 12443 && epartIsReg(insn[2]) 12444 && gregLO3ofRM(insn[2]) == 7) { 12445 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 12446 Int imm = (Int)insn[3]; 12447 Int reg = eregOfRexRM(pfx,insn[2]); 12448 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 12449 vassert(imm >= 0 && imm <= 255); 12450 delta += 4; 12451 12452 sV = newTemp(Ity_V128); 12453 dV = newTemp(Ity_V128); 12454 hi64 = newTemp(Ity_I64); 12455 lo64 = newTemp(Ity_I64); 12456 hi64r = newTemp(Ity_I64); 12457 lo64r = newTemp(Ity_I64); 12458 12459 if (imm >= 16) { 12460 putXMMReg(reg, mkV128(0x0000)); 12461 goto decode_success; 12462 } 12463 12464 assign( sV, getXMMReg(reg) ); 12465 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 12466 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 12467 12468 if (imm == 0) { 12469 assign( lo64r, mkexpr(lo64) ); 12470 assign( hi64r, mkexpr(hi64) ); 12471 } 12472 else 12473 if (imm == 8) { 12474 assign( lo64r, mkU64(0) ); 12475 assign( hi64r, mkexpr(lo64) ); 12476 } 12477 else 12478 if (imm > 8) { 12479 assign( lo64r, mkU64(0) ); 12480 assign( hi64r, binop( Iop_Shl64, 12481 mkexpr(lo64), 12482 mkU8( 8*(imm-8) ) )); 12483 } else { 12484 assign( lo64r, binop( Iop_Shl64, 12485 mkexpr(lo64), 12486 mkU8(8 * imm) )); 12487 assign( hi64r, 12488 binop( Iop_Or64, 12489 binop(Iop_Shl64, mkexpr(hi64), 12490 mkU8(8 * imm)), 12491 binop(Iop_Shr64, mkexpr(lo64), 12492 mkU8(8 * (8 - imm)) ) 12493 ) 12494 ); 12495 } 12496 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 12497 putXMMReg(reg, mkexpr(dV)); 12498 goto decode_success; 12499 } 12500 12501 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 12502 if (have66noF2noF3(pfx) && sz == 2 12503 && insn[0] == 0x0F && insn[1] == 0x73 12504 && epartIsReg(insn[2]) 12505 && gregLO3ofRM(insn[2]) == 6) { 12506 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 ); 12507 goto decode_success; 12508 } 12509 12510 /* 66 0F F3 = PSLLQ by E */ 12511 if (have66noF2noF3(pfx) && sz == 2 12512 && insn[0] == 0x0F && insn[1] == 0xF3) { 12513 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 ); 12514 goto decode_success; 12515 } 12516 12517 /* 66 0F 71 /6 ib = PSLLW by immediate */ 12518 if (have66noF2noF3(pfx) && sz == 2 12519 && insn[0] == 0x0F && insn[1] == 0x71 12520 && epartIsReg(insn[2]) 12521 && gregLO3ofRM(insn[2]) == 6) { 12522 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 ); 12523 goto decode_success; 12524 } 12525 12526 /* 66 0F F1 = PSLLW by E */ 12527 if (have66noF2noF3(pfx) && sz == 2 12528 && insn[0] == 0x0F && insn[1] == 0xF1) { 12529 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 ); 12530 goto decode_success; 12531 } 12532 12533 /* 66 0F 72 /4 ib = PSRAD by immediate */ 12534 if (have66noF2noF3(pfx) && sz == 2 12535 && insn[0] == 0x0F && insn[1] == 0x72 12536 && epartIsReg(insn[2]) 12537 && gregLO3ofRM(insn[2]) == 4) { 12538 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 ); 12539 goto decode_success; 12540 } 12541 12542 /* 66 0F E2 = PSRAD by E */ 12543 if (have66noF2noF3(pfx) && sz == 2 12544 && insn[0] == 0x0F && insn[1] == 0xE2) { 12545 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 ); 12546 goto decode_success; 12547 } 12548 12549 /* 66 0F 71 /4 ib = PSRAW by immediate */ 12550 if (have66noF2noF3(pfx) && sz == 2 12551 && insn[0] == 0x0F && insn[1] == 0x71 12552 && epartIsReg(insn[2]) 12553 && gregLO3ofRM(insn[2]) == 4) { 12554 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 ); 12555 goto decode_success; 12556 } 12557 12558 /* 66 0F E1 = PSRAW by E */ 12559 if (have66noF2noF3(pfx) && sz == 2 12560 && insn[0] == 0x0F && insn[1] == 0xE1) { 12561 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 ); 12562 goto decode_success; 12563 } 12564 12565 /* 66 0F 72 /2 ib = PSRLD by immediate */ 12566 if (have66noF2noF3(pfx) && sz == 2 12567 && insn[0] == 0x0F && insn[1] == 0x72 12568 && epartIsReg(insn[2]) 12569 && gregLO3ofRM(insn[2]) == 2) { 12570 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 ); 12571 goto decode_success; 12572 } 12573 12574 /* 66 0F D2 = PSRLD by E */ 12575 if (have66noF2noF3(pfx) && sz == 2 12576 && insn[0] == 0x0F && insn[1] == 0xD2) { 12577 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 ); 12578 goto decode_success; 12579 } 12580 12581 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 12582 /* note, if mem case ever filled in, 1 byte after amode */ 12583 if (have66noF2noF3(pfx) && sz == 2 12584 && insn[0] == 0x0F && insn[1] == 0x73 12585 && epartIsReg(insn[2]) 12586 && gregLO3ofRM(insn[2]) == 3) { 12587 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 12588 Int imm = (Int)insn[3]; 12589 Int reg = eregOfRexRM(pfx,insn[2]); 12590 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 12591 vassert(imm >= 0 && imm <= 255); 12592 delta += 4; 12593 12594 sV = newTemp(Ity_V128); 12595 dV = newTemp(Ity_V128); 12596 hi64 = newTemp(Ity_I64); 12597 lo64 = newTemp(Ity_I64); 12598 hi64r = newTemp(Ity_I64); 12599 lo64r = newTemp(Ity_I64); 12600 12601 if (imm >= 16) { 12602 putXMMReg(reg, mkV128(0x0000)); 12603 goto decode_success; 12604 } 12605 12606 assign( sV, getXMMReg(reg) ); 12607 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 12608 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 12609 12610 if (imm == 0) { 12611 assign( lo64r, mkexpr(lo64) ); 12612 assign( hi64r, mkexpr(hi64) ); 12613 } 12614 else 12615 if (imm == 8) { 12616 assign( hi64r, mkU64(0) ); 12617 assign( lo64r, mkexpr(hi64) ); 12618 } 12619 else 12620 if (imm > 8) { 12621 assign( hi64r, mkU64(0) ); 12622 assign( lo64r, binop( Iop_Shr64, 12623 mkexpr(hi64), 12624 mkU8( 8*(imm-8) ) )); 12625 } else { 12626 assign( hi64r, binop( Iop_Shr64, 12627 mkexpr(hi64), 12628 mkU8(8 * imm) )); 12629 assign( lo64r, 12630 binop( Iop_Or64, 12631 binop(Iop_Shr64, mkexpr(lo64), 12632 mkU8(8 * imm)), 12633 binop(Iop_Shl64, mkexpr(hi64), 12634 mkU8(8 * (8 - imm)) ) 12635 ) 12636 ); 12637 } 12638 12639 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 12640 putXMMReg(reg, mkexpr(dV)); 12641 goto decode_success; 12642 } 12643 12644 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 12645 if (have66noF2noF3(pfx) && sz == 2 12646 && insn[0] == 0x0F && insn[1] == 0x73 12647 && epartIsReg(insn[2]) 12648 && gregLO3ofRM(insn[2]) == 2) { 12649 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 ); 12650 goto decode_success; 12651 } 12652 12653 /* 66 0F D3 = PSRLQ by E */ 12654 if (have66noF2noF3(pfx) && sz == 2 12655 && insn[0] == 0x0F && insn[1] == 0xD3) { 12656 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 ); 12657 goto decode_success; 12658 } 12659 12660 /* 66 0F 71 /2 ib = PSRLW by immediate */ 12661 if (have66noF2noF3(pfx) && sz == 2 12662 && insn[0] == 0x0F && insn[1] == 0x71 12663 && epartIsReg(insn[2]) 12664 && gregLO3ofRM(insn[2]) == 2) { 12665 delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 ); 12666 goto decode_success; 12667 } 12668 12669 /* 66 0F D1 = PSRLW by E */ 12670 if (have66noF2noF3(pfx) && sz == 2 12671 && insn[0] == 0x0F && insn[1] == 0xD1) { 12672 delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 ); 12673 goto decode_success; 12674 } 12675 12676 /* 66 0F F8 = PSUBB */ 12677 if (have66noF2noF3(pfx) && sz == 2 12678 && insn[0] == 0x0F && insn[1] == 0xF8) { 12679 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12680 "psubb", Iop_Sub8x16, False ); 12681 goto decode_success; 12682 } 12683 12684 /* 66 0F FA = PSUBD */ 12685 if (have66noF2noF3(pfx) && sz == 2 12686 && insn[0] == 0x0F && insn[1] == 0xFA) { 12687 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12688 "psubd", Iop_Sub32x4, False ); 12689 goto decode_success; 12690 } 12691 12692 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 12693 /* 0F FB = PSUBQ -- sub 64x1 */ 12694 if (haveNo66noF2noF3(pfx) && sz == 4 12695 && insn[0] == 0x0F && insn[1] == 0xFB) { 12696 do_MMX_preamble(); 12697 delta = dis_MMXop_regmem_to_reg ( 12698 vbi, pfx, delta+2, insn[1], "psubq", False ); 12699 goto decode_success; 12700 } 12701 12702 /* 66 0F FB = PSUBQ */ 12703 if (have66noF2noF3(pfx) && sz == 2 12704 && insn[0] == 0x0F && insn[1] == 0xFB) { 12705 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12706 "psubq", Iop_Sub64x2, False ); 12707 goto decode_success; 12708 } 12709 12710 /* 66 0F F9 = PSUBW */ 12711 if (have66noF2noF3(pfx) && sz == 2 12712 && insn[0] == 0x0F && insn[1] == 0xF9) { 12713 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12714 "psubw", Iop_Sub16x8, False ); 12715 goto decode_success; 12716 } 12717 12718 /* 66 0F E8 = PSUBSB */ 12719 if (have66noF2noF3(pfx) && sz == 2 12720 && insn[0] == 0x0F && insn[1] == 0xE8) { 12721 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12722 "psubsb", Iop_QSub8Sx16, False ); 12723 goto decode_success; 12724 } 12725 12726 /* 66 0F E9 = PSUBSW */ 12727 if (have66noF2noF3(pfx) && sz == 2 12728 && insn[0] == 0x0F && insn[1] == 0xE9) { 12729 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12730 "psubsw", Iop_QSub16Sx8, False ); 12731 goto decode_success; 12732 } 12733 12734 /* 66 0F D8 = PSUBSB */ 12735 if (have66noF2noF3(pfx) && sz == 2 12736 && insn[0] == 0x0F && insn[1] == 0xD8) { 12737 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12738 "psubusb", Iop_QSub8Ux16, False ); 12739 goto decode_success; 12740 } 12741 12742 /* 66 0F D9 = PSUBSW */ 12743 if (have66noF2noF3(pfx) && sz == 2 12744 && insn[0] == 0x0F && insn[1] == 0xD9) { 12745 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12746 "psubusw", Iop_QSub16Ux8, False ); 12747 goto decode_success; 12748 } 12749 12750 /* 66 0F 68 = PUNPCKHBW */ 12751 if (have66noF2noF3(pfx) && sz == 2 12752 && insn[0] == 0x0F && insn[1] == 0x68) { 12753 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12754 "punpckhbw", 12755 Iop_InterleaveHI8x16, True ); 12756 goto decode_success; 12757 } 12758 12759 /* 66 0F 6A = PUNPCKHDQ */ 12760 if (have66noF2noF3(pfx) && sz == 2 12761 && insn[0] == 0x0F && insn[1] == 0x6A) { 12762 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12763 "punpckhdq", 12764 Iop_InterleaveHI32x4, True ); 12765 goto decode_success; 12766 } 12767 12768 /* 66 0F 6D = PUNPCKHQDQ */ 12769 if (have66noF2noF3(pfx) && sz == 2 12770 && insn[0] == 0x0F && insn[1] == 0x6D) { 12771 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12772 "punpckhqdq", 12773 Iop_InterleaveHI64x2, True ); 12774 goto decode_success; 12775 } 12776 12777 /* 66 0F 69 = PUNPCKHWD */ 12778 if (have66noF2noF3(pfx) && sz == 2 12779 && insn[0] == 0x0F && insn[1] == 0x69) { 12780 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12781 "punpckhwd", 12782 Iop_InterleaveHI16x8, True ); 12783 goto decode_success; 12784 } 12785 12786 /* 66 0F 60 = PUNPCKLBW */ 12787 if (have66noF2noF3(pfx) && sz == 2 12788 && insn[0] == 0x0F && insn[1] == 0x60) { 12789 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12790 "punpcklbw", 12791 Iop_InterleaveLO8x16, True ); 12792 goto decode_success; 12793 } 12794 12795 /* 66 0F 62 = PUNPCKLDQ */ 12796 if (have66noF2noF3(pfx) && sz == 2 12797 && insn[0] == 0x0F && insn[1] == 0x62) { 12798 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12799 "punpckldq", 12800 Iop_InterleaveLO32x4, True ); 12801 goto decode_success; 12802 } 12803 12804 /* 66 0F 6C = PUNPCKLQDQ */ 12805 if (have66noF2noF3(pfx) && sz == 2 12806 && insn[0] == 0x0F && insn[1] == 0x6C) { 12807 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12808 "punpcklqdq", 12809 Iop_InterleaveLO64x2, True ); 12810 goto decode_success; 12811 } 12812 12813 /* 66 0F 61 = PUNPCKLWD */ 12814 if (have66noF2noF3(pfx) && sz == 2 12815 && insn[0] == 0x0F && insn[1] == 0x61) { 12816 delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 12817 "punpcklwd", 12818 Iop_InterleaveLO16x8, True ); 12819 goto decode_success; 12820 } 12821 12822 /* 66 0F EF = PXOR */ 12823 if (have66noF2noF3(pfx) && sz == 2 12824 && insn[0] == 0x0F && insn[1] == 0xEF) { 12825 delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 ); 12826 goto decode_success; 12827 } 12828 12829 //.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 12830 //.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE 12831 //.. //-- && (!epartIsReg(insn[2])) 12832 //.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 12833 //.. //-- Bool store = gregOfRM(insn[2]) == 0; 12834 //.. //-- vg_assert(sz == 4); 12835 //.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 12836 //.. //-- t1 = LOW24(pair); 12837 //.. //-- eip += 2+HI8(pair); 12838 //.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 12839 //.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 12840 //.. //-- Lit16, (UShort)insn[2], 12841 //.. //-- TempReg, t1 ); 12842 //.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 12843 //.. //-- goto decode_success; 12844 //.. //-- } 12845 12846 /* 0F AE /7 = CLFLUSH -- flush cache line */ 12847 if (haveNo66noF2noF3(pfx) && sz == 4 12848 && insn[0] == 0x0F && insn[1] == 0xAE 12849 && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) { 12850 12851 /* This is something of a hack. We need to know the size of the 12852 cache line containing addr. Since we don't (easily), assume 12853 256 on the basis that no real cache would have a line that 12854 big. It's safe to invalidate more stuff than we need, just 12855 inefficient. */ 12856 ULong lineszB = 256ULL; 12857 12858 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12859 delta += 2+alen; 12860 12861 /* Round addr down to the start of the containing block. */ 12862 stmt( IRStmt_Put( 12863 OFFB_TISTART, 12864 binop( Iop_And64, 12865 mkexpr(addr), 12866 mkU64( ~(lineszB-1) ))) ); 12867 12868 stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) ); 12869 12870 irsb->jumpkind = Ijk_TInval; 12871 irsb->next = mkU64(guest_RIP_bbstart+delta); 12872 dres.whatNext = Dis_StopHere; 12873 12874 DIP("clflush %s\n", dis_buf); 12875 goto decode_success; 12876 } 12877 12878 /* ---------------------------------------------------- */ 12879 /* --- end of the SSE/SSE2 decoder. --- */ 12880 /* ---------------------------------------------------- */ 12881 12882 /* ---------------------------------------------------- */ 12883 /* --- start of the SSE3 decoder. --- */ 12884 /* ---------------------------------------------------- */ 12885 12886 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 12887 duplicating some lanes (2:2:0:0). */ 12888 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 12889 duplicating some lanes (3:3:1:1). */ 12890 if (haveF3no66noF2(pfx) && sz == 4 12891 && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) { 12892 IRTemp s3, s2, s1, s0; 12893 IRTemp sV = newTemp(Ity_V128); 12894 Bool isH = insn[1] == 0x16; 12895 s3 = s2 = s1 = s0 = IRTemp_INVALID; 12896 12897 modrm = insn[2]; 12898 if (epartIsReg(modrm)) { 12899 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12900 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 12901 nameXMMReg(eregOfRexRM(pfx,modrm)), 12902 nameXMMReg(gregOfRexRM(pfx,modrm))); 12903 delta += 2+1; 12904 } else { 12905 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12906 gen_SEGV_if_not_16_aligned( addr ); 12907 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12908 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 12909 dis_buf, 12910 nameXMMReg(gregOfRexRM(pfx,modrm))); 12911 delta += 2+alen; 12912 } 12913 12914 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 12915 putXMMReg( gregOfRexRM(pfx,modrm), 12916 isH ? mk128from32s( s3, s3, s1, s1 ) 12917 : mk128from32s( s2, s2, s0, s0 ) ); 12918 goto decode_success; 12919 } 12920 12921 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 12922 duplicating some lanes (0:1:0:1). */ 12923 if (haveF2no66noF3(pfx) 12924 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) 12925 && insn[0] == 0x0F && insn[1] == 0x12) { 12926 IRTemp sV = newTemp(Ity_V128); 12927 IRTemp d0 = newTemp(Ity_I64); 12928 12929 modrm = insn[2]; 12930 if (epartIsReg(modrm)) { 12931 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12932 DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12933 nameXMMReg(gregOfRexRM(pfx,modrm))); 12934 delta += 2+1; 12935 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 12936 } else { 12937 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12938 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 12939 DIP("movddup %s,%s\n", dis_buf, 12940 nameXMMReg(gregOfRexRM(pfx,modrm))); 12941 delta += 2+alen; 12942 } 12943 12944 putXMMReg( gregOfRexRM(pfx,modrm), 12945 binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 12946 goto decode_success; 12947 } 12948 12949 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 12950 if (haveF2no66noF3(pfx) && sz == 4 12951 && insn[0] == 0x0F && insn[1] == 0xD0) { 12952 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 12953 IRTemp eV = newTemp(Ity_V128); 12954 IRTemp gV = newTemp(Ity_V128); 12955 IRTemp addV = newTemp(Ity_V128); 12956 IRTemp subV = newTemp(Ity_V128); 12957 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 12958 12959 modrm = insn[2]; 12960 if (epartIsReg(modrm)) { 12961 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12962 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12963 nameXMMReg(gregOfRexRM(pfx,modrm))); 12964 delta += 2+1; 12965 } else { 12966 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 12967 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 12968 DIP("addsubps %s,%s\n", dis_buf, 12969 nameXMMReg(gregOfRexRM(pfx,modrm))); 12970 delta += 2+alen; 12971 } 12972 12973 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 12974 12975 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); 12976 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); 12977 12978 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 12979 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 12980 12981 putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 )); 12982 goto decode_success; 12983 } 12984 12985 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 12986 if (have66noF2noF3(pfx) && sz == 2 12987 && insn[0] == 0x0F && insn[1] == 0xD0) { 12988 IRTemp eV = newTemp(Ity_V128); 12989 IRTemp gV = newTemp(Ity_V128); 12990 IRTemp addV = newTemp(Ity_V128); 12991 IRTemp subV = newTemp(Ity_V128); 12992 IRTemp a1 = newTemp(Ity_I64); 12993 IRTemp s0 = newTemp(Ity_I64); 12994 12995 modrm = insn[2]; 12996 if (epartIsReg(modrm)) { 12997 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 12998 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12999 nameXMMReg(gregOfRexRM(pfx,modrm))); 13000 delta += 2+1; 13001 } else { 13002 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13003 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13004 DIP("addsubpd %s,%s\n", dis_buf, 13005 nameXMMReg(gregOfRexRM(pfx,modrm))); 13006 delta += 2+alen; 13007 } 13008 13009 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13010 13011 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); 13012 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); 13013 13014 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 13015 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 13016 13017 putXMMReg( gregOfRexRM(pfx,modrm), 13018 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 13019 goto decode_success; 13020 } 13021 13022 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 13023 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 13024 if (haveF2no66noF3(pfx) && sz == 4 13025 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 13026 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 13027 IRTemp eV = newTemp(Ity_V128); 13028 IRTemp gV = newTemp(Ity_V128); 13029 IRTemp leftV = newTemp(Ity_V128); 13030 IRTemp rightV = newTemp(Ity_V128); 13031 Bool isAdd = insn[1] == 0x7C; 13032 HChar* str = isAdd ? "add" : "sub"; 13033 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 13034 13035 modrm = insn[2]; 13036 if (epartIsReg(modrm)) { 13037 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13038 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13039 nameXMMReg(gregOfRexRM(pfx,modrm))); 13040 delta += 2+1; 13041 } else { 13042 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13043 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13044 DIP("h%sps %s,%s\n", str, dis_buf, 13045 nameXMMReg(gregOfRexRM(pfx,modrm))); 13046 delta += 2+alen; 13047 } 13048 13049 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13050 13051 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 13052 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 13053 13054 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 13055 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 13056 13057 putXMMReg( gregOfRexRM(pfx,modrm), 13058 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 13059 mkexpr(leftV), mkexpr(rightV) ) ); 13060 goto decode_success; 13061 } 13062 13063 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 13064 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 13065 if (have66noF2noF3(pfx) && sz == 2 13066 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 13067 IRTemp e1 = newTemp(Ity_I64); 13068 IRTemp e0 = newTemp(Ity_I64); 13069 IRTemp g1 = newTemp(Ity_I64); 13070 IRTemp g0 = newTemp(Ity_I64); 13071 IRTemp eV = newTemp(Ity_V128); 13072 IRTemp gV = newTemp(Ity_V128); 13073 IRTemp leftV = newTemp(Ity_V128); 13074 IRTemp rightV = newTemp(Ity_V128); 13075 Bool isAdd = insn[1] == 0x7C; 13076 HChar* str = isAdd ? "add" : "sub"; 13077 13078 modrm = insn[2]; 13079 if (epartIsReg(modrm)) { 13080 assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13081 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13082 nameXMMReg(gregOfRexRM(pfx,modrm))); 13083 delta += 2+1; 13084 } else { 13085 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13086 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 13087 DIP("h%spd %s,%s\n", str, dis_buf, 13088 nameXMMReg(gregOfRexRM(pfx,modrm))); 13089 delta += 2+alen; 13090 } 13091 13092 assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13093 13094 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 13095 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 13096 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 13097 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 13098 13099 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 13100 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 13101 13102 putXMMReg( gregOfRexRM(pfx,modrm), 13103 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 13104 mkexpr(leftV), mkexpr(rightV) ) ); 13105 goto decode_success; 13106 } 13107 13108 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 13109 if (haveF2no66noF3(pfx) && sz == 4 13110 && insn[0] == 0x0F && insn[1] == 0xF0) { 13111 modrm = insn[2]; 13112 if (epartIsReg(modrm)) { 13113 goto decode_failure; 13114 } else { 13115 addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); 13116 putXMMReg( gregOfRexRM(pfx,modrm), 13117 loadLE(Ity_V128, mkexpr(addr)) ); 13118 DIP("lddqu %s,%s\n", dis_buf, 13119 nameXMMReg(gregOfRexRM(pfx,modrm))); 13120 delta += 2+alen; 13121 } 13122 goto decode_success; 13123 } 13124 13125 /* ---------------------------------------------------- */ 13126 /* --- end of the SSE3 decoder. --- */ 13127 /* ---------------------------------------------------- */ 13128 13129 /* ---------------------------------------------------- */ 13130 /* --- start of the SSSE3 decoder. --- */ 13131 /* ---------------------------------------------------- */ 13132 13133 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 13134 Unsigned Bytes (MMX) */ 13135 if (haveNo66noF2noF3(pfx) 13136 && sz == 4 13137 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 13138 IRTemp sV = newTemp(Ity_I64); 13139 IRTemp dV = newTemp(Ity_I64); 13140 IRTemp sVoddsSX = newTemp(Ity_I64); 13141 IRTemp sVevensSX = newTemp(Ity_I64); 13142 IRTemp dVoddsZX = newTemp(Ity_I64); 13143 IRTemp dVevensZX = newTemp(Ity_I64); 13144 13145 modrm = insn[3]; 13146 do_MMX_preamble(); 13147 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13148 13149 if (epartIsReg(modrm)) { 13150 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13151 delta += 3+1; 13152 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13153 nameMMXReg(gregLO3ofRM(modrm))); 13154 } else { 13155 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13156 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13157 delta += 3+alen; 13158 DIP("pmaddubsw %s,%s\n", dis_buf, 13159 nameMMXReg(gregLO3ofRM(modrm))); 13160 } 13161 13162 /* compute dV unsigned x sV signed */ 13163 assign( sVoddsSX, 13164 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 13165 assign( sVevensSX, 13166 binop(Iop_SarN16x4, 13167 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 13168 mkU8(8)) ); 13169 assign( dVoddsZX, 13170 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 13171 assign( dVevensZX, 13172 binop(Iop_ShrN16x4, 13173 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 13174 mkU8(8)) ); 13175 13176 putMMXReg( 13177 gregLO3ofRM(modrm), 13178 binop(Iop_QAdd16Sx4, 13179 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 13180 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 13181 ) 13182 ); 13183 goto decode_success; 13184 } 13185 13186 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 13187 Unsigned Bytes (XMM) */ 13188 if (have66noF2noF3(pfx) 13189 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13190 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 13191 IRTemp sV = newTemp(Ity_V128); 13192 IRTemp dV = newTemp(Ity_V128); 13193 IRTemp sVoddsSX = newTemp(Ity_V128); 13194 IRTemp sVevensSX = newTemp(Ity_V128); 13195 IRTemp dVoddsZX = newTemp(Ity_V128); 13196 IRTemp dVevensZX = newTemp(Ity_V128); 13197 13198 modrm = insn[3]; 13199 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13200 13201 if (epartIsReg(modrm)) { 13202 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13203 delta += 3+1; 13204 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13205 nameXMMReg(gregOfRexRM(pfx,modrm))); 13206 } else { 13207 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13208 gen_SEGV_if_not_16_aligned( addr ); 13209 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13210 delta += 3+alen; 13211 DIP("pmaddubsw %s,%s\n", dis_buf, 13212 nameXMMReg(gregOfRexRM(pfx,modrm))); 13213 } 13214 13215 /* compute dV unsigned x sV signed */ 13216 assign( sVoddsSX, 13217 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 13218 assign( sVevensSX, 13219 binop(Iop_SarN16x8, 13220 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 13221 mkU8(8)) ); 13222 assign( dVoddsZX, 13223 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 13224 assign( dVevensZX, 13225 binop(Iop_ShrN16x8, 13226 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 13227 mkU8(8)) ); 13228 13229 putXMMReg( 13230 gregOfRexRM(pfx,modrm), 13231 binop(Iop_QAdd16Sx8, 13232 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 13233 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 13234 ) 13235 ); 13236 goto decode_success; 13237 } 13238 13239 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 13240 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 13241 mmx) and G to G (mmx). */ 13242 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 13243 mmx) and G to G (mmx). */ 13244 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 13245 to G (mmx). */ 13246 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 13247 to G (mmx). */ 13248 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 13249 to G (mmx). */ 13250 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 13251 to G (mmx). */ 13252 13253 if (haveNo66noF2noF3(pfx) 13254 && sz == 4 13255 && insn[0] == 0x0F && insn[1] == 0x38 13256 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 13257 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 13258 HChar* str = "???"; 13259 IROp opV64 = Iop_INVALID; 13260 IROp opCatO = Iop_CatOddLanes16x4; 13261 IROp opCatE = Iop_CatEvenLanes16x4; 13262 IRTemp sV = newTemp(Ity_I64); 13263 IRTemp dV = newTemp(Ity_I64); 13264 13265 modrm = insn[3]; 13266 13267 switch (insn[2]) { 13268 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 13269 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 13270 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 13271 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 13272 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 13273 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 13274 default: vassert(0); 13275 } 13276 if (insn[2] == 0x02 || insn[2] == 0x06) { 13277 opCatO = Iop_InterleaveHI32x2; 13278 opCatE = Iop_InterleaveLO32x2; 13279 } 13280 13281 do_MMX_preamble(); 13282 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13283 13284 if (epartIsReg(modrm)) { 13285 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13286 delta += 3+1; 13287 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13288 nameMMXReg(gregLO3ofRM(modrm))); 13289 } else { 13290 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13291 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13292 delta += 3+alen; 13293 DIP("ph%s %s,%s\n", str, dis_buf, 13294 nameMMXReg(gregLO3ofRM(modrm))); 13295 } 13296 13297 putMMXReg( 13298 gregLO3ofRM(modrm), 13299 binop(opV64, 13300 binop(opCatE,mkexpr(sV),mkexpr(dV)), 13301 binop(opCatO,mkexpr(sV),mkexpr(dV)) 13302 ) 13303 ); 13304 goto decode_success; 13305 } 13306 13307 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 13308 xmm) and G to G (xmm). */ 13309 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 13310 xmm) and G to G (xmm). */ 13311 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 13312 G to G (xmm). */ 13313 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 13314 G to G (xmm). */ 13315 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 13316 G to G (xmm). */ 13317 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 13318 G to G (xmm). */ 13319 13320 if (have66noF2noF3(pfx) 13321 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13322 && insn[0] == 0x0F && insn[1] == 0x38 13323 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 13324 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 13325 HChar* str = "???"; 13326 IROp opV64 = Iop_INVALID; 13327 IROp opCatO = Iop_CatOddLanes16x4; 13328 IROp opCatE = Iop_CatEvenLanes16x4; 13329 IRTemp sV = newTemp(Ity_V128); 13330 IRTemp dV = newTemp(Ity_V128); 13331 IRTemp sHi = newTemp(Ity_I64); 13332 IRTemp sLo = newTemp(Ity_I64); 13333 IRTemp dHi = newTemp(Ity_I64); 13334 IRTemp dLo = newTemp(Ity_I64); 13335 13336 modrm = insn[3]; 13337 13338 switch (insn[2]) { 13339 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 13340 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 13341 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 13342 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 13343 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 13344 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 13345 default: vassert(0); 13346 } 13347 if (insn[2] == 0x02 || insn[2] == 0x06) { 13348 opCatO = Iop_InterleaveHI32x2; 13349 opCatE = Iop_InterleaveLO32x2; 13350 } 13351 13352 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13353 13354 if (epartIsReg(modrm)) { 13355 assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) ); 13356 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13357 nameXMMReg(gregOfRexRM(pfx,modrm))); 13358 delta += 3+1; 13359 } else { 13360 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13361 gen_SEGV_if_not_16_aligned( addr ); 13362 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13363 DIP("ph%s %s,%s\n", str, dis_buf, 13364 nameXMMReg(gregOfRexRM(pfx,modrm))); 13365 delta += 3+alen; 13366 } 13367 13368 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13369 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13370 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13371 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13372 13373 /* This isn't a particularly efficient way to compute the 13374 result, but at least it avoids a proliferation of IROps, 13375 hence avoids complication all the backends. */ 13376 putXMMReg( 13377 gregOfRexRM(pfx,modrm), 13378 binop(Iop_64HLtoV128, 13379 binop(opV64, 13380 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 13381 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 13382 ), 13383 binop(opV64, 13384 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 13385 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 13386 ) 13387 ) 13388 ); 13389 goto decode_success; 13390 } 13391 13392 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 13393 (MMX) */ 13394 if (haveNo66noF2noF3(pfx) 13395 && sz == 4 13396 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 13397 IRTemp sV = newTemp(Ity_I64); 13398 IRTemp dV = newTemp(Ity_I64); 13399 13400 modrm = insn[3]; 13401 do_MMX_preamble(); 13402 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13403 13404 if (epartIsReg(modrm)) { 13405 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13406 delta += 3+1; 13407 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13408 nameMMXReg(gregLO3ofRM(modrm))); 13409 } else { 13410 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13411 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13412 delta += 3+alen; 13413 DIP("pmulhrsw %s,%s\n", dis_buf, 13414 nameMMXReg(gregLO3ofRM(modrm))); 13415 } 13416 13417 putMMXReg( 13418 gregLO3ofRM(modrm), 13419 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 13420 ); 13421 goto decode_success; 13422 } 13423 13424 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 13425 Scale (XMM) */ 13426 if (have66noF2noF3(pfx) 13427 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13428 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 13429 IRTemp sV = newTemp(Ity_V128); 13430 IRTemp dV = newTemp(Ity_V128); 13431 IRTemp sHi = newTemp(Ity_I64); 13432 IRTemp sLo = newTemp(Ity_I64); 13433 IRTemp dHi = newTemp(Ity_I64); 13434 IRTemp dLo = newTemp(Ity_I64); 13435 13436 modrm = insn[3]; 13437 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13438 13439 if (epartIsReg(modrm)) { 13440 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13441 delta += 3+1; 13442 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13443 nameXMMReg(gregOfRexRM(pfx,modrm))); 13444 } else { 13445 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13446 gen_SEGV_if_not_16_aligned( addr ); 13447 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13448 delta += 3+alen; 13449 DIP("pmulhrsw %s,%s\n", dis_buf, 13450 nameXMMReg(gregOfRexRM(pfx,modrm))); 13451 } 13452 13453 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13454 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13455 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13456 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13457 13458 putXMMReg( 13459 gregOfRexRM(pfx,modrm), 13460 binop(Iop_64HLtoV128, 13461 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 13462 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 13463 ) 13464 ); 13465 goto decode_success; 13466 } 13467 13468 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 13469 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 13470 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 13471 if (haveNo66noF2noF3(pfx) 13472 && sz == 4 13473 && insn[0] == 0x0F && insn[1] == 0x38 13474 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 13475 IRTemp sV = newTemp(Ity_I64); 13476 IRTemp dV = newTemp(Ity_I64); 13477 HChar* str = "???"; 13478 Int laneszB = 0; 13479 13480 switch (insn[2]) { 13481 case 0x08: laneszB = 1; str = "b"; break; 13482 case 0x09: laneszB = 2; str = "w"; break; 13483 case 0x0A: laneszB = 4; str = "d"; break; 13484 default: vassert(0); 13485 } 13486 13487 modrm = insn[3]; 13488 do_MMX_preamble(); 13489 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13490 13491 if (epartIsReg(modrm)) { 13492 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13493 delta += 3+1; 13494 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13495 nameMMXReg(gregLO3ofRM(modrm))); 13496 } else { 13497 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13498 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13499 delta += 3+alen; 13500 DIP("psign%s %s,%s\n", str, dis_buf, 13501 nameMMXReg(gregLO3ofRM(modrm))); 13502 } 13503 13504 putMMXReg( 13505 gregLO3ofRM(modrm), 13506 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 13507 ); 13508 goto decode_success; 13509 } 13510 13511 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 13512 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 13513 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 13514 if (have66noF2noF3(pfx) 13515 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13516 && insn[0] == 0x0F && insn[1] == 0x38 13517 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 13518 IRTemp sV = newTemp(Ity_V128); 13519 IRTemp dV = newTemp(Ity_V128); 13520 IRTemp sHi = newTemp(Ity_I64); 13521 IRTemp sLo = newTemp(Ity_I64); 13522 IRTemp dHi = newTemp(Ity_I64); 13523 IRTemp dLo = newTemp(Ity_I64); 13524 HChar* str = "???"; 13525 Int laneszB = 0; 13526 13527 switch (insn[2]) { 13528 case 0x08: laneszB = 1; str = "b"; break; 13529 case 0x09: laneszB = 2; str = "w"; break; 13530 case 0x0A: laneszB = 4; str = "d"; break; 13531 default: vassert(0); 13532 } 13533 13534 modrm = insn[3]; 13535 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13536 13537 if (epartIsReg(modrm)) { 13538 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13539 delta += 3+1; 13540 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13541 nameXMMReg(gregOfRexRM(pfx,modrm))); 13542 } else { 13543 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13544 gen_SEGV_if_not_16_aligned( addr ); 13545 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13546 delta += 3+alen; 13547 DIP("psign%s %s,%s\n", str, dis_buf, 13548 nameXMMReg(gregOfRexRM(pfx,modrm))); 13549 } 13550 13551 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13552 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13553 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13554 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13555 13556 putXMMReg( 13557 gregOfRexRM(pfx,modrm), 13558 binop(Iop_64HLtoV128, 13559 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 13560 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 13561 ) 13562 ); 13563 goto decode_success; 13564 } 13565 13566 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 13567 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 13568 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 13569 if (haveNo66noF2noF3(pfx) 13570 && sz == 4 13571 && insn[0] == 0x0F && insn[1] == 0x38 13572 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 13573 IRTemp sV = newTemp(Ity_I64); 13574 HChar* str = "???"; 13575 Int laneszB = 0; 13576 13577 switch (insn[2]) { 13578 case 0x1C: laneszB = 1; str = "b"; break; 13579 case 0x1D: laneszB = 2; str = "w"; break; 13580 case 0x1E: laneszB = 4; str = "d"; break; 13581 default: vassert(0); 13582 } 13583 13584 modrm = insn[3]; 13585 do_MMX_preamble(); 13586 13587 if (epartIsReg(modrm)) { 13588 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13589 delta += 3+1; 13590 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 13591 nameMMXReg(gregLO3ofRM(modrm))); 13592 } else { 13593 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13594 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13595 delta += 3+alen; 13596 DIP("pabs%s %s,%s\n", str, dis_buf, 13597 nameMMXReg(gregLO3ofRM(modrm))); 13598 } 13599 13600 putMMXReg( 13601 gregLO3ofRM(modrm), 13602 dis_PABS_helper( mkexpr(sV), laneszB ) 13603 ); 13604 goto decode_success; 13605 } 13606 13607 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 13608 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 13609 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 13610 if (have66noF2noF3(pfx) 13611 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13612 && insn[0] == 0x0F && insn[1] == 0x38 13613 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 13614 IRTemp sV = newTemp(Ity_V128); 13615 IRTemp sHi = newTemp(Ity_I64); 13616 IRTemp sLo = newTemp(Ity_I64); 13617 HChar* str = "???"; 13618 Int laneszB = 0; 13619 13620 switch (insn[2]) { 13621 case 0x1C: laneszB = 1; str = "b"; break; 13622 case 0x1D: laneszB = 2; str = "w"; break; 13623 case 0x1E: laneszB = 4; str = "d"; break; 13624 default: vassert(0); 13625 } 13626 13627 modrm = insn[3]; 13628 13629 if (epartIsReg(modrm)) { 13630 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13631 delta += 3+1; 13632 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 13633 nameXMMReg(gregOfRexRM(pfx,modrm))); 13634 } else { 13635 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13636 gen_SEGV_if_not_16_aligned( addr ); 13637 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13638 delta += 3+alen; 13639 DIP("pabs%s %s,%s\n", str, dis_buf, 13640 nameXMMReg(gregOfRexRM(pfx,modrm))); 13641 } 13642 13643 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13644 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13645 13646 putXMMReg( 13647 gregOfRexRM(pfx,modrm), 13648 binop(Iop_64HLtoV128, 13649 dis_PABS_helper( mkexpr(sHi), laneszB ), 13650 dis_PABS_helper( mkexpr(sLo), laneszB ) 13651 ) 13652 ); 13653 goto decode_success; 13654 } 13655 13656 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 13657 if (haveNo66noF2noF3(pfx) && sz == 4 13658 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 13659 IRTemp sV = newTemp(Ity_I64); 13660 IRTemp dV = newTemp(Ity_I64); 13661 IRTemp res = newTemp(Ity_I64); 13662 13663 modrm = insn[3]; 13664 do_MMX_preamble(); 13665 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13666 13667 if (epartIsReg(modrm)) { 13668 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13669 d64 = (Long)insn[3+1]; 13670 delta += 3+1+1; 13671 DIP("palignr $%d,%s,%s\n", (Int)d64, 13672 nameMMXReg(eregLO3ofRM(modrm)), 13673 nameMMXReg(gregLO3ofRM(modrm))); 13674 } else { 13675 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 13676 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13677 d64 = (Long)insn[3+alen]; 13678 delta += 3+alen+1; 13679 DIP("palignr $%d%s,%s\n", (Int)d64, 13680 dis_buf, 13681 nameMMXReg(gregLO3ofRM(modrm))); 13682 } 13683 13684 if (d64 == 0) { 13685 assign( res, mkexpr(sV) ); 13686 } 13687 else if (d64 >= 1 && d64 <= 7) { 13688 assign(res, 13689 binop(Iop_Or64, 13690 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 13691 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 13692 ))); 13693 } 13694 else if (d64 == 8) { 13695 assign( res, mkexpr(dV) ); 13696 } 13697 else if (d64 >= 9 && d64 <= 15) { 13698 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 13699 } 13700 else if (d64 >= 16 && d64 <= 255) { 13701 assign( res, mkU64(0) ); 13702 } 13703 else 13704 vassert(0); 13705 13706 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 13707 goto decode_success; 13708 } 13709 13710 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 13711 if (have66noF2noF3(pfx) 13712 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13713 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 13714 IRTemp sV = newTemp(Ity_V128); 13715 IRTemp dV = newTemp(Ity_V128); 13716 IRTemp sHi = newTemp(Ity_I64); 13717 IRTemp sLo = newTemp(Ity_I64); 13718 IRTemp dHi = newTemp(Ity_I64); 13719 IRTemp dLo = newTemp(Ity_I64); 13720 IRTemp rHi = newTemp(Ity_I64); 13721 IRTemp rLo = newTemp(Ity_I64); 13722 13723 modrm = insn[3]; 13724 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13725 13726 if (epartIsReg(modrm)) { 13727 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13728 d64 = (Long)insn[3+1]; 13729 delta += 3+1+1; 13730 DIP("palignr $%d,%s,%s\n", (Int)d64, 13731 nameXMMReg(eregOfRexRM(pfx,modrm)), 13732 nameXMMReg(gregOfRexRM(pfx,modrm))); 13733 } else { 13734 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 13735 gen_SEGV_if_not_16_aligned( addr ); 13736 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13737 d64 = (Long)insn[3+alen]; 13738 delta += 3+alen+1; 13739 DIP("palignr $%d,%s,%s\n", (Int)d64, 13740 dis_buf, 13741 nameXMMReg(gregOfRexRM(pfx,modrm))); 13742 } 13743 13744 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13745 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13746 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13747 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13748 13749 if (d64 == 0) { 13750 assign( rHi, mkexpr(sHi) ); 13751 assign( rLo, mkexpr(sLo) ); 13752 } 13753 else if (d64 >= 1 && d64 <= 7) { 13754 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) ); 13755 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) ); 13756 } 13757 else if (d64 == 8) { 13758 assign( rHi, mkexpr(dLo) ); 13759 assign( rLo, mkexpr(sHi) ); 13760 } 13761 else if (d64 >= 9 && d64 <= 15) { 13762 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) ); 13763 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) ); 13764 } 13765 else if (d64 == 16) { 13766 assign( rHi, mkexpr(dHi) ); 13767 assign( rLo, mkexpr(dLo) ); 13768 } 13769 else if (d64 >= 17 && d64 <= 23) { 13770 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) ); 13771 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) ); 13772 } 13773 else if (d64 == 24) { 13774 assign( rHi, mkU64(0) ); 13775 assign( rLo, mkexpr(dHi) ); 13776 } 13777 else if (d64 >= 25 && d64 <= 31) { 13778 assign( rHi, mkU64(0) ); 13779 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) ); 13780 } 13781 else if (d64 >= 32 && d64 <= 255) { 13782 assign( rHi, mkU64(0) ); 13783 assign( rLo, mkU64(0) ); 13784 } 13785 else 13786 vassert(0); 13787 13788 putXMMReg( 13789 gregOfRexRM(pfx,modrm), 13790 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 13791 ); 13792 goto decode_success; 13793 } 13794 13795 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 13796 if (haveNo66noF2noF3(pfx) 13797 && sz == 4 13798 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 13799 IRTemp sV = newTemp(Ity_I64); 13800 IRTemp dV = newTemp(Ity_I64); 13801 13802 modrm = insn[3]; 13803 do_MMX_preamble(); 13804 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 13805 13806 if (epartIsReg(modrm)) { 13807 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13808 delta += 3+1; 13809 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13810 nameMMXReg(gregLO3ofRM(modrm))); 13811 } else { 13812 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13813 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13814 delta += 3+alen; 13815 DIP("pshufb %s,%s\n", dis_buf, 13816 nameMMXReg(gregLO3ofRM(modrm))); 13817 } 13818 13819 putMMXReg( 13820 gregLO3ofRM(modrm), 13821 binop( 13822 Iop_And64, 13823 /* permute the lanes */ 13824 binop( 13825 Iop_Perm8x8, 13826 mkexpr(dV), 13827 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 13828 ), 13829 /* mask off lanes which have (index & 0x80) == 0x80 */ 13830 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 13831 ) 13832 ); 13833 goto decode_success; 13834 } 13835 13836 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 13837 if (have66noF2noF3(pfx) 13838 && (sz == 2 || /*redundant REX.W*/ sz == 8) 13839 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 13840 IRTemp sV = newTemp(Ity_V128); 13841 IRTemp dV = newTemp(Ity_V128); 13842 IRTemp sHi = newTemp(Ity_I64); 13843 IRTemp sLo = newTemp(Ity_I64); 13844 IRTemp dHi = newTemp(Ity_I64); 13845 IRTemp dLo = newTemp(Ity_I64); 13846 IRTemp rHi = newTemp(Ity_I64); 13847 IRTemp rLo = newTemp(Ity_I64); 13848 IRTemp sevens = newTemp(Ity_I64); 13849 IRTemp mask0x80hi = newTemp(Ity_I64); 13850 IRTemp mask0x80lo = newTemp(Ity_I64); 13851 IRTemp maskBit3hi = newTemp(Ity_I64); 13852 IRTemp maskBit3lo = newTemp(Ity_I64); 13853 IRTemp sAnd7hi = newTemp(Ity_I64); 13854 IRTemp sAnd7lo = newTemp(Ity_I64); 13855 IRTemp permdHi = newTemp(Ity_I64); 13856 IRTemp permdLo = newTemp(Ity_I64); 13857 13858 modrm = insn[3]; 13859 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13860 13861 if (epartIsReg(modrm)) { 13862 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13863 delta += 3+1; 13864 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13865 nameXMMReg(gregOfRexRM(pfx,modrm))); 13866 } else { 13867 addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 13868 gen_SEGV_if_not_16_aligned( addr ); 13869 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13870 delta += 3+alen; 13871 DIP("pshufb %s,%s\n", dis_buf, 13872 nameXMMReg(gregOfRexRM(pfx,modrm))); 13873 } 13874 13875 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 13876 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 13877 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 13878 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 13879 13880 assign( sevens, mkU64(0x0707070707070707ULL) ); 13881 13882 /* 13883 mask0x80hi = Not(SarN8x8(sHi,7)) 13884 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 13885 sAnd7hi = And(sHi,sevens) 13886 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 13887 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 13888 rHi = And(permdHi,mask0x80hi) 13889 */ 13890 assign( 13891 mask0x80hi, 13892 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 13893 13894 assign( 13895 maskBit3hi, 13896 binop(Iop_SarN8x8, 13897 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 13898 mkU8(7))); 13899 13900 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 13901 13902 assign( 13903 permdHi, 13904 binop( 13905 Iop_Or64, 13906 binop(Iop_And64, 13907 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 13908 mkexpr(maskBit3hi)), 13909 binop(Iop_And64, 13910 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 13911 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 13912 13913 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 13914 13915 /* And the same for the lower half of the result. What fun. */ 13916 13917 assign( 13918 mask0x80lo, 13919 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 13920 13921 assign( 13922 maskBit3lo, 13923 binop(Iop_SarN8x8, 13924 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 13925 mkU8(7))); 13926 13927 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 13928 13929 assign( 13930 permdLo, 13931 binop( 13932 Iop_Or64, 13933 binop(Iop_And64, 13934 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 13935 mkexpr(maskBit3lo)), 13936 binop(Iop_And64, 13937 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 13938 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 13939 13940 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 13941 13942 putXMMReg( 13943 gregOfRexRM(pfx,modrm), 13944 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 13945 ); 13946 goto decode_success; 13947 } 13948 13949 /* ---------------------------------------------------- */ 13950 /* --- end of the SSSE3 decoder. --- */ 13951 /* ---------------------------------------------------- */ 13952 13953 /* ---------------------------------------------------- */ 13954 /* --- start of the SSE4 decoder --- */ 13955 /* ---------------------------------------------------- */ 13956 13957 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 13958 Blend Packed Double Precision Floating-Point Values (XMM) */ 13959 if ( have66noF2noF3( pfx ) 13960 && sz == 2 13961 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0D ) { 13962 13963 Int imm8; 13964 UShort imm8_mask_16; 13965 13966 IRTemp dst_vec = newTemp(Ity_V128); 13967 IRTemp src_vec = newTemp(Ity_V128); 13968 IRTemp imm8_mask = newTemp(Ity_V128); 13969 13970 modrm = insn[3]; 13971 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 13972 13973 if ( epartIsReg( modrm ) ) { 13974 imm8 = (Int)insn[4]; 13975 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 13976 delta += 3+1+1; 13977 DIP( "blendpd $%d, %s,%s\n", imm8, 13978 nameXMMReg( eregOfRexRM(pfx, modrm) ), 13979 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 13980 } else { 13981 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 13982 1/* imm8 is 1 byte after the amode */ ); 13983 gen_SEGV_if_not_16_aligned( addr ); 13984 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 13985 imm8 = (Int)insn[2+alen+1]; 13986 delta += 3+alen+1; 13987 DIP( "blendpd $%d, %s,%s\n", 13988 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 13989 } 13990 13991 switch( imm8 & 3 ) { 13992 case 0: imm8_mask_16 = 0x0000; break; 13993 case 1: imm8_mask_16 = 0x00FF; break; 13994 case 2: imm8_mask_16 = 0xFF00; break; 13995 case 3: imm8_mask_16 = 0xFFFF; break; 13996 default: vassert(0); break; 13997 } 13998 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 13999 14000 putXMMReg( gregOfRexRM(pfx, modrm), 14001 binop( Iop_OrV128, 14002 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ), 14003 binop( Iop_AndV128, mkexpr(dst_vec), 14004 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 14005 14006 goto decode_success; 14007 } 14008 14009 14010 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 14011 Blend Packed Single Precision Floating-Point Values (XMM) */ 14012 if ( have66noF2noF3( pfx ) 14013 && sz == 2 14014 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0C ) { 14015 14016 Int imm8; 14017 IRTemp dst_vec = newTemp(Ity_V128); 14018 IRTemp src_vec = newTemp(Ity_V128); 14019 14020 modrm = insn[3]; 14021 14022 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14023 14024 if ( epartIsReg( modrm ) ) { 14025 imm8 = (Int)insn[3+1]; 14026 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14027 delta += 3+1+1; 14028 DIP( "blendps $%d, %s,%s\n", imm8, 14029 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14030 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14031 } else { 14032 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14033 1/* imm8 is 1 byte after the amode */ ); 14034 gen_SEGV_if_not_16_aligned( addr ); 14035 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14036 imm8 = (Int)insn[3+alen]; 14037 delta += 3+alen+1; 14038 DIP( "blendpd $%d, %s,%s\n", 14039 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14040 } 14041 14042 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 0x0F0F, 14043 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 0xF0F0, 0xF0FF, 14044 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF }; 14045 IRTemp imm8_mask = newTemp(Ity_V128); 14046 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 14047 14048 putXMMReg( gregOfRexRM(pfx, modrm), 14049 binop( Iop_OrV128, 14050 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ), 14051 binop( Iop_AndV128, mkexpr(dst_vec), 14052 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 14053 14054 goto decode_success; 14055 } 14056 14057 14058 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 14059 Blend Packed Words (XMM) */ 14060 if ( have66noF2noF3( pfx ) 14061 && sz == 2 14062 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0E ) { 14063 14064 Int imm8; 14065 IRTemp dst_vec = newTemp(Ity_V128); 14066 IRTemp src_vec = newTemp(Ity_V128); 14067 14068 modrm = insn[3]; 14069 14070 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14071 14072 if ( epartIsReg( modrm ) ) { 14073 imm8 = (Int)insn[3+1]; 14074 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14075 delta += 3+1+1; 14076 DIP( "pblendw $%d, %s,%s\n", imm8, 14077 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14078 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14079 } else { 14080 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14081 1/* imm8 is 1 byte after the amode */ ); 14082 gen_SEGV_if_not_16_aligned( addr ); 14083 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14084 imm8 = (Int)insn[3+alen]; 14085 delta += 3+alen+1; 14086 DIP( "pblendw $%d, %s,%s\n", 14087 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14088 } 14089 14090 /* Make w be a 16-bit version of imm8, formed by duplicating each 14091 bit in imm8. */ 14092 Int i; 14093 UShort imm16 = 0; 14094 for (i = 0; i < 8; i++) { 14095 if (imm8 & (1 << i)) 14096 imm16 |= (3 << (2*i)); 14097 } 14098 IRTemp imm16_mask = newTemp(Ity_V128); 14099 assign( imm16_mask, mkV128( imm16 )); 14100 14101 putXMMReg( gregOfRexRM(pfx, modrm), 14102 binop( Iop_OrV128, 14103 binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm16_mask) ), 14104 binop( Iop_AndV128, mkexpr(dst_vec), 14105 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 14106 14107 goto decode_success; 14108 } 14109 14110 14111 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 14112 * Carry-less multiplication of selected XMM quadwords into XMM 14113 * registers (a.k.a multiplication of polynomials over GF(2)) 14114 */ 14115 if ( have66noF2noF3( pfx ) 14116 && sz == 2 14117 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x44 ) { 14118 14119 Int imm8; 14120 IRTemp svec = newTemp(Ity_V128); 14121 IRTemp dvec = newTemp(Ity_V128); 14122 14123 modrm = insn[3]; 14124 14125 assign( dvec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14126 14127 if ( epartIsReg( modrm ) ) { 14128 imm8 = (Int)insn[4]; 14129 assign( svec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14130 delta += 3+1+1; 14131 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 14132 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14133 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14134 } else { 14135 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14136 1/* imm8 is 1 byte after the amode */ ); 14137 gen_SEGV_if_not_16_aligned( addr ); 14138 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 14139 imm8 = (Int)insn[2+alen+1]; 14140 delta += 3+alen+1; 14141 DIP( "pclmulqdq $%d, %s,%s\n", 14142 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14143 } 14144 14145 t0 = newTemp(Ity_I64); 14146 t1 = newTemp(Ity_I64); 14147 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, mkexpr(dvec))); 14148 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, mkexpr(svec))); 14149 14150 t2 = newTemp(Ity_I64); 14151 t3 = newTemp(Ity_I64); 14152 14153 IRExpr** args; 14154 14155 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 14156 assign(t2, 14157 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 14158 &amd64g_calculate_pclmul, args)); 14159 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 14160 assign(t3, 14161 mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 14162 &amd64g_calculate_pclmul, args)); 14163 14164 IRTemp res = newTemp(Ity_V128); 14165 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 14166 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14167 14168 goto decode_success; 14169 } 14170 14171 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 14172 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 14173 if ( have66noF2noF3( pfx ) 14174 && sz == 2 14175 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x41 ) { 14176 14177 Int imm8; 14178 IRTemp src_vec = newTemp(Ity_V128); 14179 IRTemp dst_vec = newTemp(Ity_V128); 14180 IRTemp and_vec = newTemp(Ity_V128); 14181 IRTemp sum_vec = newTemp(Ity_V128); 14182 14183 modrm = insn[3]; 14184 14185 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14186 14187 if ( epartIsReg( modrm ) ) { 14188 imm8 = (Int)insn[4]; 14189 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14190 delta += 3+1+1; 14191 DIP( "dppd $%d, %s,%s\n", imm8, 14192 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14193 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14194 } else { 14195 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14196 1/* imm8 is 1 byte after the amode */ ); 14197 gen_SEGV_if_not_16_aligned( addr ); 14198 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14199 imm8 = (Int)insn[2+alen+1]; 14200 delta += 3+alen+1; 14201 DIP( "dppd $%d, %s,%s\n", 14202 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14203 } 14204 14205 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 14206 14207 assign( and_vec, binop( Iop_AndV128, 14208 binop( Iop_Mul64Fx2, 14209 mkexpr(dst_vec), mkexpr(src_vec) ), 14210 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 14211 14212 assign( sum_vec, binop( Iop_Add64F0x2, 14213 binop( Iop_InterleaveHI64x2, 14214 mkexpr(and_vec), mkexpr(and_vec) ), 14215 binop( Iop_InterleaveLO64x2, 14216 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 14217 14218 putXMMReg( gregOfRexRM( pfx, modrm ), 14219 binop( Iop_AndV128, 14220 binop( Iop_InterleaveLO64x2, 14221 mkexpr(sum_vec), mkexpr(sum_vec) ), 14222 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 14223 14224 goto decode_success; 14225 } 14226 14227 14228 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 14229 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 14230 if ( have66noF2noF3( pfx ) 14231 && sz == 2 14232 && insn[0] == 0x0F 14233 && insn[1] == 0x3A 14234 && insn[2] == 0x40 ) { 14235 14236 Int imm8; 14237 IRTemp xmm1_vec = newTemp(Ity_V128); 14238 IRTemp xmm2_vec = newTemp(Ity_V128); 14239 IRTemp tmp_prod_vec = newTemp(Ity_V128); 14240 IRTemp prod_vec = newTemp(Ity_V128); 14241 IRTemp sum_vec = newTemp(Ity_V128); 14242 IRTemp v3, v2, v1, v0; 14243 v3 = v2 = v1 = v0 = IRTemp_INVALID; 14244 14245 modrm = insn[3]; 14246 14247 assign( xmm1_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14248 14249 if ( epartIsReg( modrm ) ) { 14250 imm8 = (Int)insn[4]; 14251 assign( xmm2_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14252 delta += 3+1+1; 14253 DIP( "dpps $%d, %s,%s\n", imm8, 14254 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14255 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14256 } else { 14257 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14258 1/* imm8 is 1 byte after the amode */ ); 14259 gen_SEGV_if_not_16_aligned( addr ); 14260 assign( xmm2_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 14261 imm8 = (Int)insn[2+alen+1]; 14262 delta += 3+alen+1; 14263 DIP( "dpps $%d, %s,%s\n", 14264 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14265 } 14266 14267 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 14268 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 14269 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF }; 14270 14271 assign( tmp_prod_vec, 14272 binop( Iop_AndV128, 14273 binop( Iop_Mul32Fx4, mkexpr(xmm1_vec), mkexpr(xmm2_vec) ), 14274 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 14275 breakup128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 14276 assign( prod_vec, mk128from32s( v3, v1, v2, v0 ) ); 14277 14278 assign( sum_vec, binop( Iop_Add32Fx4, 14279 binop( Iop_InterleaveHI32x4, 14280 mkexpr(prod_vec), mkexpr(prod_vec) ), 14281 binop( Iop_InterleaveLO32x4, 14282 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 14283 14284 putXMMReg( gregOfRexRM(pfx, modrm), 14285 binop( Iop_AndV128, 14286 binop( Iop_Add32Fx4, 14287 binop( Iop_InterleaveHI32x4, 14288 mkexpr(sum_vec), mkexpr(sum_vec) ), 14289 binop( Iop_InterleaveLO32x4, 14290 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 14291 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 14292 14293 goto decode_success; 14294 } 14295 14296 14297 /* 66 0F 3A 21 /r ib = INSERTPS xmm1, xmm2/m32, imm8 14298 Insert Packed Single Precision Floating-Point Value (XMM) */ 14299 if ( have66noF2noF3( pfx ) 14300 && sz == 2 14301 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x21 ) { 14302 14303 Int imm8; 14304 Int imm8_count_s; 14305 Int imm8_count_d; 14306 Int imm8_zmask; 14307 IRTemp dstVec = newTemp(Ity_V128); 14308 IRTemp srcDWord = newTemp(Ity_I32); 14309 14310 modrm = insn[3]; 14311 14312 assign( dstVec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 14313 14314 if ( epartIsReg( modrm ) ) { 14315 IRTemp src_vec = newTemp(Ity_V128); 14316 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14317 14318 IRTemp src_lane_0 = IRTemp_INVALID; 14319 IRTemp src_lane_1 = IRTemp_INVALID; 14320 IRTemp src_lane_2 = IRTemp_INVALID; 14321 IRTemp src_lane_3 = IRTemp_INVALID; 14322 breakup128to32s( src_vec, 14323 &src_lane_3, &src_lane_2, &src_lane_1, &src_lane_0 ); 14324 14325 imm8 = (Int)insn[4]; 14326 imm8_count_s = ((imm8 >> 6) & 3); 14327 switch( imm8_count_s ) { 14328 case 0: assign( srcDWord, mkexpr(src_lane_0) ); break; 14329 case 1: assign( srcDWord, mkexpr(src_lane_1) ); break; 14330 case 2: assign( srcDWord, mkexpr(src_lane_2) ); break; 14331 case 3: assign( srcDWord, mkexpr(src_lane_3) ); break; 14332 default: vassert(0); break; 14333 } 14334 14335 delta += 3+1+1; 14336 DIP( "insertps $%d, %s,%s\n", imm8, 14337 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14338 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14339 } else { 14340 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 14341 1/* const imm8 is 1 byte after the amode */ ); 14342 assign( srcDWord, loadLE( Ity_I32, mkexpr(addr) ) ); 14343 imm8 = (Int)insn[2+alen+1]; 14344 imm8_count_s = 0; 14345 delta += 3+alen+1; 14346 DIP( "insertps $%d, %s,%s\n", 14347 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14348 } 14349 14350 IRTemp dst_lane_0 = IRTemp_INVALID; 14351 IRTemp dst_lane_1 = IRTemp_INVALID; 14352 IRTemp dst_lane_2 = IRTemp_INVALID; 14353 IRTemp dst_lane_3 = IRTemp_INVALID; 14354 breakup128to32s( dstVec, 14355 &dst_lane_3, &dst_lane_2, &dst_lane_1, &dst_lane_0 ); 14356 14357 imm8_count_d = ((imm8 >> 4) & 3); 14358 switch( imm8_count_d ) { 14359 case 0: dst_lane_0 = srcDWord; break; 14360 case 1: dst_lane_1 = srcDWord; break; 14361 case 2: dst_lane_2 = srcDWord; break; 14362 case 3: dst_lane_3 = srcDWord; break; 14363 default: vassert(0); break; 14364 } 14365 14366 imm8_zmask = (imm8 & 15); 14367 IRTemp zero_32 = newTemp(Ity_I32); 14368 assign( zero_32, mkU32(0) ); 14369 14370 IRExpr* ire_vec_128 = mk128from32s( 14371 ((imm8_zmask & 8) == 8) ? zero_32 : dst_lane_3, 14372 ((imm8_zmask & 4) == 4) ? zero_32 : dst_lane_2, 14373 ((imm8_zmask & 2) == 2) ? zero_32 : dst_lane_1, 14374 ((imm8_zmask & 1) == 1) ? zero_32 : dst_lane_0 ); 14375 14376 putXMMReg( gregOfRexRM(pfx, modrm), ire_vec_128 ); 14377 14378 goto decode_success; 14379 } 14380 14381 14382 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 14383 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */ 14384 if ( have66noF2noF3( pfx ) 14385 && sz == 2 14386 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x14 ) { 14387 14388 Int imm8; 14389 IRTemp xmm_vec = newTemp(Ity_V128); 14390 IRTemp sel_lane = newTemp(Ity_I32); 14391 IRTemp shr_lane = newTemp(Ity_I32); 14392 14393 modrm = insn[3]; 14394 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14395 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14396 14397 if ( epartIsReg( modrm ) ) { 14398 imm8 = (Int)insn[3+1]; 14399 } else { 14400 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14401 imm8 = (Int)insn[3+alen]; 14402 } 14403 switch( (imm8 >> 2) & 3 ) { 14404 case 0: assign( sel_lane, mkexpr(t0) ); break; 14405 case 1: assign( sel_lane, mkexpr(t1) ); break; 14406 case 2: assign( sel_lane, mkexpr(t2) ); break; 14407 case 3: assign( sel_lane, mkexpr(t3) ); break; 14408 default: vassert(0); 14409 } 14410 assign( shr_lane, 14411 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 14412 14413 if ( epartIsReg( modrm ) ) { 14414 putIReg64( eregOfRexRM(pfx,modrm), 14415 unop( Iop_32Uto64, 14416 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 14417 14418 delta += 3+1+1; 14419 DIP( "pextrb $%d, %s,%s\n", imm8, 14420 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14421 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14422 } else { 14423 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 14424 delta += 3+alen+1; 14425 DIP( "$%d, pextrb %s,%s\n", 14426 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14427 } 14428 14429 goto decode_success; 14430 } 14431 14432 14433 /* 66 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 14434 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 14435 Note that this insn has the same opcodes as PEXTRQ, but 14436 here the REX.W bit is _not_ present */ 14437 if ( have66noF2noF3( pfx ) 14438 && sz == 2 /* REX.W is _not_ present */ 14439 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) { 14440 14441 Int imm8_10; 14442 IRTemp xmm_vec = newTemp(Ity_V128); 14443 IRTemp src_dword = newTemp(Ity_I32); 14444 14445 modrm = insn[3]; 14446 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14447 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14448 14449 if ( epartIsReg( modrm ) ) { 14450 imm8_10 = (Int)(insn[3+1] & 3); 14451 } else { 14452 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14453 imm8_10 = (Int)(insn[3+alen] & 3); 14454 } 14455 14456 switch ( imm8_10 ) { 14457 case 0: assign( src_dword, mkexpr(t0) ); break; 14458 case 1: assign( src_dword, mkexpr(t1) ); break; 14459 case 2: assign( src_dword, mkexpr(t2) ); break; 14460 case 3: assign( src_dword, mkexpr(t3) ); break; 14461 default: vassert(0); 14462 } 14463 14464 if ( epartIsReg( modrm ) ) { 14465 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 14466 delta += 3+1+1; 14467 DIP( "pextrd $%d, %s,%s\n", imm8_10, 14468 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14469 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 14470 } else { 14471 storeLE( mkexpr(addr), mkexpr(src_dword) ); 14472 delta += 3+alen+1; 14473 DIP( "pextrd $%d, %s,%s\n", 14474 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14475 } 14476 14477 goto decode_success; 14478 } 14479 14480 14481 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 14482 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 14483 Note that this insn has the same opcodes as PEXTRD, but 14484 here the REX.W bit is present */ 14485 if ( have66noF2noF3( pfx ) 14486 && sz == 8 /* REX.W is present */ 14487 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) { 14488 14489 Int imm8_0; 14490 IRTemp xmm_vec = newTemp(Ity_V128); 14491 IRTemp src_qword = newTemp(Ity_I64); 14492 14493 modrm = insn[3]; 14494 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14495 14496 if ( epartIsReg( modrm ) ) { 14497 imm8_0 = (Int)(insn[3+1] & 1); 14498 } else { 14499 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14500 imm8_0 = (Int)(insn[3+alen] & 1); 14501 } 14502 switch ( imm8_0 ) { 14503 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); break; 14504 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); break; 14505 default: vassert(0); 14506 } 14507 14508 if ( epartIsReg( modrm ) ) { 14509 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 14510 delta += 3+1+1; 14511 DIP( "pextrq $%d, %s,%s\n", imm8_0, 14512 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14513 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14514 } else { 14515 storeLE( mkexpr(addr), mkexpr(src_qword) ); 14516 delta += 3+alen+1; 14517 DIP( "pextrq $%d, %s,%s\n", 14518 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14519 } 14520 14521 goto decode_success; 14522 } 14523 14524 14525 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 14526 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */ 14527 if ( have66noF2noF3( pfx ) 14528 && sz == 2 14529 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x15 ) { 14530 14531 Int imm8_20; 14532 IRTemp xmm_vec = newTemp(Ity_V128); 14533 IRTemp src_word = newTemp(Ity_I16); 14534 14535 modrm = insn[3]; 14536 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14537 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14538 14539 if ( epartIsReg( modrm ) ) { 14540 imm8_20 = (Int)(insn[3+1] & 7); 14541 } else { 14542 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14543 imm8_20 = (Int)(insn[3+alen] & 7); 14544 } 14545 14546 switch ( imm8_20 ) { 14547 case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break; 14548 case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break; 14549 case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break; 14550 case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break; 14551 case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break; 14552 case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break; 14553 case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break; 14554 case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break; 14555 default: vassert(0); 14556 } 14557 14558 if ( epartIsReg( modrm ) ) { 14559 putIReg64( eregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(src_word)) ); 14560 delta += 3+1+1; 14561 DIP( "pextrw $%d, %s,%s\n", imm8_20, 14562 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14563 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 14564 } else { 14565 storeLE( mkexpr(addr), mkexpr(src_word) ); 14566 delta += 3+alen+1; 14567 DIP( "pextrw $%d, %s,%s\n", 14568 imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14569 } 14570 14571 goto decode_success; 14572 } 14573 14574 14575 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 14576 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 14577 if ( have66noF2noF3( pfx ) 14578 && sz == 8 /* REX.W is present */ 14579 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) { 14580 14581 Int imm8_0; 14582 IRTemp src_elems = newTemp(Ity_I64); 14583 IRTemp src_vec = newTemp(Ity_V128); 14584 14585 modrm = insn[3]; 14586 14587 if ( epartIsReg( modrm ) ) { 14588 imm8_0 = (Int)(insn[3+1] & 1); 14589 assign( src_elems, getIReg64( eregOfRexRM(pfx,modrm) ) ); 14590 delta += 3+1+1; 14591 DIP( "pinsrq $%d, %s,%s\n", imm8_0, 14592 nameIReg64( eregOfRexRM(pfx, modrm) ), 14593 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14594 } else { 14595 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14596 imm8_0 = (Int)(insn[3+alen] & 1); 14597 assign( src_elems, loadLE( Ity_I64, mkexpr(addr) ) ); 14598 delta += 3+alen+1; 14599 DIP( "pinsrq $%d, %s,%s\n", 14600 imm8_0, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14601 } 14602 14603 UShort mask = 0; 14604 if ( imm8_0 == 0 ) { 14605 mask = 0xFF00; 14606 assign( src_vec, binop( Iop_64HLtoV128, mkU64(0), mkexpr(src_elems) ) ); 14607 } else { 14608 mask = 0x00FF; 14609 assign( src_vec, binop( Iop_64HLtoV128, mkexpr(src_elems), mkU64(0) ) ); 14610 } 14611 14612 putXMMReg( gregOfRexRM(pfx, modrm), 14613 binop( Iop_OrV128, mkexpr(src_vec), 14614 binop( Iop_AndV128, 14615 getXMMReg( gregOfRexRM(pfx, modrm) ), 14616 mkV128(mask) ) ) ); 14617 14618 goto decode_success; 14619 } 14620 14621 14622 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 14623 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 14624 if ( have66noF2noF3( pfx ) 14625 && sz == 2 /* REX.W is NOT present */ 14626 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) { 14627 14628 Int imm8_10; 14629 IRTemp src_elems = newTemp(Ity_I32); 14630 IRTemp src_vec = newTemp(Ity_V128); 14631 IRTemp z32 = newTemp(Ity_I32); 14632 14633 modrm = insn[3]; 14634 14635 if ( epartIsReg( modrm ) ) { 14636 imm8_10 = (Int)(insn[3+1] & 3); 14637 assign( src_elems, getIReg32( eregOfRexRM(pfx,modrm) ) ); 14638 delta += 3+1+1; 14639 DIP( "pinsrd $%d, %s,%s\n", imm8_10, 14640 nameIReg32( eregOfRexRM(pfx, modrm) ), 14641 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14642 } else { 14643 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14644 imm8_10 = (Int)(insn[3+alen] & 3); 14645 assign( src_elems, loadLE( Ity_I32, mkexpr(addr) ) ); 14646 delta += 3+alen+1; 14647 DIP( "pinsrd $%d, %s,%s\n", 14648 imm8_10, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14649 } 14650 14651 assign(z32, mkU32(0)); 14652 14653 UShort mask = 0; 14654 switch (imm8_10) { 14655 case 3: mask = 0x0FFF; 14656 assign(src_vec, mk128from32s(src_elems, z32, z32, z32)); 14657 break; 14658 case 2: mask = 0xF0FF; 14659 assign(src_vec, mk128from32s(z32, src_elems, z32, z32)); 14660 break; 14661 case 1: mask = 0xFF0F; 14662 assign(src_vec, mk128from32s(z32, z32, src_elems, z32)); 14663 break; 14664 case 0: mask = 0xFFF0; 14665 assign(src_vec, mk128from32s(z32, z32, z32, src_elems)); 14666 break; 14667 default: vassert(0); 14668 } 14669 14670 putXMMReg( gregOfRexRM(pfx, modrm), 14671 binop( Iop_OrV128, mkexpr(src_vec), 14672 binop( Iop_AndV128, 14673 getXMMReg( gregOfRexRM(pfx, modrm) ), 14674 mkV128(mask) ) ) ); 14675 14676 goto decode_success; 14677 } 14678 14679 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 14680 Extract byte from r32/m8 and insert into xmm1 */ 14681 if ( have66noF2noF3( pfx ) 14682 && sz == 2 14683 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x20 ) { 14684 14685 Int imm8; 14686 IRTemp new8 = newTemp(Ity_I64); 14687 14688 modrm = insn[3]; 14689 14690 if ( epartIsReg( modrm ) ) { 14691 imm8 = (Int)(insn[3+1] & 0xF); 14692 assign( new8, binop(Iop_And64, 14693 unop(Iop_32Uto64, 14694 getIReg32(eregOfRexRM(pfx,modrm))), 14695 mkU64(0xFF))); 14696 delta += 3+1+1; 14697 DIP( "pinsrb $%d,%s,%s\n", imm8, 14698 nameIReg32( eregOfRexRM(pfx, modrm) ), 14699 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14700 } else { 14701 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14702 imm8 = (Int)(insn[3+alen] & 0xF); 14703 assign( new8, unop(Iop_8Uto64, loadLE( Ity_I8, mkexpr(addr) ))); 14704 delta += 3+alen+1; 14705 DIP( "pinsrb $%d,%s,%s\n", 14706 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14707 } 14708 14709 // Create a V128 value which has the selected byte in the 14710 // specified lane, and zeroes everywhere else. 14711 IRTemp tmp128 = newTemp(Ity_V128); 14712 IRTemp halfshift = newTemp(Ity_I64); 14713 assign(halfshift, binop(Iop_Shl64, 14714 mkexpr(new8), mkU8(8 * (imm8 & 7)))); 14715 vassert(imm8 >= 0 && imm8 <= 15); 14716 if (imm8 < 8) { 14717 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 14718 } else { 14719 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 14720 } 14721 14722 UShort mask = ~(1 << imm8); 14723 14724 putXMMReg( gregOfRexRM(pfx, modrm), 14725 binop( Iop_OrV128, 14726 mkexpr(tmp128), 14727 binop( Iop_AndV128, 14728 getXMMReg( gregOfRexRM(pfx, modrm) ), 14729 mkV128(mask) ) ) ); 14730 14731 goto decode_success; 14732 } 14733 14734 14735 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 14736 float from xmm reg and store in gen.reg or mem. This is 14737 identical to PEXTRD, except that REX.W appears to be ignored. 14738 */ 14739 if ( have66noF2noF3( pfx ) 14740 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14741 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x17 ) { 14742 14743 Int imm8_10; 14744 IRTemp xmm_vec = newTemp(Ity_V128); 14745 IRTemp src_dword = newTemp(Ity_I32); 14746 14747 modrm = insn[3]; 14748 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 14749 breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 14750 14751 if ( epartIsReg( modrm ) ) { 14752 imm8_10 = (Int)(insn[3+1] & 3); 14753 } else { 14754 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 ); 14755 imm8_10 = (Int)(insn[3+alen] & 3); 14756 } 14757 14758 switch ( imm8_10 ) { 14759 case 0: assign( src_dword, mkexpr(t0) ); break; 14760 case 1: assign( src_dword, mkexpr(t1) ); break; 14761 case 2: assign( src_dword, mkexpr(t2) ); break; 14762 case 3: assign( src_dword, mkexpr(t3) ); break; 14763 default: vassert(0); 14764 } 14765 14766 if ( epartIsReg( modrm ) ) { 14767 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 14768 delta += 3+1+1; 14769 DIP( "extractps $%d, %s,%s\n", imm8_10, 14770 nameXMMReg( gregOfRexRM(pfx, modrm) ), 14771 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 14772 } else { 14773 storeLE( mkexpr(addr), mkexpr(src_dword) ); 14774 delta += 3+alen+1; 14775 DIP( "extractps $%d, %s,%s\n", 14776 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 14777 } 14778 14779 goto decode_success; 14780 } 14781 14782 14783 /* 66 0F 38 37 = PCMPGTQ 14784 64x2 comparison (signed, presumably; the Intel docs don't say :-) 14785 */ 14786 if ( have66noF2noF3( pfx ) && sz == 2 14787 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x37) { 14788 /* FIXME: this needs an alignment check */ 14789 delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, 14790 "pcmpgtq", Iop_CmpGT64Sx2, False ); 14791 goto decode_success; 14792 } 14793 14794 /* 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 14795 Maximum of Packed Signed Double Word Integers (XMM) 14796 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 14797 Minimum of Packed Signed Double Word Integers (XMM) */ 14798 if ( have66noF2noF3( pfx ) && sz == 2 14799 && insn[0] == 0x0F && insn[1] == 0x38 14800 && (insn[2] == 0x3D || insn[2] == 0x39)) { 14801 /* FIXME: this needs an alignment check */ 14802 Bool isMAX = insn[2] == 0x3D; 14803 delta = dis_SSEint_E_to_G( 14804 vbi, pfx, delta+3, 14805 isMAX ? "pmaxsd" : "pminsd", 14806 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 14807 False 14808 ); 14809 goto decode_success; 14810 } 14811 14812 /* 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 14813 Maximum of Packed Unsigned Doubleword Integers (XMM) 14814 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 14815 Minimum of Packed Unsigned Doubleword Integers (XMM) */ 14816 if ( have66noF2noF3( pfx ) && sz == 2 14817 && insn[0] == 0x0F && insn[1] == 0x38 14818 && (insn[2] == 0x3F || insn[2] == 0x3B)) { 14819 /* FIXME: this needs an alignment check */ 14820 Bool isMAX = insn[2] == 0x3F; 14821 delta = dis_SSEint_E_to_G( 14822 vbi, pfx, delta+3, 14823 isMAX ? "pmaxud" : "pminud", 14824 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 14825 False 14826 ); 14827 goto decode_success; 14828 } 14829 14830 /* 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 14831 Maximum of Packed Unsigned Word Integers (XMM) 14832 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 14833 Minimum of Packed Unsigned Word Integers (XMM) 14834 */ 14835 if ( have66noF2noF3( pfx ) && sz == 2 14836 && insn[0] == 0x0F && insn[1] == 0x38 14837 && (insn[2] == 0x3E || insn[2] == 0x3A)) { 14838 /* FIXME: this needs an alignment check */ 14839 Bool isMAX = insn[2] == 0x3E; 14840 delta = dis_SSEint_E_to_G( 14841 vbi, pfx, delta+3, 14842 isMAX ? "pmaxuw" : "pminuw", 14843 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 14844 False 14845 ); 14846 goto decode_success; 14847 } 14848 14849 /* 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 14850 8Sx16 (signed) max 14851 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 14852 8Sx16 (signed) min 14853 */ 14854 if ( have66noF2noF3( pfx ) && sz == 2 14855 && insn[0] == 0x0F && insn[1] == 0x38 14856 && (insn[2] == 0x3C || insn[2] == 0x38)) { 14857 /* FIXME: this needs an alignment check */ 14858 Bool isMAX = insn[2] == 0x3C; 14859 delta = dis_SSEint_E_to_G( 14860 vbi, pfx, delta+3, 14861 isMAX ? "pmaxsb" : "pminsb", 14862 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 14863 False 14864 ); 14865 goto decode_success; 14866 } 14867 14868 /* 66 0f 38 20 /r = PMOVSXBW xmm1, xmm2/m64 14869 Packed Move with Sign Extend from Byte to Word (XMM) */ 14870 if ( have66noF2noF3( pfx ) 14871 && sz == 2 14872 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x20 ) { 14873 14874 modrm = insn[3]; 14875 14876 IRTemp srcVec = newTemp(Ity_V128); 14877 14878 if ( epartIsReg( modrm ) ) { 14879 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14880 delta += 3+1; 14881 DIP( "pmovsxbw %s,%s\n", 14882 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14883 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14884 } else { 14885 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 14886 assign( srcVec, 14887 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 14888 delta += 3+alen; 14889 DIP( "pmovsxbw %s,%s\n", 14890 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14891 } 14892 14893 putXMMReg( gregOfRexRM(pfx, modrm), 14894 binop( Iop_SarN16x8, 14895 binop( Iop_ShlN16x8, 14896 binop( Iop_InterleaveLO8x16, 14897 IRExpr_Const( IRConst_V128(0) ), 14898 mkexpr(srcVec) ), 14899 mkU8(8) ), 14900 mkU8(8) ) ); 14901 14902 goto decode_success; 14903 } 14904 14905 14906 /* 66 0f 38 21 /r = PMOVSXBD xmm1, xmm2/m32 14907 Packed Move with Sign Extend from Byte to DWord (XMM) */ 14908 if ( have66noF2noF3( pfx ) 14909 && sz == 2 14910 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x21 ) { 14911 14912 modrm = insn[3]; 14913 14914 IRTemp srcVec = newTemp(Ity_V128); 14915 14916 if ( epartIsReg( modrm ) ) { 14917 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14918 delta += 3+1; 14919 DIP( "pmovsxbd %s,%s\n", 14920 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14921 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14922 } else { 14923 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 14924 assign( srcVec, 14925 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 14926 delta += 3+alen; 14927 DIP( "pmovsxbd %s,%s\n", 14928 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14929 } 14930 14931 IRTemp zeroVec = newTemp(Ity_V128); 14932 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 14933 14934 putXMMReg( gregOfRexRM(pfx, modrm), 14935 binop( Iop_SarN32x4, 14936 binop( Iop_ShlN32x4, 14937 binop( Iop_InterleaveLO8x16, 14938 mkexpr(zeroVec), 14939 binop( Iop_InterleaveLO8x16, 14940 mkexpr(zeroVec), 14941 mkexpr(srcVec) ) ), 14942 mkU8(24) ), mkU8(24) ) ); 14943 14944 goto decode_success; 14945 } 14946 14947 14948 /* 66 0f 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 14949 Packed Move with Sign Extend from Byte to QWord (XMM) */ 14950 if ( have66noF2noF3(pfx) 14951 && sz == 2 14952 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x22 ) { 14953 14954 modrm = insn[3]; 14955 14956 IRTemp srcBytes = newTemp(Ity_I16); 14957 14958 if ( epartIsReg(modrm) ) { 14959 assign( srcBytes, getXMMRegLane16( eregOfRexRM(pfx, modrm), 0 ) ); 14960 delta += 3+1; 14961 DIP( "pmovsxbq %s,%s\n", 14962 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14963 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14964 } else { 14965 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 14966 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 14967 delta += 3+alen; 14968 DIP( "pmovsxbq %s,%s\n", 14969 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 14970 } 14971 14972 putXMMReg( gregOfRexRM( pfx, modrm ), 14973 binop( Iop_64HLtoV128, 14974 unop( Iop_8Sto64, 14975 unop( Iop_16HIto8, 14976 mkexpr(srcBytes) ) ), 14977 unop( Iop_8Sto64, 14978 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 14979 14980 goto decode_success; 14981 } 14982 14983 14984 /* 66 0f 38 23 /r = PMOVSXWD xmm1, xmm2/m64 14985 Packed Move with Sign Extend from Word to DWord (XMM) */ 14986 if ( have66noF2noF3( pfx ) 14987 && sz == 2 14988 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x23 ) { 14989 14990 modrm = insn[3]; 14991 14992 IRTemp srcVec = newTemp(Ity_V128); 14993 14994 if ( epartIsReg(modrm) ) { 14995 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 14996 delta += 3+1; 14997 DIP( "pmovsxwd %s,%s\n", 14998 nameXMMReg( eregOfRexRM(pfx, modrm) ), 14999 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15000 } else { 15001 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15002 assign( srcVec, 15003 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15004 delta += 3+alen; 15005 DIP( "pmovsxwd %s,%s\n", 15006 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15007 } 15008 15009 putXMMReg( gregOfRexRM(pfx, modrm), 15010 binop( Iop_SarN32x4, 15011 binop( Iop_ShlN32x4, 15012 binop( Iop_InterleaveLO16x8, 15013 IRExpr_Const( IRConst_V128(0) ), 15014 mkexpr(srcVec) ), 15015 mkU8(16) ), 15016 mkU8(16) ) ); 15017 15018 goto decode_success; 15019 } 15020 15021 15022 /* 66 0f 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 15023 Packed Move with Sign Extend from Word to QWord (XMM) */ 15024 if ( have66noF2noF3( pfx ) 15025 && sz == 2 15026 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x24 ) { 15027 15028 modrm = insn[3]; 15029 15030 IRTemp srcBytes = newTemp(Ity_I32); 15031 15032 if ( epartIsReg( modrm ) ) { 15033 assign( srcBytes, getXMMRegLane32( eregOfRexRM(pfx, modrm), 0 ) ); 15034 delta += 3+1; 15035 DIP( "pmovsxwq %s,%s\n", 15036 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15037 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15038 } else { 15039 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15040 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 15041 delta += 3+alen; 15042 DIP( "pmovsxwq %s,%s\n", 15043 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15044 } 15045 15046 putXMMReg( gregOfRexRM( pfx, modrm ), 15047 binop( Iop_64HLtoV128, 15048 unop( Iop_16Sto64, 15049 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 15050 unop( Iop_16Sto64, 15051 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 15052 15053 goto decode_success; 15054 } 15055 15056 15057 /* 66 0f 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 15058 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 15059 if ( have66noF2noF3( pfx ) 15060 && sz == 2 15061 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x25 ) { 15062 15063 modrm = insn[3]; 15064 15065 IRTemp srcBytes = newTemp(Ity_I64); 15066 15067 if ( epartIsReg(modrm) ) { 15068 assign( srcBytes, getXMMRegLane64( eregOfRexRM(pfx, modrm), 0 ) ); 15069 delta += 3+1; 15070 DIP( "pmovsxdq %s,%s\n", 15071 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15072 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15073 } else { 15074 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15075 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) ); 15076 delta += 3+alen; 15077 DIP( "pmovsxdq %s,%s\n", 15078 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15079 } 15080 15081 putXMMReg( gregOfRexRM(pfx, modrm), 15082 binop( Iop_64HLtoV128, 15083 unop( Iop_32Sto64, 15084 unop( Iop_64HIto32, mkexpr(srcBytes) ) ), 15085 unop( Iop_32Sto64, 15086 unop( Iop_64to32, mkexpr(srcBytes) ) ) ) ); 15087 15088 goto decode_success; 15089 } 15090 15091 15092 /* 66 0f 38 30 /r = PMOVZXBW xmm1, xmm2/m64 15093 Packed Move with Zero Extend from Byte to Word (XMM) */ 15094 if ( have66noF2noF3(pfx) 15095 && sz == 2 15096 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x30 ) { 15097 15098 modrm = insn[3]; 15099 15100 IRTemp srcVec = newTemp(Ity_V128); 15101 15102 if ( epartIsReg(modrm) ) { 15103 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15104 delta += 3+1; 15105 DIP( "pmovzxbw %s,%s\n", 15106 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15107 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15108 } else { 15109 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15110 assign( srcVec, 15111 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15112 delta += 3+alen; 15113 DIP( "pmovzxbw %s,%s\n", 15114 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15115 } 15116 15117 putXMMReg( gregOfRexRM(pfx, modrm), 15118 binop( Iop_InterleaveLO8x16, 15119 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 15120 15121 goto decode_success; 15122 } 15123 15124 15125 /* 66 0f 38 31 /r = PMOVZXBD xmm1, xmm2/m32 15126 Packed Move with Zero Extend from Byte to DWord (XMM) */ 15127 if ( have66noF2noF3( pfx ) 15128 && sz == 2 15129 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x31 ) { 15130 15131 modrm = insn[3]; 15132 15133 IRTemp srcVec = newTemp(Ity_V128); 15134 15135 if ( epartIsReg(modrm) ) { 15136 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15137 delta += 3+1; 15138 DIP( "pmovzxbd %s,%s\n", 15139 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15140 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15141 } else { 15142 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15143 assign( srcVec, 15144 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15145 delta += 3+alen; 15146 DIP( "pmovzxbd %s,%s\n", 15147 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15148 } 15149 15150 IRTemp zeroVec = newTemp(Ity_V128); 15151 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15152 15153 putXMMReg( gregOfRexRM( pfx, modrm ), 15154 binop( Iop_InterleaveLO8x16, 15155 mkexpr(zeroVec), 15156 binop( Iop_InterleaveLO8x16, 15157 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 15158 15159 goto decode_success; 15160 } 15161 15162 15163 /* 66 0f 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 15164 Packed Move with Zero Extend from Byte to QWord (XMM) */ 15165 if ( have66noF2noF3( pfx ) 15166 && sz == 2 15167 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x32 ) { 15168 15169 modrm = insn[3]; 15170 15171 IRTemp srcVec = newTemp(Ity_V128); 15172 15173 if ( epartIsReg(modrm) ) { 15174 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15175 delta += 3+1; 15176 DIP( "pmovzxbq %s,%s\n", 15177 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15178 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15179 } else { 15180 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15181 assign( srcVec, 15182 unop( Iop_32UtoV128, 15183 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ) ) ) ); 15184 delta += 3+alen; 15185 DIP( "pmovzxbq %s,%s\n", 15186 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15187 } 15188 15189 IRTemp zeroVec = newTemp(Ity_V128); 15190 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15191 15192 putXMMReg( gregOfRexRM( pfx, modrm ), 15193 binop( Iop_InterleaveLO8x16, 15194 mkexpr(zeroVec), 15195 binop( Iop_InterleaveLO8x16, 15196 mkexpr(zeroVec), 15197 binop( Iop_InterleaveLO8x16, 15198 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 15199 15200 goto decode_success; 15201 } 15202 15203 15204 /* 66 0f 38 33 /r = PMOVZXWD xmm1, xmm2/m64 15205 Packed Move with Zero Extend from Word to DWord (XMM) */ 15206 if ( have66noF2noF3( pfx ) 15207 && sz == 2 15208 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x33 ) { 15209 15210 modrm = insn[3]; 15211 15212 IRTemp srcVec = newTemp(Ity_V128); 15213 15214 if ( epartIsReg(modrm) ) { 15215 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15216 delta += 3+1; 15217 DIP( "pmovzxwd %s,%s\n", 15218 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15219 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15220 } else { 15221 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15222 assign( srcVec, 15223 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15224 delta += 3+alen; 15225 DIP( "pmovzxwd %s,%s\n", 15226 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15227 } 15228 15229 putXMMReg( gregOfRexRM(pfx, modrm), 15230 binop( Iop_InterleaveLO16x8, 15231 IRExpr_Const( IRConst_V128(0) ), 15232 mkexpr(srcVec) ) ); 15233 15234 goto decode_success; 15235 } 15236 15237 15238 /* 66 0f 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 15239 Packed Move with Zero Extend from Word to QWord (XMM) */ 15240 if ( have66noF2noF3( pfx ) 15241 && sz == 2 15242 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x34 ) { 15243 15244 modrm = insn[3]; 15245 15246 IRTemp srcVec = newTemp(Ity_V128); 15247 15248 if ( epartIsReg( modrm ) ) { 15249 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15250 delta += 3+1; 15251 DIP( "pmovzxwq %s,%s\n", 15252 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15253 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15254 } else { 15255 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15256 assign( srcVec, 15257 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 15258 delta += 3+alen; 15259 DIP( "pmovzxwq %s,%s\n", 15260 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15261 } 15262 15263 IRTemp zeroVec = newTemp( Ity_V128 ); 15264 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 15265 15266 putXMMReg( gregOfRexRM( pfx, modrm ), 15267 binop( Iop_InterleaveLO16x8, 15268 mkexpr(zeroVec), 15269 binop( Iop_InterleaveLO16x8, 15270 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 15271 15272 goto decode_success; 15273 } 15274 15275 15276 /* 66 0f 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 15277 Packed Move with Zero Extend from DWord to QWord (XMM) */ 15278 if ( have66noF2noF3( pfx ) 15279 && sz == 2 15280 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x35 ) { 15281 15282 modrm = insn[3]; 15283 15284 IRTemp srcVec = newTemp(Ity_V128); 15285 15286 if ( epartIsReg(modrm) ) { 15287 assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15288 delta += 3+1; 15289 DIP( "pmovzxdq %s,%s\n", 15290 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15291 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15292 } else { 15293 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15294 assign( srcVec, 15295 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 15296 delta += 3+alen; 15297 DIP( "pmovzxdq %s,%s\n", 15298 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15299 } 15300 15301 putXMMReg( gregOfRexRM(pfx, modrm), 15302 binop( Iop_InterleaveLO32x4, 15303 IRExpr_Const( IRConst_V128(0) ), 15304 mkexpr(srcVec) ) ); 15305 15306 goto decode_success; 15307 } 15308 15309 15310 /* 66 0f 38 40 /r = PMULLD xmm1, xmm2/m128 15311 32x4 integer multiply from xmm2/m128 to xmm1 */ 15312 if ( have66noF2noF3( pfx ) 15313 && sz == 2 15314 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x40 ) { 15315 15316 modrm = insn[3]; 15317 15318 IRTemp argL = newTemp(Ity_V128); 15319 IRTemp argR = newTemp(Ity_V128); 15320 15321 if ( epartIsReg(modrm) ) { 15322 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 15323 delta += 3+1; 15324 DIP( "pmulld %s,%s\n", 15325 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15326 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15327 } else { 15328 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15329 gen_SEGV_if_not_16_aligned( addr ); 15330 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 15331 delta += 3+alen; 15332 DIP( "pmulld %s,%s\n", 15333 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15334 } 15335 15336 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 15337 15338 putXMMReg( gregOfRexRM(pfx, modrm), 15339 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 15340 15341 goto decode_success; 15342 } 15343 15344 15345 /* F3 0F B8 = POPCNT{W,L,Q} 15346 Count the number of 1 bits in a register 15347 */ 15348 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15349 && insn[0] == 0x0F && insn[1] == 0xB8) { 15350 vassert(sz == 2 || sz == 4 || sz == 8); 15351 /*IRType*/ ty = szToITy(sz); 15352 IRTemp src = newTemp(ty); 15353 modrm = insn[2]; 15354 if (epartIsReg(modrm)) { 15355 assign(src, getIRegE(sz, pfx, modrm)); 15356 delta += 2+1; 15357 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15358 nameIRegG(sz, pfx, modrm)); 15359 } else { 15360 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0); 15361 assign(src, loadLE(ty, mkexpr(addr))); 15362 delta += 2+alen; 15363 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 15364 nameIRegG(sz, pfx, modrm)); 15365 } 15366 15367 IRTemp result = gen_POPCOUNT(ty, src); 15368 putIRegG(sz, pfx, modrm, mkexpr(result)); 15369 15370 // Update flags. This is pretty lame .. perhaps can do better 15371 // if this turns out to be performance critical. 15372 // O S A C P are cleared. Z is set if SRC == 0. 15373 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15374 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15375 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15376 stmt( IRStmt_Put( OFFB_CC_DEP1, 15377 binop(Iop_Shl64, 15378 unop(Iop_1Uto64, 15379 binop(Iop_CmpEQ64, 15380 widenUto64(mkexpr(src)), 15381 mkU64(0))), 15382 mkU8(AMD64G_CC_SHIFT_Z)))); 15383 15384 goto decode_success; 15385 } 15386 15387 15388 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 15389 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 15390 */ 15391 if (have66noF2noF3(pfx) 15392 && sz == 2 15393 && insn[0] == 0x0F && insn[1] == 0x3A 15394 && (insn[2] == 0x0B || insn[2] == 0x0A)) { 15395 15396 Bool isD = insn[2] == 0x0B; 15397 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 15398 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 15399 Int imm = 0; 15400 15401 modrm = insn[3]; 15402 15403 if (epartIsReg(modrm)) { 15404 assign( src, 15405 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 15406 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 15407 imm = insn[3+1]; 15408 if (imm & ~7) goto decode_failure; 15409 delta += 3+1+1; 15410 DIP( "rounds%c $%d,%s,%s\n", 15411 isD ? 'd' : 's', 15412 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15413 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15414 } else { 15415 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15416 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 15417 imm = insn[3+alen]; 15418 if (imm & ~7) goto decode_failure; 15419 delta += 3+alen+1; 15420 DIP( "rounds%c $%d,%s,%s\n", 15421 isD ? 'd' : 's', 15422 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15423 } 15424 15425 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15426 that encoding is the same as the encoding for IRRoundingMode, 15427 we can use that value directly in the IR as a rounding 15428 mode. */ 15429 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 15430 (imm & 4) ? get_sse_roundingmode() 15431 : mkU32(imm & 3), 15432 mkexpr(src)) ); 15433 15434 if (isD) 15435 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 15436 else 15437 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 15438 15439 goto decode_success; 15440 } 15441 15442 15443 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 15444 if (have66noF2noF3(pfx) 15445 && sz == 2 15446 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x09) { 15447 15448 IRTemp src0 = newTemp(Ity_F64); 15449 IRTemp src1 = newTemp(Ity_F64); 15450 IRTemp res0 = newTemp(Ity_F64); 15451 IRTemp res1 = newTemp(Ity_F64); 15452 IRTemp rm = newTemp(Ity_I32); 15453 Int imm = 0; 15454 15455 modrm = insn[3]; 15456 15457 if (epartIsReg(modrm)) { 15458 assign( src0, 15459 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 15460 assign( src1, 15461 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 15462 imm = insn[3+1]; 15463 if (imm & ~7) goto decode_failure; 15464 delta += 3+1+1; 15465 DIP( "roundpd $%d,%s,%s\n", 15466 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15467 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15468 } else { 15469 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15470 gen_SEGV_if_not_16_aligned(addr); 15471 assign( src0, loadLE(Ity_F64, 15472 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 15473 assign( src1, loadLE(Ity_F64, 15474 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 15475 imm = insn[3+alen]; 15476 if (imm & ~7) goto decode_failure; 15477 delta += 3+alen+1; 15478 DIP( "roundpd $%d,%s,%s\n", 15479 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15480 } 15481 15482 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15483 that encoding is the same as the encoding for IRRoundingMode, 15484 we can use that value directly in the IR as a rounding 15485 mode. */ 15486 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 15487 15488 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 15489 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 15490 15491 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 15492 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 15493 15494 goto decode_success; 15495 } 15496 15497 15498 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 15499 if (have66noF2noF3(pfx) 15500 && sz == 2 15501 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x08) { 15502 15503 IRTemp src0 = newTemp(Ity_F32); 15504 IRTemp src1 = newTemp(Ity_F32); 15505 IRTemp src2 = newTemp(Ity_F32); 15506 IRTemp src3 = newTemp(Ity_F32); 15507 IRTemp res0 = newTemp(Ity_F32); 15508 IRTemp res1 = newTemp(Ity_F32); 15509 IRTemp res2 = newTemp(Ity_F32); 15510 IRTemp res3 = newTemp(Ity_F32); 15511 IRTemp rm = newTemp(Ity_I32); 15512 Int imm = 0; 15513 15514 modrm = insn[3]; 15515 15516 if (epartIsReg(modrm)) { 15517 assign( src0, 15518 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 15519 assign( src1, 15520 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 15521 assign( src2, 15522 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 15523 assign( src3, 15524 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 15525 imm = insn[3+1]; 15526 if (imm & ~7) goto decode_failure; 15527 delta += 3+1+1; 15528 DIP( "roundps $%d,%s,%s\n", 15529 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 15530 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15531 } else { 15532 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15533 gen_SEGV_if_not_16_aligned(addr); 15534 assign( src0, loadLE(Ity_F32, 15535 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 15536 assign( src1, loadLE(Ity_F32, 15537 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 15538 assign( src2, loadLE(Ity_F32, 15539 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 15540 assign( src3, loadLE(Ity_F32, 15541 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 15542 imm = insn[3+alen]; 15543 if (imm & ~7) goto decode_failure; 15544 delta += 3+alen+1; 15545 DIP( "roundps $%d,%s,%s\n", 15546 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15547 } 15548 15549 /* (imm & 3) contains an Intel-encoded rounding mode. Because 15550 that encoding is the same as the encoding for IRRoundingMode, 15551 we can use that value directly in the IR as a rounding 15552 mode. */ 15553 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 15554 15555 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 15556 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 15557 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 15558 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 15559 15560 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 15561 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 15562 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 15563 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 15564 15565 goto decode_success; 15566 } 15567 15568 15569 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 15570 which we can only decode if we're sure this is an AMD cpu that 15571 supports LZCNT, since otherwise it's BSR, which behaves 15572 differently. */ 15573 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15574 && insn[0] == 0x0F && insn[1] == 0xBD 15575 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 15576 vassert(sz == 2 || sz == 4 || sz == 8); 15577 /*IRType*/ ty = szToITy(sz); 15578 IRTemp src = newTemp(ty); 15579 modrm = insn[2]; 15580 if (epartIsReg(modrm)) { 15581 assign(src, getIRegE(sz, pfx, modrm)); 15582 delta += 2+1; 15583 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15584 nameIRegG(sz, pfx, modrm)); 15585 } else { 15586 addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0); 15587 assign(src, loadLE(ty, mkexpr(addr))); 15588 delta += 2+alen; 15589 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 15590 nameIRegG(sz, pfx, modrm)); 15591 } 15592 15593 IRTemp res = gen_LZCNT(ty, src); 15594 putIRegG(sz, pfx, modrm, mkexpr(res)); 15595 15596 // Update flags. This is pretty lame .. perhaps can do better 15597 // if this turns out to be performance critical. 15598 // O S A P are cleared. Z is set if RESULT == 0. 15599 // C is set if SRC is zero. 15600 IRTemp src64 = newTemp(Ity_I64); 15601 IRTemp res64 = newTemp(Ity_I64); 15602 assign(src64, widenUto64(mkexpr(src))); 15603 assign(res64, widenUto64(mkexpr(res))); 15604 15605 IRTemp oszacp = newTemp(Ity_I64); 15606 assign( 15607 oszacp, 15608 binop(Iop_Or64, 15609 binop(Iop_Shl64, 15610 unop(Iop_1Uto64, 15611 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 15612 mkU8(AMD64G_CC_SHIFT_Z)), 15613 binop(Iop_Shl64, 15614 unop(Iop_1Uto64, 15615 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 15616 mkU8(AMD64G_CC_SHIFT_C)) 15617 ) 15618 ); 15619 15620 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15621 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15622 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15623 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 15624 15625 goto decode_success; 15626 } 15627 15628 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 15629 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 15630 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 15631 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 15632 (selected special cases that actually occur in glibc, 15633 not by any means a complete implementation.) 15634 */ 15635 if (have66noF2noF3(pfx) 15636 && sz == 2 15637 && insn[0] == 0x0F && insn[1] == 0x3A 15638 && (insn[2] >= 0x60 && insn[2] <= 0x63)) { 15639 15640 UInt isISTRx = insn[2] & 2; 15641 UInt isxSTRM = (insn[2] & 1) ^ 1; 15642 UInt regNoL = 0; 15643 UInt regNoR = 0; 15644 UChar imm = 0; 15645 15646 /* This is a nasty kludge. We need to pass 2 x V128 to the 15647 helper (which is clean). Since we can't do that, use a dirty 15648 helper to compute the results directly from the XMM regs in 15649 the guest state. That means for the memory case, we need to 15650 move the left operand into a pseudo-register (XMM16, let's 15651 call it). */ 15652 modrm = insn[3]; 15653 if (epartIsReg(modrm)) { 15654 regNoL = eregOfRexRM(pfx, modrm); 15655 regNoR = gregOfRexRM(pfx, modrm); 15656 imm = insn[3+1]; 15657 delta += 3+1+1; 15658 } else { 15659 regNoL = 16; /* use XMM16 as an intermediary */ 15660 regNoR = gregOfRexRM(pfx, modrm); 15661 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15662 /* No alignment check; I guess that makes sense, given that 15663 these insns are for dealing with C style strings. */ 15664 stmt( IRStmt_Put( OFFB_XMM16, loadLE(Ity_V128, mkexpr(addr)) )); 15665 imm = insn[3+alen]; 15666 delta += 3+alen+1; 15667 } 15668 15669 /* Now we know the XMM reg numbers for the operands, and the 15670 immediate byte. Is it one we can actually handle? Throw out 15671 any cases for which the helper function has not been 15672 verified. */ 15673 switch (imm) { 15674 case 0x00: 15675 case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12: 15676 case 0x1A: case 0x3A: case 0x44: case 0x4A: 15677 break; 15678 default: 15679 goto decode_failure; 15680 } 15681 15682 /* Who ya gonna call? Presumably not Ghostbusters. */ 15683 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 15684 HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 15685 15686 /* Round up the arguments. Note that this is a kludge -- the 15687 use of mkU64 rather than mkIRExpr_HWord implies the 15688 assumption that the host's word size is 64-bit. */ 15689 UInt gstOffL = regNoL == 16 ? OFFB_XMM16 : xmmGuestRegOffset(regNoL); 15690 UInt gstOffR = xmmGuestRegOffset(regNoR); 15691 15692 IRExpr* opc4_and_imm = mkU64((insn[2] << 8) | (imm & 0xFF)); 15693 IRExpr* gstOffLe = mkU64(gstOffL); 15694 IRExpr* gstOffRe = mkU64(gstOffR); 15695 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 15696 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 15697 IRExpr** args 15698 = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 15699 15700 IRTemp resT = newTemp(Ity_I64); 15701 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 15702 /* It's not really a dirty call, but we can't use the clean 15703 helper mechanism here for the very lame reason that we can't 15704 pass 2 x V128s by value to a helper, nor get one back. Hence 15705 this roundabout scheme. */ 15706 d->needsBBP = True; 15707 d->nFxState = 2; 15708 d->fxState[0].fx = Ifx_Read; 15709 d->fxState[0].offset = gstOffL; 15710 d->fxState[0].size = sizeof(U128); 15711 d->fxState[1].fx = Ifx_Read; 15712 d->fxState[1].offset = gstOffR; 15713 d->fxState[1].size = sizeof(U128); 15714 if (isxSTRM) { 15715 /* Declare that the helper writes XMM0. */ 15716 d->nFxState = 3; 15717 d->fxState[2].fx = Ifx_Write; 15718 d->fxState[2].offset = xmmGuestRegOffset(0); 15719 d->fxState[2].size = sizeof(U128); 15720 } 15721 15722 stmt( IRStmt_Dirty(d) ); 15723 15724 /* Now resT[15:0] holds the new OSZACP values, so the condition 15725 codes must be updated. And for a xSTRI case, resT[31:16] 15726 holds the new ECX value, so stash that too. */ 15727 if (!isxSTRM) { 15728 putIReg64(R_RCX, binop(Iop_And64, 15729 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 15730 mkU64(0xFFFF))); 15731 } 15732 15733 stmt( IRStmt_Put( 15734 OFFB_CC_DEP1, 15735 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 15736 )); 15737 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15738 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15739 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15740 15741 if (regNoL == 16) { 15742 DIP("pcmp%cstr%c $%x,%s,%s\n", 15743 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 15744 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 15745 } else { 15746 DIP("pcmp%cstr%c $%x,%s,%s\n", 15747 isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 15748 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 15749 } 15750 15751 goto decode_success; 15752 } 15753 15754 15755 /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128 15756 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 15757 if (have66noF2noF3( pfx ) && sz == 2 15758 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) { 15759 modrm = insn[3]; 15760 IRTemp vecE = newTemp(Ity_V128); 15761 IRTemp vecG = newTemp(Ity_V128); 15762 15763 if ( epartIsReg(modrm) ) { 15764 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 15765 delta += 3+1; 15766 DIP( "ptest %s,%s\n", 15767 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15768 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15769 } else { 15770 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15771 gen_SEGV_if_not_16_aligned( addr ); 15772 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 15773 delta += 3+alen; 15774 DIP( "ptest %s,%s\n", 15775 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15776 } 15777 15778 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 15779 15780 /* Set Z=1 iff (vecE & vecG) == 0 15781 Set C=1 iff (vecE & not vecG) == 0 15782 */ 15783 15784 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 15785 IRTemp andV = newTemp(Ity_V128); 15786 IRTemp andnV = newTemp(Ity_V128); 15787 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 15788 assign(andnV, binop(Iop_AndV128, 15789 mkexpr(vecE), 15790 binop(Iop_XorV128, mkexpr(vecG), 15791 mkV128(0xFFFF)))); 15792 15793 /* The same, but reduced to 64-bit values, by or-ing the top 15794 and bottom 64-bits together. It relies on this trick: 15795 15796 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 15797 15798 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 15799 InterleaveHI64x2([a,b],[a,b]) == [a,a] 15800 15801 and so the OR of the above 2 exprs produces 15802 [a OR b, a OR b], from which we simply take the lower half. 15803 */ 15804 IRTemp and64 = newTemp(Ity_I64); 15805 IRTemp andn64 = newTemp(Ity_I64); 15806 15807 assign( 15808 and64, 15809 unop(Iop_V128to64, 15810 binop(Iop_OrV128, 15811 binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)), 15812 binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV)) 15813 ) 15814 ) 15815 ); 15816 15817 assign( 15818 andn64, 15819 unop(Iop_V128to64, 15820 binop(Iop_OrV128, 15821 binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)), 15822 binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV)) 15823 ) 15824 ) 15825 ); 15826 15827 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 15828 slice out the Z and C bits conveniently. We use the standard 15829 trick all-zeroes -> all-zeroes, anything-else -> all-ones 15830 done by "(x | -x) >>s (word-size - 1)". 15831 */ 15832 IRTemp z64 = newTemp(Ity_I64); 15833 IRTemp c64 = newTemp(Ity_I64); 15834 assign(z64, 15835 unop(Iop_Not64, 15836 binop(Iop_Sar64, 15837 binop(Iop_Or64, 15838 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 15839 mkexpr(and64) 15840 ), 15841 mkU8(63))) 15842 ); 15843 15844 assign(c64, 15845 unop(Iop_Not64, 15846 binop(Iop_Sar64, 15847 binop(Iop_Or64, 15848 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 15849 mkexpr(andn64) 15850 ), 15851 mkU8(63))) 15852 ); 15853 15854 /* And finally, slice out the Z and C flags and set the flags 15855 thunk to COPY for them. OSAP are set to zero. */ 15856 IRTemp newOSZACP = newTemp(Ity_I64); 15857 assign(newOSZACP, 15858 binop(Iop_Or64, 15859 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 15860 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)) 15861 ) 15862 ); 15863 15864 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 15865 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15866 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15867 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15868 15869 goto decode_success; 15870 } 15871 15872 /* 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 15873 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 15874 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 15875 Blend at various granularities, with XMM0 (implicit operand) 15876 providing the controlling mask. 15877 */ 15878 if (have66noF2noF3(pfx) && sz == 2 15879 && insn[0] == 0x0F && insn[1] == 0x38 15880 && (insn[2] == 0x15 || insn[2] == 0x14 || insn[2] == 0x10)) { 15881 modrm = insn[3]; 15882 15883 HChar* nm = NULL; 15884 UInt gran = 0; 15885 IROp opSAR = Iop_INVALID; 15886 switch (insn[2]) { 15887 case 0x15: 15888 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 15889 break; 15890 case 0x14: 15891 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 15892 break; 15893 case 0x10: 15894 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 15895 break; 15896 } 15897 vassert(nm); 15898 15899 IRTemp vecE = newTemp(Ity_V128); 15900 IRTemp vecG = newTemp(Ity_V128); 15901 IRTemp vec0 = newTemp(Ity_V128); 15902 15903 if ( epartIsReg(modrm) ) { 15904 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 15905 delta += 3+1; 15906 DIP( "%s %s,%s\n", nm, 15907 nameXMMReg( eregOfRexRM(pfx, modrm) ), 15908 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15909 } else { 15910 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15911 gen_SEGV_if_not_16_aligned( addr ); 15912 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 15913 delta += 3+alen; 15914 DIP( "%s %s,%s\n", nm, 15915 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 15916 } 15917 15918 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 15919 assign(vec0, getXMMReg(0)); 15920 15921 /* Now the tricky bit is to convert vec0 into a suitable mask, 15922 by copying the most significant bit of each lane into all 15923 positions in the lane. */ 15924 IRTemp sh = newTemp(Ity_I8); 15925 assign(sh, mkU8(8 * gran - 1)); 15926 15927 IRTemp mask = newTemp(Ity_V128); 15928 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 15929 15930 IRTemp notmask = newTemp(Ity_V128); 15931 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 15932 15933 IRExpr* res = binop(Iop_OrV128, 15934 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 15935 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))); 15936 putXMMReg(gregOfRexRM(pfx, modrm), res); 15937 15938 goto decode_success; 15939 } 15940 15941 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 15942 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 15943 The decoding on this is a bit unusual. 15944 */ 15945 if (haveF2noF3(pfx) 15946 && insn[0] == 0x0F && insn[1] == 0x38 15947 && (insn[2] == 0xF1 15948 || (insn[2] == 0xF0 && !have66(pfx)))) { 15949 modrm = insn[3]; 15950 15951 if (insn[2] == 0xF0) 15952 sz = 1; 15953 else 15954 vassert(sz == 2 || sz == 4 || sz == 8); 15955 15956 IRType tyE = szToITy(sz); 15957 IRTemp valE = newTemp(tyE); 15958 15959 if (epartIsReg(modrm)) { 15960 assign(valE, getIRegE(sz, pfx, modrm)); 15961 delta += 3+1; 15962 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 15963 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); 15964 } else { 15965 addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); 15966 assign(valE, loadLE(tyE, mkexpr(addr))); 15967 delta += 3+alen; 15968 DIP("crc32b %s,%s\n", dis_buf, 15969 nameIRegG(1==getRexW(pfx) ? 8 : 4 ,pfx, modrm)); 15970 } 15971 15972 /* Somewhat funny getting/putting of the crc32 value, in order 15973 to ensure that it turns into 64-bit gets and puts. However, 15974 mask off the upper 32 bits so as to not get memcheck false 15975 +ves around the helper call. */ 15976 IRTemp valG0 = newTemp(Ity_I64); 15977 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 15978 mkU64(0xFFFFFFFF))); 15979 15980 HChar* nm = NULL; 15981 void* fn = NULL; 15982 switch (sz) { 15983 case 1: nm = "amd64g_calc_crc32b"; 15984 fn = &amd64g_calc_crc32b; break; 15985 case 2: nm = "amd64g_calc_crc32w"; 15986 fn = &amd64g_calc_crc32w; break; 15987 case 4: nm = "amd64g_calc_crc32l"; 15988 fn = &amd64g_calc_crc32l; break; 15989 case 8: nm = "amd64g_calc_crc32q"; 15990 fn = &amd64g_calc_crc32q; break; 15991 } 15992 vassert(nm && fn); 15993 IRTemp valG1 = newTemp(Ity_I64); 15994 assign(valG1, 15995 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 15996 mkIRExprVec_2(mkexpr(valG0), 15997 widenUto64(mkexpr(valE))))); 15998 15999 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 16000 goto decode_success; 16001 } 16002 16003 /* ---------------------------------------------------- */ 16004 /* --- end of the SSE4 decoder --- */ 16005 /* ---------------------------------------------------- */ 16006 16007 /*after_sse_decoders:*/ 16008 16009 /* Get the primary opcode. */ 16010 opc = getUChar(delta); delta++; 16011 16012 /* We get here if the current insn isn't SSE, or this CPU doesn't 16013 support SSE. */ 16014 16015 switch (opc) { 16016 16017 /* ------------------------ Control flow --------------- */ 16018 16019 case 0xC2: /* RET imm16 */ 16020 if (have66orF2orF3(pfx)) goto decode_failure; 16021 d64 = getUDisp16(delta); 16022 delta += 2; 16023 dis_ret(vbi, d64); 16024 dres.whatNext = Dis_StopHere; 16025 DIP("ret %lld\n", d64); 16026 break; 16027 16028 case 0xC3: /* RET */ 16029 if (have66orF2(pfx)) goto decode_failure; 16030 /* F3 is acceptable on AMD. */ 16031 dis_ret(vbi, 0); 16032 dres.whatNext = Dis_StopHere; 16033 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 16034 break; 16035 16036 case 0xE8: /* CALL J4 */ 16037 if (haveF2orF3(pfx)) goto decode_failure; 16038 d64 = getSDisp32(delta); delta += 4; 16039 d64 += (guest_RIP_bbstart+delta); 16040 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 16041 t1 = newTemp(Ity_I64); 16042 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 16043 putIReg64(R_RSP, mkexpr(t1)); 16044 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 16045 t2 = newTemp(Ity_I64); 16046 assign(t2, mkU64((Addr64)d64)); 16047 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 16048 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 16049 /* follow into the call target. */ 16050 dres.whatNext = Dis_ResteerU; 16051 dres.continueAt = d64; 16052 } else { 16053 jmp_lit(Ijk_Call,d64); 16054 dres.whatNext = Dis_StopHere; 16055 } 16056 DIP("call 0x%llx\n",d64); 16057 break; 16058 16059 //.. //-- case 0xC8: /* ENTER */ 16060 //.. //-- d32 = getUDisp16(eip); eip += 2; 16061 //.. //-- abyte = getUChar(delta); delta++; 16062 //.. //-- 16063 //.. //-- vg_assert(sz == 4); 16064 //.. //-- vg_assert(abyte == 0); 16065 //.. //-- 16066 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); 16067 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 16068 //.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 16069 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 16070 //.. //-- uLiteral(cb, sz); 16071 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 16072 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 16073 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 16074 //.. //-- if (d32) { 16075 //.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 16076 //.. //-- uLiteral(cb, d32); 16077 //.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 16078 //.. //-- } 16079 //.. //-- DIP("enter 0x%x, 0x%x", d32, abyte); 16080 //.. //-- break; 16081 16082 case 0xC9: /* LEAVE */ 16083 /* In 64-bit mode this defaults to a 64-bit operand size. There 16084 is no way to encode a 32-bit variant. Hence sz==4 but we do 16085 it as if sz=8. */ 16086 if (sz != 4) 16087 goto decode_failure; 16088 t1 = newTemp(Ity_I64); 16089 t2 = newTemp(Ity_I64); 16090 assign(t1, getIReg64(R_RBP)); 16091 /* First PUT RSP looks redundant, but need it because RSP must 16092 always be up-to-date for Memcheck to work... */ 16093 putIReg64(R_RSP, mkexpr(t1)); 16094 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 16095 putIReg64(R_RBP, mkexpr(t2)); 16096 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 16097 DIP("leave\n"); 16098 break; 16099 16100 //.. //-- /* ---------------- Misc weird-ass insns --------------- */ 16101 //.. //-- 16102 //.. //-- case 0x27: /* DAA */ 16103 //.. //-- case 0x2F: /* DAS */ 16104 //.. //-- t1 = newTemp(cb); 16105 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1); 16106 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ 16107 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16108 //.. //-- uWiden(cb, 1, False); 16109 //.. //-- uInstr0(cb, CALLM_S, 0); 16110 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16111 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16112 //.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) ); 16113 //.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO); 16114 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16115 //.. //-- uInstr0(cb, CALLM_E, 0); 16116 //.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL); 16117 //.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n"); 16118 //.. //-- break; 16119 //.. //-- 16120 //.. //-- case 0x37: /* AAA */ 16121 //.. //-- case 0x3F: /* AAS */ 16122 //.. //-- t1 = newTemp(cb); 16123 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); 16124 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */ 16125 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16126 //.. //-- uWiden(cb, 2, False); 16127 //.. //-- uInstr0(cb, CALLM_S, 0); 16128 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16129 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16130 //.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) ); 16131 //.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty); 16132 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16133 //.. //-- uInstr0(cb, CALLM_E, 0); 16134 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); 16135 //.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n"); 16136 //.. //-- break; 16137 //.. //-- 16138 //.. //-- case 0xD4: /* AAM */ 16139 //.. //-- case 0xD5: /* AAD */ 16140 //.. //-- d32 = getUChar(delta); delta++; 16141 //.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !"); 16142 //.. //-- t1 = newTemp(cb); 16143 //.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); 16144 //.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */ 16145 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1); 16146 //.. //-- uWiden(cb, 2, False); 16147 //.. //-- uInstr0(cb, CALLM_S, 0); 16148 //.. //-- uInstr1(cb, PUSH, 4, TempReg, t1); 16149 //.. //-- uInstr1(cb, CALLM, 0, Lit16, 16150 //.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) ); 16151 //.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty); 16152 //.. //-- uInstr1(cb, POP, 4, TempReg, t1); 16153 //.. //-- uInstr0(cb, CALLM_E, 0); 16154 //.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); 16155 //.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 16156 //.. //-- break; 16157 16158 /* ------------------------ CWD/CDQ -------------------- */ 16159 16160 case 0x98: /* CBW */ 16161 if (haveF2orF3(pfx)) goto decode_failure; 16162 if (sz == 8) { 16163 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 16164 DIP(/*"cdqe\n"*/"cltq"); 16165 break; 16166 } 16167 if (sz == 4) { 16168 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 16169 DIP("cwtl\n"); 16170 break; 16171 } 16172 if (sz == 2) { 16173 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 16174 DIP("cbw\n"); 16175 break; 16176 } 16177 goto decode_failure; 16178 16179 case 0x99: /* CWD/CDQ/CQO */ 16180 if (haveF2orF3(pfx)) goto decode_failure; 16181 vassert(sz == 2 || sz == 4 || sz == 8); 16182 ty = szToITy(sz); 16183 putIRegRDX( sz, 16184 binop(mkSizedOp(ty,Iop_Sar8), 16185 getIRegRAX(sz), 16186 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 16187 DIP(sz == 2 ? "cwd\n" 16188 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 16189 : "cqo\n")); 16190 break; 16191 16192 /* ------------------------ FPU ops -------------------- */ 16193 16194 case 0x9E: /* SAHF */ 16195 codegen_SAHF(); 16196 DIP("sahf\n"); 16197 break; 16198 16199 case 0x9F: /* LAHF */ 16200 codegen_LAHF(); 16201 DIP("lahf\n"); 16202 break; 16203 16204 case 0x9B: /* FWAIT */ 16205 /* ignore? */ 16206 DIP("fwait\n"); 16207 break; 16208 16209 case 0xD8: 16210 case 0xD9: 16211 case 0xDA: 16212 case 0xDB: 16213 case 0xDC: 16214 case 0xDD: 16215 case 0xDE: 16216 case 0xDF: { 16217 Bool redundantREXWok = False; 16218 16219 if (haveF2orF3(pfx)) 16220 goto decode_failure; 16221 16222 /* kludge to tolerate redundant rex.w prefixes (should do this 16223 properly one day) */ 16224 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 16225 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 16226 redundantREXWok = True; 16227 16228 if ( (sz == 4 16229 || (sz == 8 && redundantREXWok)) 16230 && haveNo66noF2noF3(pfx)) { 16231 Long delta0 = delta; 16232 Bool decode_OK = False; 16233 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 16234 if (!decode_OK) { 16235 delta = delta0; 16236 goto decode_failure; 16237 } 16238 break; 16239 } else { 16240 goto decode_failure; 16241 } 16242 } 16243 16244 /* ------------------------ INT ------------------------ */ 16245 16246 case 0xCC: /* INT 3 */ 16247 jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta); 16248 dres.whatNext = Dis_StopHere; 16249 DIP("int $0x3\n"); 16250 break; 16251 16252 case 0xCD: { /* INT imm8 */ 16253 IRJumpKind jk = Ijk_Boring; 16254 if (have66orF2orF3(pfx)) goto decode_failure; 16255 d64 = getUChar(delta); delta++; 16256 switch (d64) { 16257 case 32: jk = Ijk_Sys_int32; break; 16258 default: goto decode_failure; 16259 } 16260 guest_RIP_next_mustcheck = True; 16261 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 16262 jmp_lit(jk, guest_RIP_next_assumed); 16263 /* It's important that all ArchRegs carry their up-to-date value 16264 at this point. So we declare an end-of-block here, which 16265 forces any TempRegs caching ArchRegs to be flushed. */ 16266 dres.whatNext = Dis_StopHere; 16267 DIP("int $0x%02x\n", (UInt)d64); 16268 break; 16269 } 16270 16271 /* ------------------------ Jcond, byte offset --------- */ 16272 16273 case 0xEB: /* Jb (jump, byte offset) */ 16274 if (haveF2orF3(pfx)) goto decode_failure; 16275 if (sz != 4) 16276 goto decode_failure; /* JRS added 2004 July 11 */ 16277 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 16278 delta++; 16279 if (resteerOkFn(callback_opaque,d64)) { 16280 dres.whatNext = Dis_ResteerU; 16281 dres.continueAt = d64; 16282 } else { 16283 jmp_lit(Ijk_Boring,d64); 16284 dres.whatNext = Dis_StopHere; 16285 } 16286 DIP("jmp-8 0x%llx\n", d64); 16287 break; 16288 16289 case 0xE9: /* Jv (jump, 16/32 offset) */ 16290 if (haveF2orF3(pfx)) goto decode_failure; 16291 if (sz != 4) 16292 goto decode_failure; /* JRS added 2004 July 11 */ 16293 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 16294 delta += sz; 16295 if (resteerOkFn(callback_opaque,d64)) { 16296 dres.whatNext = Dis_ResteerU; 16297 dres.continueAt = d64; 16298 } else { 16299 jmp_lit(Ijk_Boring,d64); 16300 dres.whatNext = Dis_StopHere; 16301 } 16302 DIP("jmp 0x%llx\n", d64); 16303 break; 16304 16305 case 0x70: 16306 case 0x71: 16307 case 0x72: /* JBb/JNAEb (jump below) */ 16308 case 0x73: /* JNBb/JAEb (jump not below) */ 16309 case 0x74: /* JZb/JEb (jump zero) */ 16310 case 0x75: /* JNZb/JNEb (jump not zero) */ 16311 case 0x76: /* JBEb/JNAb (jump below or equal) */ 16312 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 16313 case 0x78: /* JSb (jump negative) */ 16314 case 0x79: /* JSb (jump not negative) */ 16315 case 0x7A: /* JP (jump parity even) */ 16316 case 0x7B: /* JNP/JPO (jump parity odd) */ 16317 case 0x7C: /* JLb/JNGEb (jump less) */ 16318 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 16319 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 16320 case 0x7F: /* JGb/JNLEb (jump greater) */ 16321 { Long jmpDelta; 16322 HChar* comment = ""; 16323 if (haveF2orF3(pfx)) goto decode_failure; 16324 jmpDelta = getSDisp8(delta); 16325 vassert(-128 <= jmpDelta && jmpDelta < 128); 16326 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 16327 delta++; 16328 if (resteerCisOk 16329 && vex_control.guest_chase_cond 16330 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 16331 && jmpDelta < 0 16332 && resteerOkFn( callback_opaque, d64) ) { 16333 /* Speculation: assume this backward branch is taken. So we 16334 need to emit a side-exit to the insn following this one, 16335 on the negation of the condition, and continue at the 16336 branch target address (d64). If we wind up back at the 16337 first instruction of the trace, just stop; it's better to 16338 let the IR loop unroller handle that case. */ 16339 stmt( IRStmt_Exit( 16340 mk_amd64g_calculate_condition( 16341 (AMD64Condcode)(1 ^ (opc - 0x70))), 16342 Ijk_Boring, 16343 IRConst_U64(guest_RIP_bbstart+delta) ) ); 16344 dres.whatNext = Dis_ResteerC; 16345 dres.continueAt = d64; 16346 comment = "(assumed taken)"; 16347 } 16348 else 16349 if (resteerCisOk 16350 && vex_control.guest_chase_cond 16351 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 16352 && jmpDelta >= 0 16353 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 16354 /* Speculation: assume this forward branch is not taken. So 16355 we need to emit a side-exit to d64 (the dest) and continue 16356 disassembling at the insn immediately following this 16357 one. */ 16358 stmt( IRStmt_Exit( 16359 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 16360 Ijk_Boring, 16361 IRConst_U64(d64) ) ); 16362 dres.whatNext = Dis_ResteerC; 16363 dres.continueAt = guest_RIP_bbstart+delta; 16364 comment = "(assumed not taken)"; 16365 } 16366 else { 16367 /* Conservative default translation - end the block at this 16368 point. */ 16369 jcc_01( (AMD64Condcode)(opc - 0x70), 16370 guest_RIP_bbstart+delta, 16371 d64 ); 16372 dres.whatNext = Dis_StopHere; 16373 } 16374 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); 16375 break; 16376 } 16377 16378 case 0xE3: 16379 /* JRCXZ or JECXZ, depending address size override. */ 16380 if (have66orF2orF3(pfx)) goto decode_failure; 16381 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 16382 delta++; 16383 if (haveASO(pfx)) { 16384 /* 32-bit */ 16385 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 16386 unop(Iop_32Uto64, getIReg32(R_RCX)), 16387 mkU64(0)), 16388 Ijk_Boring, 16389 IRConst_U64(d64)) 16390 ); 16391 DIP("jecxz 0x%llx\n", d64); 16392 } else { 16393 /* 64-bit */ 16394 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 16395 getIReg64(R_RCX), 16396 mkU64(0)), 16397 Ijk_Boring, 16398 IRConst_U64(d64)) 16399 ); 16400 DIP("jrcxz 0x%llx\n", d64); 16401 } 16402 break; 16403 16404 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 16405 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 16406 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 16407 { /* The docs say this uses rCX as a count depending on the 16408 address size override, not the operand one. */ 16409 IRExpr* zbit = NULL; 16410 IRExpr* count = NULL; 16411 IRExpr* cond = NULL; 16412 HChar* xtra = NULL; 16413 16414 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 16415 /* So at this point we've rejected any variants which appear to 16416 be governed by the usual operand-size modifiers. Hence only 16417 the address size prefix can have an effect. It changes the 16418 size from 64 (default) to 32. */ 16419 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 16420 delta++; 16421 if (haveASO(pfx)) { 16422 /* 64to32 of 64-bit get is merely a get-put improvement 16423 trick. */ 16424 putIReg32(R_RCX, binop(Iop_Sub32, 16425 unop(Iop_64to32, getIReg64(R_RCX)), 16426 mkU32(1))); 16427 } else { 16428 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 16429 } 16430 16431 /* This is correct, both for 32- and 64-bit versions. If we're 16432 doing a 32-bit dec and the result is zero then the default 16433 zero extension rule will cause the upper 32 bits to be zero 16434 too. Hence a 64-bit check against zero is OK. */ 16435 count = getIReg64(R_RCX); 16436 cond = binop(Iop_CmpNE64, count, mkU64(0)); 16437 switch (opc) { 16438 case 0xE2: 16439 xtra = ""; 16440 break; 16441 case 0xE1: 16442 xtra = "e"; 16443 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 16444 cond = mkAnd1(cond, zbit); 16445 break; 16446 case 0xE0: 16447 xtra = "ne"; 16448 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 16449 cond = mkAnd1(cond, zbit); 16450 break; 16451 default: 16452 vassert(0); 16453 } 16454 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) ); 16455 16456 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); 16457 break; 16458 } 16459 16460 /* ------------------------ IMUL ----------------------- */ 16461 16462 case 0x69: /* IMUL Iv, Ev, Gv */ 16463 if (haveF2orF3(pfx)) goto decode_failure; 16464 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 16465 break; 16466 case 0x6B: /* IMUL Ib, Ev, Gv */ 16467 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 16468 break; 16469 16470 /* ------------------------ MOV ------------------------ */ 16471 16472 case 0x88: /* MOV Gb,Eb */ 16473 if (haveF2orF3(pfx)) goto decode_failure; 16474 delta = dis_mov_G_E(vbi, pfx, 1, delta); 16475 break; 16476 16477 case 0x89: /* MOV Gv,Ev */ 16478 if (haveF2orF3(pfx)) goto decode_failure; 16479 delta = dis_mov_G_E(vbi, pfx, sz, delta); 16480 break; 16481 16482 case 0x8A: /* MOV Eb,Gb */ 16483 if (haveF2orF3(pfx)) goto decode_failure; 16484 delta = dis_mov_E_G(vbi, pfx, 1, delta); 16485 break; 16486 16487 case 0x8B: /* MOV Ev,Gv */ 16488 if (haveF2orF3(pfx)) goto decode_failure; 16489 delta = dis_mov_E_G(vbi, pfx, sz, delta); 16490 break; 16491 16492 case 0x8D: /* LEA M,Gv */ 16493 if (haveF2orF3(pfx)) goto decode_failure; 16494 if (sz != 4 && sz != 8) 16495 goto decode_failure; 16496 modrm = getUChar(delta); 16497 if (epartIsReg(modrm)) 16498 goto decode_failure; 16499 /* NOTE! this is the one place where a segment override prefix 16500 has no effect on the address calculation. Therefore we clear 16501 any segment override bits in pfx. */ 16502 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 16503 delta += alen; 16504 /* This is a hack. But it isn't clear that really doing the 16505 calculation at 32 bits is really worth it. Hence for leal, 16506 do the full 64-bit calculation and then truncate it. */ 16507 putIRegG( sz, pfx, modrm, 16508 sz == 4 16509 ? unop(Iop_64to32, mkexpr(addr)) 16510 : mkexpr(addr) 16511 ); 16512 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 16513 nameIRegG(sz,pfx,modrm)); 16514 break; 16515 16516 //.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 16517 //.. delta = dis_mov_Sw_Ew(sorb, sz, delta); 16518 //.. break; 16519 //.. 16520 //.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 16521 //.. delta = dis_mov_Ew_Sw(sorb, delta); 16522 //.. break; 16523 16524 case 0xA0: /* MOV Ob,AL */ 16525 if (have66orF2orF3(pfx)) goto decode_failure; 16526 sz = 1; 16527 /* Fall through ... */ 16528 case 0xA1: /* MOV Ov,eAX */ 16529 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 16530 goto decode_failure; 16531 d64 = getDisp64(delta); 16532 delta += 8; 16533 ty = szToITy(sz); 16534 addr = newTemp(Ity_I64); 16535 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 16536 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 16537 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 16538 segRegTxt(pfx), d64, 16539 nameIRegRAX(sz)); 16540 break; 16541 16542 case 0xA2: /* MOV AL,Ob */ 16543 if (have66orF2orF3(pfx)) goto decode_failure; 16544 sz = 1; 16545 /* Fall through ... */ 16546 case 0xA3: /* MOV eAX,Ov */ 16547 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 16548 goto decode_failure; 16549 d64 = getDisp64(delta); 16550 delta += 8; 16551 ty = szToITy(sz); 16552 addr = newTemp(Ity_I64); 16553 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 16554 storeLE( mkexpr(addr), getIRegRAX(sz) ); 16555 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 16556 segRegTxt(pfx), d64); 16557 break; 16558 16559 /* XXXX be careful here with moves to AH/BH/CH/DH */ 16560 case 0xB0: /* MOV imm,AL */ 16561 case 0xB1: /* MOV imm,CL */ 16562 case 0xB2: /* MOV imm,DL */ 16563 case 0xB3: /* MOV imm,BL */ 16564 case 0xB4: /* MOV imm,AH */ 16565 case 0xB5: /* MOV imm,CH */ 16566 case 0xB6: /* MOV imm,DH */ 16567 case 0xB7: /* MOV imm,BH */ 16568 if (haveF2orF3(pfx)) goto decode_failure; 16569 d64 = getUChar(delta); 16570 delta += 1; 16571 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 16572 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 16573 break; 16574 16575 case 0xB8: /* MOV imm,eAX */ 16576 case 0xB9: /* MOV imm,eCX */ 16577 case 0xBA: /* MOV imm,eDX */ 16578 case 0xBB: /* MOV imm,eBX */ 16579 case 0xBC: /* MOV imm,eSP */ 16580 case 0xBD: /* MOV imm,eBP */ 16581 case 0xBE: /* MOV imm,eSI */ 16582 case 0xBF: /* MOV imm,eDI */ 16583 /* This is the one-and-only place where 64-bit literals are 16584 allowed in the instruction stream. */ 16585 if (haveF2orF3(pfx)) goto decode_failure; 16586 if (sz == 8) { 16587 d64 = getDisp64(delta); 16588 delta += 8; 16589 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 16590 DIP("movabsq $%lld,%s\n", (Long)d64, 16591 nameIRegRexB(8,pfx,opc-0xB8)); 16592 } else { 16593 d64 = getSDisp(imin(4,sz),delta); 16594 delta += imin(4,sz); 16595 putIRegRexB(sz, pfx, opc-0xB8, 16596 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16597 DIP("mov%c $%lld,%s\n", nameISize(sz), 16598 (Long)d64, 16599 nameIRegRexB(sz,pfx,opc-0xB8)); 16600 } 16601 break; 16602 16603 case 0xC6: /* MOV Ib,Eb */ 16604 sz = 1; 16605 goto do_Mov_I_E; 16606 case 0xC7: /* MOV Iv,Ev */ 16607 goto do_Mov_I_E; 16608 16609 do_Mov_I_E: 16610 if (haveF2orF3(pfx)) goto decode_failure; 16611 modrm = getUChar(delta); 16612 if (epartIsReg(modrm)) { 16613 delta++; /* mod/rm byte */ 16614 d64 = getSDisp(imin(4,sz),delta); 16615 delta += imin(4,sz); 16616 putIRegE(sz, pfx, modrm, 16617 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16618 DIP("mov%c $%lld, %s\n", nameISize(sz), 16619 (Long)d64, 16620 nameIRegE(sz,pfx,modrm)); 16621 } else { 16622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 16623 /*xtra*/imin(4,sz) ); 16624 delta += alen; 16625 d64 = getSDisp(imin(4,sz),delta); 16626 delta += imin(4,sz); 16627 storeLE(mkexpr(addr), 16628 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 16629 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 16630 } 16631 break; 16632 16633 /* ------------------------ MOVx ------------------------ */ 16634 16635 case 0x63: /* MOVSX */ 16636 if (haveF2orF3(pfx)) goto decode_failure; 16637 if (haveREX(pfx) && 1==getRexW(pfx)) { 16638 vassert(sz == 8); 16639 /* movsx r/m32 to r64 */ 16640 modrm = getUChar(delta); 16641 if (epartIsReg(modrm)) { 16642 delta++; 16643 putIRegG(8, pfx, modrm, 16644 unop(Iop_32Sto64, 16645 getIRegE(4, pfx, modrm))); 16646 DIP("movslq %s,%s\n", 16647 nameIRegE(4, pfx, modrm), 16648 nameIRegG(8, pfx, modrm)); 16649 break; 16650 } else { 16651 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16652 delta += alen; 16653 putIRegG(8, pfx, modrm, 16654 unop(Iop_32Sto64, 16655 loadLE(Ity_I32, mkexpr(addr)))); 16656 DIP("movslq %s,%s\n", dis_buf, 16657 nameIRegG(8, pfx, modrm)); 16658 break; 16659 } 16660 } else { 16661 goto decode_failure; 16662 } 16663 16664 /* ------------------------ opl imm, A ----------------- */ 16665 16666 case 0x04: /* ADD Ib, AL */ 16667 if (haveF2orF3(pfx)) goto decode_failure; 16668 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 16669 break; 16670 case 0x05: /* ADD Iv, eAX */ 16671 if (haveF2orF3(pfx)) goto decode_failure; 16672 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 16673 break; 16674 16675 case 0x0C: /* OR Ib, AL */ 16676 if (haveF2orF3(pfx)) goto decode_failure; 16677 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 16678 break; 16679 case 0x0D: /* OR Iv, eAX */ 16680 if (haveF2orF3(pfx)) goto decode_failure; 16681 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 16682 break; 16683 16684 case 0x14: /* ADC Ib, AL */ 16685 if (haveF2orF3(pfx)) goto decode_failure; 16686 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 16687 break; 16688 case 0x15: /* ADC Iv, eAX */ 16689 if (haveF2orF3(pfx)) goto decode_failure; 16690 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 16691 break; 16692 16693 case 0x1C: /* SBB Ib, AL */ 16694 if (haveF2orF3(pfx)) goto decode_failure; 16695 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 16696 break; 16697 case 0x1D: /* SBB Iv, eAX */ 16698 if (haveF2orF3(pfx)) goto decode_failure; 16699 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 16700 break; 16701 16702 case 0x24: /* AND Ib, AL */ 16703 if (haveF2orF3(pfx)) goto decode_failure; 16704 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 16705 break; 16706 case 0x25: /* AND Iv, eAX */ 16707 if (haveF2orF3(pfx)) goto decode_failure; 16708 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 16709 break; 16710 16711 case 0x2C: /* SUB Ib, AL */ 16712 if (haveF2orF3(pfx)) goto decode_failure; 16713 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 16714 break; 16715 case 0x2D: /* SUB Iv, eAX */ 16716 if (haveF2orF3(pfx)) goto decode_failure; 16717 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 16718 break; 16719 16720 case 0x34: /* XOR Ib, AL */ 16721 if (haveF2orF3(pfx)) goto decode_failure; 16722 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 16723 break; 16724 case 0x35: /* XOR Iv, eAX */ 16725 if (haveF2orF3(pfx)) goto decode_failure; 16726 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 16727 break; 16728 16729 case 0x3C: /* CMP Ib, AL */ 16730 if (haveF2orF3(pfx)) goto decode_failure; 16731 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 16732 break; 16733 case 0x3D: /* CMP Iv, eAX */ 16734 if (haveF2orF3(pfx)) goto decode_failure; 16735 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 16736 break; 16737 16738 case 0xA8: /* TEST Ib, AL */ 16739 if (haveF2orF3(pfx)) goto decode_failure; 16740 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 16741 break; 16742 case 0xA9: /* TEST Iv, eAX */ 16743 if (haveF2orF3(pfx)) goto decode_failure; 16744 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 16745 break; 16746 16747 /* ------------------------ opl Ev, Gv ----------------- */ 16748 16749 case 0x02: /* ADD Eb,Gb */ 16750 if (haveF2orF3(pfx)) goto decode_failure; 16751 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 16752 break; 16753 case 0x03: /* ADD Ev,Gv */ 16754 if (haveF2orF3(pfx)) goto decode_failure; 16755 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 16756 break; 16757 16758 case 0x0A: /* OR Eb,Gb */ 16759 if (haveF2orF3(pfx)) goto decode_failure; 16760 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 16761 break; 16762 case 0x0B: /* OR Ev,Gv */ 16763 if (haveF2orF3(pfx)) goto decode_failure; 16764 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 16765 break; 16766 16767 case 0x12: /* ADC Eb,Gb */ 16768 if (haveF2orF3(pfx)) goto decode_failure; 16769 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 16770 break; 16771 case 0x13: /* ADC Ev,Gv */ 16772 if (haveF2orF3(pfx)) goto decode_failure; 16773 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 16774 break; 16775 16776 case 0x1A: /* SBB Eb,Gb */ 16777 if (haveF2orF3(pfx)) goto decode_failure; 16778 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 16779 break; 16780 case 0x1B: /* SBB Ev,Gv */ 16781 if (haveF2orF3(pfx)) goto decode_failure; 16782 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 16783 break; 16784 16785 case 0x22: /* AND Eb,Gb */ 16786 if (haveF2orF3(pfx)) goto decode_failure; 16787 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 16788 break; 16789 case 0x23: /* AND Ev,Gv */ 16790 if (haveF2orF3(pfx)) goto decode_failure; 16791 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 16792 break; 16793 16794 case 0x2A: /* SUB Eb,Gb */ 16795 if (haveF2orF3(pfx)) goto decode_failure; 16796 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 16797 break; 16798 case 0x2B: /* SUB Ev,Gv */ 16799 if (haveF2orF3(pfx)) goto decode_failure; 16800 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 16801 break; 16802 16803 case 0x32: /* XOR Eb,Gb */ 16804 if (haveF2orF3(pfx)) goto decode_failure; 16805 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 16806 break; 16807 case 0x33: /* XOR Ev,Gv */ 16808 if (haveF2orF3(pfx)) goto decode_failure; 16809 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 16810 break; 16811 16812 case 0x3A: /* CMP Eb,Gb */ 16813 if (haveF2orF3(pfx)) goto decode_failure; 16814 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 16815 break; 16816 case 0x3B: /* CMP Ev,Gv */ 16817 if (haveF2orF3(pfx)) goto decode_failure; 16818 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 16819 break; 16820 16821 case 0x84: /* TEST Eb,Gb */ 16822 if (haveF2orF3(pfx)) goto decode_failure; 16823 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 16824 break; 16825 case 0x85: /* TEST Ev,Gv */ 16826 if (haveF2orF3(pfx)) goto decode_failure; 16827 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 16828 break; 16829 16830 /* ------------------------ opl Gv, Ev ----------------- */ 16831 16832 case 0x00: /* ADD Gb,Eb */ 16833 if (haveF2orF3(pfx)) goto decode_failure; 16834 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 16835 break; 16836 case 0x01: /* ADD Gv,Ev */ 16837 if (haveF2orF3(pfx)) goto decode_failure; 16838 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 16839 break; 16840 16841 case 0x08: /* OR Gb,Eb */ 16842 if (haveF2orF3(pfx)) goto decode_failure; 16843 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 16844 break; 16845 case 0x09: /* OR Gv,Ev */ 16846 if (haveF2orF3(pfx)) goto decode_failure; 16847 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 16848 break; 16849 16850 case 0x10: /* ADC Gb,Eb */ 16851 if (haveF2orF3(pfx)) goto decode_failure; 16852 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 16853 break; 16854 case 0x11: /* ADC Gv,Ev */ 16855 if (haveF2orF3(pfx)) goto decode_failure; 16856 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 16857 break; 16858 16859 case 0x18: /* SBB Gb,Eb */ 16860 if (haveF2orF3(pfx)) goto decode_failure; 16861 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 16862 break; 16863 case 0x19: /* SBB Gv,Ev */ 16864 if (haveF2orF3(pfx)) goto decode_failure; 16865 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 16866 break; 16867 16868 case 0x20: /* AND Gb,Eb */ 16869 if (haveF2orF3(pfx)) goto decode_failure; 16870 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 16871 break; 16872 case 0x21: /* AND Gv,Ev */ 16873 if (haveF2orF3(pfx)) goto decode_failure; 16874 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 16875 break; 16876 16877 case 0x28: /* SUB Gb,Eb */ 16878 if (haveF2orF3(pfx)) goto decode_failure; 16879 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 16880 break; 16881 case 0x29: /* SUB Gv,Ev */ 16882 if (haveF2orF3(pfx)) goto decode_failure; 16883 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 16884 break; 16885 16886 case 0x30: /* XOR Gb,Eb */ 16887 if (haveF2orF3(pfx)) goto decode_failure; 16888 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 16889 break; 16890 case 0x31: /* XOR Gv,Ev */ 16891 if (haveF2orF3(pfx)) goto decode_failure; 16892 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 16893 break; 16894 16895 case 0x38: /* CMP Gb,Eb */ 16896 if (haveF2orF3(pfx)) goto decode_failure; 16897 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 16898 break; 16899 case 0x39: /* CMP Gv,Ev */ 16900 if (haveF2orF3(pfx)) goto decode_failure; 16901 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 16902 break; 16903 16904 /* ------------------------ POP ------------------------ */ 16905 16906 case 0x58: /* POP eAX */ 16907 case 0x59: /* POP eCX */ 16908 case 0x5A: /* POP eDX */ 16909 case 0x5B: /* POP eBX */ 16910 case 0x5D: /* POP eBP */ 16911 case 0x5E: /* POP eSI */ 16912 case 0x5F: /* POP eDI */ 16913 case 0x5C: /* POP eSP */ 16914 if (haveF2orF3(pfx)) goto decode_failure; 16915 vassert(sz == 2 || sz == 4 || sz == 8); 16916 if (sz == 4) 16917 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 16918 t1 = newTemp(szToITy(sz)); 16919 t2 = newTemp(Ity_I64); 16920 assign(t2, getIReg64(R_RSP)); 16921 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 16922 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 16923 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 16924 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 16925 break; 16926 16927 case 0x9D: /* POPF */ 16928 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 16929 So sz==4 actually means sz==8. */ 16930 if (haveF2orF3(pfx)) goto decode_failure; 16931 vassert(sz == 2 || sz == 4 || sz == 8); 16932 if (sz == 4) sz = 8; 16933 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 16934 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 16935 assign(t2, getIReg64(R_RSP)); 16936 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 16937 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 16938 /* t1 is the flag word. Mask out everything except OSZACP and 16939 set the flags thunk to AMD64G_CC_OP_COPY. */ 16940 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16941 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16942 stmt( IRStmt_Put( OFFB_CC_DEP1, 16943 binop(Iop_And64, 16944 mkexpr(t1), 16945 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 16946 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 16947 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 16948 ) 16949 ) 16950 ); 16951 16952 /* Also need to set the D flag, which is held in bit 10 of t1. 16953 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 16954 stmt( IRStmt_Put( 16955 OFFB_DFLAG, 16956 IRExpr_Mux0X( 16957 unop(Iop_32to8, 16958 unop(Iop_64to32, 16959 binop(Iop_And64, 16960 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 16961 mkU64(1)))), 16962 mkU64(1), 16963 mkU64(0xFFFFFFFFFFFFFFFFULL))) 16964 ); 16965 16966 /* And set the ID flag */ 16967 stmt( IRStmt_Put( 16968 OFFB_IDFLAG, 16969 IRExpr_Mux0X( 16970 unop(Iop_32to8, 16971 unop(Iop_64to32, 16972 binop(Iop_And64, 16973 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 16974 mkU64(1)))), 16975 mkU64(0), 16976 mkU64(1))) 16977 ); 16978 16979 /* And set the AC flag too */ 16980 stmt( IRStmt_Put( 16981 OFFB_ACFLAG, 16982 IRExpr_Mux0X( 16983 unop(Iop_32to8, 16984 unop(Iop_64to32, 16985 binop(Iop_And64, 16986 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 16987 mkU64(1)))), 16988 mkU64(0), 16989 mkU64(1))) 16990 ); 16991 16992 DIP("popf%c\n", nameISize(sz)); 16993 break; 16994 16995 //.. case 0x61: /* POPA */ 16996 //.. /* This is almost certainly wrong for sz==2. So ... */ 16997 //.. if (sz != 4) goto decode_failure; 16998 //.. 16999 //.. /* t5 is the old %ESP value. */ 17000 //.. t5 = newTemp(Ity_I32); 17001 //.. assign( t5, getIReg(4, R_ESP) ); 17002 //.. 17003 //.. /* Reload all the registers, except %esp. */ 17004 //.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 17005 //.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 17006 //.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 17007 //.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 17008 //.. /* ignore saved %ESP */ 17009 //.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 17010 //.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 17011 //.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 17012 //.. 17013 //.. /* and move %ESP back up */ 17014 //.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 17015 //.. 17016 //.. DIP("pusha%c\n", nameISize(sz)); 17017 //.. break; 17018 17019 case 0x8F: { /* POPQ m64 / POPW m16 */ 17020 Int len; 17021 UChar rm; 17022 /* There is no encoding for 32-bit pop in 64-bit mode. 17023 So sz==4 actually means sz==8. */ 17024 if (haveF2orF3(pfx)) goto decode_failure; 17025 vassert(sz == 2 || sz == 4 17026 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 17027 if (sz == 4) sz = 8; 17028 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 17029 17030 rm = getUChar(delta); 17031 17032 /* make sure this instruction is correct POP */ 17033 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 17034 goto decode_failure; 17035 /* and has correct size */ 17036 vassert(sz == 8); 17037 17038 t1 = newTemp(Ity_I64); 17039 t3 = newTemp(Ity_I64); 17040 assign( t1, getIReg64(R_RSP) ); 17041 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 17042 17043 /* Increase RSP; must be done before the STORE. Intel manual 17044 says: If the RSP register is used as a base register for 17045 addressing a destination operand in memory, the POP 17046 instruction computes the effective address of the operand 17047 after it increments the RSP register. */ 17048 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 17049 17050 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 17051 storeLE( mkexpr(addr), mkexpr(t3) ); 17052 17053 DIP("popl %s\n", dis_buf); 17054 17055 delta += len; 17056 break; 17057 } 17058 17059 //.. //-- case 0x1F: /* POP %DS */ 17060 //.. //-- dis_pop_segreg( cb, R_DS, sz ); break; 17061 //.. //-- case 0x07: /* POP %ES */ 17062 //.. //-- dis_pop_segreg( cb, R_ES, sz ); break; 17063 //.. //-- case 0x17: /* POP %SS */ 17064 //.. //-- dis_pop_segreg( cb, R_SS, sz ); break; 17065 17066 /* ------------------------ PUSH ----------------------- */ 17067 17068 case 0x50: /* PUSH eAX */ 17069 case 0x51: /* PUSH eCX */ 17070 case 0x52: /* PUSH eDX */ 17071 case 0x53: /* PUSH eBX */ 17072 case 0x55: /* PUSH eBP */ 17073 case 0x56: /* PUSH eSI */ 17074 case 0x57: /* PUSH eDI */ 17075 case 0x54: /* PUSH eSP */ 17076 /* This is the Right Way, in that the value to be pushed is 17077 established before %rsp is changed, so that pushq %rsp 17078 correctly pushes the old value. */ 17079 if (haveF2orF3(pfx)) goto decode_failure; 17080 vassert(sz == 2 || sz == 4 || sz == 8); 17081 if (sz == 4) 17082 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 17083 ty = sz==2 ? Ity_I16 : Ity_I64; 17084 t1 = newTemp(ty); 17085 t2 = newTemp(Ity_I64); 17086 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 17087 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 17088 putIReg64(R_RSP, mkexpr(t2) ); 17089 storeLE(mkexpr(t2),mkexpr(t1)); 17090 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 17091 break; 17092 17093 case 0x68: /* PUSH Iv */ 17094 if (haveF2orF3(pfx)) goto decode_failure; 17095 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 17096 if (sz == 4) sz = 8; 17097 d64 = getSDisp(imin(4,sz),delta); 17098 delta += imin(4,sz); 17099 goto do_push_I; 17100 case 0x6A: /* PUSH Ib, sign-extended to sz */ 17101 if (haveF2orF3(pfx)) goto decode_failure; 17102 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 17103 if (sz == 4) sz = 8; 17104 d64 = getSDisp8(delta); delta += 1; 17105 goto do_push_I; 17106 do_push_I: 17107 ty = szToITy(sz); 17108 t1 = newTemp(Ity_I64); 17109 t2 = newTemp(ty); 17110 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 17111 putIReg64(R_RSP, mkexpr(t1) ); 17112 /* stop mkU16 asserting if d32 is a negative 16-bit number 17113 (bug #132813) */ 17114 if (ty == Ity_I16) 17115 d64 &= 0xFFFF; 17116 storeLE( mkexpr(t1), mkU(ty,d64) ); 17117 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 17118 break; 17119 17120 case 0x9C: /* PUSHF */ { 17121 /* Note. There is no encoding for a 32-bit pushf in 64-bit 17122 mode. So sz==4 actually means sz==8. */ 17123 /* 24 July 06: has also been seen with a redundant REX prefix, 17124 so must also allow sz==8. */ 17125 if (haveF2orF3(pfx)) goto decode_failure; 17126 vassert(sz == 2 || sz == 4 || sz == 8); 17127 if (sz == 4) sz = 8; 17128 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 17129 17130 t1 = newTemp(Ity_I64); 17131 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 17132 putIReg64(R_RSP, mkexpr(t1) ); 17133 17134 t2 = newTemp(Ity_I64); 17135 assign( t2, mk_amd64g_calculate_rflags_all() ); 17136 17137 /* Patch in the D flag. This can simply be a copy of bit 10 of 17138 baseBlock[OFFB_DFLAG]. */ 17139 t3 = newTemp(Ity_I64); 17140 assign( t3, binop(Iop_Or64, 17141 mkexpr(t2), 17142 binop(Iop_And64, 17143 IRExpr_Get(OFFB_DFLAG,Ity_I64), 17144 mkU64(1<<10))) 17145 ); 17146 17147 /* And patch in the ID flag. */ 17148 t4 = newTemp(Ity_I64); 17149 assign( t4, binop(Iop_Or64, 17150 mkexpr(t3), 17151 binop(Iop_And64, 17152 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 17153 mkU8(21)), 17154 mkU64(1<<21))) 17155 ); 17156 17157 /* And patch in the AC flag too. */ 17158 t5 = newTemp(Ity_I64); 17159 assign( t5, binop(Iop_Or64, 17160 mkexpr(t4), 17161 binop(Iop_And64, 17162 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 17163 mkU8(18)), 17164 mkU64(1<<18))) 17165 ); 17166 17167 /* if sz==2, the stored value needs to be narrowed. */ 17168 if (sz == 2) 17169 storeLE( mkexpr(t1), unop(Iop_32to16, 17170 unop(Iop_64to32,mkexpr(t5))) ); 17171 else 17172 storeLE( mkexpr(t1), mkexpr(t5) ); 17173 17174 DIP("pushf%c\n", nameISize(sz)); 17175 break; 17176 } 17177 17178 //.. case 0x60: /* PUSHA */ 17179 //.. /* This is almost certainly wrong for sz==2. So ... */ 17180 //.. if (sz != 4) goto decode_failure; 17181 //.. 17182 //.. /* This is the Right Way, in that the value to be pushed is 17183 //.. established before %esp is changed, so that pusha 17184 //.. correctly pushes the old %esp value. New value of %esp is 17185 //.. pushed at start. */ 17186 //.. /* t0 is the %ESP value we're going to push. */ 17187 //.. t0 = newTemp(Ity_I32); 17188 //.. assign( t0, getIReg(4, R_ESP) ); 17189 //.. 17190 //.. /* t5 will be the new %ESP value. */ 17191 //.. t5 = newTemp(Ity_I32); 17192 //.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 17193 //.. 17194 //.. /* Update guest state before prodding memory. */ 17195 //.. putIReg(4, R_ESP, mkexpr(t5)); 17196 //.. 17197 //.. /* Dump all the registers. */ 17198 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 17199 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 17200 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 17201 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 17202 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 17203 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 17204 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 17205 //.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 17206 //.. 17207 //.. DIP("pusha%c\n", nameISize(sz)); 17208 //.. break; 17209 //.. 17210 //.. 17211 //.. //-- case 0x0E: /* PUSH %CS */ 17212 //.. //-- dis_push_segreg( cb, R_CS, sz ); break; 17213 //.. //-- case 0x1E: /* PUSH %DS */ 17214 //.. //-- dis_push_segreg( cb, R_DS, sz ); break; 17215 //.. //-- case 0x06: /* PUSH %ES */ 17216 //.. //-- dis_push_segreg( cb, R_ES, sz ); break; 17217 //.. //-- case 0x16: /* PUSH %SS */ 17218 //.. //-- dis_push_segreg( cb, R_SS, sz ); break; 17219 //.. 17220 //.. /* ------------------------ SCAS et al ----------------- */ 17221 //.. 17222 //.. case 0xA4: /* MOVS, no REP prefix */ 17223 //.. case 0xA5: 17224 //.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 17225 //.. break; 17226 //.. 17227 //.. case 0xA6: /* CMPSb, no REP prefix */ 17228 //.. //-- case 0xA7: 17229 //.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 17230 //.. break; 17231 //.. //-- 17232 //.. //-- 17233 case 0xAC: /* LODS, no REP prefix */ 17234 case 0xAD: 17235 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 17236 break; 17237 //.. 17238 //.. case 0xAE: /* SCAS, no REP prefix */ 17239 //.. case 0xAF: 17240 //.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 17241 //.. break; 17242 17243 17244 case 0xFC: /* CLD */ 17245 if (haveF2orF3(pfx)) goto decode_failure; 17246 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 17247 DIP("cld\n"); 17248 break; 17249 17250 case 0xFD: /* STD */ 17251 if (haveF2orF3(pfx)) goto decode_failure; 17252 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 17253 DIP("std\n"); 17254 break; 17255 17256 case 0xF8: /* CLC */ 17257 case 0xF9: /* STC */ 17258 case 0xF5: /* CMC */ 17259 t0 = newTemp(Ity_I64); 17260 t1 = newTemp(Ity_I64); 17261 assign( t0, mk_amd64g_calculate_rflags_all() ); 17262 switch (opc) { 17263 case 0xF8: 17264 assign( t1, binop(Iop_And64, mkexpr(t0), 17265 mkU64(~AMD64G_CC_MASK_C))); 17266 DIP("clc\n"); 17267 break; 17268 case 0xF9: 17269 assign( t1, binop(Iop_Or64, mkexpr(t0), 17270 mkU64(AMD64G_CC_MASK_C))); 17271 DIP("stc\n"); 17272 break; 17273 case 0xF5: 17274 assign( t1, binop(Iop_Xor64, mkexpr(t0), 17275 mkU64(AMD64G_CC_MASK_C))); 17276 DIP("cmc\n"); 17277 break; 17278 default: 17279 vpanic("disInstr(x64)(clc/stc/cmc)"); 17280 } 17281 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 17282 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 17283 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 17284 /* Set NDEP even though it isn't used. This makes redundant-PUT 17285 elimination of previous stores to this field work better. */ 17286 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 17287 break; 17288 17289 //.. /* REPNE prefix insn */ 17290 //.. case 0xF2: { 17291 //.. Addr32 eip_orig = guest_eip_bbstart + delta - 1; 17292 //.. vassert(sorb == 0); 17293 //.. abyte = getUChar(delta); delta++; 17294 //.. 17295 //.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; } 17296 //.. whatNext = Dis_StopHere; 17297 //.. 17298 //.. switch (abyte) { 17299 //.. /* According to the Intel manual, "repne movs" should never occur, but 17300 //.. * in practice it has happened, so allow for it here... */ 17301 //.. case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 17302 //.. goto decode_failure; 17303 //.. //-- case 0xA5: 17304 //.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig, 17305 //.. // guest_eip_bbstart+delta, "repne movs" ); 17306 //.. // break; 17307 //.. //-- 17308 //.. //-- case 0xA6: sz = 1; /* REPNE CMPS<sz> */ 17309 //.. //-- case 0xA7: 17310 //.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" ); 17311 //.. //-- break; 17312 //.. //-- 17313 //.. case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 17314 //.. case 0xAF: 17315 //.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, 17316 //.. guest_eip_bbstart+delta, "repne scas" ); 17317 //.. break; 17318 //.. 17319 //.. default: 17320 //.. goto decode_failure; 17321 //.. } 17322 //.. break; 17323 //.. } 17324 17325 /* ------ AE: SCAS variants ------ */ 17326 case 0xAE: 17327 case 0xAF: 17328 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 17329 if (haveASO(pfx)) 17330 goto decode_failure; 17331 if (haveF2(pfx) && !haveF3(pfx)) { 17332 if (opc == 0xAE) 17333 sz = 1; 17334 dis_REP_op ( AMD64CondNZ, dis_SCAS, sz, 17335 guest_RIP_curr_instr, 17336 guest_RIP_bbstart+delta, "repne scas", pfx ); 17337 dres.whatNext = Dis_StopHere; 17338 break; 17339 } 17340 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 17341 if (haveASO(pfx)) 17342 goto decode_failure; 17343 if (!haveF2(pfx) && haveF3(pfx)) { 17344 if (opc == 0xAE) 17345 sz = 1; 17346 dis_REP_op ( AMD64CondZ, dis_SCAS, sz, 17347 guest_RIP_curr_instr, 17348 guest_RIP_bbstart+delta, "repe scas", pfx ); 17349 dres.whatNext = Dis_StopHere; 17350 break; 17351 } 17352 /* AE/AF: scasb/scas{w,l,q} */ 17353 if (!haveF2(pfx) && !haveF3(pfx)) { 17354 if (opc == 0xAE) 17355 sz = 1; 17356 dis_string_op( dis_SCAS, sz, "scas", pfx ); 17357 break; 17358 } 17359 goto decode_failure; 17360 17361 /* ------ A6, A7: CMPS variants ------ */ 17362 case 0xA6: 17363 case 0xA7: 17364 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 17365 if (haveASO(pfx)) 17366 goto decode_failure; 17367 if (haveF3(pfx) && !haveF2(pfx)) { 17368 if (opc == 0xA6) 17369 sz = 1; 17370 dis_REP_op ( AMD64CondZ, dis_CMPS, sz, 17371 guest_RIP_curr_instr, 17372 guest_RIP_bbstart+delta, "repe cmps", pfx ); 17373 dres.whatNext = Dis_StopHere; 17374 break; 17375 } 17376 goto decode_failure; 17377 17378 /* ------ AA, AB: STOS variants ------ */ 17379 case 0xAA: 17380 case 0xAB: 17381 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 17382 if (haveASO(pfx)) 17383 goto decode_failure; 17384 if (haveF3(pfx) && !haveF2(pfx)) { 17385 if (opc == 0xAA) 17386 sz = 1; 17387 dis_REP_op ( AMD64CondAlways, dis_STOS, sz, 17388 guest_RIP_curr_instr, 17389 guest_RIP_bbstart+delta, "rep stos", pfx ); 17390 dres.whatNext = Dis_StopHere; 17391 break; 17392 } 17393 /* AA/AB: stosb/stos{w,l,q} */ 17394 if (!haveF3(pfx) && !haveF2(pfx)) { 17395 if (opc == 0xAA) 17396 sz = 1; 17397 dis_string_op( dis_STOS, sz, "stos", pfx ); 17398 break; 17399 } 17400 goto decode_failure; 17401 17402 /* ------ A4, A5: MOVS variants ------ */ 17403 case 0xA4: 17404 case 0xA5: 17405 /* F3 A4: rep movsb */ 17406 if (haveASO(pfx)) 17407 goto decode_failure; 17408 if (haveF3(pfx) && !haveF2(pfx)) { 17409 if (opc == 0xA4) 17410 sz = 1; 17411 dis_REP_op ( AMD64CondAlways, dis_MOVS, sz, 17412 guest_RIP_curr_instr, 17413 guest_RIP_bbstart+delta, "rep movs", pfx ); 17414 dres.whatNext = Dis_StopHere; 17415 break; 17416 } 17417 /* A4: movsb */ 17418 if (!haveF3(pfx) && !haveF2(pfx)) { 17419 if (opc == 0xA4) 17420 sz = 1; 17421 dis_string_op( dis_MOVS, sz, "movs", pfx ); 17422 break; 17423 } 17424 goto decode_failure; 17425 17426 17427 /* ------------------------ XCHG ----------------------- */ 17428 17429 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 17430 prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock) 17431 and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is 17432 used with an explicit LOCK prefix, we don't want to end up with 17433 two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by 17434 the generic LOCK logic at the top of disInstr. */ 17435 case 0x86: /* XCHG Gb,Eb */ 17436 sz = 1; 17437 /* Fall through ... */ 17438 case 0x87: /* XCHG Gv,Ev */ 17439 if (haveF2orF3(pfx)) goto decode_failure; 17440 modrm = getUChar(delta); 17441 ty = szToITy(sz); 17442 t1 = newTemp(ty); t2 = newTemp(ty); 17443 if (epartIsReg(modrm)) { 17444 assign(t1, getIRegE(sz, pfx, modrm)); 17445 assign(t2, getIRegG(sz, pfx, modrm)); 17446 putIRegG(sz, pfx, modrm, mkexpr(t1)); 17447 putIRegE(sz, pfx, modrm, mkexpr(t2)); 17448 delta++; 17449 DIP("xchg%c %s, %s\n", 17450 nameISize(sz), nameIRegG(sz, pfx, modrm), 17451 nameIRegE(sz, pfx, modrm)); 17452 } else { 17453 *expect_CAS = True; 17454 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17455 assign( t1, loadLE(ty, mkexpr(addr)) ); 17456 assign( t2, getIRegG(sz, pfx, modrm) ); 17457 casLE( mkexpr(addr), 17458 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 17459 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 17460 delta += alen; 17461 DIP("xchg%c %s, %s\n", nameISize(sz), 17462 nameIRegG(sz, pfx, modrm), dis_buf); 17463 } 17464 break; 17465 17466 case 0x90: /* XCHG eAX,eAX */ 17467 /* detect and handle F3 90 (rep nop) specially */ 17468 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 17469 DIP("rep nop (P4 pause)\n"); 17470 /* "observe" the hint. The Vex client needs to be careful not 17471 to cause very long delays as a result, though. */ 17472 jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta); 17473 dres.whatNext = Dis_StopHere; 17474 break; 17475 } 17476 /* detect and handle NOPs specially */ 17477 if (/* F2/F3 probably change meaning completely */ 17478 !haveF2orF3(pfx) 17479 /* If REX.B is 1, we're not exchanging rAX with itself */ 17480 && getRexB(pfx)==0 ) { 17481 DIP("nop\n"); 17482 break; 17483 } 17484 /* else fall through to normal case. */ 17485 case 0x91: /* XCHG rAX,rCX */ 17486 case 0x92: /* XCHG rAX,rDX */ 17487 case 0x93: /* XCHG rAX,rBX */ 17488 case 0x94: /* XCHG rAX,rSP */ 17489 case 0x95: /* XCHG rAX,rBP */ 17490 case 0x96: /* XCHG rAX,rSI */ 17491 case 0x97: /* XCHG rAX,rDI */ 17492 17493 /* guard against mutancy */ 17494 if (haveF2orF3(pfx)) goto decode_failure; 17495 17496 /* sz == 2 could legitimately happen, but we don't handle it yet */ 17497 if (sz == 2) goto decode_failure; /* awaiting test case */ 17498 17499 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 17500 break; 17501 17502 //.. //-- /* ------------------------ XLAT ----------------------- */ 17503 //.. //-- 17504 //.. //-- case 0xD7: /* XLAT */ 17505 //.. //-- t1 = newTemp(cb); t2 = newTemp(cb); 17506 //.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */ 17507 //.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */ 17508 //.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */ 17509 //.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */ 17510 //.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2); 17511 //.. //-- uWiden(cb, 1, False); 17512 //.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */ 17513 //.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */ 17514 //.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */ 17515 //.. //-- 17516 //.. //-- DIP("xlat%c [ebx]\n", nameISize(sz)); 17517 //.. //-- break; 17518 17519 /* ------------------------ IN / OUT ----------------------- */ 17520 17521 case 0xE4: /* IN imm8, AL */ 17522 sz = 1; 17523 t1 = newTemp(Ity_I64); 17524 abyte = getUChar(delta); delta++; 17525 assign(t1, mkU64( abyte & 0xFF )); 17526 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 17527 goto do_IN; 17528 case 0xE5: /* IN imm8, eAX */ 17529 if (!(sz == 2 || sz == 4)) goto decode_failure; 17530 t1 = newTemp(Ity_I64); 17531 abyte = getUChar(delta); delta++; 17532 assign(t1, mkU64( abyte & 0xFF )); 17533 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 17534 goto do_IN; 17535 case 0xEC: /* IN %DX, AL */ 17536 sz = 1; 17537 t1 = newTemp(Ity_I64); 17538 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 17539 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 17540 nameIRegRAX(sz)); 17541 goto do_IN; 17542 case 0xED: /* IN %DX, eAX */ 17543 if (!(sz == 2 || sz == 4)) goto decode_failure; 17544 t1 = newTemp(Ity_I64); 17545 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 17546 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 17547 nameIRegRAX(sz)); 17548 goto do_IN; 17549 do_IN: { 17550 /* At this point, sz indicates the width, and t1 is a 64-bit 17551 value giving port number. */ 17552 IRDirty* d; 17553 if (haveF2orF3(pfx)) goto decode_failure; 17554 vassert(sz == 1 || sz == 2 || sz == 4); 17555 ty = szToITy(sz); 17556 t2 = newTemp(Ity_I64); 17557 d = unsafeIRDirty_1_N( 17558 t2, 17559 0/*regparms*/, 17560 "amd64g_dirtyhelper_IN", 17561 &amd64g_dirtyhelper_IN, 17562 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 17563 ); 17564 /* do the call, dumping the result in t2. */ 17565 stmt( IRStmt_Dirty(d) ); 17566 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 17567 break; 17568 } 17569 17570 case 0xE6: /* OUT AL, imm8 */ 17571 sz = 1; 17572 t1 = newTemp(Ity_I64); 17573 abyte = getUChar(delta); delta++; 17574 assign( t1, mkU64( abyte & 0xFF ) ); 17575 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 17576 goto do_OUT; 17577 case 0xE7: /* OUT eAX, imm8 */ 17578 if (!(sz == 2 || sz == 4)) goto decode_failure; 17579 t1 = newTemp(Ity_I64); 17580 abyte = getUChar(delta); delta++; 17581 assign( t1, mkU64( abyte & 0xFF ) ); 17582 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 17583 goto do_OUT; 17584 case 0xEE: /* OUT AL, %DX */ 17585 sz = 1; 17586 t1 = newTemp(Ity_I64); 17587 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 17588 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 17589 nameIRegRDX(2)); 17590 goto do_OUT; 17591 case 0xEF: /* OUT eAX, %DX */ 17592 if (!(sz == 2 || sz == 4)) goto decode_failure; 17593 t1 = newTemp(Ity_I64); 17594 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 17595 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 17596 nameIRegRDX(2)); 17597 goto do_OUT; 17598 do_OUT: { 17599 /* At this point, sz indicates the width, and t1 is a 64-bit 17600 value giving port number. */ 17601 IRDirty* d; 17602 if (haveF2orF3(pfx)) goto decode_failure; 17603 vassert(sz == 1 || sz == 2 || sz == 4); 17604 ty = szToITy(sz); 17605 d = unsafeIRDirty_0_N( 17606 0/*regparms*/, 17607 "amd64g_dirtyhelper_OUT", 17608 &amd64g_dirtyhelper_OUT, 17609 mkIRExprVec_3( mkexpr(t1), 17610 widenUto64( getIRegRAX(sz) ), 17611 mkU64(sz) ) 17612 ); 17613 stmt( IRStmt_Dirty(d) ); 17614 break; 17615 } 17616 17617 /* ------------------------ (Grp1 extensions) ---------- */ 17618 17619 case 0x80: /* Grp1 Ib,Eb */ 17620 if (haveF2orF3(pfx)) goto decode_failure; 17621 modrm = getUChar(delta); 17622 am_sz = lengthAMode(pfx,delta); 17623 sz = 1; 17624 d_sz = 1; 17625 d64 = getSDisp8(delta + am_sz); 17626 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17627 break; 17628 17629 case 0x81: /* Grp1 Iv,Ev */ 17630 if (haveF2orF3(pfx)) goto decode_failure; 17631 modrm = getUChar(delta); 17632 am_sz = lengthAMode(pfx,delta); 17633 d_sz = imin(sz,4); 17634 d64 = getSDisp(d_sz, delta + am_sz); 17635 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17636 break; 17637 17638 case 0x83: /* Grp1 Ib,Ev */ 17639 if (haveF2orF3(pfx)) goto decode_failure; 17640 modrm = getUChar(delta); 17641 am_sz = lengthAMode(pfx,delta); 17642 d_sz = 1; 17643 d64 = getSDisp8(delta + am_sz); 17644 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 17645 break; 17646 17647 /* ------------------------ (Grp2 extensions) ---------- */ 17648 17649 case 0xC0: { /* Grp2 Ib,Eb */ 17650 Bool decode_OK = True; 17651 if (haveF2orF3(pfx)) goto decode_failure; 17652 modrm = getUChar(delta); 17653 am_sz = lengthAMode(pfx,delta); 17654 d_sz = 1; 17655 d64 = getUChar(delta + am_sz); 17656 sz = 1; 17657 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17658 mkU8(d64 & 0xFF), NULL, &decode_OK ); 17659 if (!decode_OK) goto decode_failure; 17660 break; 17661 } 17662 case 0xC1: { /* Grp2 Ib,Ev */ 17663 Bool decode_OK = True; 17664 if (haveF2orF3(pfx)) goto decode_failure; 17665 modrm = getUChar(delta); 17666 am_sz = lengthAMode(pfx,delta); 17667 d_sz = 1; 17668 d64 = getUChar(delta + am_sz); 17669 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17670 mkU8(d64 & 0xFF), NULL, &decode_OK ); 17671 if (!decode_OK) goto decode_failure; 17672 break; 17673 } 17674 case 0xD0: { /* Grp2 1,Eb */ 17675 Bool decode_OK = True; 17676 if (haveF2orF3(pfx)) goto decode_failure; 17677 modrm = getUChar(delta); 17678 am_sz = lengthAMode(pfx,delta); 17679 d_sz = 0; 17680 d64 = 1; 17681 sz = 1; 17682 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17683 mkU8(d64), NULL, &decode_OK ); 17684 if (!decode_OK) goto decode_failure; 17685 break; 17686 } 17687 case 0xD1: { /* Grp2 1,Ev */ 17688 Bool decode_OK = True; 17689 if (haveF2orF3(pfx)) goto decode_failure; 17690 modrm = getUChar(delta); 17691 am_sz = lengthAMode(pfx,delta); 17692 d_sz = 0; 17693 d64 = 1; 17694 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17695 mkU8(d64), NULL, &decode_OK ); 17696 if (!decode_OK) goto decode_failure; 17697 break; 17698 } 17699 case 0xD2: { /* Grp2 CL,Eb */ 17700 Bool decode_OK = True; 17701 if (haveF2orF3(pfx)) goto decode_failure; 17702 modrm = getUChar(delta); 17703 am_sz = lengthAMode(pfx,delta); 17704 d_sz = 0; 17705 sz = 1; 17706 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17707 getIRegCL(), "%cl", &decode_OK ); 17708 if (!decode_OK) goto decode_failure; 17709 break; 17710 } 17711 case 0xD3: { /* Grp2 CL,Ev */ 17712 Bool decode_OK = True; 17713 if (haveF2orF3(pfx)) goto decode_failure; 17714 modrm = getUChar(delta); 17715 am_sz = lengthAMode(pfx,delta); 17716 d_sz = 0; 17717 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 17718 getIRegCL(), "%cl", &decode_OK ); 17719 if (!decode_OK) goto decode_failure; 17720 break; 17721 } 17722 17723 /* ------------------------ (Grp3 extensions) ---------- */ 17724 17725 case 0xF6: { /* Grp3 Eb */ 17726 Bool decode_OK = True; 17727 if (haveF2orF3(pfx)) goto decode_failure; 17728 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 17729 if (!decode_OK) goto decode_failure; 17730 break; 17731 } 17732 case 0xF7: { /* Grp3 Ev */ 17733 Bool decode_OK = True; 17734 if (haveF2orF3(pfx)) goto decode_failure; 17735 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 17736 if (!decode_OK) goto decode_failure; 17737 break; 17738 } 17739 17740 /* ------------------------ (Grp4 extensions) ---------- */ 17741 17742 case 0xFE: { /* Grp4 Eb */ 17743 Bool decode_OK = True; 17744 if (haveF2orF3(pfx)) goto decode_failure; 17745 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 17746 if (!decode_OK) goto decode_failure; 17747 break; 17748 } 17749 17750 /* ------------------------ (Grp5 extensions) ---------- */ 17751 17752 case 0xFF: { /* Grp5 Ev */ 17753 Bool decode_OK = True; 17754 if (haveF2orF3(pfx)) goto decode_failure; 17755 delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK ); 17756 if (!decode_OK) goto decode_failure; 17757 break; 17758 } 17759 17760 /* ------------------------ Escapes to 2-byte opcodes -- */ 17761 17762 case 0x0F: { 17763 opc = getUChar(delta); delta++; 17764 switch (opc) { 17765 17766 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 17767 17768 case 0xBA: { /* Grp8 Ib,Ev */ 17769 Bool decode_OK = False; 17770 if (haveF2orF3(pfx)) goto decode_failure; 17771 modrm = getUChar(delta); 17772 am_sz = lengthAMode(pfx,delta); 17773 d64 = getSDisp8(delta + am_sz); 17774 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 17775 &decode_OK ); 17776 if (!decode_OK) 17777 goto decode_failure; 17778 break; 17779 } 17780 17781 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 17782 17783 case 0xBC: /* BSF Gv,Ev */ 17784 if (haveF2orF3(pfx)) goto decode_failure; 17785 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 17786 break; 17787 case 0xBD: /* BSR Gv,Ev */ 17788 if (haveF2orF3(pfx)) goto decode_failure; 17789 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 17790 break; 17791 17792 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 17793 17794 case 0xC8: /* BSWAP %eax */ 17795 case 0xC9: 17796 case 0xCA: 17797 case 0xCB: 17798 case 0xCC: 17799 case 0xCD: 17800 case 0xCE: 17801 case 0xCF: /* BSWAP %edi */ 17802 if (haveF2orF3(pfx)) goto decode_failure; 17803 /* According to the AMD64 docs, this insn can have size 4 or 17804 8. */ 17805 if (sz == 4) { 17806 t1 = newTemp(Ity_I32); 17807 t2 = newTemp(Ity_I32); 17808 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 17809 assign( t2, 17810 binop(Iop_Or32, 17811 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 17812 binop(Iop_Or32, 17813 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 17814 mkU32(0x00FF0000)), 17815 binop(Iop_Or32, 17816 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 17817 mkU32(0x0000FF00)), 17818 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 17819 mkU32(0x000000FF) ) 17820 ))) 17821 ); 17822 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 17823 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 17824 break; 17825 } 17826 else if (sz == 8) { 17827 IRTemp m8 = newTemp(Ity_I64); 17828 IRTemp s8 = newTemp(Ity_I64); 17829 IRTemp m16 = newTemp(Ity_I64); 17830 IRTemp s16 = newTemp(Ity_I64); 17831 IRTemp m32 = newTemp(Ity_I64); 17832 t1 = newTemp(Ity_I64); 17833 t2 = newTemp(Ity_I64); 17834 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 17835 17836 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 17837 assign( s8, 17838 binop(Iop_Or64, 17839 binop(Iop_Shr64, 17840 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 17841 mkU8(8)), 17842 binop(Iop_And64, 17843 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 17844 mkexpr(m8)) 17845 ) 17846 ); 17847 17848 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 17849 assign( s16, 17850 binop(Iop_Or64, 17851 binop(Iop_Shr64, 17852 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 17853 mkU8(16)), 17854 binop(Iop_And64, 17855 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 17856 mkexpr(m16)) 17857 ) 17858 ); 17859 17860 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 17861 assign( t2, 17862 binop(Iop_Or64, 17863 binop(Iop_Shr64, 17864 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 17865 mkU8(32)), 17866 binop(Iop_And64, 17867 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 17868 mkexpr(m32)) 17869 ) 17870 ); 17871 17872 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 17873 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 17874 break; 17875 } else { 17876 goto decode_failure; 17877 } 17878 17879 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 17880 17881 /* All of these are possible at sizes 2, 4 and 8, but until a 17882 size 2 test case shows up, only handle sizes 4 and 8. */ 17883 17884 case 0xA3: /* BT Gv,Ev */ 17885 if (haveF2orF3(pfx)) goto decode_failure; 17886 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 17887 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone ); 17888 break; 17889 case 0xB3: /* BTR Gv,Ev */ 17890 if (haveF2orF3(pfx)) goto decode_failure; 17891 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 17892 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset ); 17893 break; 17894 case 0xAB: /* BTS Gv,Ev */ 17895 if (haveF2orF3(pfx)) goto decode_failure; 17896 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 17897 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet ); 17898 break; 17899 case 0xBB: /* BTC Gv,Ev */ 17900 if (haveF2orF3(pfx)) goto decode_failure; 17901 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 17902 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp ); 17903 break; 17904 17905 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 17906 17907 case 0x40: 17908 case 0x41: 17909 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 17910 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 17911 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 17912 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 17913 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 17914 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 17915 case 0x48: /* CMOVSb (cmov negative) */ 17916 case 0x49: /* CMOVSb (cmov not negative) */ 17917 case 0x4A: /* CMOVP (cmov parity even) */ 17918 case 0x4B: /* CMOVNP (cmov parity odd) */ 17919 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 17920 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 17921 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 17922 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 17923 if (haveF2orF3(pfx)) goto decode_failure; 17924 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 17925 break; 17926 17927 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 17928 17929 case 0xB0: { /* CMPXCHG Gb,Eb */ 17930 Bool ok = True; 17931 if (haveF2orF3(pfx)) goto decode_failure; 17932 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 17933 if (!ok) goto decode_failure; 17934 break; 17935 } 17936 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 17937 Bool ok = True; 17938 if (haveF2orF3(pfx)) goto decode_failure; 17939 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 17940 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 17941 if (!ok) goto decode_failure; 17942 break; 17943 } 17944 17945 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 17946 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 17947 IRTemp expdHi = newTemp(elemTy); 17948 IRTemp expdLo = newTemp(elemTy); 17949 IRTemp dataHi = newTemp(elemTy); 17950 IRTemp dataLo = newTemp(elemTy); 17951 IRTemp oldHi = newTemp(elemTy); 17952 IRTemp oldLo = newTemp(elemTy); 17953 IRTemp flags_old = newTemp(Ity_I64); 17954 IRTemp flags_new = newTemp(Ity_I64); 17955 IRTemp success = newTemp(Ity_I1); 17956 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 17957 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 17958 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 17959 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 17960 IRTemp expdHi64 = newTemp(Ity_I64); 17961 IRTemp expdLo64 = newTemp(Ity_I64); 17962 17963 /* Translate this using a DCAS, even if there is no LOCK 17964 prefix. Life is too short to bother with generating two 17965 different translations for the with/without-LOCK-prefix 17966 cases. */ 17967 *expect_CAS = True; 17968 17969 /* Decode, and generate address. */ 17970 if (have66orF2orF3(pfx)) goto decode_failure; 17971 if (sz != 4 && sz != 8) goto decode_failure; 17972 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 17973 goto decode_failure; 17974 modrm = getUChar(delta); 17975 if (epartIsReg(modrm)) goto decode_failure; 17976 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 17977 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17978 delta += alen; 17979 17980 /* cmpxchg16b requires an alignment check. */ 17981 if (sz == 8) 17982 gen_SEGV_if_not_16_aligned( addr ); 17983 17984 /* Get the expected and new values. */ 17985 assign( expdHi64, getIReg64(R_RDX) ); 17986 assign( expdLo64, getIReg64(R_RAX) ); 17987 17988 /* These are the correctly-sized expected and new values. 17989 However, we also get expdHi64/expdLo64 above as 64-bits 17990 regardless, because we will need them later in the 32-bit 17991 case (paradoxically). */ 17992 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 17993 : mkexpr(expdHi64) ); 17994 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 17995 : mkexpr(expdLo64) ); 17996 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 17997 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 17998 17999 /* Do the DCAS */ 18000 stmt( IRStmt_CAS( 18001 mkIRCAS( oldHi, oldLo, 18002 Iend_LE, mkexpr(addr), 18003 mkexpr(expdHi), mkexpr(expdLo), 18004 mkexpr(dataHi), mkexpr(dataLo) 18005 ))); 18006 18007 /* success when oldHi:oldLo == expdHi:expdLo */ 18008 assign( success, 18009 binop(opCasCmpEQ, 18010 binop(opOR, 18011 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 18012 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 18013 ), 18014 zero 18015 )); 18016 18017 /* If the DCAS is successful, that is to say oldHi:oldLo == 18018 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 18019 which is where they came from originally. Both the actual 18020 contents of these two regs, and any shadow values, are 18021 unchanged. If the DCAS fails then we're putting into 18022 RDX:RAX the value seen in memory. */ 18023 /* Now of course there's a complication in the 32-bit case 18024 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 18025 unchanged; but if we use the same scheme as in the 64-bit 18026 case, we get hit by the standard rule that a write to the 18027 bottom 32 bits of an integer register zeros the upper 32 18028 bits. And so the upper halves of RDX and RAX mysteriously 18029 become zero. So we have to stuff back in the original 18030 64-bit values which we previously stashed in 18031 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 18032 /* It's just _so_ much fun ... */ 18033 putIRegRDX( 8, 18034 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 18035 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 18036 : mkexpr(oldHi), 18037 mkexpr(expdHi64) 18038 )); 18039 putIRegRAX( 8, 18040 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 18041 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 18042 : mkexpr(oldLo), 18043 mkexpr(expdLo64) 18044 )); 18045 18046 /* Copy the success bit into the Z flag and leave the others 18047 unchanged */ 18048 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 18049 assign( 18050 flags_new, 18051 binop(Iop_Or64, 18052 binop(Iop_And64, mkexpr(flags_old), 18053 mkU64(~AMD64G_CC_MASK_Z)), 18054 binop(Iop_Shl64, 18055 binop(Iop_And64, 18056 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 18057 mkU8(AMD64G_CC_SHIFT_Z)) )); 18058 18059 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18060 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 18061 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18062 /* Set NDEP even though it isn't used. This makes 18063 redundant-PUT elimination of previous stores to this field 18064 work better. */ 18065 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 18066 18067 /* Sheesh. Aren't you glad it was me and not you that had to 18068 write and validate all this grunge? */ 18069 18070 DIP("cmpxchg8b %s\n", dis_buf); 18071 break; 18072 18073 } 18074 18075 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 18076 18077 case 0xA2: { /* CPUID */ 18078 /* Uses dirty helper: 18079 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 18080 declared to mod rax, wr rbx, rcx, rdx 18081 */ 18082 IRDirty* d = NULL; 18083 HChar* fName = NULL; 18084 void* fAddr = NULL; 18085 if (haveF2orF3(pfx)) goto decode_failure; 18086 if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3 18087 |VEX_HWCAPS_AMD64_CX16)) { 18088 fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16"; 18089 fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16; 18090 /* This is a Core-2-like machine */ 18091 //fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 18092 //fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 18093 /* This is a Core-i5-like machine */ 18094 } 18095 else { 18096 /* Give a CPUID for at least a baseline machine, SSE2 18097 only, and no CX16 */ 18098 fName = "amd64g_dirtyhelper_CPUID_baseline"; 18099 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 18100 } 18101 18102 vassert(fName); vassert(fAddr); 18103 d = unsafeIRDirty_0_N ( 0/*regparms*/, 18104 fName, fAddr, mkIRExprVec_0() ); 18105 /* declare guest state effects */ 18106 d->needsBBP = True; 18107 d->nFxState = 4; 18108 d->fxState[0].fx = Ifx_Modify; 18109 d->fxState[0].offset = OFFB_RAX; 18110 d->fxState[0].size = 8; 18111 d->fxState[1].fx = Ifx_Write; 18112 d->fxState[1].offset = OFFB_RBX; 18113 d->fxState[1].size = 8; 18114 d->fxState[2].fx = Ifx_Modify; 18115 d->fxState[2].offset = OFFB_RCX; 18116 d->fxState[2].size = 8; 18117 d->fxState[3].fx = Ifx_Write; 18118 d->fxState[3].offset = OFFB_RDX; 18119 d->fxState[3].size = 8; 18120 /* execute the dirty call, side-effecting guest state */ 18121 stmt( IRStmt_Dirty(d) ); 18122 /* CPUID is a serialising insn. So, just in case someone is 18123 using it as a memory fence ... */ 18124 stmt( IRStmt_MBE(Imbe_Fence) ); 18125 DIP("cpuid\n"); 18126 break; 18127 } 18128 18129 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 18130 18131 case 0xB6: /* MOVZXb Eb,Gv */ 18132 if (haveF2orF3(pfx)) goto decode_failure; 18133 if (sz != 2 && sz != 4 && sz != 8) 18134 goto decode_failure; 18135 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 18136 break; 18137 case 0xB7: /* MOVZXw Ew,Gv */ 18138 if (haveF2orF3(pfx)) goto decode_failure; 18139 if (sz != 4 && sz != 8) 18140 goto decode_failure; 18141 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 18142 break; 18143 18144 case 0xBE: /* MOVSXb Eb,Gv */ 18145 if (haveF2orF3(pfx)) goto decode_failure; 18146 if (sz != 2 && sz != 4 && sz != 8) 18147 goto decode_failure; 18148 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 18149 break; 18150 case 0xBF: /* MOVSXw Ew,Gv */ 18151 if (haveF2orF3(pfx)) goto decode_failure; 18152 if (sz != 4 && sz != 8) 18153 goto decode_failure; 18154 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 18155 break; 18156 18157 //.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 18158 //.. //-- 18159 //.. //-- case 0xC3: /* MOVNTI Gv,Ev */ 18160 //.. //-- vg_assert(sz == 4); 18161 //.. //-- modrm = getUChar(eip); 18162 //.. //-- vg_assert(!epartIsReg(modrm)); 18163 //.. //-- t1 = newTemp(cb); 18164 //.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 18165 //.. //-- pair = disAMode ( cb, sorb, eip, dis_buf ); 18166 //.. //-- t2 = LOW24(pair); 18167 //.. //-- eip += HI8(pair); 18168 //.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 18169 //.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 18170 //.. //-- break; 18171 18172 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 18173 18174 case 0xAF: /* IMUL Ev, Gv */ 18175 if (haveF2orF3(pfx)) goto decode_failure; 18176 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 18177 break; 18178 18179 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 18180 18181 case 0x1F: 18182 if (haveF2orF3(pfx)) goto decode_failure; 18183 modrm = getUChar(delta); 18184 if (epartIsReg(modrm)) goto decode_failure; 18185 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18186 delta += alen; 18187 DIP("nop%c %s\n", nameISize(sz), dis_buf); 18188 break; 18189 18190 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 18191 case 0x80: 18192 case 0x81: 18193 case 0x82: /* JBb/JNAEb (jump below) */ 18194 case 0x83: /* JNBb/JAEb (jump not below) */ 18195 case 0x84: /* JZb/JEb (jump zero) */ 18196 case 0x85: /* JNZb/JNEb (jump not zero) */ 18197 case 0x86: /* JBEb/JNAb (jump below or equal) */ 18198 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 18199 case 0x88: /* JSb (jump negative) */ 18200 case 0x89: /* JSb (jump not negative) */ 18201 case 0x8A: /* JP (jump parity even) */ 18202 case 0x8B: /* JNP/JPO (jump parity odd) */ 18203 case 0x8C: /* JLb/JNGEb (jump less) */ 18204 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 18205 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 18206 case 0x8F: /* JGb/JNLEb (jump greater) */ 18207 { Long jmpDelta; 18208 HChar* comment = ""; 18209 if (haveF2orF3(pfx)) goto decode_failure; 18210 jmpDelta = getSDisp32(delta); 18211 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 18212 delta += 4; 18213 if (resteerCisOk 18214 && vex_control.guest_chase_cond 18215 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18216 && jmpDelta < 0 18217 && resteerOkFn( callback_opaque, d64) ) { 18218 /* Speculation: assume this backward branch is taken. So 18219 we need to emit a side-exit to the insn following this 18220 one, on the negation of the condition, and continue at 18221 the branch target address (d64). If we wind up back at 18222 the first instruction of the trace, just stop; it's 18223 better to let the IR loop unroller handle that case. */ 18224 stmt( IRStmt_Exit( 18225 mk_amd64g_calculate_condition( 18226 (AMD64Condcode)(1 ^ (opc - 0x80))), 18227 Ijk_Boring, 18228 IRConst_U64(guest_RIP_bbstart+delta) ) ); 18229 dres.whatNext = Dis_ResteerC; 18230 dres.continueAt = d64; 18231 comment = "(assumed taken)"; 18232 } 18233 else 18234 if (resteerCisOk 18235 && vex_control.guest_chase_cond 18236 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 18237 && jmpDelta >= 0 18238 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 18239 /* Speculation: assume this forward branch is not taken. 18240 So we need to emit a side-exit to d64 (the dest) and 18241 continue disassembling at the insn immediately 18242 following this one. */ 18243 stmt( IRStmt_Exit( 18244 mk_amd64g_calculate_condition((AMD64Condcode) 18245 (opc - 0x80)), 18246 Ijk_Boring, 18247 IRConst_U64(d64) ) ); 18248 dres.whatNext = Dis_ResteerC; 18249 dres.continueAt = guest_RIP_bbstart+delta; 18250 comment = "(assumed not taken)"; 18251 } 18252 else { 18253 /* Conservative default translation - end the block at 18254 this point. */ 18255 jcc_01( (AMD64Condcode)(opc - 0x80), 18256 guest_RIP_bbstart+delta, 18257 d64 ); 18258 dres.whatNext = Dis_StopHere; 18259 } 18260 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); 18261 break; 18262 } 18263 18264 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ 18265 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 18266 /* 0F 0D /1 -- prefetchw mem8 */ 18267 if (have66orF2orF3(pfx)) goto decode_failure; 18268 modrm = getUChar(delta); 18269 if (epartIsReg(modrm)) goto decode_failure; 18270 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 18271 goto decode_failure; 18272 18273 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18274 delta += alen; 18275 18276 switch (gregLO3ofRM(modrm)) { 18277 case 0: DIP("prefetch %s\n", dis_buf); break; 18278 case 1: DIP("prefetchw %s\n", dis_buf); break; 18279 default: vassert(0); /*NOTREACHED*/ 18280 } 18281 break; 18282 18283 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 18284 case 0x31: { /* RDTSC */ 18285 IRTemp val = newTemp(Ity_I64); 18286 IRExpr** args = mkIRExprVec_0(); 18287 IRDirty* d = unsafeIRDirty_1_N ( 18288 val, 18289 0/*regparms*/, 18290 "amd64g_dirtyhelper_RDTSC", 18291 &amd64g_dirtyhelper_RDTSC, 18292 args 18293 ); 18294 if (have66orF2orF3(pfx)) goto decode_failure; 18295 /* execute the dirty call, dumping the result in val. */ 18296 stmt( IRStmt_Dirty(d) ); 18297 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 18298 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 18299 DIP("rdtsc\n"); 18300 break; 18301 } 18302 18303 //.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 18304 //.. 18305 //.. case 0xA1: /* POP %FS */ 18306 //.. dis_pop_segreg( R_FS, sz ); break; 18307 //.. case 0xA9: /* POP %GS */ 18308 //.. dis_pop_segreg( R_GS, sz ); break; 18309 //.. 18310 //.. case 0xA0: /* PUSH %FS */ 18311 //.. dis_push_segreg( R_FS, sz ); break; 18312 //.. case 0xA8: /* PUSH %GS */ 18313 //.. dis_push_segreg( R_GS, sz ); break; 18314 18315 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 18316 case 0x90: 18317 case 0x91: 18318 case 0x92: /* set-Bb/set-NAEb (set if below) */ 18319 case 0x93: /* set-NBb/set-AEb (set if not below) */ 18320 case 0x94: /* set-Zb/set-Eb (set if zero) */ 18321 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 18322 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 18323 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 18324 case 0x98: /* set-Sb (set if negative) */ 18325 case 0x99: /* set-Sb (set if not negative) */ 18326 case 0x9A: /* set-P (set if parity even) */ 18327 case 0x9B: /* set-NP (set if parity odd) */ 18328 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 18329 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 18330 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 18331 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 18332 if (haveF2orF3(pfx)) goto decode_failure; 18333 t1 = newTemp(Ity_I8); 18334 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 18335 modrm = getUChar(delta); 18336 if (epartIsReg(modrm)) { 18337 delta++; 18338 putIRegE(1, pfx, modrm, mkexpr(t1)); 18339 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 18340 nameIRegE(1,pfx,modrm)); 18341 } else { 18342 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18343 delta += alen; 18344 storeLE( mkexpr(addr), mkexpr(t1) ); 18345 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 18346 } 18347 break; 18348 18349 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 18350 18351 case 0xA4: /* SHLDv imm8,Gv,Ev */ 18352 modrm = getUChar(delta); 18353 d64 = delta + lengthAMode(pfx, delta); 18354 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 18355 delta = dis_SHLRD_Gv_Ev ( 18356 vbi, pfx, delta, modrm, sz, 18357 mkU8(getUChar(d64)), True, /* literal */ 18358 dis_buf, True /* left */ ); 18359 break; 18360 case 0xA5: /* SHLDv %cl,Gv,Ev */ 18361 modrm = getUChar(delta); 18362 delta = dis_SHLRD_Gv_Ev ( 18363 vbi, pfx, delta, modrm, sz, 18364 getIRegCL(), False, /* not literal */ 18365 "%cl", True /* left */ ); 18366 break; 18367 18368 case 0xAC: /* SHRDv imm8,Gv,Ev */ 18369 modrm = getUChar(delta); 18370 d64 = delta + lengthAMode(pfx, delta); 18371 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 18372 delta = dis_SHLRD_Gv_Ev ( 18373 vbi, pfx, delta, modrm, sz, 18374 mkU8(getUChar(d64)), True, /* literal */ 18375 dis_buf, False /* right */ ); 18376 break; 18377 case 0xAD: /* SHRDv %cl,Gv,Ev */ 18378 modrm = getUChar(delta); 18379 delta = dis_SHLRD_Gv_Ev ( 18380 vbi, pfx, delta, modrm, sz, 18381 getIRegCL(), False, /* not literal */ 18382 "%cl", False /* right */); 18383 break; 18384 18385 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ 18386 case 0x05: /* SYSCALL */ 18387 guest_RIP_next_mustcheck = True; 18388 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 18389 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 18390 /* It's important that all guest state is up-to-date 18391 at this point. So we declare an end-of-block here, which 18392 forces any cached guest state to be flushed. */ 18393 jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed); 18394 dres.whatNext = Dis_StopHere; 18395 DIP("syscall\n"); 18396 break; 18397 18398 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 18399 18400 case 0xC0: { /* XADD Gb,Eb */ 18401 Bool decode_OK = False; 18402 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 18403 if (!decode_OK) 18404 goto decode_failure; 18405 break; 18406 } 18407 case 0xC1: { /* XADD Gv,Ev */ 18408 Bool decode_OK = False; 18409 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 18410 if (!decode_OK) 18411 goto decode_failure; 18412 break; 18413 } 18414 18415 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 18416 18417 case 0x71: 18418 case 0x72: 18419 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 18420 18421 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 18422 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 18423 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 18424 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 18425 18426 case 0xFC: 18427 case 0xFD: 18428 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 18429 18430 case 0xEC: 18431 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18432 18433 case 0xDC: 18434 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18435 18436 case 0xF8: 18437 case 0xF9: 18438 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 18439 18440 case 0xE8: 18441 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18442 18443 case 0xD8: 18444 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 18445 18446 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 18447 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 18448 18449 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 18450 18451 case 0x74: 18452 case 0x75: 18453 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 18454 18455 case 0x64: 18456 case 0x65: 18457 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 18458 18459 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 18460 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 18461 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 18462 18463 case 0x68: 18464 case 0x69: 18465 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 18466 18467 case 0x60: 18468 case 0x61: 18469 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18470 18471 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 18472 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 18473 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 18474 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 18475 18476 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18477 case 0xF2: 18478 case 0xF3: 18479 18480 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 18481 case 0xD2: 18482 case 0xD3: 18483 18484 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 18485 case 0xE2: 18486 { 18487 Long delta0 = delta-1; 18488 Bool decode_OK = False; 18489 18490 /* If sz==2 this is SSE, and we assume sse idec has 18491 already spotted those cases by now. */ 18492 if (sz != 4 && sz != 8) 18493 goto decode_failure; 18494 if (have66orF2orF3(pfx)) 18495 goto decode_failure; 18496 18497 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 ); 18498 if (!decode_OK) { 18499 delta = delta0; 18500 goto decode_failure; 18501 } 18502 break; 18503 } 18504 18505 case 0x0E: /* FEMMS */ 18506 case 0x77: /* EMMS */ 18507 if (sz != 4) 18508 goto decode_failure; 18509 do_EMMS_preamble(); 18510 DIP("{f}emms\n"); 18511 break; 18512 18513 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 18514 case 0x01: /* 0F 01 /0 -- SGDT */ 18515 /* 0F 01 /1 -- SIDT */ 18516 { 18517 /* This is really revolting, but ... since each processor 18518 (core) only has one IDT and one GDT, just let the guest 18519 see it (pass-through semantics). I can't see any way to 18520 construct a faked-up value, so don't bother to try. */ 18521 modrm = getUChar(delta); 18522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 18523 delta += alen; 18524 if (epartIsReg(modrm)) goto decode_failure; 18525 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 18526 goto decode_failure; 18527 switch (gregLO3ofRM(modrm)) { 18528 case 0: DIP("sgdt %s\n", dis_buf); break; 18529 case 1: DIP("sidt %s\n", dis_buf); break; 18530 default: vassert(0); /*NOTREACHED*/ 18531 } 18532 18533 IRDirty* d = unsafeIRDirty_0_N ( 18534 0/*regparms*/, 18535 "amd64g_dirtyhelper_SxDT", 18536 &amd64g_dirtyhelper_SxDT, 18537 mkIRExprVec_2( mkexpr(addr), 18538 mkU64(gregLO3ofRM(modrm)) ) 18539 ); 18540 /* declare we're writing memory */ 18541 d->mFx = Ifx_Write; 18542 d->mAddr = mkexpr(addr); 18543 d->mSize = 6; 18544 stmt( IRStmt_Dirty(d) ); 18545 break; 18546 } 18547 18548 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 18549 18550 default: 18551 goto decode_failure; 18552 } /* switch (opc) for the 2-byte opcodes */ 18553 goto decode_success; 18554 } /* case 0x0F: of primary opcode */ 18555 18556 /* ------------------------ ??? ------------------------ */ 18557 18558 default: 18559 decode_failure: 18560 /* All decode failures end up here. */ 18561 vex_printf("vex amd64->IR: unhandled instruction bytes: " 18562 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 18563 (Int)getUChar(delta_start+0), 18564 (Int)getUChar(delta_start+1), 18565 (Int)getUChar(delta_start+2), 18566 (Int)getUChar(delta_start+3), 18567 (Int)getUChar(delta_start+4), 18568 (Int)getUChar(delta_start+5) ); 18569 18570 /* Tell the dispatcher that this insn cannot be decoded, and so has 18571 not been executed, and (is currently) the next to be executed. 18572 RIP should be up-to-date since it made so at the start of each 18573 insn, but nevertheless be paranoid and update it again right 18574 now. */ 18575 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 18576 jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr); 18577 dres.whatNext = Dis_StopHere; 18578 dres.len = 0; 18579 /* We also need to say that a CAS is not expected now, regardless 18580 of what it might have been set to at the start of the function, 18581 since the IR that we've emitted just above (to synthesis a 18582 SIGILL) does not involve any CAS, and presumably no other IR has 18583 been emitted for this (non-decoded) insn. */ 18584 *expect_CAS = False; 18585 return dres; 18586 18587 } /* switch (opc) for the main (primary) opcode switch. */ 18588 18589 decode_success: 18590 /* All decode successes end up here. */ 18591 DIP("\n"); 18592 dres.len = (Int)toUInt(delta - delta_start); 18593 return dres; 18594 } 18595 18596 #undef DIP 18597 #undef DIS 18598 18599 18600 /*------------------------------------------------------------*/ 18601 /*--- Top-level fn ---*/ 18602 /*------------------------------------------------------------*/ 18603 18604 /* Disassemble a single instruction into IR. The instruction 18605 is located in host memory at &guest_code[delta]. */ 18606 18607 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 18608 Bool put_IP, 18609 Bool (*resteerOkFn) ( void*, Addr64 ), 18610 Bool resteerCisOk, 18611 void* callback_opaque, 18612 UChar* guest_code_IN, 18613 Long delta, 18614 Addr64 guest_IP, 18615 VexArch guest_arch, 18616 VexArchInfo* archinfo, 18617 VexAbiInfo* abiinfo, 18618 Bool host_bigendian_IN ) 18619 { 18620 Int i, x1, x2; 18621 Bool expect_CAS, has_CAS; 18622 DisResult dres; 18623 18624 /* Set globals (see top of this file) */ 18625 vassert(guest_arch == VexArchAMD64); 18626 guest_code = guest_code_IN; 18627 irsb = irsb_IN; 18628 host_is_bigendian = host_bigendian_IN; 18629 guest_RIP_curr_instr = guest_IP; 18630 guest_RIP_bbstart = guest_IP - delta; 18631 18632 /* We'll consult these after doing disInstr_AMD64_WRK. */ 18633 guest_RIP_next_assumed = 0; 18634 guest_RIP_next_mustcheck = False; 18635 18636 x1 = irsb_IN->stmts_used; 18637 expect_CAS = False; 18638 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, 18639 resteerCisOk, 18640 callback_opaque, 18641 delta, archinfo, abiinfo ); 18642 x2 = irsb_IN->stmts_used; 18643 vassert(x2 >= x1); 18644 18645 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 18646 got it right. Failure of this assertion is serious and denotes 18647 a bug in disInstr. */ 18648 if (guest_RIP_next_mustcheck 18649 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 18650 vex_printf("\n"); 18651 vex_printf("assumed next %%rip = 0x%llx\n", 18652 guest_RIP_next_assumed ); 18653 vex_printf(" actual next %%rip = 0x%llx\n", 18654 guest_RIP_curr_instr + dres.len ); 18655 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 18656 } 18657 18658 /* See comment at the top of disInstr_AMD64_WRK for meaning of 18659 expect_CAS. Here, we (sanity-)check for the presence/absence of 18660 IRCAS as directed by the returned expect_CAS value. */ 18661 has_CAS = False; 18662 for (i = x1; i < x2; i++) { 18663 if (irsb_IN->stmts[i]->tag == Ist_CAS) 18664 has_CAS = True; 18665 } 18666 18667 if (expect_CAS != has_CAS) { 18668 /* inconsistency detected. re-disassemble the instruction so as 18669 to generate a useful error message; then assert. */ 18670 vex_traceflags |= VEX_TRACE_FE; 18671 dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn, 18672 resteerCisOk, 18673 callback_opaque, 18674 delta, archinfo, abiinfo ); 18675 for (i = x1; i < x2; i++) { 18676 vex_printf("\t\t"); 18677 ppIRStmt(irsb_IN->stmts[i]); 18678 vex_printf("\n"); 18679 } 18680 /* Failure of this assertion is serious and denotes a bug in 18681 disInstr. */ 18682 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 18683 } 18684 18685 return dres; 18686 } 18687 18688 18689 /*------------------------------------------------------------*/ 18690 /*--- Unused stuff ---*/ 18691 /*------------------------------------------------------------*/ 18692 18693 // A potentially more Memcheck-friendly version of gen_LZCNT, if 18694 // this should ever be needed. 18695 // 18696 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 18697 //{ 18698 // /* Scheme is simple: propagate the most significant 1-bit into all 18699 // lower positions in the word. This gives a word of the form 18700 // 0---01---1. Now invert it, giving a word of the form 18701 // 1---10---0, then do a population-count idiom (to count the 1s, 18702 // which is the number of leading zeroes, or the word size if the 18703 // original word was 0. 18704 // */ 18705 // Int i; 18706 // IRTemp t[7]; 18707 // for (i = 0; i < 7; i++) { 18708 // t[i] = newTemp(ty); 18709 // } 18710 // if (ty == Ity_I64) { 18711 // assign(t[0], binop(Iop_Or64, mkexpr(src), 18712 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 18713 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 18714 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 18715 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 18716 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 18717 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 18718 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 18719 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 18720 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 18721 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 18722 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 18723 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 18724 // return gen_POPCOUNT(ty, t[6]); 18725 // } 18726 // if (ty == Ity_I32) { 18727 // assign(t[0], binop(Iop_Or32, mkexpr(src), 18728 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 18729 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 18730 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 18731 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 18732 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 18733 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 18734 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 18735 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 18736 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 18737 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 18738 // return gen_POPCOUNT(ty, t[5]); 18739 // } 18740 // if (ty == Ity_I16) { 18741 // assign(t[0], binop(Iop_Or16, mkexpr(src), 18742 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 18743 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 18744 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 18745 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 18746 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 18747 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 18748 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 18749 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 18750 // return gen_POPCOUNT(ty, t[4]); 18751 // } 18752 // vassert(0); 18753 //} 18754 18755 18756 /*--------------------------------------------------------------------*/ 18757 /*--- end guest_amd64_toIR.c ---*/ 18758 /*--------------------------------------------------------------------*/ 18759