1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 55 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 56 even when it isn't. 57 58 * some of the FCOM cases could do with testing -- not convinced 59 that the args are the right way round. 60 61 * FSAVE does not re-initialise the FPU; it should do 62 63 * FINIT not only initialises the FPU environment, it also zeroes 64 all the FP registers. It should leave the registers unchanged. 65 66 RDTSC returns zero, always. 67 68 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 69 per Intel docs this bit has no meaning anyway. Since PUSHF is the 70 only way to observe eflags[1], a proper fix would be to make that 71 bit be set by PUSHF. 72 73 This module uses global variables and so is not MT-safe (if that 74 should ever become relevant). 75 */ 76 77 /* Notes re address size overrides (0x67). 78 79 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 80 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 81 and System Instructions"), Section 1.2.3 ("Address-Size Override 82 Prefix"): 83 84 0x67 applies to all explicit memory references, causing the top 85 32 bits of the effective address to become zero. 86 87 0x67 has no effect on stack references (push/pop); these always 88 use a 64-bit address. 89 90 0x67 changes the interpretation of instructions which implicitly 91 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 92 instead. These are: 93 94 cmp{s,sb,sw,sd,sq} 95 in{s,sb,sw,sd} 96 jcxz, jecxz, jrcxz 97 lod{s,sb,sw,sd,sq} 98 loop{,e,bz,be,z} 99 mov{s,sb,sw,sd,sq} 100 out{s,sb,sw,sd} 101 rep{,e,ne,nz} 102 sca{s,sb,sw,sd,sq} 103 sto{s,sb,sw,sd,sq} 104 xlat{,b} */ 105 106 /* "Special" instructions. 107 108 This instruction decoder can decode three special instructions 109 which mean nothing natively (are no-ops as far as regs/mem are 110 concerned) but have meaning for supporting Valgrind. A special 111 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 112 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 113 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 114 Following that, one of the following 3 are allowed (standard 115 interpretation in parentheses): 116 117 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 118 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 119 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 120 121 Any other bytes following the 16-byte preamble are illegal and 122 constitute a failure in instruction decoding. This all assumes 123 that the preamble will never occur except in specific code 124 fragments designed for Valgrind to catch. 125 126 No prefixes may precede a "Special" instruction. 127 */ 128 129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 130 insns: the side-exit back to the start of the insn is done with 131 Ijk_Boring. This is quite wrong, it should be done with 132 Ijk_NoRedir, since otherwise the side exit, which is intended to 133 restart the instruction for whatever reason, could go somewhere 134 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 135 no-redir jumps performance critical, at least for rep-prefixed 136 instructions, since all iterations thereof would involve such a 137 jump. It's not such a big deal with casLE since the side exit is 138 only taken if the CAS fails, that is, the location is contended, 139 which is relatively unlikely. 140 141 Note also, the test for CAS success vs failure is done using 142 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 143 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 144 shouldn't definedness-check these comparisons. See 145 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 146 background/rationale. 147 */ 148 149 /* LOCK prefixed instructions. These are translated using IR-level 150 CAS statements (IRCAS) and are believed to preserve atomicity, even 151 from the point of view of some other process racing against a 152 simulated one (presumably they communicate via a shared memory 153 segment). 154 155 Handlers which are aware of LOCK prefixes are: 156 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 157 dis_cmpxchg_G_E (cmpxchg) 158 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 159 dis_Grp3 (not, neg) 160 dis_Grp4 (inc, dec) 161 dis_Grp5 (inc, dec) 162 dis_Grp8_Imm (bts, btc, btr) 163 dis_bt_G_E (bts, btc, btr) 164 dis_xadd_G_E (xadd) 165 */ 166 167 168 #include "libvex_basictypes.h" 169 #include "libvex_ir.h" 170 #include "libvex.h" 171 #include "libvex_guest_amd64.h" 172 173 #include "main_util.h" 174 #include "main_globals.h" 175 #include "guest_generic_bb_to_IR.h" 176 #include "guest_generic_x87.h" 177 #include "guest_amd64_defs.h" 178 179 180 /*------------------------------------------------------------*/ 181 /*--- Globals ---*/ 182 /*------------------------------------------------------------*/ 183 184 /* These are set at the start of the translation of an insn, right 185 down in disInstr_AMD64, so that we don't have to pass them around 186 endlessly. They are all constant during the translation of any 187 given insn. */ 188 189 /* These are set at the start of the translation of a BB, so 190 that we don't have to pass them around endlessly. */ 191 192 /* We need to know this to do sub-register accesses correctly. */ 193 static Bool host_is_bigendian; 194 195 /* Pointer to the guest code area (points to start of BB, not to the 196 insn being processed). */ 197 static UChar* guest_code; 198 199 /* The guest address corresponding to guest_code[0]. */ 200 static Addr64 guest_RIP_bbstart; 201 202 /* The guest address for the instruction currently being 203 translated. */ 204 static Addr64 guest_RIP_curr_instr; 205 206 /* The IRSB* into which we're generating code. */ 207 static IRSB* irsb; 208 209 /* For ensuring that %rip-relative addressing is done right. A read 210 of %rip generates the address of the next instruction. It may be 211 that we don't conveniently know that inside disAMode(). For sanity 212 checking, if the next insn %rip is needed, we make a guess at what 213 it is, record that guess here, and set the accompanying Bool to 214 indicate that -- after this insn's decode is finished -- that guess 215 needs to be checked. */ 216 217 /* At the start of each insn decode, is set to (0, False). 218 After the decode, if _mustcheck is now True, _assumed is 219 checked. */ 220 221 static Addr64 guest_RIP_next_assumed; 222 static Bool guest_RIP_next_mustcheck; 223 224 225 /*------------------------------------------------------------*/ 226 /*--- Helpers for constructing IR. ---*/ 227 /*------------------------------------------------------------*/ 228 229 /* Generate a new temporary of the given type. */ 230 static IRTemp newTemp ( IRType ty ) 231 { 232 vassert(isPlausibleIRType(ty)); 233 return newIRTemp( irsb->tyenv, ty ); 234 } 235 236 /* Add a statement to the list held by "irsb". */ 237 static void stmt ( IRStmt* st ) 238 { 239 addStmtToIRSB( irsb, st ); 240 } 241 242 /* Generate a statement "dst := e". */ 243 static void assign ( IRTemp dst, IRExpr* e ) 244 { 245 stmt( IRStmt_WrTmp(dst, e) ); 246 } 247 248 static IRExpr* unop ( IROp op, IRExpr* a ) 249 { 250 return IRExpr_Unop(op, a); 251 } 252 253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 254 { 255 return IRExpr_Binop(op, a1, a2); 256 } 257 258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 259 { 260 return IRExpr_Triop(op, a1, a2, a3); 261 } 262 263 static IRExpr* mkexpr ( IRTemp tmp ) 264 { 265 return IRExpr_RdTmp(tmp); 266 } 267 268 static IRExpr* mkU8 ( ULong i ) 269 { 270 vassert(i < 256); 271 return IRExpr_Const(IRConst_U8( (UChar)i )); 272 } 273 274 static IRExpr* mkU16 ( ULong i ) 275 { 276 vassert(i < 0x10000ULL); 277 return IRExpr_Const(IRConst_U16( (UShort)i )); 278 } 279 280 static IRExpr* mkU32 ( ULong i ) 281 { 282 vassert(i < 0x100000000ULL); 283 return IRExpr_Const(IRConst_U32( (UInt)i )); 284 } 285 286 static IRExpr* mkU64 ( ULong i ) 287 { 288 return IRExpr_Const(IRConst_U64(i)); 289 } 290 291 static IRExpr* mkU ( IRType ty, ULong i ) 292 { 293 switch (ty) { 294 case Ity_I8: return mkU8(i); 295 case Ity_I16: return mkU16(i); 296 case Ity_I32: return mkU32(i); 297 case Ity_I64: return mkU64(i); 298 default: vpanic("mkU(amd64)"); 299 } 300 } 301 302 static void storeLE ( IRExpr* addr, IRExpr* data ) 303 { 304 stmt( IRStmt_Store(Iend_LE, addr, data) ); 305 } 306 307 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 308 { 309 return IRExpr_Load(Iend_LE, ty, addr); 310 } 311 312 static IROp mkSizedOp ( IRType ty, IROp op8 ) 313 { 314 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 315 || op8 == Iop_Mul8 316 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 317 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 318 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 319 || op8 == Iop_CasCmpNE8 320 || op8 == Iop_Not8 ); 321 switch (ty) { 322 case Ity_I8: return 0 +op8; 323 case Ity_I16: return 1 +op8; 324 case Ity_I32: return 2 +op8; 325 case Ity_I64: return 3 +op8; 326 default: vpanic("mkSizedOp(amd64)"); 327 } 328 } 329 330 static 331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 332 { 333 if (szSmall == 1 && szBig == 4) { 334 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 335 } 336 if (szSmall == 1 && szBig == 2) { 337 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 338 } 339 if (szSmall == 2 && szBig == 4) { 340 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 341 } 342 if (szSmall == 1 && szBig == 8 && !signd) { 343 return unop(Iop_8Uto64, src); 344 } 345 if (szSmall == 1 && szBig == 8 && signd) { 346 return unop(Iop_8Sto64, src); 347 } 348 if (szSmall == 2 && szBig == 8 && !signd) { 349 return unop(Iop_16Uto64, src); 350 } 351 if (szSmall == 2 && szBig == 8 && signd) { 352 return unop(Iop_16Sto64, src); 353 } 354 vpanic("doScalarWidening(amd64)"); 355 } 356 357 358 359 /*------------------------------------------------------------*/ 360 /*--- Debugging output ---*/ 361 /*------------------------------------------------------------*/ 362 363 /* Bomb out if we can't handle something. */ 364 __attribute__ ((noreturn)) 365 static void unimplemented ( HChar* str ) 366 { 367 vex_printf("amd64toIR: unimplemented feature\n"); 368 vpanic(str); 369 } 370 371 #define DIP(format, args...) \ 372 if (vex_traceflags & VEX_TRACE_FE) \ 373 vex_printf(format, ## args) 374 375 #define DIS(buf, format, args...) \ 376 if (vex_traceflags & VEX_TRACE_FE) \ 377 vex_sprintf(buf, format, ## args) 378 379 380 /*------------------------------------------------------------*/ 381 /*--- Offsets of various parts of the amd64 guest state. ---*/ 382 /*------------------------------------------------------------*/ 383 384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 400 401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 402 403 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) 404 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) 405 406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 410 411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 419 //.. 420 //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS) 421 //.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS) 422 //.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES) 423 //.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS) 424 //.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS) 425 //.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS) 426 //.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 427 //.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 428 429 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 430 #define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0) 431 #define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1) 432 #define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2) 433 #define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3) 434 #define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4) 435 #define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5) 436 #define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6) 437 #define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7) 438 #define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8) 439 #define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9) 440 #define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10) 441 #define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11) 442 #define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12) 443 #define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13) 444 #define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14) 445 #define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15) 446 #define OFFB_XMM16 offsetof(VexGuestAMD64State,guest_XMM16) 447 448 #define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN) 449 #define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART) 450 #define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN) 451 452 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 453 454 455 /*------------------------------------------------------------*/ 456 /*--- Helper bits and pieces for deconstructing the ---*/ 457 /*--- amd64 insn stream. ---*/ 458 /*------------------------------------------------------------*/ 459 460 /* This is the AMD64 register encoding -- integer regs. */ 461 #define R_RAX 0 462 #define R_RCX 1 463 #define R_RDX 2 464 #define R_RBX 3 465 #define R_RSP 4 466 #define R_RBP 5 467 #define R_RSI 6 468 #define R_RDI 7 469 #define R_R8 8 470 #define R_R9 9 471 #define R_R10 10 472 #define R_R11 11 473 #define R_R12 12 474 #define R_R13 13 475 #define R_R14 14 476 #define R_R15 15 477 478 //.. #define R_AL (0+R_EAX) 479 //.. #define R_AH (4+R_EAX) 480 481 /* This is the Intel register encoding -- segment regs. */ 482 #define R_ES 0 483 #define R_CS 1 484 #define R_SS 2 485 #define R_DS 3 486 #define R_FS 4 487 #define R_GS 5 488 489 490 /* Various simple conversions */ 491 492 static ULong extend_s_8to64 ( UChar x ) 493 { 494 return (ULong)((((Long)x) << 56) >> 56); 495 } 496 497 static ULong extend_s_16to64 ( UShort x ) 498 { 499 return (ULong)((((Long)x) << 48) >> 48); 500 } 501 502 static ULong extend_s_32to64 ( UInt x ) 503 { 504 return (ULong)((((Long)x) << 32) >> 32); 505 } 506 507 /* Figure out whether the mod and rm parts of a modRM byte refer to a 508 register or memory. If so, the byte will have the form 11XXXYYY, 509 where YYY is the register number. */ 510 inline 511 static Bool epartIsReg ( UChar mod_reg_rm ) 512 { 513 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 514 } 515 516 /* Extract the 'g' field from a modRM byte. This only produces 3 517 bits, which is not a complete register number. You should avoid 518 this function if at all possible. */ 519 inline 520 static Int gregLO3ofRM ( UChar mod_reg_rm ) 521 { 522 return (Int)( (mod_reg_rm >> 3) & 7 ); 523 } 524 525 /* Ditto the 'e' field of a modRM byte. */ 526 inline 527 static Int eregLO3ofRM ( UChar mod_reg_rm ) 528 { 529 return (Int)(mod_reg_rm & 0x7); 530 } 531 532 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 533 534 static UChar getUChar ( Long delta ) 535 { 536 UChar v = guest_code[delta+0]; 537 return v; 538 } 539 540 static UInt getUDisp16 ( Long delta ) 541 { 542 UInt v = guest_code[delta+1]; v <<= 8; 543 v |= guest_code[delta+0]; 544 return v & 0xFFFF; 545 } 546 547 //.. static UInt getUDisp ( Int size, Long delta ) 548 //.. { 549 //.. switch (size) { 550 //.. case 4: return getUDisp32(delta); 551 //.. case 2: return getUDisp16(delta); 552 //.. case 1: return getUChar(delta); 553 //.. default: vpanic("getUDisp(x86)"); 554 //.. } 555 //.. return 0; /*notreached*/ 556 //.. } 557 558 559 /* Get a byte value out of the insn stream and sign-extend to 64 560 bits. */ 561 static Long getSDisp8 ( Long delta ) 562 { 563 return extend_s_8to64( guest_code[delta] ); 564 } 565 566 /* Get a 16-bit value out of the insn stream and sign-extend to 64 567 bits. */ 568 static Long getSDisp16 ( Long delta ) 569 { 570 UInt v = guest_code[delta+1]; v <<= 8; 571 v |= guest_code[delta+0]; 572 return extend_s_16to64( (UShort)v ); 573 } 574 575 /* Get a 32-bit value out of the insn stream and sign-extend to 64 576 bits. */ 577 static Long getSDisp32 ( Long delta ) 578 { 579 UInt v = guest_code[delta+3]; v <<= 8; 580 v |= guest_code[delta+2]; v <<= 8; 581 v |= guest_code[delta+1]; v <<= 8; 582 v |= guest_code[delta+0]; 583 return extend_s_32to64( v ); 584 } 585 586 /* Get a 64-bit value out of the insn stream. */ 587 static Long getDisp64 ( Long delta ) 588 { 589 ULong v = 0; 590 v |= guest_code[delta+7]; v <<= 8; 591 v |= guest_code[delta+6]; v <<= 8; 592 v |= guest_code[delta+5]; v <<= 8; 593 v |= guest_code[delta+4]; v <<= 8; 594 v |= guest_code[delta+3]; v <<= 8; 595 v |= guest_code[delta+2]; v <<= 8; 596 v |= guest_code[delta+1]; v <<= 8; 597 v |= guest_code[delta+0]; 598 return v; 599 } 600 601 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 602 if this is called with size==8. Should not happen. */ 603 static Long getSDisp ( Int size, Long delta ) 604 { 605 switch (size) { 606 case 4: return getSDisp32(delta); 607 case 2: return getSDisp16(delta); 608 case 1: return getSDisp8(delta); 609 default: vpanic("getSDisp(amd64)"); 610 } 611 } 612 613 static ULong mkSizeMask ( Int sz ) 614 { 615 switch (sz) { 616 case 1: return 0x00000000000000FFULL; 617 case 2: return 0x000000000000FFFFULL; 618 case 4: return 0x00000000FFFFFFFFULL; 619 case 8: return 0xFFFFFFFFFFFFFFFFULL; 620 default: vpanic("mkSzMask(amd64)"); 621 } 622 } 623 624 static Int imin ( Int a, Int b ) 625 { 626 return (a < b) ? a : b; 627 } 628 629 static IRType szToITy ( Int n ) 630 { 631 switch (n) { 632 case 1: return Ity_I8; 633 case 2: return Ity_I16; 634 case 4: return Ity_I32; 635 case 8: return Ity_I64; 636 default: vex_printf("\nszToITy(%d)\n", n); 637 vpanic("szToITy(amd64)"); 638 } 639 } 640 641 642 /*------------------------------------------------------------*/ 643 /*--- For dealing with prefixes. ---*/ 644 /*------------------------------------------------------------*/ 645 646 /* The idea is to pass around an int holding a bitmask summarising 647 info from the prefixes seen on the current instruction, including 648 info from the REX byte. This info is used in various places, but 649 most especially when making sense of register fields in 650 instructions. 651 652 The top 16 bits of the prefix are 0x3141, just as a hacky way 653 to ensure it really is a valid prefix. 654 655 Things you can safely assume about a well-formed prefix: 656 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 657 * if REX is not present then REXW,REXR,REXX,REXB will read 658 as zero. 659 * F2 and F3 will not both be 1. 660 */ 661 662 typedef UInt Prefix; 663 664 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 665 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 666 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 667 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 668 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 669 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 670 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 671 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 672 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 673 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 674 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 675 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 676 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 677 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 678 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 679 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 680 681 #define PFX_EMPTY 0x31410000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFFFF0000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 /* Apparently unused. 695 static Int getRexR ( Prefix pfx ) { 696 return (pfx & PFX_REXR) ? 1 : 0; 697 } 698 */ 699 static Int getRexX ( Prefix pfx ) { 700 return (pfx & PFX_REXX) ? 1 : 0; 701 } 702 static Int getRexB ( Prefix pfx ) { 703 return (pfx & PFX_REXB) ? 1 : 0; 704 } 705 706 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 707 completely changes what instruction it really is. */ 708 static Bool haveF2orF3 ( Prefix pfx ) { 709 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 710 } 711 static Bool haveF2 ( Prefix pfx ) { 712 return toBool((pfx & PFX_F2) > 0); 713 } 714 static Bool haveF3 ( Prefix pfx ) { 715 return toBool((pfx & PFX_F3) > 0); 716 } 717 718 static Bool have66 ( Prefix pfx ) { 719 return toBool((pfx & PFX_66) > 0); 720 } 721 static Bool haveASO ( Prefix pfx ) { 722 return toBool((pfx & PFX_ASO) > 0); 723 } 724 725 /* Return True iff pfx has 66 set and F2 and F3 clear */ 726 static Bool have66noF2noF3 ( Prefix pfx ) 727 { 728 return 729 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 730 } 731 732 /* Return True iff pfx has F2 set and 66 and F3 clear */ 733 static Bool haveF2no66noF3 ( Prefix pfx ) 734 { 735 return 736 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 737 } 738 739 /* Return True iff pfx has F3 set and 66 and F2 clear */ 740 static Bool haveF3no66noF2 ( Prefix pfx ) 741 { 742 return 743 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 744 } 745 746 /* Return True iff pfx has F3 set and F2 clear */ 747 static Bool haveF3noF2 ( Prefix pfx ) 748 { 749 return 750 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 751 } 752 753 /* Return True iff pfx has F2 set and F3 clear */ 754 static Bool haveF2noF3 ( Prefix pfx ) 755 { 756 return 757 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 758 } 759 760 /* Return True iff pfx has 66, F2 and F3 clear */ 761 static Bool haveNo66noF2noF3 ( Prefix pfx ) 762 { 763 return 764 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 765 } 766 767 /* Return True iff pfx has any of 66, F2 and F3 set */ 768 static Bool have66orF2orF3 ( Prefix pfx ) 769 { 770 return toBool( ! haveNo66noF2noF3(pfx) ); 771 } 772 773 /* Return True iff pfx has 66 or F2 set */ 774 static Bool have66orF2 ( Prefix pfx ) 775 { 776 return toBool((pfx & (PFX_66|PFX_F2)) > 0); 777 } 778 779 /* Clear all the segment-override bits in a prefix. */ 780 static Prefix clearSegBits ( Prefix p ) 781 { 782 return 783 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 784 } 785 786 787 /*------------------------------------------------------------*/ 788 /*--- For dealing with integer registers ---*/ 789 /*------------------------------------------------------------*/ 790 791 /* This is somewhat complex. The rules are: 792 793 For 64, 32 and 16 bit register references, the e or g fields in the 794 modrm bytes supply the low 3 bits of the register number. The 795 fourth (most-significant) bit of the register number is supplied by 796 the REX byte, if it is present; else that bit is taken to be zero. 797 798 The REX.R bit supplies the high bit corresponding to the g register 799 field, and the REX.B bit supplies the high bit corresponding to the 800 e register field (when the mod part of modrm indicates that modrm's 801 e component refers to a register and not to memory). 802 803 The REX.X bit supplies a high register bit for certain registers 804 in SIB address modes, and is generally rarely used. 805 806 For 8 bit register references, the presence of the REX byte itself 807 has significance. If there is no REX present, then the 3-bit 808 number extracted from the modrm e or g field is treated as an index 809 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 810 old x86 encoding scheme. 811 812 But if there is a REX present, the register reference is 813 interpreted in the same way as for 64/32/16-bit references: a high 814 bit is extracted from REX, giving a 4-bit number, and the denoted 815 register is the lowest 8 bits of the 16 integer registers denoted 816 by the number. In particular, values 3 through 7 of this sequence 817 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 818 %rsp %rbp %rsi %rdi. 819 820 The REX.W bit has no bearing at all on register numbers. Instead 821 its presence indicates that the operand size is to be overridden 822 from its default value (32 bits) to 64 bits instead. This is in 823 the same fashion that an 0x66 prefix indicates the operand size is 824 to be overridden from 32 bits down to 16 bits. When both REX.W and 825 0x66 are present there is a conflict, and REX.W takes precedence. 826 827 Rather than try to handle this complexity using a single huge 828 function, several smaller ones are provided. The aim is to make it 829 as difficult as possible to screw up register decoding in a subtle 830 and hard-to-track-down way. 831 832 Because these routines fish around in the host's memory (that is, 833 in the guest state area) for sub-parts of guest registers, their 834 correctness depends on the host's endianness. So far these 835 routines only work for little-endian hosts. Those for which 836 endianness is important have assertions to ensure sanity. 837 */ 838 839 840 /* About the simplest question you can ask: where do the 64-bit 841 integer registers live (in the guest state) ? */ 842 843 static Int integerGuestReg64Offset ( UInt reg ) 844 { 845 switch (reg) { 846 case R_RAX: return OFFB_RAX; 847 case R_RCX: return OFFB_RCX; 848 case R_RDX: return OFFB_RDX; 849 case R_RBX: return OFFB_RBX; 850 case R_RSP: return OFFB_RSP; 851 case R_RBP: return OFFB_RBP; 852 case R_RSI: return OFFB_RSI; 853 case R_RDI: return OFFB_RDI; 854 case R_R8: return OFFB_R8; 855 case R_R9: return OFFB_R9; 856 case R_R10: return OFFB_R10; 857 case R_R11: return OFFB_R11; 858 case R_R12: return OFFB_R12; 859 case R_R13: return OFFB_R13; 860 case R_R14: return OFFB_R14; 861 case R_R15: return OFFB_R15; 862 default: vpanic("integerGuestReg64Offset(amd64)"); 863 } 864 } 865 866 867 /* Produce the name of an integer register, for printing purposes. 868 reg is a number in the range 0 .. 15 that has been generated from a 869 3-bit reg-field number and a REX extension bit. irregular denotes 870 the case where sz==1 and no REX byte is present. */ 871 872 static 873 HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 874 { 875 static HChar* ireg64_names[16] 876 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 877 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 878 static HChar* ireg32_names[16] 879 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 880 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 881 static HChar* ireg16_names[16] 882 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 883 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 884 static HChar* ireg8_names[16] 885 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 886 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 887 static HChar* ireg8_irregular[8] 888 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 889 890 vassert(reg < 16); 891 if (sz == 1) { 892 if (irregular) 893 vassert(reg < 8); 894 } else { 895 vassert(irregular == False); 896 } 897 898 switch (sz) { 899 case 8: return ireg64_names[reg]; 900 case 4: return ireg32_names[reg]; 901 case 2: return ireg16_names[reg]; 902 case 1: if (irregular) { 903 return ireg8_irregular[reg]; 904 } else { 905 return ireg8_names[reg]; 906 } 907 default: vpanic("nameIReg(amd64)"); 908 } 909 } 910 911 /* Using the same argument conventions as nameIReg, produce the 912 guest state offset of an integer register. */ 913 914 static 915 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 916 { 917 vassert(reg < 16); 918 if (sz == 1) { 919 if (irregular) 920 vassert(reg < 8); 921 } else { 922 vassert(irregular == False); 923 } 924 925 /* Deal with irregular case -- sz==1 and no REX present */ 926 if (sz == 1 && irregular) { 927 switch (reg) { 928 case R_RSP: return 1+ OFFB_RAX; 929 case R_RBP: return 1+ OFFB_RCX; 930 case R_RSI: return 1+ OFFB_RDX; 931 case R_RDI: return 1+ OFFB_RBX; 932 default: break; /* use the normal case */ 933 } 934 } 935 936 /* Normal case */ 937 return integerGuestReg64Offset(reg); 938 } 939 940 941 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 942 943 static IRExpr* getIRegCL ( void ) 944 { 945 vassert(!host_is_bigendian); 946 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 947 } 948 949 950 /* Write to the %AH register. */ 951 952 static void putIRegAH ( IRExpr* e ) 953 { 954 vassert(!host_is_bigendian); 955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 956 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 957 } 958 959 960 /* Read/write various widths of %RAX, as it has various 961 special-purpose uses. */ 962 963 static HChar* nameIRegRAX ( Int sz ) 964 { 965 switch (sz) { 966 case 1: return "%al"; 967 case 2: return "%ax"; 968 case 4: return "%eax"; 969 case 8: return "%rax"; 970 default: vpanic("nameIRegRAX(amd64)"); 971 } 972 } 973 974 static IRExpr* getIRegRAX ( Int sz ) 975 { 976 vassert(!host_is_bigendian); 977 switch (sz) { 978 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 979 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 980 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 981 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 982 default: vpanic("getIRegRAX(amd64)"); 983 } 984 } 985 986 static void putIRegRAX ( Int sz, IRExpr* e ) 987 { 988 IRType ty = typeOfIRExpr(irsb->tyenv, e); 989 vassert(!host_is_bigendian); 990 switch (sz) { 991 case 8: vassert(ty == Ity_I64); 992 stmt( IRStmt_Put( OFFB_RAX, e )); 993 break; 994 case 4: vassert(ty == Ity_I32); 995 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 996 break; 997 case 2: vassert(ty == Ity_I16); 998 stmt( IRStmt_Put( OFFB_RAX, e )); 999 break; 1000 case 1: vassert(ty == Ity_I8); 1001 stmt( IRStmt_Put( OFFB_RAX, e )); 1002 break; 1003 default: vpanic("putIRegRAX(amd64)"); 1004 } 1005 } 1006 1007 1008 /* Read/write various widths of %RDX, as it has various 1009 special-purpose uses. */ 1010 1011 static HChar* nameIRegRDX ( Int sz ) 1012 { 1013 switch (sz) { 1014 case 1: return "%dl"; 1015 case 2: return "%dx"; 1016 case 4: return "%edx"; 1017 case 8: return "%rdx"; 1018 default: vpanic("nameIRegRDX(amd64)"); 1019 } 1020 } 1021 1022 static IRExpr* getIRegRDX ( Int sz ) 1023 { 1024 vassert(!host_is_bigendian); 1025 switch (sz) { 1026 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1027 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1028 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1029 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1030 default: vpanic("getIRegRDX(amd64)"); 1031 } 1032 } 1033 1034 static void putIRegRDX ( Int sz, IRExpr* e ) 1035 { 1036 vassert(!host_is_bigendian); 1037 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1038 switch (sz) { 1039 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1040 break; 1041 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1042 break; 1043 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1044 break; 1045 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1046 break; 1047 default: vpanic("putIRegRDX(amd64)"); 1048 } 1049 } 1050 1051 1052 /* Simplistic functions to deal with the integer registers as a 1053 straightforward bank of 16 64-bit regs. */ 1054 1055 static IRExpr* getIReg64 ( UInt regno ) 1056 { 1057 return IRExpr_Get( integerGuestReg64Offset(regno), 1058 Ity_I64 ); 1059 } 1060 1061 static void putIReg64 ( UInt regno, IRExpr* e ) 1062 { 1063 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1064 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1065 } 1066 1067 static HChar* nameIReg64 ( UInt regno ) 1068 { 1069 return nameIReg( 8, regno, False ); 1070 } 1071 1072 1073 /* Simplistic functions to deal with the lower halves of integer 1074 registers as a straightforward bank of 16 32-bit regs. */ 1075 1076 static IRExpr* getIReg32 ( UInt regno ) 1077 { 1078 vassert(!host_is_bigendian); 1079 return unop(Iop_64to32, 1080 IRExpr_Get( integerGuestReg64Offset(regno), 1081 Ity_I64 )); 1082 } 1083 1084 static void putIReg32 ( UInt regno, IRExpr* e ) 1085 { 1086 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1087 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1088 unop(Iop_32Uto64,e) ) ); 1089 } 1090 1091 static HChar* nameIReg32 ( UInt regno ) 1092 { 1093 return nameIReg( 4, regno, False ); 1094 } 1095 1096 1097 /* Simplistic functions to deal with the lower quarters of integer 1098 registers as a straightforward bank of 16 16-bit regs. */ 1099 1100 static IRExpr* getIReg16 ( UInt regno ) 1101 { 1102 vassert(!host_is_bigendian); 1103 return IRExpr_Get( integerGuestReg64Offset(regno), 1104 Ity_I16 ); 1105 } 1106 1107 static void putIReg16 ( UInt regno, IRExpr* e ) 1108 { 1109 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1110 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1111 unop(Iop_16Uto64,e) ) ); 1112 } 1113 1114 static HChar* nameIReg16 ( UInt regno ) 1115 { 1116 return nameIReg( 2, regno, False ); 1117 } 1118 1119 1120 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1121 which field of the REX byte is to be used to extend to a 4-bit 1122 number. These functions cater for that situation. 1123 */ 1124 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1125 { 1126 vassert(lo3bits < 8); 1127 vassert(IS_VALID_PFX(pfx)); 1128 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1129 } 1130 1131 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1132 { 1133 vassert(lo3bits < 8); 1134 vassert(IS_VALID_PFX(pfx)); 1135 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1136 } 1137 1138 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1139 { 1140 vassert(lo3bits < 8); 1141 vassert(IS_VALID_PFX(pfx)); 1142 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1143 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1144 toBool(sz==1 && !haveREX(pfx)) ); 1145 } 1146 1147 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1148 { 1149 vassert(lo3bits < 8); 1150 vassert(IS_VALID_PFX(pfx)); 1151 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1152 if (sz == 4) { 1153 sz = 8; 1154 return unop(Iop_64to32, 1155 IRExpr_Get( 1156 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1157 toBool(sz==1 && !haveREX(pfx)) ), 1158 szToITy(sz) 1159 ) 1160 ); 1161 } else { 1162 return IRExpr_Get( 1163 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1164 toBool(sz==1 && !haveREX(pfx)) ), 1165 szToITy(sz) 1166 ); 1167 } 1168 } 1169 1170 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1171 { 1172 vassert(lo3bits < 8); 1173 vassert(IS_VALID_PFX(pfx)); 1174 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1175 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1176 stmt( IRStmt_Put( 1177 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1178 toBool(sz==1 && !haveREX(pfx)) ), 1179 sz==4 ? unop(Iop_32Uto64,e) : e 1180 )); 1181 } 1182 1183 1184 /* Functions for getting register numbers from modrm bytes and REX 1185 when we don't have to consider the complexities of integer subreg 1186 accesses. 1187 */ 1188 /* Extract the g reg field from a modRM byte, and augment it using the 1189 REX.R bit from the supplied REX byte. The R bit usually is 1190 associated with the g register field. 1191 */ 1192 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1193 { 1194 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1195 reg += (pfx & PFX_REXR) ? 8 : 0; 1196 return reg; 1197 } 1198 1199 /* Extract the e reg field from a modRM byte, and augment it using the 1200 REX.B bit from the supplied REX byte. The B bit usually is 1201 associated with the e register field (when modrm indicates e is a 1202 register, that is). 1203 */ 1204 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1205 { 1206 Int rm; 1207 vassert(epartIsReg(mod_reg_rm)); 1208 rm = (Int)(mod_reg_rm & 0x7); 1209 rm += (pfx & PFX_REXB) ? 8 : 0; 1210 return rm; 1211 } 1212 1213 1214 /* General functions for dealing with integer register access. */ 1215 1216 /* Produce the guest state offset for a reference to the 'g' register 1217 field in a modrm byte, taking into account REX (or its absence), 1218 and the size of the access. 1219 */ 1220 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1221 { 1222 UInt reg; 1223 vassert(!host_is_bigendian); 1224 vassert(IS_VALID_PFX(pfx)); 1225 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1226 reg = gregOfRexRM( pfx, mod_reg_rm ); 1227 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1228 } 1229 1230 static 1231 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1232 { 1233 if (sz == 4) { 1234 sz = 8; 1235 return unop(Iop_64to32, 1236 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1237 szToITy(sz) )); 1238 } else { 1239 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1240 szToITy(sz) ); 1241 } 1242 } 1243 1244 static 1245 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1246 { 1247 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1248 if (sz == 4) { 1249 e = unop(Iop_32Uto64,e); 1250 } 1251 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1252 } 1253 1254 static 1255 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1256 { 1257 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1258 toBool(sz==1 && !haveREX(pfx)) ); 1259 } 1260 1261 1262 /* Produce the guest state offset for a reference to the 'e' register 1263 field in a modrm byte, taking into account REX (or its absence), 1264 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1265 denotes a memory access rather than a register access. 1266 */ 1267 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1268 { 1269 UInt reg; 1270 vassert(!host_is_bigendian); 1271 vassert(IS_VALID_PFX(pfx)); 1272 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1273 reg = eregOfRexRM( pfx, mod_reg_rm ); 1274 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1275 } 1276 1277 static 1278 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1279 { 1280 if (sz == 4) { 1281 sz = 8; 1282 return unop(Iop_64to32, 1283 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1284 szToITy(sz) )); 1285 } else { 1286 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1287 szToITy(sz) ); 1288 } 1289 } 1290 1291 static 1292 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1293 { 1294 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1295 if (sz == 4) { 1296 e = unop(Iop_32Uto64,e); 1297 } 1298 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1299 } 1300 1301 static 1302 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1303 { 1304 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1305 toBool(sz==1 && !haveREX(pfx)) ); 1306 } 1307 1308 1309 /*------------------------------------------------------------*/ 1310 /*--- For dealing with XMM registers ---*/ 1311 /*------------------------------------------------------------*/ 1312 1313 //.. static Int segmentGuestRegOffset ( UInt sreg ) 1314 //.. { 1315 //.. switch (sreg) { 1316 //.. case R_ES: return OFFB_ES; 1317 //.. case R_CS: return OFFB_CS; 1318 //.. case R_SS: return OFFB_SS; 1319 //.. case R_DS: return OFFB_DS; 1320 //.. case R_FS: return OFFB_FS; 1321 //.. case R_GS: return OFFB_GS; 1322 //.. default: vpanic("segmentGuestRegOffset(x86)"); 1323 //.. } 1324 //.. } 1325 1326 static Int xmmGuestRegOffset ( UInt xmmreg ) 1327 { 1328 switch (xmmreg) { 1329 case 0: return OFFB_XMM0; 1330 case 1: return OFFB_XMM1; 1331 case 2: return OFFB_XMM2; 1332 case 3: return OFFB_XMM3; 1333 case 4: return OFFB_XMM4; 1334 case 5: return OFFB_XMM5; 1335 case 6: return OFFB_XMM6; 1336 case 7: return OFFB_XMM7; 1337 case 8: return OFFB_XMM8; 1338 case 9: return OFFB_XMM9; 1339 case 10: return OFFB_XMM10; 1340 case 11: return OFFB_XMM11; 1341 case 12: return OFFB_XMM12; 1342 case 13: return OFFB_XMM13; 1343 case 14: return OFFB_XMM14; 1344 case 15: return OFFB_XMM15; 1345 default: vpanic("xmmGuestRegOffset(amd64)"); 1346 } 1347 } 1348 1349 /* Lanes of vector registers are always numbered from zero being the 1350 least significant lane (rightmost in the register). */ 1351 1352 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1353 { 1354 /* Correct for little-endian host only. */ 1355 vassert(!host_is_bigendian); 1356 vassert(laneno >= 0 && laneno < 8); 1357 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1358 } 1359 1360 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1361 { 1362 /* Correct for little-endian host only. */ 1363 vassert(!host_is_bigendian); 1364 vassert(laneno >= 0 && laneno < 4); 1365 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1366 } 1367 1368 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1369 { 1370 /* Correct for little-endian host only. */ 1371 vassert(!host_is_bigendian); 1372 vassert(laneno >= 0 && laneno < 2); 1373 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1374 } 1375 1376 //.. static IRExpr* getSReg ( UInt sreg ) 1377 //.. { 1378 //.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 1379 //.. } 1380 //.. 1381 //.. static void putSReg ( UInt sreg, IRExpr* e ) 1382 //.. { 1383 //.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1384 //.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 1385 //.. } 1386 1387 static IRExpr* getXMMReg ( UInt xmmreg ) 1388 { 1389 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1390 } 1391 1392 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1393 { 1394 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1395 } 1396 1397 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1398 { 1399 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1400 } 1401 1402 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1403 { 1404 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1405 } 1406 1407 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1408 { 1409 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1410 } 1411 1412 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1413 { 1414 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1415 } 1416 1417 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1418 { 1419 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1420 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1421 } 1422 1423 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1424 { 1425 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1426 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1427 } 1428 1429 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1430 { 1431 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1432 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1433 } 1434 1435 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1436 { 1437 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1438 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1439 } 1440 1441 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1442 { 1443 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1444 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1445 } 1446 1447 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 1448 { 1449 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1450 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 1451 } 1452 1453 static IRExpr* mkV128 ( UShort mask ) 1454 { 1455 return IRExpr_Const(IRConst_V128(mask)); 1456 } 1457 1458 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1459 { 1460 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1461 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1462 return unop(Iop_64to1, 1463 binop(Iop_And64, 1464 unop(Iop_1Uto64,x), 1465 unop(Iop_1Uto64,y))); 1466 } 1467 1468 /* Generate a compare-and-swap operation, operating on memory at 1469 'addr'. The expected value is 'expVal' and the new value is 1470 'newVal'. If the operation fails, then transfer control (with a 1471 no-redir jump (XXX no -- see comment at top of this file)) to 1472 'restart_point', which is presumably the address of the guest 1473 instruction again -- retrying, essentially. */ 1474 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1475 Addr64 restart_point ) 1476 { 1477 IRCAS* cas; 1478 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1479 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1480 IRTemp oldTmp = newTemp(tyE); 1481 IRTemp expTmp = newTemp(tyE); 1482 vassert(tyE == tyN); 1483 vassert(tyE == Ity_I64 || tyE == Ity_I32 1484 || tyE == Ity_I16 || tyE == Ity_I8); 1485 assign(expTmp, expVal); 1486 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1487 NULL, mkexpr(expTmp), NULL, newVal ); 1488 stmt( IRStmt_CAS(cas) ); 1489 stmt( IRStmt_Exit( 1490 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1491 mkexpr(oldTmp), mkexpr(expTmp) ), 1492 Ijk_Boring, /*Ijk_NoRedir*/ 1493 IRConst_U64( restart_point ) 1494 )); 1495 } 1496 1497 1498 /*------------------------------------------------------------*/ 1499 /*--- Helpers for %rflags. ---*/ 1500 /*------------------------------------------------------------*/ 1501 1502 /* -------------- Evaluating the flags-thunk. -------------- */ 1503 1504 /* Build IR to calculate all the eflags from stored 1505 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1506 Ity_I64. */ 1507 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1508 { 1509 IRExpr** args 1510 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1511 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1512 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1513 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1514 IRExpr* call 1515 = mkIRExprCCall( 1516 Ity_I64, 1517 0/*regparm*/, 1518 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1519 args 1520 ); 1521 /* Exclude OP and NDEP from definedness checking. We're only 1522 interested in DEP1 and DEP2. */ 1523 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1524 return call; 1525 } 1526 1527 /* Build IR to calculate some particular condition from stored 1528 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1529 Ity_Bit. */ 1530 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1531 { 1532 IRExpr** args 1533 = mkIRExprVec_5( mkU64(cond), 1534 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1535 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1536 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1537 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1538 IRExpr* call 1539 = mkIRExprCCall( 1540 Ity_I64, 1541 0/*regparm*/, 1542 "amd64g_calculate_condition", &amd64g_calculate_condition, 1543 args 1544 ); 1545 /* Exclude the requested condition, OP and NDEP from definedness 1546 checking. We're only interested in DEP1 and DEP2. */ 1547 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1548 return unop(Iop_64to1, call); 1549 } 1550 1551 /* Build IR to calculate just the carry flag from stored 1552 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1553 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1554 { 1555 IRExpr** args 1556 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1557 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1558 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1559 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1560 IRExpr* call 1561 = mkIRExprCCall( 1562 Ity_I64, 1563 0/*regparm*/, 1564 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1565 args 1566 ); 1567 /* Exclude OP and NDEP from definedness checking. We're only 1568 interested in DEP1 and DEP2. */ 1569 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1570 return call; 1571 } 1572 1573 1574 /* -------------- Building the flags-thunk. -------------- */ 1575 1576 /* The machinery in this section builds the flag-thunk following a 1577 flag-setting operation. Hence the various setFlags_* functions. 1578 */ 1579 1580 static Bool isAddSub ( IROp op8 ) 1581 { 1582 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1583 } 1584 1585 static Bool isLogic ( IROp op8 ) 1586 { 1587 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1588 } 1589 1590 /* U-widen 8/16/32/64 bit int expr to 64. */ 1591 static IRExpr* widenUto64 ( IRExpr* e ) 1592 { 1593 switch (typeOfIRExpr(irsb->tyenv,e)) { 1594 case Ity_I64: return e; 1595 case Ity_I32: return unop(Iop_32Uto64, e); 1596 case Ity_I16: return unop(Iop_16Uto64, e); 1597 case Ity_I8: return unop(Iop_8Uto64, e); 1598 default: vpanic("widenUto64"); 1599 } 1600 } 1601 1602 /* S-widen 8/16/32/64 bit int expr to 32. */ 1603 static IRExpr* widenSto64 ( IRExpr* e ) 1604 { 1605 switch (typeOfIRExpr(irsb->tyenv,e)) { 1606 case Ity_I64: return e; 1607 case Ity_I32: return unop(Iop_32Sto64, e); 1608 case Ity_I16: return unop(Iop_16Sto64, e); 1609 case Ity_I8: return unop(Iop_8Sto64, e); 1610 default: vpanic("widenSto64"); 1611 } 1612 } 1613 1614 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1615 of these combinations make sense. */ 1616 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1617 { 1618 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1619 if (src_ty == dst_ty) 1620 return e; 1621 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1622 return unop(Iop_32to16, e); 1623 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1624 return unop(Iop_32to8, e); 1625 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1626 return unop(Iop_64to32, e); 1627 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1628 return unop(Iop_64to16, e); 1629 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1630 return unop(Iop_64to8, e); 1631 1632 vex_printf("\nsrc, dst tys are: "); 1633 ppIRType(src_ty); 1634 vex_printf(", "); 1635 ppIRType(dst_ty); 1636 vex_printf("\n"); 1637 vpanic("narrowTo(amd64)"); 1638 } 1639 1640 1641 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1642 auto-sized up to the real op. */ 1643 1644 static 1645 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1646 { 1647 Int ccOp = 0; 1648 switch (ty) { 1649 case Ity_I8: ccOp = 0; break; 1650 case Ity_I16: ccOp = 1; break; 1651 case Ity_I32: ccOp = 2; break; 1652 case Ity_I64: ccOp = 3; break; 1653 default: vassert(0); 1654 } 1655 switch (op8) { 1656 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1657 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1658 default: ppIROp(op8); 1659 vpanic("setFlags_DEP1_DEP2(amd64)"); 1660 } 1661 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1662 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1663 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1664 } 1665 1666 1667 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1668 1669 static 1670 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1671 { 1672 Int ccOp = 0; 1673 switch (ty) { 1674 case Ity_I8: ccOp = 0; break; 1675 case Ity_I16: ccOp = 1; break; 1676 case Ity_I32: ccOp = 2; break; 1677 case Ity_I64: ccOp = 3; break; 1678 default: vassert(0); 1679 } 1680 switch (op8) { 1681 case Iop_Or8: 1682 case Iop_And8: 1683 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1684 default: ppIROp(op8); 1685 vpanic("setFlags_DEP1(amd64)"); 1686 } 1687 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1688 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1689 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1690 } 1691 1692 1693 /* For shift operations, we put in the result and the undershifted 1694 result. Except if the shift amount is zero, the thunk is left 1695 unchanged. */ 1696 1697 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1698 IRTemp res, 1699 IRTemp resUS, 1700 IRType ty, 1701 IRTemp guard ) 1702 { 1703 Int ccOp = 0; 1704 switch (ty) { 1705 case Ity_I8: ccOp = 0; break; 1706 case Ity_I16: ccOp = 1; break; 1707 case Ity_I32: ccOp = 2; break; 1708 case Ity_I64: ccOp = 3; break; 1709 default: vassert(0); 1710 } 1711 1712 vassert(guard); 1713 1714 /* Both kinds of right shifts are handled by the same thunk 1715 operation. */ 1716 switch (op64) { 1717 case Iop_Shr64: 1718 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1719 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1720 default: ppIROp(op64); 1721 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1722 } 1723 1724 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1725 stmt( IRStmt_Put( OFFB_CC_OP, 1726 IRExpr_Mux0X( mkexpr(guard), 1727 IRExpr_Get(OFFB_CC_OP,Ity_I64), 1728 mkU64(ccOp))) ); 1729 stmt( IRStmt_Put( OFFB_CC_DEP1, 1730 IRExpr_Mux0X( mkexpr(guard), 1731 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 1732 widenUto64(mkexpr(res)))) ); 1733 stmt( IRStmt_Put( OFFB_CC_DEP2, 1734 IRExpr_Mux0X( mkexpr(guard), 1735 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 1736 widenUto64(mkexpr(resUS)))) ); 1737 } 1738 1739 1740 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1741 the former value of the carry flag, which unfortunately we have to 1742 compute. */ 1743 1744 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1745 { 1746 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1747 1748 switch (ty) { 1749 case Ity_I8: ccOp += 0; break; 1750 case Ity_I16: ccOp += 1; break; 1751 case Ity_I32: ccOp += 2; break; 1752 case Ity_I64: ccOp += 3; break; 1753 default: vassert(0); 1754 } 1755 1756 /* This has to come first, because calculating the C flag 1757 may require reading all four thunk fields. */ 1758 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1759 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1760 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1761 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1762 } 1763 1764 1765 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1766 two arguments. */ 1767 1768 static 1769 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1770 { 1771 switch (ty) { 1772 case Ity_I8: 1773 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1774 break; 1775 case Ity_I16: 1776 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1777 break; 1778 case Ity_I32: 1779 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1780 break; 1781 case Ity_I64: 1782 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1783 break; 1784 default: 1785 vpanic("setFlags_MUL(amd64)"); 1786 } 1787 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1788 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1789 } 1790 1791 1792 /* -------------- Condition codes. -------------- */ 1793 1794 /* Condition codes, using the AMD encoding. */ 1795 1796 static HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1797 { 1798 switch (cond) { 1799 case AMD64CondO: return "o"; 1800 case AMD64CondNO: return "no"; 1801 case AMD64CondB: return "b"; 1802 case AMD64CondNB: return "ae"; /*"nb";*/ 1803 case AMD64CondZ: return "e"; /*"z";*/ 1804 case AMD64CondNZ: return "ne"; /*"nz";*/ 1805 case AMD64CondBE: return "be"; 1806 case AMD64CondNBE: return "a"; /*"nbe";*/ 1807 case AMD64CondS: return "s"; 1808 case AMD64CondNS: return "ns"; 1809 case AMD64CondP: return "p"; 1810 case AMD64CondNP: return "np"; 1811 case AMD64CondL: return "l"; 1812 case AMD64CondNL: return "ge"; /*"nl";*/ 1813 case AMD64CondLE: return "le"; 1814 case AMD64CondNLE: return "g"; /*"nle";*/ 1815 case AMD64CondAlways: return "ALWAYS"; 1816 default: vpanic("name_AMD64Condcode"); 1817 } 1818 } 1819 1820 static 1821 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1822 /*OUT*/Bool* needInvert ) 1823 { 1824 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1825 if (cond & 1) { 1826 *needInvert = True; 1827 return cond-1; 1828 } else { 1829 *needInvert = False; 1830 return cond; 1831 } 1832 } 1833 1834 1835 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1836 1837 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1838 appropriately. 1839 1840 Optionally, generate a store for the 'tres' value. This can either 1841 be a normal store, or it can be a cas-with-possible-failure style 1842 store: 1843 1844 if taddr is IRTemp_INVALID, then no store is generated. 1845 1846 if taddr is not IRTemp_INVALID, then a store (using taddr as 1847 the address) is generated: 1848 1849 if texpVal is IRTemp_INVALID then a normal store is 1850 generated, and restart_point must be zero (it is irrelevant). 1851 1852 if texpVal is not IRTemp_INVALID then a cas-style store is 1853 generated. texpVal is the expected value, restart_point 1854 is the restart point if the store fails, and texpVal must 1855 have the same type as tres. 1856 1857 */ 1858 static void helper_ADC ( Int sz, 1859 IRTemp tres, IRTemp ta1, IRTemp ta2, 1860 /* info about optional store: */ 1861 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1862 { 1863 UInt thunkOp; 1864 IRType ty = szToITy(sz); 1865 IRTemp oldc = newTemp(Ity_I64); 1866 IRTemp oldcn = newTemp(ty); 1867 IROp plus = mkSizedOp(ty, Iop_Add8); 1868 IROp xor = mkSizedOp(ty, Iop_Xor8); 1869 1870 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1871 1872 switch (sz) { 1873 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 1874 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 1875 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 1876 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 1877 default: vassert(0); 1878 } 1879 1880 /* oldc = old carry flag, 0 or 1 */ 1881 assign( oldc, binop(Iop_And64, 1882 mk_amd64g_calculate_rflags_c(), 1883 mkU64(1)) ); 1884 1885 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1886 1887 assign( tres, binop(plus, 1888 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1889 mkexpr(oldcn)) ); 1890 1891 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1892 start of this function. */ 1893 if (taddr != IRTemp_INVALID) { 1894 if (texpVal == IRTemp_INVALID) { 1895 vassert(restart_point == 0); 1896 storeLE( mkexpr(taddr), mkexpr(tres) ); 1897 } else { 1898 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1899 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1900 casLE( mkexpr(taddr), 1901 mkexpr(texpVal), mkexpr(tres), restart_point ); 1902 } 1903 } 1904 1905 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1906 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 1907 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1908 mkexpr(oldcn)) )) ); 1909 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1910 } 1911 1912 1913 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1914 appropriately. As with helper_ADC, possibly generate a store of 1915 the result -- see comments on helper_ADC for details. 1916 */ 1917 static void helper_SBB ( Int sz, 1918 IRTemp tres, IRTemp ta1, IRTemp ta2, 1919 /* info about optional store: */ 1920 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1921 { 1922 UInt thunkOp; 1923 IRType ty = szToITy(sz); 1924 IRTemp oldc = newTemp(Ity_I64); 1925 IRTemp oldcn = newTemp(ty); 1926 IROp minus = mkSizedOp(ty, Iop_Sub8); 1927 IROp xor = mkSizedOp(ty, Iop_Xor8); 1928 1929 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1930 1931 switch (sz) { 1932 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 1933 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 1934 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 1935 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 1936 default: vassert(0); 1937 } 1938 1939 /* oldc = old carry flag, 0 or 1 */ 1940 assign( oldc, binop(Iop_And64, 1941 mk_amd64g_calculate_rflags_c(), 1942 mkU64(1)) ); 1943 1944 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1945 1946 assign( tres, binop(minus, 1947 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1948 mkexpr(oldcn)) ); 1949 1950 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1951 start of this function. */ 1952 if (taddr != IRTemp_INVALID) { 1953 if (texpVal == IRTemp_INVALID) { 1954 vassert(restart_point == 0); 1955 storeLE( mkexpr(taddr), mkexpr(tres) ); 1956 } else { 1957 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1958 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1959 casLE( mkexpr(taddr), 1960 mkexpr(texpVal), mkexpr(tres), restart_point ); 1961 } 1962 } 1963 1964 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 1965 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 1966 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 1967 mkexpr(oldcn)) )) ); 1968 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1969 } 1970 1971 1972 /* -------------- Helpers for disassembly printing. -------------- */ 1973 1974 static HChar* nameGrp1 ( Int opc_aux ) 1975 { 1976 static HChar* grp1_names[8] 1977 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1978 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 1979 return grp1_names[opc_aux]; 1980 } 1981 1982 static HChar* nameGrp2 ( Int opc_aux ) 1983 { 1984 static HChar* grp2_names[8] 1985 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1986 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 1987 return grp2_names[opc_aux]; 1988 } 1989 1990 static HChar* nameGrp4 ( Int opc_aux ) 1991 { 1992 static HChar* grp4_names[8] 1993 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1994 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 1995 return grp4_names[opc_aux]; 1996 } 1997 1998 static HChar* nameGrp5 ( Int opc_aux ) 1999 { 2000 static HChar* grp5_names[8] 2001 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2002 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2003 return grp5_names[opc_aux]; 2004 } 2005 2006 static HChar* nameGrp8 ( Int opc_aux ) 2007 { 2008 static HChar* grp8_names[8] 2009 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2010 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2011 return grp8_names[opc_aux]; 2012 } 2013 2014 //.. static HChar* nameSReg ( UInt sreg ) 2015 //.. { 2016 //.. switch (sreg) { 2017 //.. case R_ES: return "%es"; 2018 //.. case R_CS: return "%cs"; 2019 //.. case R_SS: return "%ss"; 2020 //.. case R_DS: return "%ds"; 2021 //.. case R_FS: return "%fs"; 2022 //.. case R_GS: return "%gs"; 2023 //.. default: vpanic("nameSReg(x86)"); 2024 //.. } 2025 //.. } 2026 2027 static HChar* nameMMXReg ( Int mmxreg ) 2028 { 2029 static HChar* mmx_names[8] 2030 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2031 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2032 return mmx_names[mmxreg]; 2033 } 2034 2035 static HChar* nameXMMReg ( Int xmmreg ) 2036 { 2037 static HChar* xmm_names[16] 2038 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2039 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2040 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2041 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2042 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2043 return xmm_names[xmmreg]; 2044 } 2045 2046 static HChar* nameMMXGran ( Int gran ) 2047 { 2048 switch (gran) { 2049 case 0: return "b"; 2050 case 1: return "w"; 2051 case 2: return "d"; 2052 case 3: return "q"; 2053 default: vpanic("nameMMXGran(amd64,guest)"); 2054 } 2055 } 2056 2057 static HChar nameISize ( Int size ) 2058 { 2059 switch (size) { 2060 case 8: return 'q'; 2061 case 4: return 'l'; 2062 case 2: return 'w'; 2063 case 1: return 'b'; 2064 default: vpanic("nameISize(amd64)"); 2065 } 2066 } 2067 2068 2069 /*------------------------------------------------------------*/ 2070 /*--- JMP helpers ---*/ 2071 /*------------------------------------------------------------*/ 2072 2073 static void jmp_lit( IRJumpKind kind, Addr64 d64 ) 2074 { 2075 irsb->next = mkU64(d64); 2076 irsb->jumpkind = kind; 2077 } 2078 2079 static void jmp_treg( IRJumpKind kind, IRTemp t ) 2080 { 2081 irsb->next = mkexpr(t); 2082 irsb->jumpkind = kind; 2083 } 2084 2085 static 2086 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2087 { 2088 Bool invert; 2089 AMD64Condcode condPos; 2090 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2091 if (invert) { 2092 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2093 Ijk_Boring, 2094 IRConst_U64(d64_false) ) ); 2095 irsb->next = mkU64(d64_true); 2096 irsb->jumpkind = Ijk_Boring; 2097 } else { 2098 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2099 Ijk_Boring, 2100 IRConst_U64(d64_true) ) ); 2101 irsb->next = mkU64(d64_false); 2102 irsb->jumpkind = Ijk_Boring; 2103 } 2104 } 2105 2106 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2107 guest address of the next instruction to be executed. 2108 2109 This function generates an AbiHint to say that -128(%rsp) 2110 .. -1(%rsp) should now be regarded as uninitialised. 2111 */ 2112 static 2113 void make_redzone_AbiHint ( VexAbiInfo* vbi, 2114 IRTemp new_rsp, IRTemp nia, HChar* who ) 2115 { 2116 Int szB = vbi->guest_stack_redzone_size; 2117 vassert(szB >= 0); 2118 2119 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2120 for is ELF. So just check it's the expected 128 value 2121 (paranoia). */ 2122 vassert(szB == 128); 2123 2124 if (0) vex_printf("AbiHint: %s\n", who); 2125 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2126 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2127 if (szB > 0) 2128 stmt( IRStmt_AbiHint( 2129 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2130 szB, 2131 mkexpr(nia) 2132 )); 2133 } 2134 2135 2136 /*------------------------------------------------------------*/ 2137 /*--- Disassembling addressing modes ---*/ 2138 /*------------------------------------------------------------*/ 2139 2140 static 2141 HChar* segRegTxt ( Prefix pfx ) 2142 { 2143 if (pfx & PFX_CS) return "%cs:"; 2144 if (pfx & PFX_DS) return "%ds:"; 2145 if (pfx & PFX_ES) return "%es:"; 2146 if (pfx & PFX_FS) return "%fs:"; 2147 if (pfx & PFX_GS) return "%gs:"; 2148 if (pfx & PFX_SS) return "%ss:"; 2149 return ""; /* no override */ 2150 } 2151 2152 2153 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2154 linear address by adding any required segment override as indicated 2155 by sorb, and also dealing with any address size override 2156 present. */ 2157 static 2158 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, 2159 Prefix pfx, IRExpr* virtual ) 2160 { 2161 /* --- segment overrides --- */ 2162 if (pfx & PFX_FS) { 2163 if (vbi->guest_amd64_assume_fs_is_zero) { 2164 /* Note that this is a linux-kernel specific hack that relies 2165 on the assumption that %fs is always zero. */ 2166 /* return virtual + guest_FS_ZERO. */ 2167 virtual = binop(Iop_Add64, virtual, 2168 IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); 2169 } else { 2170 unimplemented("amd64 %fs segment override"); 2171 } 2172 } 2173 2174 if (pfx & PFX_GS) { 2175 if (vbi->guest_amd64_assume_gs_is_0x60) { 2176 /* Note that this is a darwin-kernel specific hack that relies 2177 on the assumption that %gs is always 0x60. */ 2178 /* return virtual + guest_GS_0x60. */ 2179 virtual = binop(Iop_Add64, virtual, 2180 IRExpr_Get(OFFB_GS_0x60, Ity_I64)); 2181 } else { 2182 unimplemented("amd64 %gs segment override"); 2183 } 2184 } 2185 2186 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2187 2188 /* --- address size override --- */ 2189 if (haveASO(pfx)) 2190 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2191 2192 return virtual; 2193 } 2194 2195 //.. { 2196 //.. Int sreg; 2197 //.. IRType hWordTy; 2198 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2199 //.. 2200 //.. if (sorb == 0) 2201 //.. /* the common case - no override */ 2202 //.. return virtual; 2203 //.. 2204 //.. switch (sorb) { 2205 //.. case 0x3E: sreg = R_DS; break; 2206 //.. case 0x26: sreg = R_ES; break; 2207 //.. case 0x64: sreg = R_FS; break; 2208 //.. case 0x65: sreg = R_GS; break; 2209 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2210 //.. } 2211 //.. 2212 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2213 //.. 2214 //.. seg_selector = newTemp(Ity_I32); 2215 //.. ldt_ptr = newTemp(hWordTy); 2216 //.. gdt_ptr = newTemp(hWordTy); 2217 //.. r64 = newTemp(Ity_I64); 2218 //.. 2219 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2220 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2221 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2222 //.. 2223 //.. /* 2224 //.. Call this to do the translation and limit checks: 2225 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2226 //.. UInt seg_selector, UInt virtual_addr ) 2227 //.. */ 2228 //.. assign( 2229 //.. r64, 2230 //.. mkIRExprCCall( 2231 //.. Ity_I64, 2232 //.. 0/*regparms*/, 2233 //.. "x86g_use_seg_selector", 2234 //.. &x86g_use_seg_selector, 2235 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2236 //.. mkexpr(seg_selector), virtual) 2237 //.. ) 2238 //.. ); 2239 //.. 2240 //.. /* If the high 32 of the result are non-zero, there was a 2241 //.. failure in address translation. In which case, make a 2242 //.. quick exit. 2243 //.. */ 2244 //.. stmt( 2245 //.. IRStmt_Exit( 2246 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2247 //.. Ijk_MapFail, 2248 //.. IRConst_U32( guest_eip_curr_instr ) 2249 //.. ) 2250 //.. ); 2251 //.. 2252 //.. /* otherwise, here's the translated result. */ 2253 //.. return unop(Iop_64to32, mkexpr(r64)); 2254 //.. } 2255 2256 2257 /* Generate IR to calculate an address indicated by a ModRM and 2258 following SIB bytes. The expression, and the number of bytes in 2259 the address mode, are returned (the latter in *len). Note that 2260 this fn should not be called if the R/M part of the address denotes 2261 a register instead of memory. If print_codegen is true, text of 2262 the addressing mode is placed in buf. 2263 2264 The computed address is stored in a new tempreg, and the 2265 identity of the tempreg is returned. 2266 2267 extra_bytes holds the number of bytes after the amode, as supplied 2268 by the caller. This is needed to make sense of %rip-relative 2269 addresses. Note that the value that *len is set to is only the 2270 length of the amode itself and does not include the value supplied 2271 in extra_bytes. 2272 */ 2273 2274 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2275 { 2276 IRTemp tmp = newTemp(Ity_I64); 2277 assign( tmp, addr64 ); 2278 return tmp; 2279 } 2280 2281 static 2282 IRTemp disAMode ( /*OUT*/Int* len, 2283 VexAbiInfo* vbi, Prefix pfx, Long delta, 2284 /*OUT*/HChar* buf, Int extra_bytes ) 2285 { 2286 UChar mod_reg_rm = getUChar(delta); 2287 delta++; 2288 2289 buf[0] = (UChar)0; 2290 vassert(extra_bytes >= 0 && extra_bytes < 10); 2291 2292 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2293 jump table seems a bit excessive. 2294 */ 2295 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2296 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2297 /* is now XX0XXYYY */ 2298 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2299 switch (mod_reg_rm) { 2300 2301 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2302 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2303 */ 2304 case 0x00: case 0x01: case 0x02: case 0x03: 2305 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2306 { UChar rm = toUChar(mod_reg_rm & 7); 2307 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2308 *len = 1; 2309 return disAMode_copy2tmp( 2310 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2311 } 2312 2313 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2314 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2315 */ 2316 case 0x08: case 0x09: case 0x0A: case 0x0B: 2317 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2318 { UChar rm = toUChar(mod_reg_rm & 7); 2319 Long d = getSDisp8(delta); 2320 if (d == 0) { 2321 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2322 } else { 2323 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2324 } 2325 *len = 2; 2326 return disAMode_copy2tmp( 2327 handleAddrOverrides(vbi, pfx, 2328 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2329 } 2330 2331 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2332 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2333 */ 2334 case 0x10: case 0x11: case 0x12: case 0x13: 2335 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2336 { UChar rm = toUChar(mod_reg_rm & 7); 2337 Long d = getSDisp32(delta); 2338 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2339 *len = 5; 2340 return disAMode_copy2tmp( 2341 handleAddrOverrides(vbi, pfx, 2342 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2343 } 2344 2345 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2346 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2347 case 0x18: case 0x19: case 0x1A: case 0x1B: 2348 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2349 vpanic("disAMode(amd64): not an addr!"); 2350 2351 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2352 correctly at the start of handling each instruction. */ 2353 case 0x05: 2354 { Long d = getSDisp32(delta); 2355 *len = 5; 2356 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2357 /* We need to know the next instruction's start address. 2358 Try and figure out what it is, record the guess, and ask 2359 the top-level driver logic (bbToIR_AMD64) to check we 2360 guessed right, after the instruction is completely 2361 decoded. */ 2362 guest_RIP_next_mustcheck = True; 2363 guest_RIP_next_assumed = guest_RIP_bbstart 2364 + delta+4 + extra_bytes; 2365 return disAMode_copy2tmp( 2366 handleAddrOverrides(vbi, pfx, 2367 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2368 mkU64(d)))); 2369 } 2370 2371 case 0x04: { 2372 /* SIB, with no displacement. Special cases: 2373 -- %rsp cannot act as an index value. 2374 If index_r indicates %rsp, zero is used for the index. 2375 -- when mod is zero and base indicates RBP or R13, base is 2376 instead a 32-bit sign-extended literal. 2377 It's all madness, I tell you. Extract %index, %base and 2378 scale from the SIB byte. The value denoted is then: 2379 | %index == %RSP && (%base == %RBP || %base == %R13) 2380 = d32 following SIB byte 2381 | %index == %RSP && !(%base == %RBP || %base == %R13) 2382 = %base 2383 | %index != %RSP && (%base == %RBP || %base == %R13) 2384 = d32 following SIB byte + (%index << scale) 2385 | %index != %RSP && !(%base == %RBP || %base == %R13) 2386 = %base + (%index << scale) 2387 */ 2388 UChar sib = getUChar(delta); 2389 UChar scale = toUChar((sib >> 6) & 3); 2390 UChar index_r = toUChar((sib >> 3) & 7); 2391 UChar base_r = toUChar(sib & 7); 2392 /* correct since #(R13) == 8 + #(RBP) */ 2393 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2394 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2395 delta++; 2396 2397 if ((!index_is_SP) && (!base_is_BPor13)) { 2398 if (scale == 0) { 2399 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2400 nameIRegRexB(8,pfx,base_r), 2401 nameIReg64rexX(pfx,index_r)); 2402 } else { 2403 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2404 nameIRegRexB(8,pfx,base_r), 2405 nameIReg64rexX(pfx,index_r), 1<<scale); 2406 } 2407 *len = 2; 2408 return 2409 disAMode_copy2tmp( 2410 handleAddrOverrides(vbi, pfx, 2411 binop(Iop_Add64, 2412 getIRegRexB(8,pfx,base_r), 2413 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2414 mkU8(scale))))); 2415 } 2416 2417 if ((!index_is_SP) && base_is_BPor13) { 2418 Long d = getSDisp32(delta); 2419 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2420 nameIReg64rexX(pfx,index_r), 1<<scale); 2421 *len = 6; 2422 return 2423 disAMode_copy2tmp( 2424 handleAddrOverrides(vbi, pfx, 2425 binop(Iop_Add64, 2426 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2427 mkU8(scale)), 2428 mkU64(d)))); 2429 } 2430 2431 if (index_is_SP && (!base_is_BPor13)) { 2432 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2433 *len = 2; 2434 return disAMode_copy2tmp( 2435 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2436 } 2437 2438 if (index_is_SP && base_is_BPor13) { 2439 Long d = getSDisp32(delta); 2440 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2441 *len = 6; 2442 return disAMode_copy2tmp( 2443 handleAddrOverrides(vbi, pfx, mkU64(d))); 2444 } 2445 2446 vassert(0); 2447 } 2448 2449 /* SIB, with 8-bit displacement. Special cases: 2450 -- %esp cannot act as an index value. 2451 If index_r indicates %esp, zero is used for the index. 2452 Denoted value is: 2453 | %index == %ESP 2454 = d8 + %base 2455 | %index != %ESP 2456 = d8 + %base + (%index << scale) 2457 */ 2458 case 0x0C: { 2459 UChar sib = getUChar(delta); 2460 UChar scale = toUChar((sib >> 6) & 3); 2461 UChar index_r = toUChar((sib >> 3) & 7); 2462 UChar base_r = toUChar(sib & 7); 2463 Long d = getSDisp8(delta+1); 2464 2465 if (index_r == R_RSP && 0==getRexX(pfx)) { 2466 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2467 d, nameIRegRexB(8,pfx,base_r)); 2468 *len = 3; 2469 return disAMode_copy2tmp( 2470 handleAddrOverrides(vbi, pfx, 2471 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2472 } else { 2473 if (scale == 0) { 2474 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2475 nameIRegRexB(8,pfx,base_r), 2476 nameIReg64rexX(pfx,index_r)); 2477 } else { 2478 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2479 nameIRegRexB(8,pfx,base_r), 2480 nameIReg64rexX(pfx,index_r), 1<<scale); 2481 } 2482 *len = 3; 2483 return 2484 disAMode_copy2tmp( 2485 handleAddrOverrides(vbi, pfx, 2486 binop(Iop_Add64, 2487 binop(Iop_Add64, 2488 getIRegRexB(8,pfx,base_r), 2489 binop(Iop_Shl64, 2490 getIReg64rexX(pfx,index_r), mkU8(scale))), 2491 mkU64(d)))); 2492 } 2493 vassert(0); /*NOTREACHED*/ 2494 } 2495 2496 /* SIB, with 32-bit displacement. Special cases: 2497 -- %rsp cannot act as an index value. 2498 If index_r indicates %rsp, zero is used for the index. 2499 Denoted value is: 2500 | %index == %RSP 2501 = d32 + %base 2502 | %index != %RSP 2503 = d32 + %base + (%index << scale) 2504 */ 2505 case 0x14: { 2506 UChar sib = getUChar(delta); 2507 UChar scale = toUChar((sib >> 6) & 3); 2508 UChar index_r = toUChar((sib >> 3) & 7); 2509 UChar base_r = toUChar(sib & 7); 2510 Long d = getSDisp32(delta+1); 2511 2512 if (index_r == R_RSP && 0==getRexX(pfx)) { 2513 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2514 d, nameIRegRexB(8,pfx,base_r)); 2515 *len = 6; 2516 return disAMode_copy2tmp( 2517 handleAddrOverrides(vbi, pfx, 2518 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2519 } else { 2520 if (scale == 0) { 2521 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2522 nameIRegRexB(8,pfx,base_r), 2523 nameIReg64rexX(pfx,index_r)); 2524 } else { 2525 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2526 nameIRegRexB(8,pfx,base_r), 2527 nameIReg64rexX(pfx,index_r), 1<<scale); 2528 } 2529 *len = 6; 2530 return 2531 disAMode_copy2tmp( 2532 handleAddrOverrides(vbi, pfx, 2533 binop(Iop_Add64, 2534 binop(Iop_Add64, 2535 getIRegRexB(8,pfx,base_r), 2536 binop(Iop_Shl64, 2537 getIReg64rexX(pfx,index_r), mkU8(scale))), 2538 mkU64(d)))); 2539 } 2540 vassert(0); /*NOTREACHED*/ 2541 } 2542 2543 default: 2544 vpanic("disAMode(amd64)"); 2545 return 0; /*notreached*/ 2546 } 2547 } 2548 2549 2550 /* Figure out the number of (insn-stream) bytes constituting the amode 2551 beginning at delta. Is useful for getting hold of literals beyond 2552 the end of the amode before it has been disassembled. */ 2553 2554 static UInt lengthAMode ( Prefix pfx, Long delta ) 2555 { 2556 UChar mod_reg_rm = getUChar(delta); 2557 delta++; 2558 2559 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2560 jump table seems a bit excessive. 2561 */ 2562 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2563 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2564 /* is now XX0XXYYY */ 2565 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2566 switch (mod_reg_rm) { 2567 2568 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2569 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2570 */ 2571 case 0x00: case 0x01: case 0x02: case 0x03: 2572 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2573 return 1; 2574 2575 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2576 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2577 */ 2578 case 0x08: case 0x09: case 0x0A: case 0x0B: 2579 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2580 return 2; 2581 2582 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2583 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2584 */ 2585 case 0x10: case 0x11: case 0x12: case 0x13: 2586 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2587 return 5; 2588 2589 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2590 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2591 /* Not an address, but still handled. */ 2592 case 0x18: case 0x19: case 0x1A: case 0x1B: 2593 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2594 return 1; 2595 2596 /* RIP + disp32. */ 2597 case 0x05: 2598 return 5; 2599 2600 case 0x04: { 2601 /* SIB, with no displacement. */ 2602 UChar sib = getUChar(delta); 2603 UChar base_r = toUChar(sib & 7); 2604 /* correct since #(R13) == 8 + #(RBP) */ 2605 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2606 2607 if (base_is_BPor13) { 2608 return 6; 2609 } else { 2610 return 2; 2611 } 2612 } 2613 2614 /* SIB, with 8-bit displacement. */ 2615 case 0x0C: 2616 return 3; 2617 2618 /* SIB, with 32-bit displacement. */ 2619 case 0x14: 2620 return 6; 2621 2622 default: 2623 vpanic("lengthAMode(amd64)"); 2624 return 0; /*notreached*/ 2625 } 2626 } 2627 2628 2629 /*------------------------------------------------------------*/ 2630 /*--- Disassembling common idioms ---*/ 2631 /*------------------------------------------------------------*/ 2632 2633 /* Handle binary integer instructions of the form 2634 op E, G meaning 2635 op reg-or-mem, reg 2636 Is passed the a ptr to the modRM byte, the actual operation, and the 2637 data size. Returns the address advanced completely over this 2638 instruction. 2639 2640 E(src) is reg-or-mem 2641 G(dst) is reg. 2642 2643 If E is reg, --> GET %G, tmp 2644 OP %E, tmp 2645 PUT tmp, %G 2646 2647 If E is mem and OP is not reversible, 2648 --> (getAddr E) -> tmpa 2649 LD (tmpa), tmpa 2650 GET %G, tmp2 2651 OP tmpa, tmp2 2652 PUT tmp2, %G 2653 2654 If E is mem and OP is reversible 2655 --> (getAddr E) -> tmpa 2656 LD (tmpa), tmpa 2657 OP %G, tmpa 2658 PUT tmpa, %G 2659 */ 2660 static 2661 ULong dis_op2_E_G ( VexAbiInfo* vbi, 2662 Prefix pfx, 2663 Bool addSubCarry, 2664 IROp op8, 2665 Bool keep, 2666 Int size, 2667 Long delta0, 2668 HChar* t_amd64opc ) 2669 { 2670 HChar dis_buf[50]; 2671 Int len; 2672 IRType ty = szToITy(size); 2673 IRTemp dst1 = newTemp(ty); 2674 IRTemp src = newTemp(ty); 2675 IRTemp dst0 = newTemp(ty); 2676 UChar rm = getUChar(delta0); 2677 IRTemp addr = IRTemp_INVALID; 2678 2679 /* addSubCarry == True indicates the intended operation is 2680 add-with-carry or subtract-with-borrow. */ 2681 if (addSubCarry) { 2682 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2683 vassert(keep); 2684 } 2685 2686 if (epartIsReg(rm)) { 2687 /* Specially handle XOR reg,reg, because that doesn't really 2688 depend on reg, and doing the obvious thing potentially 2689 generates a spurious value check failure due to the bogus 2690 dependency. */ 2691 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2692 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2693 if (False && op8 == Iop_Sub8) 2694 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2695 putIRegG(size,pfx,rm, mkU(ty,0)); 2696 } 2697 2698 assign( dst0, getIRegG(size,pfx,rm) ); 2699 assign( src, getIRegE(size,pfx,rm) ); 2700 2701 if (addSubCarry && op8 == Iop_Add8) { 2702 helper_ADC( size, dst1, dst0, src, 2703 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2704 putIRegG(size, pfx, rm, mkexpr(dst1)); 2705 } else 2706 if (addSubCarry && op8 == Iop_Sub8) { 2707 helper_SBB( size, dst1, dst0, src, 2708 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2709 putIRegG(size, pfx, rm, mkexpr(dst1)); 2710 } else { 2711 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2712 if (isAddSub(op8)) 2713 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2714 else 2715 setFlags_DEP1(op8, dst1, ty); 2716 if (keep) 2717 putIRegG(size, pfx, rm, mkexpr(dst1)); 2718 } 2719 2720 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2721 nameIRegE(size,pfx,rm), 2722 nameIRegG(size,pfx,rm)); 2723 return 1+delta0; 2724 } else { 2725 /* E refers to memory */ 2726 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2727 assign( dst0, getIRegG(size,pfx,rm) ); 2728 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2729 2730 if (addSubCarry && op8 == Iop_Add8) { 2731 helper_ADC( size, dst1, dst0, src, 2732 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2733 putIRegG(size, pfx, rm, mkexpr(dst1)); 2734 } else 2735 if (addSubCarry && op8 == Iop_Sub8) { 2736 helper_SBB( size, dst1, dst0, src, 2737 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2738 putIRegG(size, pfx, rm, mkexpr(dst1)); 2739 } else { 2740 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2741 if (isAddSub(op8)) 2742 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2743 else 2744 setFlags_DEP1(op8, dst1, ty); 2745 if (keep) 2746 putIRegG(size, pfx, rm, mkexpr(dst1)); 2747 } 2748 2749 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2750 dis_buf, nameIRegG(size, pfx, rm)); 2751 return len+delta0; 2752 } 2753 } 2754 2755 2756 2757 /* Handle binary integer instructions of the form 2758 op G, E meaning 2759 op reg, reg-or-mem 2760 Is passed the a ptr to the modRM byte, the actual operation, and the 2761 data size. Returns the address advanced completely over this 2762 instruction. 2763 2764 G(src) is reg. 2765 E(dst) is reg-or-mem 2766 2767 If E is reg, --> GET %E, tmp 2768 OP %G, tmp 2769 PUT tmp, %E 2770 2771 If E is mem, --> (getAddr E) -> tmpa 2772 LD (tmpa), tmpv 2773 OP %G, tmpv 2774 ST tmpv, (tmpa) 2775 */ 2776 static 2777 ULong dis_op2_G_E ( VexAbiInfo* vbi, 2778 Prefix pfx, 2779 Bool addSubCarry, 2780 IROp op8, 2781 Bool keep, 2782 Int size, 2783 Long delta0, 2784 HChar* t_amd64opc ) 2785 { 2786 HChar dis_buf[50]; 2787 Int len; 2788 IRType ty = szToITy(size); 2789 IRTemp dst1 = newTemp(ty); 2790 IRTemp src = newTemp(ty); 2791 IRTemp dst0 = newTemp(ty); 2792 UChar rm = getUChar(delta0); 2793 IRTemp addr = IRTemp_INVALID; 2794 2795 /* addSubCarry == True indicates the intended operation is 2796 add-with-carry or subtract-with-borrow. */ 2797 if (addSubCarry) { 2798 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2799 vassert(keep); 2800 } 2801 2802 if (epartIsReg(rm)) { 2803 /* Specially handle XOR reg,reg, because that doesn't really 2804 depend on reg, and doing the obvious thing potentially 2805 generates a spurious value check failure due to the bogus 2806 dependency. Ditto SBB reg,reg. */ 2807 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2808 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2809 putIRegE(size,pfx,rm, mkU(ty,0)); 2810 } 2811 2812 assign(dst0, getIRegE(size,pfx,rm)); 2813 assign(src, getIRegG(size,pfx,rm)); 2814 2815 if (addSubCarry && op8 == Iop_Add8) { 2816 helper_ADC( size, dst1, dst0, src, 2817 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2818 putIRegE(size, pfx, rm, mkexpr(dst1)); 2819 } else 2820 if (addSubCarry && op8 == Iop_Sub8) { 2821 helper_SBB( size, dst1, dst0, src, 2822 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2823 putIRegE(size, pfx, rm, mkexpr(dst1)); 2824 } else { 2825 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2826 if (isAddSub(op8)) 2827 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2828 else 2829 setFlags_DEP1(op8, dst1, ty); 2830 if (keep) 2831 putIRegE(size, pfx, rm, mkexpr(dst1)); 2832 } 2833 2834 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2835 nameIRegG(size,pfx,rm), 2836 nameIRegE(size,pfx,rm)); 2837 return 1+delta0; 2838 } 2839 2840 /* E refers to memory */ 2841 { 2842 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2843 assign(dst0, loadLE(ty,mkexpr(addr))); 2844 assign(src, getIRegG(size,pfx,rm)); 2845 2846 if (addSubCarry && op8 == Iop_Add8) { 2847 if (pfx & PFX_LOCK) { 2848 /* cas-style store */ 2849 helper_ADC( size, dst1, dst0, src, 2850 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2851 } else { 2852 /* normal store */ 2853 helper_ADC( size, dst1, dst0, src, 2854 /*store*/addr, IRTemp_INVALID, 0 ); 2855 } 2856 } else 2857 if (addSubCarry && op8 == Iop_Sub8) { 2858 if (pfx & PFX_LOCK) { 2859 /* cas-style store */ 2860 helper_SBB( size, dst1, dst0, src, 2861 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 2862 } else { 2863 /* normal store */ 2864 helper_SBB( size, dst1, dst0, src, 2865 /*store*/addr, IRTemp_INVALID, 0 ); 2866 } 2867 } else { 2868 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2869 if (keep) { 2870 if (pfx & PFX_LOCK) { 2871 if (0) vex_printf("locked case\n" ); 2872 casLE( mkexpr(addr), 2873 mkexpr(dst0)/*expval*/, 2874 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 2875 } else { 2876 if (0) vex_printf("nonlocked case\n"); 2877 storeLE(mkexpr(addr), mkexpr(dst1)); 2878 } 2879 } 2880 if (isAddSub(op8)) 2881 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2882 else 2883 setFlags_DEP1(op8, dst1, ty); 2884 } 2885 2886 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2887 nameIRegG(size,pfx,rm), dis_buf); 2888 return len+delta0; 2889 } 2890 } 2891 2892 2893 /* Handle move instructions of the form 2894 mov E, G meaning 2895 mov reg-or-mem, reg 2896 Is passed the a ptr to the modRM byte, and the data size. Returns 2897 the address advanced completely over this instruction. 2898 2899 E(src) is reg-or-mem 2900 G(dst) is reg. 2901 2902 If E is reg, --> GET %E, tmpv 2903 PUT tmpv, %G 2904 2905 If E is mem --> (getAddr E) -> tmpa 2906 LD (tmpa), tmpb 2907 PUT tmpb, %G 2908 */ 2909 static 2910 ULong dis_mov_E_G ( VexAbiInfo* vbi, 2911 Prefix pfx, 2912 Int size, 2913 Long delta0 ) 2914 { 2915 Int len; 2916 UChar rm = getUChar(delta0); 2917 HChar dis_buf[50]; 2918 2919 if (epartIsReg(rm)) { 2920 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 2921 DIP("mov%c %s,%s\n", nameISize(size), 2922 nameIRegE(size,pfx,rm), 2923 nameIRegG(size,pfx,rm)); 2924 return 1+delta0; 2925 } 2926 2927 /* E refers to memory */ 2928 { 2929 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2930 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 2931 DIP("mov%c %s,%s\n", nameISize(size), 2932 dis_buf, 2933 nameIRegG(size,pfx,rm)); 2934 return delta0+len; 2935 } 2936 } 2937 2938 2939 /* Handle move instructions of the form 2940 mov G, E meaning 2941 mov reg, reg-or-mem 2942 Is passed the a ptr to the modRM byte, and the data size. Returns 2943 the address advanced completely over this instruction. 2944 2945 G(src) is reg. 2946 E(dst) is reg-or-mem 2947 2948 If E is reg, --> GET %G, tmp 2949 PUT tmp, %E 2950 2951 If E is mem, --> (getAddr E) -> tmpa 2952 GET %G, tmpv 2953 ST tmpv, (tmpa) 2954 */ 2955 static 2956 ULong dis_mov_G_E ( VexAbiInfo* vbi, 2957 Prefix pfx, 2958 Int size, 2959 Long delta0 ) 2960 { 2961 Int len; 2962 UChar rm = getUChar(delta0); 2963 HChar dis_buf[50]; 2964 2965 if (epartIsReg(rm)) { 2966 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 2967 DIP("mov%c %s,%s\n", nameISize(size), 2968 nameIRegG(size,pfx,rm), 2969 nameIRegE(size,pfx,rm)); 2970 return 1+delta0; 2971 } 2972 2973 /* E refers to memory */ 2974 { 2975 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2976 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 2977 DIP("mov%c %s,%s\n", nameISize(size), 2978 nameIRegG(size,pfx,rm), 2979 dis_buf); 2980 return len+delta0; 2981 } 2982 } 2983 2984 2985 /* op $immediate, AL/AX/EAX/RAX. */ 2986 static 2987 ULong dis_op_imm_A ( Int size, 2988 Bool carrying, 2989 IROp op8, 2990 Bool keep, 2991 Long delta, 2992 HChar* t_amd64opc ) 2993 { 2994 Int size4 = imin(size,4); 2995 IRType ty = szToITy(size); 2996 IRTemp dst0 = newTemp(ty); 2997 IRTemp src = newTemp(ty); 2998 IRTemp dst1 = newTemp(ty); 2999 Long lit = getSDisp(size4,delta); 3000 assign(dst0, getIRegRAX(size)); 3001 assign(src, mkU(ty,lit & mkSizeMask(size))); 3002 3003 if (isAddSub(op8) && !carrying) { 3004 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3005 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3006 } 3007 else 3008 if (isLogic(op8)) { 3009 vassert(!carrying); 3010 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3011 setFlags_DEP1(op8, dst1, ty); 3012 } 3013 else 3014 if (op8 == Iop_Add8 && carrying) { 3015 helper_ADC( size, dst1, dst0, src, 3016 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3017 } 3018 else 3019 if (op8 == Iop_Sub8 && carrying) { 3020 helper_SBB( size, dst1, dst0, src, 3021 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3022 } 3023 else 3024 vpanic("dis_op_imm_A(amd64,guest)"); 3025 3026 if (keep) 3027 putIRegRAX(size, mkexpr(dst1)); 3028 3029 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3030 lit, nameIRegRAX(size)); 3031 return delta+size4; 3032 } 3033 3034 3035 /* Sign- and Zero-extending moves. */ 3036 static 3037 ULong dis_movx_E_G ( VexAbiInfo* vbi, 3038 Prefix pfx, 3039 Long delta, Int szs, Int szd, Bool sign_extend ) 3040 { 3041 UChar rm = getUChar(delta); 3042 if (epartIsReg(rm)) { 3043 putIRegG(szd, pfx, rm, 3044 doScalarWidening( 3045 szs,szd,sign_extend, 3046 getIRegE(szs,pfx,rm))); 3047 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3048 nameISize(szs), 3049 nameISize(szd), 3050 nameIRegE(szs,pfx,rm), 3051 nameIRegG(szd,pfx,rm)); 3052 return 1+delta; 3053 } 3054 3055 /* E refers to memory */ 3056 { 3057 Int len; 3058 HChar dis_buf[50]; 3059 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3060 putIRegG(szd, pfx, rm, 3061 doScalarWidening( 3062 szs,szd,sign_extend, 3063 loadLE(szToITy(szs),mkexpr(addr)))); 3064 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3065 nameISize(szs), 3066 nameISize(szd), 3067 dis_buf, 3068 nameIRegG(szd,pfx,rm)); 3069 return len+delta; 3070 } 3071 } 3072 3073 3074 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3075 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3076 static 3077 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3078 { 3079 /* special-case the 64-bit case */ 3080 if (sz == 8) { 3081 IROp op = signed_divide ? Iop_DivModS128to64 3082 : Iop_DivModU128to64; 3083 IRTemp src128 = newTemp(Ity_I128); 3084 IRTemp dst128 = newTemp(Ity_I128); 3085 assign( src128, binop(Iop_64HLto128, 3086 getIReg64(R_RDX), 3087 getIReg64(R_RAX)) ); 3088 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3089 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3090 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3091 } else { 3092 IROp op = signed_divide ? Iop_DivModS64to32 3093 : Iop_DivModU64to32; 3094 IRTemp src64 = newTemp(Ity_I64); 3095 IRTemp dst64 = newTemp(Ity_I64); 3096 switch (sz) { 3097 case 4: 3098 assign( src64, 3099 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3100 assign( dst64, 3101 binop(op, mkexpr(src64), mkexpr(t)) ); 3102 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3103 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3104 break; 3105 case 2: { 3106 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3107 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3108 assign( src64, unop(widen3264, 3109 binop(Iop_16HLto32, 3110 getIRegRDX(2), 3111 getIRegRAX(2))) ); 3112 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3113 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3114 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3115 break; 3116 } 3117 case 1: { 3118 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3119 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3120 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3121 assign( src64, unop(widen3264, 3122 unop(widen1632, getIRegRAX(2))) ); 3123 assign( dst64, 3124 binop(op, mkexpr(src64), 3125 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3126 putIRegRAX( 1, unop(Iop_16to8, 3127 unop(Iop_32to16, 3128 unop(Iop_64to32,mkexpr(dst64)))) ); 3129 putIRegAH( unop(Iop_16to8, 3130 unop(Iop_32to16, 3131 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3132 break; 3133 } 3134 default: 3135 vpanic("codegen_div(amd64)"); 3136 } 3137 } 3138 } 3139 3140 static 3141 ULong dis_Grp1 ( VexAbiInfo* vbi, 3142 Prefix pfx, 3143 Long delta, UChar modrm, 3144 Int am_sz, Int d_sz, Int sz, Long d64 ) 3145 { 3146 Int len; 3147 HChar dis_buf[50]; 3148 IRType ty = szToITy(sz); 3149 IRTemp dst1 = newTemp(ty); 3150 IRTemp src = newTemp(ty); 3151 IRTemp dst0 = newTemp(ty); 3152 IRTemp addr = IRTemp_INVALID; 3153 IROp op8 = Iop_INVALID; 3154 ULong mask = mkSizeMask(sz); 3155 3156 switch (gregLO3ofRM(modrm)) { 3157 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3158 case 2: break; // ADC 3159 case 3: break; // SBB 3160 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3161 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3162 /*NOTREACHED*/ 3163 default: vpanic("dis_Grp1(amd64): unhandled case"); 3164 } 3165 3166 if (epartIsReg(modrm)) { 3167 vassert(am_sz == 1); 3168 3169 assign(dst0, getIRegE(sz,pfx,modrm)); 3170 assign(src, mkU(ty,d64 & mask)); 3171 3172 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3173 helper_ADC( sz, dst1, dst0, src, 3174 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3175 } else 3176 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3177 helper_SBB( sz, dst1, dst0, src, 3178 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3179 } else { 3180 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3181 if (isAddSub(op8)) 3182 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3183 else 3184 setFlags_DEP1(op8, dst1, ty); 3185 } 3186 3187 if (gregLO3ofRM(modrm) < 7) 3188 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3189 3190 delta += (am_sz + d_sz); 3191 DIP("%s%c $%lld, %s\n", 3192 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3193 nameIRegE(sz,pfx,modrm)); 3194 } else { 3195 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3196 3197 assign(dst0, loadLE(ty,mkexpr(addr))); 3198 assign(src, mkU(ty,d64 & mask)); 3199 3200 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3201 if (pfx & PFX_LOCK) { 3202 /* cas-style store */ 3203 helper_ADC( sz, dst1, dst0, src, 3204 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3205 } else { 3206 /* normal store */ 3207 helper_ADC( sz, dst1, dst0, src, 3208 /*store*/addr, IRTemp_INVALID, 0 ); 3209 } 3210 } else 3211 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3212 if (pfx & PFX_LOCK) { 3213 /* cas-style store */ 3214 helper_SBB( sz, dst1, dst0, src, 3215 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3216 } else { 3217 /* normal store */ 3218 helper_SBB( sz, dst1, dst0, src, 3219 /*store*/addr, IRTemp_INVALID, 0 ); 3220 } 3221 } else { 3222 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3223 if (gregLO3ofRM(modrm) < 7) { 3224 if (pfx & PFX_LOCK) { 3225 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3226 mkexpr(dst1)/*newVal*/, 3227 guest_RIP_curr_instr ); 3228 } else { 3229 storeLE(mkexpr(addr), mkexpr(dst1)); 3230 } 3231 } 3232 if (isAddSub(op8)) 3233 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3234 else 3235 setFlags_DEP1(op8, dst1, ty); 3236 } 3237 3238 delta += (len+d_sz); 3239 DIP("%s%c $%lld, %s\n", 3240 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3241 d64, dis_buf); 3242 } 3243 return delta; 3244 } 3245 3246 3247 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3248 expression. */ 3249 3250 static 3251 ULong dis_Grp2 ( VexAbiInfo* vbi, 3252 Prefix pfx, 3253 Long delta, UChar modrm, 3254 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3255 HChar* shift_expr_txt, Bool* decode_OK ) 3256 { 3257 /* delta on entry points at the modrm byte. */ 3258 HChar dis_buf[50]; 3259 Int len; 3260 Bool isShift, isRotate, isRotateC; 3261 IRType ty = szToITy(sz); 3262 IRTemp dst0 = newTemp(ty); 3263 IRTemp dst1 = newTemp(ty); 3264 IRTemp addr = IRTemp_INVALID; 3265 3266 *decode_OK = True; 3267 3268 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3269 3270 /* Put value to shift/rotate in dst0. */ 3271 if (epartIsReg(modrm)) { 3272 assign(dst0, getIRegE(sz, pfx, modrm)); 3273 delta += (am_sz + d_sz); 3274 } else { 3275 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3276 assign(dst0, loadLE(ty,mkexpr(addr))); 3277 delta += len + d_sz; 3278 } 3279 3280 isShift = False; 3281 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3282 3283 isRotate = False; 3284 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3285 3286 isRotateC = False; 3287 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3288 3289 if (!isShift && !isRotate && !isRotateC) { 3290 /*NOTREACHED*/ 3291 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3292 } 3293 3294 if (isRotateC) { 3295 /* Call a helper; this insn is so ridiculous it does not deserve 3296 better. One problem is, the helper has to calculate both the 3297 new value and the new flags. This is more than 64 bits, and 3298 there is no way to return more than 64 bits from the helper. 3299 Hence the crude and obvious solution is to call it twice, 3300 using the sign of the sz field to indicate whether it is the 3301 value or rflags result we want. 3302 */ 3303 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3304 IRExpr** argsVALUE; 3305 IRExpr** argsRFLAGS; 3306 3307 IRTemp new_value = newTemp(Ity_I64); 3308 IRTemp new_rflags = newTemp(Ity_I64); 3309 IRTemp old_rflags = newTemp(Ity_I64); 3310 3311 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3312 3313 argsVALUE 3314 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3315 widenUto64(shift_expr), /* rotate amount */ 3316 mkexpr(old_rflags), 3317 mkU64(sz) ); 3318 assign( new_value, 3319 mkIRExprCCall( 3320 Ity_I64, 3321 0/*regparm*/, 3322 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3323 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3324 argsVALUE 3325 ) 3326 ); 3327 3328 argsRFLAGS 3329 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3330 widenUto64(shift_expr), /* rotate amount */ 3331 mkexpr(old_rflags), 3332 mkU64(-sz) ); 3333 assign( new_rflags, 3334 mkIRExprCCall( 3335 Ity_I64, 3336 0/*regparm*/, 3337 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3338 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3339 argsRFLAGS 3340 ) 3341 ); 3342 3343 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3344 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3345 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3346 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3347 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3348 } 3349 3350 else 3351 if (isShift) { 3352 3353 IRTemp pre64 = newTemp(Ity_I64); 3354 IRTemp res64 = newTemp(Ity_I64); 3355 IRTemp res64ss = newTemp(Ity_I64); 3356 IRTemp shift_amt = newTemp(Ity_I8); 3357 UChar mask = toUChar(sz==8 ? 63 : 31); 3358 IROp op64; 3359 3360 switch (gregLO3ofRM(modrm)) { 3361 case 4: op64 = Iop_Shl64; break; 3362 case 5: op64 = Iop_Shr64; break; 3363 case 6: op64 = Iop_Shl64; break; 3364 case 7: op64 = Iop_Sar64; break; 3365 /*NOTREACHED*/ 3366 default: vpanic("dis_Grp2:shift"); break; 3367 } 3368 3369 /* Widen the value to be shifted to 64 bits, do the shift, and 3370 narrow back down. This seems surprisingly long-winded, but 3371 unfortunately the AMD semantics requires that 8/16/32-bit 3372 shifts give defined results for shift values all the way up 3373 to 32, and this seems the simplest way to do it. It has the 3374 advantage that the only IR level shifts generated are of 64 3375 bit values, and the shift amount is guaranteed to be in the 3376 range 0 .. 63, thereby observing the IR semantics requiring 3377 all shift values to be in the range 0 .. 2^word_size-1. 3378 3379 Therefore the shift amount is masked with 63 for 64-bit shifts 3380 and 31 for all others. 3381 */ 3382 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3383 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3384 3385 /* suitably widen the value to be shifted to 64 bits. */ 3386 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3387 : widenUto64(mkexpr(dst0)) ); 3388 3389 /* res64 = pre64 `shift` shift_amt */ 3390 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3391 3392 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3393 assign( res64ss, 3394 binop(op64, 3395 mkexpr(pre64), 3396 binop(Iop_And8, 3397 binop(Iop_Sub8, 3398 mkexpr(shift_amt), mkU8(1)), 3399 mkU8(mask))) ); 3400 3401 /* Build the flags thunk. */ 3402 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3403 3404 /* Narrow the result back down. */ 3405 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3406 3407 } /* if (isShift) */ 3408 3409 else 3410 if (isRotate) { 3411 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3412 : (ty==Ity_I32 ? 2 : 3)); 3413 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3414 IRTemp rot_amt = newTemp(Ity_I8); 3415 IRTemp rot_amt64 = newTemp(Ity_I8); 3416 IRTemp oldFlags = newTemp(Ity_I64); 3417 UChar mask = toUChar(sz==8 ? 63 : 31); 3418 3419 /* rot_amt = shift_expr & mask */ 3420 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3421 expressions never shift beyond the word size and thus remain 3422 well defined. */ 3423 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3424 3425 if (ty == Ity_I64) 3426 assign(rot_amt, mkexpr(rot_amt64)); 3427 else 3428 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3429 3430 if (left) { 3431 3432 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3433 assign(dst1, 3434 binop( mkSizedOp(ty,Iop_Or8), 3435 binop( mkSizedOp(ty,Iop_Shl8), 3436 mkexpr(dst0), 3437 mkexpr(rot_amt) 3438 ), 3439 binop( mkSizedOp(ty,Iop_Shr8), 3440 mkexpr(dst0), 3441 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3442 ) 3443 ) 3444 ); 3445 ccOp += AMD64G_CC_OP_ROLB; 3446 3447 } else { /* right */ 3448 3449 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3450 assign(dst1, 3451 binop( mkSizedOp(ty,Iop_Or8), 3452 binop( mkSizedOp(ty,Iop_Shr8), 3453 mkexpr(dst0), 3454 mkexpr(rot_amt) 3455 ), 3456 binop( mkSizedOp(ty,Iop_Shl8), 3457 mkexpr(dst0), 3458 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3459 ) 3460 ) 3461 ); 3462 ccOp += AMD64G_CC_OP_RORB; 3463 3464 } 3465 3466 /* dst1 now holds the rotated value. Build flag thunk. We 3467 need the resulting value for this, and the previous flags. 3468 Except don't set it if the rotate count is zero. */ 3469 3470 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3471 3472 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3473 stmt( IRStmt_Put( OFFB_CC_OP, 3474 IRExpr_Mux0X( mkexpr(rot_amt64), 3475 IRExpr_Get(OFFB_CC_OP,Ity_I64), 3476 mkU64(ccOp))) ); 3477 stmt( IRStmt_Put( OFFB_CC_DEP1, 3478 IRExpr_Mux0X( mkexpr(rot_amt64), 3479 IRExpr_Get(OFFB_CC_DEP1,Ity_I64), 3480 widenUto64(mkexpr(dst1)))) ); 3481 stmt( IRStmt_Put( OFFB_CC_DEP2, 3482 IRExpr_Mux0X( mkexpr(rot_amt64), 3483 IRExpr_Get(OFFB_CC_DEP2,Ity_I64), 3484 mkU64(0))) ); 3485 stmt( IRStmt_Put( OFFB_CC_NDEP, 3486 IRExpr_Mux0X( mkexpr(rot_amt64), 3487 IRExpr_Get(OFFB_CC_NDEP,Ity_I64), 3488 mkexpr(oldFlags))) ); 3489 } /* if (isRotate) */ 3490 3491 /* Save result, and finish up. */ 3492 if (epartIsReg(modrm)) { 3493 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3494 if (vex_traceflags & VEX_TRACE_FE) { 3495 vex_printf("%s%c ", 3496 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3497 if (shift_expr_txt) 3498 vex_printf("%s", shift_expr_txt); 3499 else 3500 ppIRExpr(shift_expr); 3501 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3502 } 3503 } else { 3504 storeLE(mkexpr(addr), mkexpr(dst1)); 3505 if (vex_traceflags & VEX_TRACE_FE) { 3506 vex_printf("%s%c ", 3507 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3508 if (shift_expr_txt) 3509 vex_printf("%s", shift_expr_txt); 3510 else 3511 ppIRExpr(shift_expr); 3512 vex_printf(", %s\n", dis_buf); 3513 } 3514 } 3515 return delta; 3516 } 3517 3518 3519 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3520 static 3521 ULong dis_Grp8_Imm ( VexAbiInfo* vbi, 3522 Prefix pfx, 3523 Long delta, UChar modrm, 3524 Int am_sz, Int sz, ULong src_val, 3525 Bool* decode_OK ) 3526 { 3527 /* src_val denotes a d8. 3528 And delta on entry points at the modrm byte. */ 3529 3530 IRType ty = szToITy(sz); 3531 IRTemp t2 = newTemp(Ity_I64); 3532 IRTemp t2m = newTemp(Ity_I64); 3533 IRTemp t_addr = IRTemp_INVALID; 3534 HChar dis_buf[50]; 3535 ULong mask; 3536 3537 /* we're optimists :-) */ 3538 *decode_OK = True; 3539 3540 /* Limit src_val -- the bit offset -- to something within a word. 3541 The Intel docs say that literal offsets larger than a word are 3542 masked in this way. */ 3543 switch (sz) { 3544 case 2: src_val &= 15; break; 3545 case 4: src_val &= 31; break; 3546 case 8: src_val &= 63; break; 3547 default: *decode_OK = False; return delta; 3548 } 3549 3550 /* Invent a mask suitable for the operation. */ 3551 switch (gregLO3ofRM(modrm)) { 3552 case 4: /* BT */ mask = 0; break; 3553 case 5: /* BTS */ mask = 1ULL << src_val; break; 3554 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3555 case 7: /* BTC */ mask = 1ULL << src_val; break; 3556 /* If this needs to be extended, probably simplest to make a 3557 new function to handle the other cases (0 .. 3). The 3558 Intel docs do however not indicate any use for 0 .. 3, so 3559 we don't expect this to happen. */ 3560 default: *decode_OK = False; return delta; 3561 } 3562 3563 /* Fetch the value to be tested and modified into t2, which is 3564 64-bits wide regardless of sz. */ 3565 if (epartIsReg(modrm)) { 3566 vassert(am_sz == 1); 3567 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3568 delta += (am_sz + 1); 3569 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3570 nameISize(sz), 3571 src_val, nameIRegE(sz,pfx,modrm)); 3572 } else { 3573 Int len; 3574 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3575 delta += (len+1); 3576 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3577 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3578 nameISize(sz), 3579 src_val, dis_buf); 3580 } 3581 3582 /* Compute the new value into t2m, if non-BT. */ 3583 switch (gregLO3ofRM(modrm)) { 3584 case 4: /* BT */ 3585 break; 3586 case 5: /* BTS */ 3587 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3588 break; 3589 case 6: /* BTR */ 3590 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3591 break; 3592 case 7: /* BTC */ 3593 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3594 break; 3595 default: 3596 /*NOTREACHED*/ /*the previous switch guards this*/ 3597 vassert(0); 3598 } 3599 3600 /* Write the result back, if non-BT. */ 3601 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3602 if (epartIsReg(modrm)) { 3603 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3604 } else { 3605 if (pfx & PFX_LOCK) { 3606 casLE( mkexpr(t_addr), 3607 narrowTo(ty, mkexpr(t2))/*expd*/, 3608 narrowTo(ty, mkexpr(t2m))/*new*/, 3609 guest_RIP_curr_instr ); 3610 } else { 3611 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3612 } 3613 } 3614 } 3615 3616 /* Copy relevant bit from t2 into the carry flag. */ 3617 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3618 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3619 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3620 stmt( IRStmt_Put( 3621 OFFB_CC_DEP1, 3622 binop(Iop_And64, 3623 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3624 mkU64(1)) 3625 )); 3626 /* Set NDEP even though it isn't used. This makes redundant-PUT 3627 elimination of previous stores to this field work better. */ 3628 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3629 3630 return delta; 3631 } 3632 3633 3634 /* Signed/unsigned widening multiply. Generate IR to multiply the 3635 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3636 RDX:RAX/EDX:EAX/DX:AX/AX. 3637 */ 3638 static void codegen_mulL_A_D ( Int sz, Bool syned, 3639 IRTemp tmp, HChar* tmp_txt ) 3640 { 3641 IRType ty = szToITy(sz); 3642 IRTemp t1 = newTemp(ty); 3643 3644 assign( t1, getIRegRAX(sz) ); 3645 3646 switch (ty) { 3647 case Ity_I64: { 3648 IRTemp res128 = newTemp(Ity_I128); 3649 IRTemp resHi = newTemp(Ity_I64); 3650 IRTemp resLo = newTemp(Ity_I64); 3651 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3652 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3653 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3654 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3655 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3656 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3657 putIReg64(R_RDX, mkexpr(resHi)); 3658 putIReg64(R_RAX, mkexpr(resLo)); 3659 break; 3660 } 3661 case Ity_I32: { 3662 IRTemp res64 = newTemp(Ity_I64); 3663 IRTemp resHi = newTemp(Ity_I32); 3664 IRTemp resLo = newTemp(Ity_I32); 3665 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3666 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3667 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3668 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3669 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3670 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3671 putIRegRDX(4, mkexpr(resHi)); 3672 putIRegRAX(4, mkexpr(resLo)); 3673 break; 3674 } 3675 case Ity_I16: { 3676 IRTemp res32 = newTemp(Ity_I32); 3677 IRTemp resHi = newTemp(Ity_I16); 3678 IRTemp resLo = newTemp(Ity_I16); 3679 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3680 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3681 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3682 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3683 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3684 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3685 putIRegRDX(2, mkexpr(resHi)); 3686 putIRegRAX(2, mkexpr(resLo)); 3687 break; 3688 } 3689 case Ity_I8: { 3690 IRTemp res16 = newTemp(Ity_I16); 3691 IRTemp resHi = newTemp(Ity_I8); 3692 IRTemp resLo = newTemp(Ity_I8); 3693 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3694 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3695 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3696 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3697 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3698 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3699 putIRegRAX(2, mkexpr(res16)); 3700 break; 3701 } 3702 default: 3703 ppIRType(ty); 3704 vpanic("codegen_mulL_A_D(amd64)"); 3705 } 3706 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3707 } 3708 3709 3710 /* Group 3 extended opcodes. */ 3711 static 3712 ULong dis_Grp3 ( VexAbiInfo* vbi, 3713 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3714 { 3715 Long d64; 3716 UChar modrm; 3717 HChar dis_buf[50]; 3718 Int len; 3719 IRTemp addr; 3720 IRType ty = szToITy(sz); 3721 IRTemp t1 = newTemp(ty); 3722 IRTemp dst1, src, dst0; 3723 *decode_OK = True; 3724 modrm = getUChar(delta); 3725 if (epartIsReg(modrm)) { 3726 switch (gregLO3ofRM(modrm)) { 3727 case 0: { /* TEST */ 3728 delta++; 3729 d64 = getSDisp(imin(4,sz), delta); 3730 delta += imin(4,sz); 3731 dst1 = newTemp(ty); 3732 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3733 getIRegE(sz,pfx,modrm), 3734 mkU(ty, d64 & mkSizeMask(sz)))); 3735 setFlags_DEP1( Iop_And8, dst1, ty ); 3736 DIP("test%c $%lld, %s\n", 3737 nameISize(sz), d64, 3738 nameIRegE(sz, pfx, modrm)); 3739 break; 3740 } 3741 case 1: 3742 *decode_OK = False; 3743 return delta; 3744 case 2: /* NOT */ 3745 delta++; 3746 putIRegE(sz, pfx, modrm, 3747 unop(mkSizedOp(ty,Iop_Not8), 3748 getIRegE(sz, pfx, modrm))); 3749 DIP("not%c %s\n", nameISize(sz), 3750 nameIRegE(sz, pfx, modrm)); 3751 break; 3752 case 3: /* NEG */ 3753 delta++; 3754 dst0 = newTemp(ty); 3755 src = newTemp(ty); 3756 dst1 = newTemp(ty); 3757 assign(dst0, mkU(ty,0)); 3758 assign(src, getIRegE(sz, pfx, modrm)); 3759 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3760 mkexpr(src))); 3761 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3762 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3763 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 3764 break; 3765 case 4: /* MUL (unsigned widening) */ 3766 delta++; 3767 src = newTemp(ty); 3768 assign(src, getIRegE(sz,pfx,modrm)); 3769 codegen_mulL_A_D ( sz, False, src, 3770 nameIRegE(sz,pfx,modrm) ); 3771 break; 3772 case 5: /* IMUL (signed widening) */ 3773 delta++; 3774 src = newTemp(ty); 3775 assign(src, getIRegE(sz,pfx,modrm)); 3776 codegen_mulL_A_D ( sz, True, src, 3777 nameIRegE(sz,pfx,modrm) ); 3778 break; 3779 case 6: /* DIV */ 3780 delta++; 3781 assign( t1, getIRegE(sz, pfx, modrm) ); 3782 codegen_div ( sz, t1, False ); 3783 DIP("div%c %s\n", nameISize(sz), 3784 nameIRegE(sz, pfx, modrm)); 3785 break; 3786 case 7: /* IDIV */ 3787 delta++; 3788 assign( t1, getIRegE(sz, pfx, modrm) ); 3789 codegen_div ( sz, t1, True ); 3790 DIP("idiv%c %s\n", nameISize(sz), 3791 nameIRegE(sz, pfx, modrm)); 3792 break; 3793 default: 3794 /*NOTREACHED*/ 3795 vpanic("Grp3(amd64,R)"); 3796 } 3797 } else { 3798 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 3799 /* we have to inform disAMode of any immediate 3800 bytes used */ 3801 gregLO3ofRM(modrm)==0/*TEST*/ 3802 ? imin(4,sz) 3803 : 0 3804 ); 3805 t1 = newTemp(ty); 3806 delta += len; 3807 assign(t1, loadLE(ty,mkexpr(addr))); 3808 switch (gregLO3ofRM(modrm)) { 3809 case 0: { /* TEST */ 3810 d64 = getSDisp(imin(4,sz), delta); 3811 delta += imin(4,sz); 3812 dst1 = newTemp(ty); 3813 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 3814 mkexpr(t1), 3815 mkU(ty, d64 & mkSizeMask(sz)))); 3816 setFlags_DEP1( Iop_And8, dst1, ty ); 3817 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 3818 break; 3819 } 3820 case 1: 3821 *decode_OK = False; 3822 return delta; 3823 case 2: /* NOT */ 3824 dst1 = newTemp(ty); 3825 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 3826 if (pfx & PFX_LOCK) { 3827 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3828 guest_RIP_curr_instr ); 3829 } else { 3830 storeLE( mkexpr(addr), mkexpr(dst1) ); 3831 } 3832 DIP("not%c %s\n", nameISize(sz), dis_buf); 3833 break; 3834 case 3: /* NEG */ 3835 dst0 = newTemp(ty); 3836 src = newTemp(ty); 3837 dst1 = newTemp(ty); 3838 assign(dst0, mkU(ty,0)); 3839 assign(src, mkexpr(t1)); 3840 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 3841 mkexpr(src))); 3842 if (pfx & PFX_LOCK) { 3843 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 3844 guest_RIP_curr_instr ); 3845 } else { 3846 storeLE( mkexpr(addr), mkexpr(dst1) ); 3847 } 3848 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 3849 DIP("neg%c %s\n", nameISize(sz), dis_buf); 3850 break; 3851 case 4: /* MUL (unsigned widening) */ 3852 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 3853 break; 3854 case 5: /* IMUL */ 3855 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 3856 break; 3857 case 6: /* DIV */ 3858 codegen_div ( sz, t1, False ); 3859 DIP("div%c %s\n", nameISize(sz), dis_buf); 3860 break; 3861 case 7: /* IDIV */ 3862 codegen_div ( sz, t1, True ); 3863 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 3864 break; 3865 default: 3866 /*NOTREACHED*/ 3867 vpanic("Grp3(amd64,M)"); 3868 } 3869 } 3870 return delta; 3871 } 3872 3873 3874 /* Group 4 extended opcodes. */ 3875 static 3876 ULong dis_Grp4 ( VexAbiInfo* vbi, 3877 Prefix pfx, Long delta, Bool* decode_OK ) 3878 { 3879 Int alen; 3880 UChar modrm; 3881 HChar dis_buf[50]; 3882 IRType ty = Ity_I8; 3883 IRTemp t1 = newTemp(ty); 3884 IRTemp t2 = newTemp(ty); 3885 3886 *decode_OK = True; 3887 3888 modrm = getUChar(delta); 3889 if (epartIsReg(modrm)) { 3890 assign(t1, getIRegE(1, pfx, modrm)); 3891 switch (gregLO3ofRM(modrm)) { 3892 case 0: /* INC */ 3893 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3894 putIRegE(1, pfx, modrm, mkexpr(t2)); 3895 setFlags_INC_DEC( True, t2, ty ); 3896 break; 3897 case 1: /* DEC */ 3898 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3899 putIRegE(1, pfx, modrm, mkexpr(t2)); 3900 setFlags_INC_DEC( False, t2, ty ); 3901 break; 3902 default: 3903 *decode_OK = False; 3904 return delta; 3905 } 3906 delta++; 3907 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 3908 nameIRegE(1, pfx, modrm)); 3909 } else { 3910 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 3911 assign( t1, loadLE(ty, mkexpr(addr)) ); 3912 switch (gregLO3ofRM(modrm)) { 3913 case 0: /* INC */ 3914 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 3915 if (pfx & PFX_LOCK) { 3916 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3917 guest_RIP_curr_instr ); 3918 } else { 3919 storeLE( mkexpr(addr), mkexpr(t2) ); 3920 } 3921 setFlags_INC_DEC( True, t2, ty ); 3922 break; 3923 case 1: /* DEC */ 3924 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 3925 if (pfx & PFX_LOCK) { 3926 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 3927 guest_RIP_curr_instr ); 3928 } else { 3929 storeLE( mkexpr(addr), mkexpr(t2) ); 3930 } 3931 setFlags_INC_DEC( False, t2, ty ); 3932 break; 3933 default: 3934 *decode_OK = False; 3935 return delta; 3936 } 3937 delta += alen; 3938 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 3939 } 3940 return delta; 3941 } 3942 3943 3944 /* Group 5 extended opcodes. */ 3945 static 3946 ULong dis_Grp5 ( VexAbiInfo* vbi, 3947 Prefix pfx, Int sz, Long delta, 3948 DisResult* dres, Bool* decode_OK ) 3949 { 3950 Int len; 3951 UChar modrm; 3952 HChar dis_buf[50]; 3953 IRTemp addr = IRTemp_INVALID; 3954 IRType ty = szToITy(sz); 3955 IRTemp t1 = newTemp(ty); 3956 IRTemp t2 = IRTemp_INVALID; 3957 IRTemp t3 = IRTemp_INVALID; 3958 Bool showSz = True; 3959 3960 *decode_OK = True; 3961 3962 modrm = getUChar(delta); 3963 if (epartIsReg(modrm)) { 3964 assign(t1, getIRegE(sz,pfx,modrm)); 3965 switch (gregLO3ofRM(modrm)) { 3966 case 0: /* INC */ 3967 t2 = newTemp(ty); 3968 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3969 mkexpr(t1), mkU(ty,1))); 3970 setFlags_INC_DEC( True, t2, ty ); 3971 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3972 break; 3973 case 1: /* DEC */ 3974 t2 = newTemp(ty); 3975 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3976 mkexpr(t1), mkU(ty,1))); 3977 setFlags_INC_DEC( False, t2, ty ); 3978 putIRegE(sz,pfx,modrm, mkexpr(t2)); 3979 break; 3980 case 2: /* call Ev */ 3981 /* Ignore any sz value and operate as if sz==8. */ 3982 if (!(sz == 4 || sz == 8)) goto unhandled; 3983 sz = 8; 3984 t3 = newTemp(Ity_I64); 3985 assign(t3, getIRegE(sz,pfx,modrm)); 3986 t2 = newTemp(Ity_I64); 3987 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 3988 putIReg64(R_RSP, mkexpr(t2)); 3989 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 3990 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 3991 jmp_treg(Ijk_Call,t3); 3992 dres->whatNext = Dis_StopHere; 3993 showSz = False; 3994 break; 3995 case 4: /* jmp Ev */ 3996 /* Ignore any sz value and operate as if sz==8. */ 3997 if (!(sz == 4 || sz == 8)) goto unhandled; 3998 sz = 8; 3999 t3 = newTemp(Ity_I64); 4000 assign(t3, getIRegE(sz,pfx,modrm)); 4001 jmp_treg(Ijk_Boring,t3); 4002 dres->whatNext = Dis_StopHere; 4003 showSz = False; 4004 break; 4005 default: 4006 *decode_OK = False; 4007 return delta; 4008 } 4009 delta++; 4010 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4011 showSz ? nameISize(sz) : ' ', 4012 nameIRegE(sz, pfx, modrm)); 4013 } else { 4014 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4015 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4016 && gregLO3ofRM(modrm) != 6) { 4017 assign(t1, loadLE(ty,mkexpr(addr))); 4018 } 4019 switch (gregLO3ofRM(modrm)) { 4020 case 0: /* INC */ 4021 t2 = newTemp(ty); 4022 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4023 mkexpr(t1), mkU(ty,1))); 4024 if (pfx & PFX_LOCK) { 4025 casLE( mkexpr(addr), 4026 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4027 } else { 4028 storeLE(mkexpr(addr),mkexpr(t2)); 4029 } 4030 setFlags_INC_DEC( True, t2, ty ); 4031 break; 4032 case 1: /* DEC */ 4033 t2 = newTemp(ty); 4034 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4035 mkexpr(t1), mkU(ty,1))); 4036 if (pfx & PFX_LOCK) { 4037 casLE( mkexpr(addr), 4038 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4039 } else { 4040 storeLE(mkexpr(addr),mkexpr(t2)); 4041 } 4042 setFlags_INC_DEC( False, t2, ty ); 4043 break; 4044 case 2: /* call Ev */ 4045 /* Ignore any sz value and operate as if sz==8. */ 4046 if (!(sz == 4 || sz == 8)) goto unhandled; 4047 sz = 8; 4048 t3 = newTemp(Ity_I64); 4049 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4050 t2 = newTemp(Ity_I64); 4051 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4052 putIReg64(R_RSP, mkexpr(t2)); 4053 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4054 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4055 jmp_treg(Ijk_Call,t3); 4056 dres->whatNext = Dis_StopHere; 4057 showSz = False; 4058 break; 4059 case 4: /* JMP Ev */ 4060 /* Ignore any sz value and operate as if sz==8. */ 4061 if (!(sz == 4 || sz == 8)) goto unhandled; 4062 sz = 8; 4063 t3 = newTemp(Ity_I64); 4064 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4065 jmp_treg(Ijk_Boring,t3); 4066 dres->whatNext = Dis_StopHere; 4067 showSz = False; 4068 break; 4069 case 6: /* PUSH Ev */ 4070 /* There is no encoding for 32-bit operand size; hence ... */ 4071 if (sz == 4) sz = 8; 4072 if (!(sz == 8 || sz == 2)) goto unhandled; 4073 if (sz == 8) { 4074 t3 = newTemp(Ity_I64); 4075 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4076 t2 = newTemp(Ity_I64); 4077 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4078 putIReg64(R_RSP, mkexpr(t2) ); 4079 storeLE( mkexpr(t2), mkexpr(t3) ); 4080 break; 4081 } else { 4082 goto unhandled; /* awaiting test case */ 4083 } 4084 default: 4085 unhandled: 4086 *decode_OK = False; 4087 return delta; 4088 } 4089 delta += len; 4090 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4091 showSz ? nameISize(sz) : ' ', 4092 dis_buf); 4093 } 4094 return delta; 4095 } 4096 4097 4098 /*------------------------------------------------------------*/ 4099 /*--- Disassembling string ops (including REP prefixes) ---*/ 4100 /*------------------------------------------------------------*/ 4101 4102 /* Code shared by all the string ops */ 4103 static 4104 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4105 { 4106 UChar logSz; 4107 if (sz == 8 || sz == 4 || sz == 2) { 4108 logSz = 1; 4109 if (sz == 4) logSz = 2; 4110 if (sz == 8) logSz = 3; 4111 assign( t_inc, 4112 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4113 mkU8(logSz) ) ); 4114 } else { 4115 assign( t_inc, 4116 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4117 } 4118 } 4119 4120 static 4121 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4122 Int sz, HChar* name, Prefix pfx ) 4123 { 4124 IRTemp t_inc = newTemp(Ity_I64); 4125 /* Really we ought to inspect the override prefixes, but we don't. 4126 The following assertion catches any resulting sillyness. */ 4127 vassert(pfx == clearSegBits(pfx)); 4128 dis_string_op_increment(sz, t_inc); 4129 dis_OP( sz, t_inc, pfx ); 4130 DIP("%s%c\n", name, nameISize(sz)); 4131 } 4132 4133 static 4134 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4135 { 4136 IRType ty = szToITy(sz); 4137 IRTemp td = newTemp(Ity_I64); /* RDI */ 4138 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4139 IRExpr *incd, *incs; 4140 4141 if (haveASO(pfx)) { 4142 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4143 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4144 } else { 4145 assign( td, getIReg64(R_RDI) ); 4146 assign( ts, getIReg64(R_RSI) ); 4147 } 4148 4149 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4150 4151 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4152 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4153 if (haveASO(pfx)) { 4154 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4155 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4156 } 4157 putIReg64( R_RDI, incd ); 4158 putIReg64( R_RSI, incs ); 4159 } 4160 4161 static 4162 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4163 { 4164 IRType ty = szToITy(sz); 4165 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4166 IRExpr *incs; 4167 4168 if (haveASO(pfx)) 4169 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4170 else 4171 assign( ts, getIReg64(R_RSI) ); 4172 4173 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4174 4175 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4176 if (haveASO(pfx)) 4177 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4178 putIReg64( R_RSI, incs ); 4179 } 4180 4181 static 4182 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4183 { 4184 IRType ty = szToITy(sz); 4185 IRTemp ta = newTemp(ty); /* rAX */ 4186 IRTemp td = newTemp(Ity_I64); /* RDI */ 4187 IRExpr *incd; 4188 4189 assign( ta, getIRegRAX(sz) ); 4190 4191 if (haveASO(pfx)) 4192 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4193 else 4194 assign( td, getIReg64(R_RDI) ); 4195 4196 storeLE( mkexpr(td), mkexpr(ta) ); 4197 4198 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4199 if (haveASO(pfx)) 4200 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4201 putIReg64( R_RDI, incd ); 4202 } 4203 4204 static 4205 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4206 { 4207 IRType ty = szToITy(sz); 4208 IRTemp tdv = newTemp(ty); /* (RDI) */ 4209 IRTemp tsv = newTemp(ty); /* (RSI) */ 4210 IRTemp td = newTemp(Ity_I64); /* RDI */ 4211 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4212 IRExpr *incd, *incs; 4213 4214 if (haveASO(pfx)) { 4215 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4216 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4217 } else { 4218 assign( td, getIReg64(R_RDI) ); 4219 assign( ts, getIReg64(R_RSI) ); 4220 } 4221 4222 assign( tdv, loadLE(ty,mkexpr(td)) ); 4223 4224 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4225 4226 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4227 4228 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4229 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4230 if (haveASO(pfx)) { 4231 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4232 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4233 } 4234 putIReg64( R_RDI, incd ); 4235 putIReg64( R_RSI, incs ); 4236 } 4237 4238 static 4239 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4240 { 4241 IRType ty = szToITy(sz); 4242 IRTemp ta = newTemp(ty); /* rAX */ 4243 IRTemp td = newTemp(Ity_I64); /* RDI */ 4244 IRTemp tdv = newTemp(ty); /* (RDI) */ 4245 IRExpr *incd; 4246 4247 assign( ta, getIRegRAX(sz) ); 4248 4249 if (haveASO(pfx)) 4250 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4251 else 4252 assign( td, getIReg64(R_RDI) ); 4253 4254 assign( tdv, loadLE(ty,mkexpr(td)) ); 4255 4256 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4257 4258 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4259 if (haveASO(pfx)) 4260 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4261 putIReg64( R_RDI, incd ); 4262 } 4263 4264 4265 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4266 the insn is the last one in the basic block, and so emit a jump to 4267 the next insn, rather than just falling through. */ 4268 static 4269 void dis_REP_op ( AMD64Condcode cond, 4270 void (*dis_OP)(Int, IRTemp, Prefix), 4271 Int sz, Addr64 rip, Addr64 rip_next, HChar* name, 4272 Prefix pfx ) 4273 { 4274 IRTemp t_inc = newTemp(Ity_I64); 4275 IRTemp tc; 4276 IRExpr* cmp; 4277 4278 /* Really we ought to inspect the override prefixes, but we don't. 4279 The following assertion catches any resulting sillyness. */ 4280 vassert(pfx == clearSegBits(pfx)); 4281 4282 if (haveASO(pfx)) { 4283 tc = newTemp(Ity_I32); /* ECX */ 4284 assign( tc, getIReg32(R_RCX) ); 4285 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4286 } else { 4287 tc = newTemp(Ity_I64); /* RCX */ 4288 assign( tc, getIReg64(R_RCX) ); 4289 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4290 } 4291 4292 stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) ); 4293 4294 if (haveASO(pfx)) 4295 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4296 else 4297 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4298 4299 dis_string_op_increment(sz, t_inc); 4300 dis_OP (sz, t_inc, pfx); 4301 4302 if (cond == AMD64CondAlways) { 4303 jmp_lit(Ijk_Boring,rip); 4304 } else { 4305 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4306 Ijk_Boring, 4307 IRConst_U64(rip) ) ); 4308 jmp_lit(Ijk_Boring,rip_next); 4309 } 4310 DIP("%s%c\n", name, nameISize(sz)); 4311 } 4312 4313 4314 /*------------------------------------------------------------*/ 4315 /*--- Arithmetic, etc. ---*/ 4316 /*------------------------------------------------------------*/ 4317 4318 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4319 static 4320 ULong dis_mul_E_G ( VexAbiInfo* vbi, 4321 Prefix pfx, 4322 Int size, 4323 Long delta0 ) 4324 { 4325 Int alen; 4326 HChar dis_buf[50]; 4327 UChar rm = getUChar(delta0); 4328 IRType ty = szToITy(size); 4329 IRTemp te = newTemp(ty); 4330 IRTemp tg = newTemp(ty); 4331 IRTemp resLo = newTemp(ty); 4332 4333 assign( tg, getIRegG(size, pfx, rm) ); 4334 if (epartIsReg(rm)) { 4335 assign( te, getIRegE(size, pfx, rm) ); 4336 } else { 4337 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4338 assign( te, loadLE(ty,mkexpr(addr)) ); 4339 } 4340 4341 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4342 4343 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4344 4345 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4346 4347 if (epartIsReg(rm)) { 4348 DIP("imul%c %s, %s\n", nameISize(size), 4349 nameIRegE(size,pfx,rm), 4350 nameIRegG(size,pfx,rm)); 4351 return 1+delta0; 4352 } else { 4353 DIP("imul%c %s, %s\n", nameISize(size), 4354 dis_buf, 4355 nameIRegG(size,pfx,rm)); 4356 return alen+delta0; 4357 } 4358 } 4359 4360 4361 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4362 static 4363 ULong dis_imul_I_E_G ( VexAbiInfo* vbi, 4364 Prefix pfx, 4365 Int size, 4366 Long delta, 4367 Int litsize ) 4368 { 4369 Long d64; 4370 Int alen; 4371 HChar dis_buf[50]; 4372 UChar rm = getUChar(delta); 4373 IRType ty = szToITy(size); 4374 IRTemp te = newTemp(ty); 4375 IRTemp tl = newTemp(ty); 4376 IRTemp resLo = newTemp(ty); 4377 4378 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4379 4380 if (epartIsReg(rm)) { 4381 assign(te, getIRegE(size, pfx, rm)); 4382 delta++; 4383 } else { 4384 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4385 imin(4,litsize) ); 4386 assign(te, loadLE(ty, mkexpr(addr))); 4387 delta += alen; 4388 } 4389 d64 = getSDisp(imin(4,litsize),delta); 4390 delta += imin(4,litsize); 4391 4392 d64 &= mkSizeMask(size); 4393 assign(tl, mkU(ty,d64)); 4394 4395 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4396 4397 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4398 4399 putIRegG(size, pfx, rm, mkexpr(resLo)); 4400 4401 DIP("imul%c $%lld, %s, %s\n", 4402 nameISize(size), d64, 4403 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4404 nameIRegG(size,pfx,rm) ); 4405 return delta; 4406 } 4407 4408 4409 /* Generate an IR sequence to do a popcount operation on the supplied 4410 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4411 Ity_I16, Ity_I32 or Ity_I64 only. */ 4412 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4413 { 4414 Int i; 4415 if (ty == Ity_I16) { 4416 IRTemp old = IRTemp_INVALID; 4417 IRTemp nyu = IRTemp_INVALID; 4418 IRTemp mask[4], shift[4]; 4419 for (i = 0; i < 4; i++) { 4420 mask[i] = newTemp(ty); 4421 shift[i] = 1 << i; 4422 } 4423 assign(mask[0], mkU16(0x5555)); 4424 assign(mask[1], mkU16(0x3333)); 4425 assign(mask[2], mkU16(0x0F0F)); 4426 assign(mask[3], mkU16(0x00FF)); 4427 old = src; 4428 for (i = 0; i < 4; i++) { 4429 nyu = newTemp(ty); 4430 assign(nyu, 4431 binop(Iop_Add16, 4432 binop(Iop_And16, 4433 mkexpr(old), 4434 mkexpr(mask[i])), 4435 binop(Iop_And16, 4436 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4437 mkexpr(mask[i])))); 4438 old = nyu; 4439 } 4440 return nyu; 4441 } 4442 if (ty == Ity_I32) { 4443 IRTemp old = IRTemp_INVALID; 4444 IRTemp nyu = IRTemp_INVALID; 4445 IRTemp mask[5], shift[5]; 4446 for (i = 0; i < 5; i++) { 4447 mask[i] = newTemp(ty); 4448 shift[i] = 1 << i; 4449 } 4450 assign(mask[0], mkU32(0x55555555)); 4451 assign(mask[1], mkU32(0x33333333)); 4452 assign(mask[2], mkU32(0x0F0F0F0F)); 4453 assign(mask[3], mkU32(0x00FF00FF)); 4454 assign(mask[4], mkU32(0x0000FFFF)); 4455 old = src; 4456 for (i = 0; i < 5; i++) { 4457 nyu = newTemp(ty); 4458 assign(nyu, 4459 binop(Iop_Add32, 4460 binop(Iop_And32, 4461 mkexpr(old), 4462 mkexpr(mask[i])), 4463 binop(Iop_And32, 4464 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4465 mkexpr(mask[i])))); 4466 old = nyu; 4467 } 4468 return nyu; 4469 } 4470 if (ty == Ity_I64) { 4471 IRTemp old = IRTemp_INVALID; 4472 IRTemp nyu = IRTemp_INVALID; 4473 IRTemp mask[6], shift[6]; 4474 for (i = 0; i < 6; i++) { 4475 mask[i] = newTemp(ty); 4476 shift[i] = 1 << i; 4477 } 4478 assign(mask[0], mkU64(0x5555555555555555ULL)); 4479 assign(mask[1], mkU64(0x3333333333333333ULL)); 4480 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4481 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4482 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4483 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4484 old = src; 4485 for (i = 0; i < 6; i++) { 4486 nyu = newTemp(ty); 4487 assign(nyu, 4488 binop(Iop_Add64, 4489 binop(Iop_And64, 4490 mkexpr(old), 4491 mkexpr(mask[i])), 4492 binop(Iop_And64, 4493 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4494 mkexpr(mask[i])))); 4495 old = nyu; 4496 } 4497 return nyu; 4498 } 4499 /*NOTREACHED*/ 4500 vassert(0); 4501 } 4502 4503 4504 /* Generate an IR sequence to do a count-leading-zeroes operation on 4505 the supplied IRTemp, and return a new IRTemp holding the result. 4506 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4507 the argument is zero, return the number of bits in the word (the 4508 natural semantics). */ 4509 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4510 { 4511 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4512 4513 IRTemp src64 = newTemp(Ity_I64); 4514 assign(src64, widenUto64( mkexpr(src) )); 4515 4516 IRTemp src64x = newTemp(Ity_I64); 4517 assign(src64x, 4518 binop(Iop_Shl64, mkexpr(src64), 4519 mkU8(64 - 8 * sizeofIRType(ty)))); 4520 4521 // Clz64 has undefined semantics when its input is zero, so 4522 // special-case around that. 4523 IRTemp res64 = newTemp(Ity_I64); 4524 assign(res64, 4525 IRExpr_Mux0X( 4526 unop(Iop_1Uto8, 4527 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))), 4528 unop(Iop_Clz64, mkexpr(src64x)), 4529 mkU64(8 * sizeofIRType(ty)) 4530 )); 4531 4532 IRTemp res = newTemp(ty); 4533 assign(res, narrowTo(ty, mkexpr(res64))); 4534 return res; 4535 } 4536 4537 4538 /*------------------------------------------------------------*/ 4539 /*--- ---*/ 4540 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4541 /*--- ---*/ 4542 /*------------------------------------------------------------*/ 4543 4544 /* --- Helper functions for dealing with the register stack. --- */ 4545 4546 /* --- Set the emulation-warning pseudo-register. --- */ 4547 4548 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4549 { 4550 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4551 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 4552 } 4553 4554 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4555 4556 static IRExpr* mkQNaN64 ( void ) 4557 { 4558 /* QNaN is 0 2047 1 0(51times) 4559 == 0b 11111111111b 1 0(51times) 4560 == 0x7FF8 0000 0000 0000 4561 */ 4562 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4563 } 4564 4565 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4566 4567 static IRExpr* get_ftop ( void ) 4568 { 4569 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4570 } 4571 4572 static void put_ftop ( IRExpr* e ) 4573 { 4574 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4575 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4576 } 4577 4578 /* --------- Get/put the C3210 bits. --------- */ 4579 4580 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4581 { 4582 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4583 } 4584 4585 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4586 { 4587 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4588 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4589 } 4590 4591 /* --------- Get/put the FPU rounding mode. --------- */ 4592 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4593 { 4594 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4595 } 4596 4597 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4598 { 4599 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4600 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4601 } 4602 4603 4604 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4605 /* Produces a value in 0 .. 3, which is encoded as per the type 4606 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4607 per IRRoundingMode, we merely need to get it and mask it for 4608 safety. 4609 */ 4610 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4611 { 4612 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4613 } 4614 4615 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4616 { 4617 return mkU32(Irrm_NEAREST); 4618 } 4619 4620 4621 /* --------- Get/set FP register tag bytes. --------- */ 4622 4623 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4624 4625 static void put_ST_TAG ( Int i, IRExpr* value ) 4626 { 4627 IRRegArray* descr; 4628 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 4629 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4630 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4631 } 4632 4633 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 4634 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 4635 4636 static IRExpr* get_ST_TAG ( Int i ) 4637 { 4638 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 4639 return IRExpr_GetI( descr, get_ftop(), i ); 4640 } 4641 4642 4643 /* --------- Get/set FP registers. --------- */ 4644 4645 /* Given i, and some expression e, emit 'ST(i) = e' and set the 4646 register's tag to indicate the register is full. The previous 4647 state of the register is not checked. */ 4648 4649 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 4650 { 4651 IRRegArray* descr; 4652 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 4653 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4654 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 4655 /* Mark the register as in-use. */ 4656 put_ST_TAG(i, mkU8(1)); 4657 } 4658 4659 /* Given i, and some expression e, emit 4660 ST(i) = is_full(i) ? NaN : e 4661 and set the tag accordingly. 4662 */ 4663 4664 static void put_ST ( Int i, IRExpr* value ) 4665 { 4666 put_ST_UNCHECKED( i, 4667 IRExpr_Mux0X( get_ST_TAG(i), 4668 /* 0 means empty */ 4669 value, 4670 /* non-0 means full */ 4671 mkQNaN64() 4672 ) 4673 ); 4674 } 4675 4676 4677 /* Given i, generate an expression yielding 'ST(i)'. */ 4678 4679 static IRExpr* get_ST_UNCHECKED ( Int i ) 4680 { 4681 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 4682 return IRExpr_GetI( descr, get_ftop(), i ); 4683 } 4684 4685 4686 /* Given i, generate an expression yielding 4687 is_full(i) ? ST(i) : NaN 4688 */ 4689 4690 static IRExpr* get_ST ( Int i ) 4691 { 4692 return 4693 IRExpr_Mux0X( get_ST_TAG(i), 4694 /* 0 means empty */ 4695 mkQNaN64(), 4696 /* non-0 means full */ 4697 get_ST_UNCHECKED(i)); 4698 } 4699 4700 4701 /* Adjust FTOP downwards by one register. */ 4702 4703 static void fp_push ( void ) 4704 { 4705 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 4706 } 4707 4708 /* Adjust FTOP upwards by one register, and mark the vacated register 4709 as empty. */ 4710 4711 static void fp_pop ( void ) 4712 { 4713 put_ST_TAG(0, mkU8(0)); 4714 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4715 } 4716 4717 /* Clear the C2 bit of the FPU status register, for 4718 sin/cos/tan/sincos. */ 4719 4720 static void clear_C2 ( void ) 4721 { 4722 put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); 4723 } 4724 4725 /* Invent a plausible-looking FPU status word value: 4726 ((ftop & 7) << 11) | (c3210 & 0x4700) 4727 */ 4728 static IRExpr* get_FPU_sw ( void ) 4729 { 4730 return 4731 unop(Iop_32to16, 4732 binop(Iop_Or32, 4733 binop(Iop_Shl32, 4734 binop(Iop_And32, get_ftop(), mkU32(7)), 4735 mkU8(11)), 4736 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 4737 mkU32(0x4700)) 4738 )); 4739 } 4740 4741 4742 /* ------------------------------------------------------- */ 4743 /* Given all that stack-mangling junk, we can now go ahead 4744 and describe FP instructions. 4745 */ 4746 4747 /* ST(0) = ST(0) `op` mem64/32(addr) 4748 Need to check ST(0)'s tag on read, but not on write. 4749 */ 4750 static 4751 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4752 IROp op, Bool dbl ) 4753 { 4754 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4755 if (dbl) { 4756 put_ST_UNCHECKED(0, 4757 triop( op, 4758 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4759 get_ST(0), 4760 loadLE(Ity_F64,mkexpr(addr)) 4761 )); 4762 } else { 4763 put_ST_UNCHECKED(0, 4764 triop( op, 4765 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4766 get_ST(0), 4767 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 4768 )); 4769 } 4770 } 4771 4772 4773 /* ST(0) = mem64/32(addr) `op` ST(0) 4774 Need to check ST(0)'s tag on read, but not on write. 4775 */ 4776 static 4777 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 4778 IROp op, Bool dbl ) 4779 { 4780 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 4781 if (dbl) { 4782 put_ST_UNCHECKED(0, 4783 triop( op, 4784 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4785 loadLE(Ity_F64,mkexpr(addr)), 4786 get_ST(0) 4787 )); 4788 } else { 4789 put_ST_UNCHECKED(0, 4790 triop( op, 4791 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4792 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 4793 get_ST(0) 4794 )); 4795 } 4796 } 4797 4798 4799 /* ST(dst) = ST(dst) `op` ST(src). 4800 Check dst and src tags when reading but not on write. 4801 */ 4802 static 4803 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4804 Bool pop_after ) 4805 { 4806 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4807 put_ST_UNCHECKED( 4808 st_dst, 4809 triop( op, 4810 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4811 get_ST(st_dst), 4812 get_ST(st_src) ) 4813 ); 4814 if (pop_after) 4815 fp_pop(); 4816 } 4817 4818 /* ST(dst) = ST(src) `op` ST(dst). 4819 Check dst and src tags when reading but not on write. 4820 */ 4821 static 4822 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 4823 Bool pop_after ) 4824 { 4825 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 4826 put_ST_UNCHECKED( 4827 st_dst, 4828 triop( op, 4829 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4830 get_ST(st_src), 4831 get_ST(st_dst) ) 4832 ); 4833 if (pop_after) 4834 fp_pop(); 4835 } 4836 4837 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 4838 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 4839 { 4840 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 4841 /* This is a bit of a hack (and isn't really right). It sets 4842 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 4843 documentation implies A and S are unchanged. 4844 */ 4845 /* It's also fishy in that it is used both for COMIP and 4846 UCOMIP, and they aren't the same (although similar). */ 4847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 4848 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 4849 stmt( IRStmt_Put( 4850 OFFB_CC_DEP1, 4851 binop( Iop_And64, 4852 unop( Iop_32Uto64, 4853 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 4854 mkU64(0x45) 4855 ))); 4856 if (pop_after) 4857 fp_pop(); 4858 } 4859 4860 4861 /* returns 4862 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 4863 */ 4864 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 4865 { 4866 IRTemp t32 = newTemp(Ity_I32); 4867 assign( t32, e32 ); 4868 return 4869 IRExpr_Mux0X( 4870 unop(Iop_1Uto8, 4871 binop(Iop_CmpLT64U, 4872 unop(Iop_32Uto64, 4873 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 4874 mkU64(65536))), 4875 mkU16( 0x8000 ), 4876 unop(Iop_32to16, mkexpr(t32))); 4877 } 4878 4879 4880 static 4881 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 4882 VexAbiInfo* vbi, Prefix pfx, Long delta ) 4883 { 4884 Int len; 4885 UInt r_src, r_dst; 4886 HChar dis_buf[50]; 4887 IRTemp t1, t2; 4888 4889 /* On entry, delta points at the second byte of the insn (the modrm 4890 byte).*/ 4891 UChar first_opcode = getUChar(delta-1); 4892 UChar modrm = getUChar(delta+0); 4893 4894 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 4895 4896 if (first_opcode == 0xD8) { 4897 if (modrm < 0xC0) { 4898 4899 /* bits 5,4,3 are an opcode extension, and the modRM also 4900 specifies an address. */ 4901 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 4902 delta += len; 4903 4904 switch (gregLO3ofRM(modrm)) { 4905 4906 case 0: /* FADD single-real */ 4907 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 4908 break; 4909 4910 case 1: /* FMUL single-real */ 4911 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 4912 break; 4913 4914 //.. case 2: /* FCOM single-real */ 4915 //.. DIP("fcoms %s\n", dis_buf); 4916 //.. /* This forces C1 to zero, which isn't right. */ 4917 //.. put_C3210( 4918 //.. binop( Iop_And32, 4919 //.. binop(Iop_Shl32, 4920 //.. binop(Iop_CmpF64, 4921 //.. get_ST(0), 4922 //.. unop(Iop_F32toF64, 4923 //.. loadLE(Ity_F32,mkexpr(addr)))), 4924 //.. mkU8(8)), 4925 //.. mkU32(0x4500) 4926 //.. )); 4927 //.. break; 4928 //.. 4929 //.. case 3: /* FCOMP single-real */ 4930 //.. DIP("fcomps %s\n", dis_buf); 4931 //.. /* This forces C1 to zero, which isn't right. */ 4932 //.. put_C3210( 4933 //.. binop( Iop_And32, 4934 //.. binop(Iop_Shl32, 4935 //.. binop(Iop_CmpF64, 4936 //.. get_ST(0), 4937 //.. unop(Iop_F32toF64, 4938 //.. loadLE(Ity_F32,mkexpr(addr)))), 4939 //.. mkU8(8)), 4940 //.. mkU32(0x4500) 4941 //.. )); 4942 //.. fp_pop(); 4943 //.. break; 4944 4945 case 4: /* FSUB single-real */ 4946 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 4947 break; 4948 4949 case 5: /* FSUBR single-real */ 4950 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 4951 break; 4952 4953 case 6: /* FDIV single-real */ 4954 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 4955 break; 4956 4957 case 7: /* FDIVR single-real */ 4958 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 4959 break; 4960 4961 default: 4962 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 4963 vex_printf("first_opcode == 0xD8\n"); 4964 goto decode_fail; 4965 } 4966 } else { 4967 delta++; 4968 switch (modrm) { 4969 4970 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 4971 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 4972 break; 4973 4974 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 4975 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 4976 break; 4977 4978 /* Dunno if this is right */ 4979 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 4980 r_dst = (UInt)modrm - 0xD0; 4981 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 4982 /* This forces C1 to zero, which isn't right. */ 4983 put_C3210( 4984 unop(Iop_32Uto64, 4985 binop( Iop_And32, 4986 binop(Iop_Shl32, 4987 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 4988 mkU8(8)), 4989 mkU32(0x4500) 4990 ))); 4991 break; 4992 4993 /* Dunno if this is right */ 4994 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 4995 r_dst = (UInt)modrm - 0xD8; 4996 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 4997 /* This forces C1 to zero, which isn't right. */ 4998 put_C3210( 4999 unop(Iop_32Uto64, 5000 binop( Iop_And32, 5001 binop(Iop_Shl32, 5002 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5003 mkU8(8)), 5004 mkU32(0x4500) 5005 ))); 5006 fp_pop(); 5007 break; 5008 5009 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5010 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5011 break; 5012 5013 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5014 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5015 break; 5016 5017 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5018 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5019 break; 5020 5021 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5022 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5023 break; 5024 5025 default: 5026 goto decode_fail; 5027 } 5028 } 5029 } 5030 5031 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5032 else 5033 if (first_opcode == 0xD9) { 5034 if (modrm < 0xC0) { 5035 5036 /* bits 5,4,3 are an opcode extension, and the modRM also 5037 specifies an address. */ 5038 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5039 delta += len; 5040 5041 switch (gregLO3ofRM(modrm)) { 5042 5043 case 0: /* FLD single-real */ 5044 DIP("flds %s\n", dis_buf); 5045 fp_push(); 5046 put_ST(0, unop(Iop_F32toF64, 5047 loadLE(Ity_F32, mkexpr(addr)))); 5048 break; 5049 5050 case 2: /* FST single-real */ 5051 DIP("fsts %s\n", dis_buf); 5052 storeLE(mkexpr(addr), 5053 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5054 break; 5055 5056 case 3: /* FSTP single-real */ 5057 DIP("fstps %s\n", dis_buf); 5058 storeLE(mkexpr(addr), 5059 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5060 fp_pop(); 5061 break; 5062 5063 case 4: { /* FLDENV m28 */ 5064 /* Uses dirty helper: 5065 VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5066 IRTemp ew = newTemp(Ity_I32); 5067 IRTemp w64 = newTemp(Ity_I64); 5068 IRDirty* d = unsafeIRDirty_0_N ( 5069 0/*regparms*/, 5070 "amd64g_dirtyhelper_FLDENV", 5071 &amd64g_dirtyhelper_FLDENV, 5072 mkIRExprVec_1( mkexpr(addr) ) 5073 ); 5074 d->needsBBP = True; 5075 d->tmp = w64; 5076 /* declare we're reading memory */ 5077 d->mFx = Ifx_Read; 5078 d->mAddr = mkexpr(addr); 5079 d->mSize = 28; 5080 5081 /* declare we're writing guest state */ 5082 d->nFxState = 4; 5083 5084 d->fxState[0].fx = Ifx_Write; 5085 d->fxState[0].offset = OFFB_FTOP; 5086 d->fxState[0].size = sizeof(UInt); 5087 5088 d->fxState[1].fx = Ifx_Write; 5089 d->fxState[1].offset = OFFB_FPTAGS; 5090 d->fxState[1].size = 8 * sizeof(UChar); 5091 5092 d->fxState[2].fx = Ifx_Write; 5093 d->fxState[2].offset = OFFB_FPROUND; 5094 d->fxState[2].size = sizeof(ULong); 5095 5096 d->fxState[3].fx = Ifx_Write; 5097 d->fxState[3].offset = OFFB_FC3210; 5098 d->fxState[3].size = sizeof(ULong); 5099 5100 stmt( IRStmt_Dirty(d) ); 5101 5102 /* ew contains any emulation warning we may need to 5103 issue. If needed, side-exit to the next insn, 5104 reporting the warning, so that Valgrind's dispatcher 5105 sees the warning. */ 5106 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5107 put_emwarn( mkexpr(ew) ); 5108 stmt( 5109 IRStmt_Exit( 5110 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5111 Ijk_EmWarn, 5112 IRConst_U64( guest_RIP_bbstart+delta ) 5113 ) 5114 ); 5115 5116 DIP("fldenv %s\n", dis_buf); 5117 break; 5118 } 5119 5120 case 5: {/* FLDCW */ 5121 /* The only thing we observe in the control word is the 5122 rounding mode. Therefore, pass the 16-bit value 5123 (x87 native-format control word) to a clean helper, 5124 getting back a 64-bit value, the lower half of which 5125 is the FPROUND value to store, and the upper half of 5126 which is the emulation-warning token which may be 5127 generated. 5128 */ 5129 /* ULong amd64h_check_fldcw ( ULong ); */ 5130 IRTemp t64 = newTemp(Ity_I64); 5131 IRTemp ew = newTemp(Ity_I32); 5132 DIP("fldcw %s\n", dis_buf); 5133 assign( t64, mkIRExprCCall( 5134 Ity_I64, 0/*regparms*/, 5135 "amd64g_check_fldcw", 5136 &amd64g_check_fldcw, 5137 mkIRExprVec_1( 5138 unop( Iop_16Uto64, 5139 loadLE(Ity_I16, mkexpr(addr))) 5140 ) 5141 ) 5142 ); 5143 5144 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5145 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5146 put_emwarn( mkexpr(ew) ); 5147 /* Finally, if an emulation warning was reported, 5148 side-exit to the next insn, reporting the warning, 5149 so that Valgrind's dispatcher sees the warning. */ 5150 stmt( 5151 IRStmt_Exit( 5152 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5153 Ijk_EmWarn, 5154 IRConst_U64( guest_RIP_bbstart+delta ) 5155 ) 5156 ); 5157 break; 5158 } 5159 5160 case 6: { /* FNSTENV m28 */ 5161 /* Uses dirty helper: 5162 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5163 IRDirty* d = unsafeIRDirty_0_N ( 5164 0/*regparms*/, 5165 "amd64g_dirtyhelper_FSTENV", 5166 &amd64g_dirtyhelper_FSTENV, 5167 mkIRExprVec_1( mkexpr(addr) ) 5168 ); 5169 d->needsBBP = True; 5170 /* declare we're writing memory */ 5171 d->mFx = Ifx_Write; 5172 d->mAddr = mkexpr(addr); 5173 d->mSize = 28; 5174 5175 /* declare we're reading guest state */ 5176 d->nFxState = 4; 5177 5178 d->fxState[0].fx = Ifx_Read; 5179 d->fxState[0].offset = OFFB_FTOP; 5180 d->fxState[0].size = sizeof(UInt); 5181 5182 d->fxState[1].fx = Ifx_Read; 5183 d->fxState[1].offset = OFFB_FPTAGS; 5184 d->fxState[1].size = 8 * sizeof(UChar); 5185 5186 d->fxState[2].fx = Ifx_Read; 5187 d->fxState[2].offset = OFFB_FPROUND; 5188 d->fxState[2].size = sizeof(ULong); 5189 5190 d->fxState[3].fx = Ifx_Read; 5191 d->fxState[3].offset = OFFB_FC3210; 5192 d->fxState[3].size = sizeof(ULong); 5193 5194 stmt( IRStmt_Dirty(d) ); 5195 5196 DIP("fnstenv %s\n", dis_buf); 5197 break; 5198 } 5199 5200 case 7: /* FNSTCW */ 5201 /* Fake up a native x87 FPU control word. The only 5202 thing it depends on is FPROUND[1:0], so call a clean 5203 helper to cook it up. */ 5204 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5205 DIP("fnstcw %s\n", dis_buf); 5206 storeLE( 5207 mkexpr(addr), 5208 unop( Iop_64to16, 5209 mkIRExprCCall( 5210 Ity_I64, 0/*regp*/, 5211 "amd64g_create_fpucw", &amd64g_create_fpucw, 5212 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5213 ) 5214 ) 5215 ); 5216 break; 5217 5218 default: 5219 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5220 vex_printf("first_opcode == 0xD9\n"); 5221 goto decode_fail; 5222 } 5223 5224 } else { 5225 delta++; 5226 switch (modrm) { 5227 5228 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5229 r_src = (UInt)modrm - 0xC0; 5230 DIP("fld %%st(%u)\n", r_src); 5231 t1 = newTemp(Ity_F64); 5232 assign(t1, get_ST(r_src)); 5233 fp_push(); 5234 put_ST(0, mkexpr(t1)); 5235 break; 5236 5237 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5238 r_src = (UInt)modrm - 0xC8; 5239 DIP("fxch %%st(%u)\n", r_src); 5240 t1 = newTemp(Ity_F64); 5241 t2 = newTemp(Ity_F64); 5242 assign(t1, get_ST(0)); 5243 assign(t2, get_ST(r_src)); 5244 put_ST_UNCHECKED(0, mkexpr(t2)); 5245 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5246 break; 5247 5248 case 0xE0: /* FCHS */ 5249 DIP("fchs\n"); 5250 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5251 break; 5252 5253 case 0xE1: /* FABS */ 5254 DIP("fabs\n"); 5255 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5256 break; 5257 5258 case 0xE5: { /* FXAM */ 5259 /* This is an interesting one. It examines %st(0), 5260 regardless of whether the tag says it's empty or not. 5261 Here, just pass both the tag (in our format) and the 5262 value (as a double, actually a ULong) to a helper 5263 function. */ 5264 IRExpr** args 5265 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5266 unop(Iop_ReinterpF64asI64, 5267 get_ST_UNCHECKED(0)) ); 5268 put_C3210(mkIRExprCCall( 5269 Ity_I64, 5270 0/*regparm*/, 5271 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5272 args 5273 )); 5274 DIP("fxam\n"); 5275 break; 5276 } 5277 5278 case 0xE8: /* FLD1 */ 5279 DIP("fld1\n"); 5280 fp_push(); 5281 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5282 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5283 break; 5284 5285 case 0xE9: /* FLDL2T */ 5286 DIP("fldl2t\n"); 5287 fp_push(); 5288 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5289 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5290 break; 5291 5292 case 0xEA: /* FLDL2E */ 5293 DIP("fldl2e\n"); 5294 fp_push(); 5295 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5296 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5297 break; 5298 5299 case 0xEB: /* FLDPI */ 5300 DIP("fldpi\n"); 5301 fp_push(); 5302 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5303 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5304 break; 5305 5306 case 0xEC: /* FLDLG2 */ 5307 DIP("fldlg2\n"); 5308 fp_push(); 5309 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5310 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5311 break; 5312 5313 case 0xED: /* FLDLN2 */ 5314 DIP("fldln2\n"); 5315 fp_push(); 5316 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5317 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5318 break; 5319 5320 case 0xEE: /* FLDZ */ 5321 DIP("fldz\n"); 5322 fp_push(); 5323 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5324 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5325 break; 5326 5327 case 0xF0: /* F2XM1 */ 5328 DIP("f2xm1\n"); 5329 put_ST_UNCHECKED(0, 5330 binop(Iop_2xm1F64, 5331 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5332 get_ST(0))); 5333 break; 5334 5335 case 0xF1: /* FYL2X */ 5336 DIP("fyl2x\n"); 5337 put_ST_UNCHECKED(1, 5338 triop(Iop_Yl2xF64, 5339 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5340 get_ST(1), 5341 get_ST(0))); 5342 fp_pop(); 5343 break; 5344 5345 case 0xF2: /* FPTAN */ 5346 DIP("ftan\n"); 5347 put_ST_UNCHECKED(0, 5348 binop(Iop_TanF64, 5349 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5350 get_ST(0))); 5351 fp_push(); 5352 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 5353 clear_C2(); /* HACK */ 5354 break; 5355 5356 case 0xF3: /* FPATAN */ 5357 DIP("fpatan\n"); 5358 put_ST_UNCHECKED(1, 5359 triop(Iop_AtanF64, 5360 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5361 get_ST(1), 5362 get_ST(0))); 5363 fp_pop(); 5364 break; 5365 5366 case 0xF4: { /* FXTRACT */ 5367 IRTemp argF = newTemp(Ity_F64); 5368 IRTemp sigF = newTemp(Ity_F64); 5369 IRTemp expF = newTemp(Ity_F64); 5370 IRTemp argI = newTemp(Ity_I64); 5371 IRTemp sigI = newTemp(Ity_I64); 5372 IRTemp expI = newTemp(Ity_I64); 5373 DIP("fxtract\n"); 5374 assign( argF, get_ST(0) ); 5375 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5376 assign( sigI, 5377 mkIRExprCCall( 5378 Ity_I64, 0/*regparms*/, 5379 "x86amd64g_calculate_FXTRACT", 5380 &x86amd64g_calculate_FXTRACT, 5381 mkIRExprVec_2( mkexpr(argI), 5382 mkIRExpr_HWord(0)/*sig*/ )) 5383 ); 5384 assign( expI, 5385 mkIRExprCCall( 5386 Ity_I64, 0/*regparms*/, 5387 "x86amd64g_calculate_FXTRACT", 5388 &x86amd64g_calculate_FXTRACT, 5389 mkIRExprVec_2( mkexpr(argI), 5390 mkIRExpr_HWord(1)/*exp*/ )) 5391 ); 5392 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5393 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5394 /* exponent */ 5395 put_ST_UNCHECKED(0, mkexpr(expF) ); 5396 fp_push(); 5397 /* significand */ 5398 put_ST(0, mkexpr(sigF) ); 5399 break; 5400 } 5401 5402 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5403 IRTemp a1 = newTemp(Ity_F64); 5404 IRTemp a2 = newTemp(Ity_F64); 5405 DIP("fprem1\n"); 5406 /* Do FPREM1 twice, once to get the remainder, and once 5407 to get the C3210 flag values. */ 5408 assign( a1, get_ST(0) ); 5409 assign( a2, get_ST(1) ); 5410 put_ST_UNCHECKED(0, 5411 triop(Iop_PRem1F64, 5412 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5413 mkexpr(a1), 5414 mkexpr(a2))); 5415 put_C3210( 5416 unop(Iop_32Uto64, 5417 triop(Iop_PRem1C3210F64, 5418 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5419 mkexpr(a1), 5420 mkexpr(a2)) )); 5421 break; 5422 } 5423 5424 case 0xF7: /* FINCSTP */ 5425 DIP("fincstp\n"); 5426 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5427 break; 5428 5429 case 0xF8: { /* FPREM -- not IEEE compliant */ 5430 IRTemp a1 = newTemp(Ity_F64); 5431 IRTemp a2 = newTemp(Ity_F64); 5432 DIP("fprem\n"); 5433 /* Do FPREM twice, once to get the remainder, and once 5434 to get the C3210 flag values. */ 5435 assign( a1, get_ST(0) ); 5436 assign( a2, get_ST(1) ); 5437 put_ST_UNCHECKED(0, 5438 triop(Iop_PRemF64, 5439 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5440 mkexpr(a1), 5441 mkexpr(a2))); 5442 put_C3210( 5443 unop(Iop_32Uto64, 5444 triop(Iop_PRemC3210F64, 5445 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5446 mkexpr(a1), 5447 mkexpr(a2)) )); 5448 break; 5449 } 5450 5451 case 0xF9: /* FYL2XP1 */ 5452 DIP("fyl2xp1\n"); 5453 put_ST_UNCHECKED(1, 5454 triop(Iop_Yl2xp1F64, 5455 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5456 get_ST(1), 5457 get_ST(0))); 5458 fp_pop(); 5459 break; 5460 5461 case 0xFA: /* FSQRT */ 5462 DIP("fsqrt\n"); 5463 put_ST_UNCHECKED(0, 5464 binop(Iop_SqrtF64, 5465 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5466 get_ST(0))); 5467 break; 5468 5469 case 0xFB: { /* FSINCOS */ 5470 IRTemp a1 = newTemp(Ity_F64); 5471 assign( a1, get_ST(0) ); 5472 DIP("fsincos\n"); 5473 put_ST_UNCHECKED(0, 5474 binop(Iop_SinF64, 5475 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5476 mkexpr(a1))); 5477 fp_push(); 5478 put_ST(0, 5479 binop(Iop_CosF64, 5480 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5481 mkexpr(a1))); 5482 clear_C2(); /* HACK */ 5483 break; 5484 } 5485 5486 case 0xFC: /* FRNDINT */ 5487 DIP("frndint\n"); 5488 put_ST_UNCHECKED(0, 5489 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5490 break; 5491 5492 case 0xFD: /* FSCALE */ 5493 DIP("fscale\n"); 5494 put_ST_UNCHECKED(0, 5495 triop(Iop_ScaleF64, 5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5497 get_ST(0), 5498 get_ST(1))); 5499 break; 5500 5501 case 0xFE: /* FSIN */ 5502 DIP("fsin\n"); 5503 put_ST_UNCHECKED(0, 5504 binop(Iop_SinF64, 5505 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5506 get_ST(0))); 5507 clear_C2(); /* HACK */ 5508 break; 5509 5510 case 0xFF: /* FCOS */ 5511 DIP("fcos\n"); 5512 put_ST_UNCHECKED(0, 5513 binop(Iop_CosF64, 5514 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5515 get_ST(0))); 5516 clear_C2(); /* HACK */ 5517 break; 5518 5519 default: 5520 goto decode_fail; 5521 } 5522 } 5523 } 5524 5525 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 5526 else 5527 if (first_opcode == 0xDA) { 5528 5529 if (modrm < 0xC0) { 5530 5531 /* bits 5,4,3 are an opcode extension, and the modRM also 5532 specifies an address. */ 5533 IROp fop; 5534 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5535 delta += len; 5536 switch (gregLO3ofRM(modrm)) { 5537 5538 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 5539 DIP("fiaddl %s\n", dis_buf); 5540 fop = Iop_AddF64; 5541 goto do_fop_m32; 5542 5543 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 5544 DIP("fimull %s\n", dis_buf); 5545 fop = Iop_MulF64; 5546 goto do_fop_m32; 5547 5548 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 5549 DIP("fisubl %s\n", dis_buf); 5550 fop = Iop_SubF64; 5551 goto do_fop_m32; 5552 5553 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 5554 DIP("fisubrl %s\n", dis_buf); 5555 fop = Iop_SubF64; 5556 goto do_foprev_m32; 5557 5558 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 5559 DIP("fisubl %s\n", dis_buf); 5560 fop = Iop_DivF64; 5561 goto do_fop_m32; 5562 5563 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 5564 DIP("fidivrl %s\n", dis_buf); 5565 fop = Iop_DivF64; 5566 goto do_foprev_m32; 5567 5568 do_fop_m32: 5569 put_ST_UNCHECKED(0, 5570 triop(fop, 5571 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5572 get_ST(0), 5573 unop(Iop_I32StoF64, 5574 loadLE(Ity_I32, mkexpr(addr))))); 5575 break; 5576 5577 do_foprev_m32: 5578 put_ST_UNCHECKED(0, 5579 triop(fop, 5580 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5581 unop(Iop_I32StoF64, 5582 loadLE(Ity_I32, mkexpr(addr))), 5583 get_ST(0))); 5584 break; 5585 5586 default: 5587 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5588 vex_printf("first_opcode == 0xDA\n"); 5589 goto decode_fail; 5590 } 5591 5592 } else { 5593 5594 delta++; 5595 switch (modrm) { 5596 5597 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 5598 r_src = (UInt)modrm - 0xC0; 5599 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 5600 put_ST_UNCHECKED(0, 5601 IRExpr_Mux0X( 5602 unop(Iop_1Uto8, 5603 mk_amd64g_calculate_condition(AMD64CondB)), 5604 get_ST(0), get_ST(r_src)) ); 5605 break; 5606 5607 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 5608 r_src = (UInt)modrm - 0xC8; 5609 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 5610 put_ST_UNCHECKED(0, 5611 IRExpr_Mux0X( 5612 unop(Iop_1Uto8, 5613 mk_amd64g_calculate_condition(AMD64CondZ)), 5614 get_ST(0), get_ST(r_src)) ); 5615 break; 5616 5617 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 5618 r_src = (UInt)modrm - 0xD0; 5619 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 5620 put_ST_UNCHECKED(0, 5621 IRExpr_Mux0X( 5622 unop(Iop_1Uto8, 5623 mk_amd64g_calculate_condition(AMD64CondBE)), 5624 get_ST(0), get_ST(r_src)) ); 5625 break; 5626 5627 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 5628 r_src = (UInt)modrm - 0xD8; 5629 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 5630 put_ST_UNCHECKED(0, 5631 IRExpr_Mux0X( 5632 unop(Iop_1Uto8, 5633 mk_amd64g_calculate_condition(AMD64CondP)), 5634 get_ST(0), get_ST(r_src)) ); 5635 break; 5636 5637 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 5638 DIP("fucompp %%st(0),%%st(1)\n"); 5639 /* This forces C1 to zero, which isn't right. */ 5640 put_C3210( 5641 unop(Iop_32Uto64, 5642 binop( Iop_And32, 5643 binop(Iop_Shl32, 5644 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5645 mkU8(8)), 5646 mkU32(0x4500) 5647 ))); 5648 fp_pop(); 5649 fp_pop(); 5650 break; 5651 5652 default: 5653 goto decode_fail; 5654 } 5655 5656 } 5657 } 5658 5659 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 5660 else 5661 if (first_opcode == 0xDB) { 5662 if (modrm < 0xC0) { 5663 5664 /* bits 5,4,3 are an opcode extension, and the modRM also 5665 specifies an address. */ 5666 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5667 delta += len; 5668 5669 switch (gregLO3ofRM(modrm)) { 5670 5671 case 0: /* FILD m32int */ 5672 DIP("fildl %s\n", dis_buf); 5673 fp_push(); 5674 put_ST(0, unop(Iop_I32StoF64, 5675 loadLE(Ity_I32, mkexpr(addr)))); 5676 break; 5677 5678 case 1: /* FISTTPL m32 (SSE3) */ 5679 DIP("fisttpl %s\n", dis_buf); 5680 storeLE( mkexpr(addr), 5681 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 5682 fp_pop(); 5683 break; 5684 5685 case 2: /* FIST m32 */ 5686 DIP("fistl %s\n", dis_buf); 5687 storeLE( mkexpr(addr), 5688 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5689 break; 5690 5691 case 3: /* FISTP m32 */ 5692 DIP("fistpl %s\n", dis_buf); 5693 storeLE( mkexpr(addr), 5694 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 5695 fp_pop(); 5696 break; 5697 5698 case 5: { /* FLD extended-real */ 5699 /* Uses dirty helper: 5700 ULong amd64g_loadF80le ( ULong ) 5701 addr holds the address. First, do a dirty call to 5702 get hold of the data. */ 5703 IRTemp val = newTemp(Ity_I64); 5704 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 5705 5706 IRDirty* d = unsafeIRDirty_1_N ( 5707 val, 5708 0/*regparms*/, 5709 "amd64g_dirtyhelper_loadF80le", 5710 &amd64g_dirtyhelper_loadF80le, 5711 args 5712 ); 5713 /* declare that we're reading memory */ 5714 d->mFx = Ifx_Read; 5715 d->mAddr = mkexpr(addr); 5716 d->mSize = 10; 5717 5718 /* execute the dirty call, dumping the result in val. */ 5719 stmt( IRStmt_Dirty(d) ); 5720 fp_push(); 5721 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 5722 5723 DIP("fldt %s\n", dis_buf); 5724 break; 5725 } 5726 5727 case 7: { /* FSTP extended-real */ 5728 /* Uses dirty helper: 5729 void amd64g_storeF80le ( ULong addr, ULong data ) 5730 */ 5731 IRExpr** args 5732 = mkIRExprVec_2( mkexpr(addr), 5733 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 5734 5735 IRDirty* d = unsafeIRDirty_0_N ( 5736 0/*regparms*/, 5737 "amd64g_dirtyhelper_storeF80le", 5738 &amd64g_dirtyhelper_storeF80le, 5739 args 5740 ); 5741 /* declare we're writing memory */ 5742 d->mFx = Ifx_Write; 5743 d->mAddr = mkexpr(addr); 5744 d->mSize = 10; 5745 5746 /* execute the dirty call. */ 5747 stmt( IRStmt_Dirty(d) ); 5748 fp_pop(); 5749 5750 DIP("fstpt\n %s", dis_buf); 5751 break; 5752 } 5753 5754 default: 5755 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5756 vex_printf("first_opcode == 0xDB\n"); 5757 goto decode_fail; 5758 } 5759 5760 } else { 5761 5762 delta++; 5763 switch (modrm) { 5764 5765 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 5766 r_src = (UInt)modrm - 0xC0; 5767 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 5768 put_ST_UNCHECKED(0, 5769 IRExpr_Mux0X( 5770 unop(Iop_1Uto8, 5771 mk_amd64g_calculate_condition(AMD64CondNB)), 5772 get_ST(0), get_ST(r_src)) ); 5773 break; 5774 5775 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 5776 r_src = (UInt)modrm - 0xC8; 5777 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 5778 put_ST_UNCHECKED( 5779 0, 5780 IRExpr_Mux0X( 5781 unop(Iop_1Uto8, 5782 mk_amd64g_calculate_condition(AMD64CondNZ)), 5783 get_ST(0), 5784 get_ST(r_src) 5785 ) 5786 ); 5787 break; 5788 5789 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 5790 r_src = (UInt)modrm - 0xD0; 5791 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 5792 put_ST_UNCHECKED( 5793 0, 5794 IRExpr_Mux0X( 5795 unop(Iop_1Uto8, 5796 mk_amd64g_calculate_condition(AMD64CondNBE)), 5797 get_ST(0), 5798 get_ST(r_src) 5799 ) 5800 ); 5801 break; 5802 5803 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 5804 r_src = (UInt)modrm - 0xD8; 5805 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 5806 put_ST_UNCHECKED( 5807 0, 5808 IRExpr_Mux0X( 5809 unop(Iop_1Uto8, 5810 mk_amd64g_calculate_condition(AMD64CondNP)), 5811 get_ST(0), 5812 get_ST(r_src) 5813 ) 5814 ); 5815 break; 5816 5817 case 0xE2: 5818 DIP("fnclex\n"); 5819 break; 5820 5821 case 0xE3: { 5822 /* Uses dirty helper: 5823 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5824 IRDirty* d = unsafeIRDirty_0_N ( 5825 0/*regparms*/, 5826 "amd64g_dirtyhelper_FINIT", 5827 &amd64g_dirtyhelper_FINIT, 5828 mkIRExprVec_0() 5829 ); 5830 d->needsBBP = True; 5831 5832 /* declare we're writing guest state */ 5833 d->nFxState = 5; 5834 5835 d->fxState[0].fx = Ifx_Write; 5836 d->fxState[0].offset = OFFB_FTOP; 5837 d->fxState[0].size = sizeof(UInt); 5838 5839 d->fxState[1].fx = Ifx_Write; 5840 d->fxState[1].offset = OFFB_FPREGS; 5841 d->fxState[1].size = 8 * sizeof(ULong); 5842 5843 d->fxState[2].fx = Ifx_Write; 5844 d->fxState[2].offset = OFFB_FPTAGS; 5845 d->fxState[2].size = 8 * sizeof(UChar); 5846 5847 d->fxState[3].fx = Ifx_Write; 5848 d->fxState[3].offset = OFFB_FPROUND; 5849 d->fxState[3].size = sizeof(ULong); 5850 5851 d->fxState[4].fx = Ifx_Write; 5852 d->fxState[4].offset = OFFB_FC3210; 5853 d->fxState[4].size = sizeof(ULong); 5854 5855 stmt( IRStmt_Dirty(d) ); 5856 5857 DIP("fninit\n"); 5858 break; 5859 } 5860 5861 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 5862 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 5863 break; 5864 5865 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 5866 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 5867 break; 5868 5869 default: 5870 goto decode_fail; 5871 } 5872 } 5873 } 5874 5875 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 5876 else 5877 if (first_opcode == 0xDC) { 5878 if (modrm < 0xC0) { 5879 5880 /* bits 5,4,3 are an opcode extension, and the modRM also 5881 specifies an address. */ 5882 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5883 delta += len; 5884 5885 switch (gregLO3ofRM(modrm)) { 5886 5887 case 0: /* FADD double-real */ 5888 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 5889 break; 5890 5891 case 1: /* FMUL double-real */ 5892 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 5893 break; 5894 5895 //.. case 2: /* FCOM double-real */ 5896 //.. DIP("fcoml %s\n", dis_buf); 5897 //.. /* This forces C1 to zero, which isn't right. */ 5898 //.. put_C3210( 5899 //.. binop( Iop_And32, 5900 //.. binop(Iop_Shl32, 5901 //.. binop(Iop_CmpF64, 5902 //.. get_ST(0), 5903 //.. loadLE(Ity_F64,mkexpr(addr))), 5904 //.. mkU8(8)), 5905 //.. mkU32(0x4500) 5906 //.. )); 5907 //.. break; 5908 5909 case 3: /* FCOMP double-real */ 5910 DIP("fcompl %s\n", dis_buf); 5911 /* This forces C1 to zero, which isn't right. */ 5912 put_C3210( 5913 unop(Iop_32Uto64, 5914 binop( Iop_And32, 5915 binop(Iop_Shl32, 5916 binop(Iop_CmpF64, 5917 get_ST(0), 5918 loadLE(Ity_F64,mkexpr(addr))), 5919 mkU8(8)), 5920 mkU32(0x4500) 5921 ))); 5922 fp_pop(); 5923 break; 5924 5925 case 4: /* FSUB double-real */ 5926 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 5927 break; 5928 5929 case 5: /* FSUBR double-real */ 5930 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 5931 break; 5932 5933 case 6: /* FDIV double-real */ 5934 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 5935 break; 5936 5937 case 7: /* FDIVR double-real */ 5938 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 5939 break; 5940 5941 default: 5942 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5943 vex_printf("first_opcode == 0xDC\n"); 5944 goto decode_fail; 5945 } 5946 5947 } else { 5948 5949 delta++; 5950 switch (modrm) { 5951 5952 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 5953 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 5954 break; 5955 5956 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 5957 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 5958 break; 5959 5960 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 5961 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 5962 break; 5963 5964 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 5965 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 5966 break; 5967 5968 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 5969 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 5970 break; 5971 5972 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 5973 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 5974 break; 5975 5976 default: 5977 goto decode_fail; 5978 } 5979 5980 } 5981 } 5982 5983 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 5984 else 5985 if (first_opcode == 0xDD) { 5986 5987 if (modrm < 0xC0) { 5988 5989 /* bits 5,4,3 are an opcode extension, and the modRM also 5990 specifies an address. */ 5991 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5992 delta += len; 5993 5994 switch (gregLO3ofRM(modrm)) { 5995 5996 case 0: /* FLD double-real */ 5997 DIP("fldl %s\n", dis_buf); 5998 fp_push(); 5999 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6000 break; 6001 6002 case 1: /* FISTTPQ m64 (SSE3) */ 6003 DIP("fistppll %s\n", dis_buf); 6004 storeLE( mkexpr(addr), 6005 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6006 fp_pop(); 6007 break; 6008 6009 case 2: /* FST double-real */ 6010 DIP("fstl %s\n", dis_buf); 6011 storeLE(mkexpr(addr), get_ST(0)); 6012 break; 6013 6014 case 3: /* FSTP double-real */ 6015 DIP("fstpl %s\n", dis_buf); 6016 storeLE(mkexpr(addr), get_ST(0)); 6017 fp_pop(); 6018 break; 6019 6020 //.. case 4: { /* FRSTOR m108 */ 6021 //.. /* Uses dirty helper: 6022 //.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 6023 //.. IRTemp ew = newTemp(Ity_I32); 6024 //.. IRDirty* d = unsafeIRDirty_0_N ( 6025 //.. 0/*regparms*/, 6026 //.. "x86g_dirtyhelper_FRSTOR", 6027 //.. &x86g_dirtyhelper_FRSTOR, 6028 //.. mkIRExprVec_1( mkexpr(addr) ) 6029 //.. ); 6030 //.. d->needsBBP = True; 6031 //.. d->tmp = ew; 6032 //.. /* declare we're reading memory */ 6033 //.. d->mFx = Ifx_Read; 6034 //.. d->mAddr = mkexpr(addr); 6035 //.. d->mSize = 108; 6036 //.. 6037 //.. /* declare we're writing guest state */ 6038 //.. d->nFxState = 5; 6039 //.. 6040 //.. d->fxState[0].fx = Ifx_Write; 6041 //.. d->fxState[0].offset = OFFB_FTOP; 6042 //.. d->fxState[0].size = sizeof(UInt); 6043 //.. 6044 //.. d->fxState[1].fx = Ifx_Write; 6045 //.. d->fxState[1].offset = OFFB_FPREGS; 6046 //.. d->fxState[1].size = 8 * sizeof(ULong); 6047 //.. 6048 //.. d->fxState[2].fx = Ifx_Write; 6049 //.. d->fxState[2].offset = OFFB_FPTAGS; 6050 //.. d->fxState[2].size = 8 * sizeof(UChar); 6051 //.. 6052 //.. d->fxState[3].fx = Ifx_Write; 6053 //.. d->fxState[3].offset = OFFB_FPROUND; 6054 //.. d->fxState[3].size = sizeof(UInt); 6055 //.. 6056 //.. d->fxState[4].fx = Ifx_Write; 6057 //.. d->fxState[4].offset = OFFB_FC3210; 6058 //.. d->fxState[4].size = sizeof(UInt); 6059 //.. 6060 //.. stmt( IRStmt_Dirty(d) ); 6061 //.. 6062 //.. /* ew contains any emulation warning we may need to 6063 //.. issue. If needed, side-exit to the next insn, 6064 //.. reporting the warning, so that Valgrind's dispatcher 6065 //.. sees the warning. */ 6066 //.. put_emwarn( mkexpr(ew) ); 6067 //.. stmt( 6068 //.. IRStmt_Exit( 6069 //.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6070 //.. Ijk_EmWarn, 6071 //.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta) 6072 //.. ) 6073 //.. ); 6074 //.. 6075 //.. DIP("frstor %s\n", dis_buf); 6076 //.. break; 6077 //.. } 6078 //.. 6079 //.. case 6: { /* FNSAVE m108 */ 6080 //.. /* Uses dirty helper: 6081 //.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 6082 //.. IRDirty* d = unsafeIRDirty_0_N ( 6083 //.. 0/*regparms*/, 6084 //.. "x86g_dirtyhelper_FSAVE", 6085 //.. &x86g_dirtyhelper_FSAVE, 6086 //.. mkIRExprVec_1( mkexpr(addr) ) 6087 //.. ); 6088 //.. d->needsBBP = True; 6089 //.. /* declare we're writing memory */ 6090 //.. d->mFx = Ifx_Write; 6091 //.. d->mAddr = mkexpr(addr); 6092 //.. d->mSize = 108; 6093 //.. 6094 //.. /* declare we're reading guest state */ 6095 //.. d->nFxState = 5; 6096 //.. 6097 //.. d->fxState[0].fx = Ifx_Read; 6098 //.. d->fxState[0].offset = OFFB_FTOP; 6099 //.. d->fxState[0].size = sizeof(UInt); 6100 //.. 6101 //.. d->fxState[1].fx = Ifx_Read; 6102 //.. d->fxState[1].offset = OFFB_FPREGS; 6103 //.. d->fxState[1].size = 8 * sizeof(ULong); 6104 //.. 6105 //.. d->fxState[2].fx = Ifx_Read; 6106 //.. d->fxState[2].offset = OFFB_FPTAGS; 6107 //.. d->fxState[2].size = 8 * sizeof(UChar); 6108 //.. 6109 //.. d->fxState[3].fx = Ifx_Read; 6110 //.. d->fxState[3].offset = OFFB_FPROUND; 6111 //.. d->fxState[3].size = sizeof(UInt); 6112 //.. 6113 //.. d->fxState[4].fx = Ifx_Read; 6114 //.. d->fxState[4].offset = OFFB_FC3210; 6115 //.. d->fxState[4].size = sizeof(UInt); 6116 //.. 6117 //.. stmt( IRStmt_Dirty(d) ); 6118 //.. 6119 //.. DIP("fnsave %s\n", dis_buf); 6120 //.. break; 6121 //.. } 6122 6123 case 7: { /* FNSTSW m16 */ 6124 IRExpr* sw = get_FPU_sw(); 6125 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6126 storeLE( mkexpr(addr), sw ); 6127 DIP("fnstsw %s\n", dis_buf); 6128 break; 6129 } 6130 6131 default: 6132 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6133 vex_printf("first_opcode == 0xDD\n"); 6134 goto decode_fail; 6135 } 6136 } else { 6137 delta++; 6138 switch (modrm) { 6139 6140 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6141 r_dst = (UInt)modrm - 0xC0; 6142 DIP("ffree %%st(%u)\n", r_dst); 6143 put_ST_TAG ( r_dst, mkU8(0) ); 6144 break; 6145 6146 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6147 r_dst = (UInt)modrm - 0xD0; 6148 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6149 /* P4 manual says: "If the destination operand is a 6150 non-empty register, the invalid-operation exception 6151 is not generated. Hence put_ST_UNCHECKED. */ 6152 put_ST_UNCHECKED(r_dst, get_ST(0)); 6153 break; 6154 6155 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6156 r_dst = (UInt)modrm - 0xD8; 6157 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6158 /* P4 manual says: "If the destination operand is a 6159 non-empty register, the invalid-operation exception 6160 is not generated. Hence put_ST_UNCHECKED. */ 6161 put_ST_UNCHECKED(r_dst, get_ST(0)); 6162 fp_pop(); 6163 break; 6164 6165 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6166 r_dst = (UInt)modrm - 0xE0; 6167 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6168 /* This forces C1 to zero, which isn't right. */ 6169 put_C3210( 6170 unop(Iop_32Uto64, 6171 binop( Iop_And32, 6172 binop(Iop_Shl32, 6173 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6174 mkU8(8)), 6175 mkU32(0x4500) 6176 ))); 6177 break; 6178 6179 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6180 r_dst = (UInt)modrm - 0xE8; 6181 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6182 /* This forces C1 to zero, which isn't right. */ 6183 put_C3210( 6184 unop(Iop_32Uto64, 6185 binop( Iop_And32, 6186 binop(Iop_Shl32, 6187 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6188 mkU8(8)), 6189 mkU32(0x4500) 6190 ))); 6191 fp_pop(); 6192 break; 6193 6194 default: 6195 goto decode_fail; 6196 } 6197 } 6198 } 6199 6200 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6201 else 6202 if (first_opcode == 0xDE) { 6203 6204 if (modrm < 0xC0) { 6205 6206 /* bits 5,4,3 are an opcode extension, and the modRM also 6207 specifies an address. */ 6208 IROp fop; 6209 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6210 delta += len; 6211 6212 switch (gregLO3ofRM(modrm)) { 6213 6214 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6215 DIP("fiaddw %s\n", dis_buf); 6216 fop = Iop_AddF64; 6217 goto do_fop_m16; 6218 6219 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6220 DIP("fimulw %s\n", dis_buf); 6221 fop = Iop_MulF64; 6222 goto do_fop_m16; 6223 6224 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6225 DIP("fisubw %s\n", dis_buf); 6226 fop = Iop_SubF64; 6227 goto do_fop_m16; 6228 6229 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6230 DIP("fisubrw %s\n", dis_buf); 6231 fop = Iop_SubF64; 6232 goto do_foprev_m16; 6233 6234 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6235 DIP("fisubw %s\n", dis_buf); 6236 fop = Iop_DivF64; 6237 goto do_fop_m16; 6238 6239 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6240 DIP("fidivrw %s\n", dis_buf); 6241 fop = Iop_DivF64; 6242 goto do_foprev_m16; 6243 6244 do_fop_m16: 6245 put_ST_UNCHECKED(0, 6246 triop(fop, 6247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6248 get_ST(0), 6249 unop(Iop_I32StoF64, 6250 unop(Iop_16Sto32, 6251 loadLE(Ity_I16, mkexpr(addr)))))); 6252 break; 6253 6254 do_foprev_m16: 6255 put_ST_UNCHECKED(0, 6256 triop(fop, 6257 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6258 unop(Iop_I32StoF64, 6259 unop(Iop_16Sto32, 6260 loadLE(Ity_I16, mkexpr(addr)))), 6261 get_ST(0))); 6262 break; 6263 6264 default: 6265 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6266 vex_printf("first_opcode == 0xDE\n"); 6267 goto decode_fail; 6268 } 6269 6270 } else { 6271 6272 delta++; 6273 switch (modrm) { 6274 6275 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6276 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6277 break; 6278 6279 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6280 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6281 break; 6282 6283 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6284 DIP("fcompp %%st(0),%%st(1)\n"); 6285 /* This forces C1 to zero, which isn't right. */ 6286 put_C3210( 6287 unop(Iop_32Uto64, 6288 binop( Iop_And32, 6289 binop(Iop_Shl32, 6290 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6291 mkU8(8)), 6292 mkU32(0x4500) 6293 ))); 6294 fp_pop(); 6295 fp_pop(); 6296 break; 6297 6298 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6299 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6300 break; 6301 6302 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6303 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6304 break; 6305 6306 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6307 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6308 break; 6309 6310 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6311 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6312 break; 6313 6314 default: 6315 goto decode_fail; 6316 } 6317 6318 } 6319 } 6320 6321 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6322 else 6323 if (first_opcode == 0xDF) { 6324 6325 if (modrm < 0xC0) { 6326 6327 /* bits 5,4,3 are an opcode extension, and the modRM also 6328 specifies an address. */ 6329 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6330 delta += len; 6331 6332 switch (gregLO3ofRM(modrm)) { 6333 6334 case 0: /* FILD m16int */ 6335 DIP("fildw %s\n", dis_buf); 6336 fp_push(); 6337 put_ST(0, unop(Iop_I32StoF64, 6338 unop(Iop_16Sto32, 6339 loadLE(Ity_I16, mkexpr(addr))))); 6340 break; 6341 6342 case 1: /* FISTTPS m16 (SSE3) */ 6343 DIP("fisttps %s\n", dis_buf); 6344 storeLE( mkexpr(addr), 6345 x87ishly_qnarrow_32_to_16( 6346 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6347 fp_pop(); 6348 break; 6349 6350 case 2: /* FIST m16 */ 6351 DIP("fists %s\n", dis_buf); 6352 storeLE( mkexpr(addr), 6353 x87ishly_qnarrow_32_to_16( 6354 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6355 break; 6356 6357 case 3: /* FISTP m16 */ 6358 DIP("fistps %s\n", dis_buf); 6359 storeLE( mkexpr(addr), 6360 x87ishly_qnarrow_32_to_16( 6361 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6362 fp_pop(); 6363 break; 6364 6365 case 5: /* FILD m64 */ 6366 DIP("fildll %s\n", dis_buf); 6367 fp_push(); 6368 put_ST(0, binop(Iop_I64StoF64, 6369 get_roundingmode(), 6370 loadLE(Ity_I64, mkexpr(addr)))); 6371 break; 6372 6373 case 7: /* FISTP m64 */ 6374 DIP("fistpll %s\n", dis_buf); 6375 storeLE( mkexpr(addr), 6376 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6377 fp_pop(); 6378 break; 6379 6380 default: 6381 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6382 vex_printf("first_opcode == 0xDF\n"); 6383 goto decode_fail; 6384 } 6385 6386 } else { 6387 6388 delta++; 6389 switch (modrm) { 6390 6391 case 0xC0: /* FFREEP %st(0) */ 6392 DIP("ffreep %%st(%d)\n", 0); 6393 put_ST_TAG ( 0, mkU8(0) ); 6394 fp_pop(); 6395 break; 6396 6397 case 0xE0: /* FNSTSW %ax */ 6398 DIP("fnstsw %%ax\n"); 6399 /* Invent a plausible-looking FPU status word value and 6400 dump it in %AX: 6401 ((ftop & 7) << 11) | (c3210 & 0x4700) 6402 */ 6403 putIRegRAX( 6404 2, 6405 unop(Iop_32to16, 6406 binop(Iop_Or32, 6407 binop(Iop_Shl32, 6408 binop(Iop_And32, get_ftop(), mkU32(7)), 6409 mkU8(11)), 6410 binop(Iop_And32, 6411 unop(Iop_64to32, get_C3210()), 6412 mkU32(0x4700)) 6413 ))); 6414 break; 6415 6416 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6417 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6418 break; 6419 6420 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6421 /* not really right since COMIP != UCOMIP */ 6422 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6423 break; 6424 6425 default: 6426 goto decode_fail; 6427 } 6428 } 6429 6430 } 6431 6432 else 6433 goto decode_fail; 6434 6435 *decode_ok = True; 6436 return delta; 6437 6438 decode_fail: 6439 *decode_ok = False; 6440 return delta; 6441 } 6442 6443 6444 /*------------------------------------------------------------*/ 6445 /*--- ---*/ 6446 /*--- MMX INSTRUCTIONS ---*/ 6447 /*--- ---*/ 6448 /*------------------------------------------------------------*/ 6449 6450 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6451 IA32 arch manual, volume 3): 6452 6453 Read from, or write to MMX register (viz, any insn except EMMS): 6454 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6455 * FP stack pointer set to zero 6456 6457 EMMS: 6458 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6459 * FP stack pointer set to zero 6460 */ 6461 6462 static void do_MMX_preamble ( void ) 6463 { 6464 Int i; 6465 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6466 IRExpr* zero = mkU32(0); 6467 IRExpr* tag1 = mkU8(1); 6468 put_ftop(zero); 6469 for (i = 0; i < 8; i++) 6470 stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); 6471 } 6472 6473 static void do_EMMS_preamble ( void ) 6474 { 6475 Int i; 6476 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6477 IRExpr* zero = mkU32(0); 6478 IRExpr* tag0 = mkU8(0); 6479 put_ftop(zero); 6480 for (i = 0; i < 8; i++) 6481 stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); 6482 } 6483 6484 6485 static IRExpr* getMMXReg ( UInt archreg ) 6486 { 6487 vassert(archreg < 8); 6488 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 6489 } 6490 6491 6492 static void putMMXReg ( UInt archreg, IRExpr* e ) 6493 { 6494 vassert(archreg < 8); 6495 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 6496 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 6497 } 6498 6499 6500 /* Helper for non-shift MMX insns. Note this is incomplete in the 6501 sense that it does not first call do_MMX_preamble() -- that is the 6502 responsibility of its caller. */ 6503 6504 static 6505 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, 6506 Prefix pfx, 6507 Long delta, 6508 UChar opc, 6509 HChar* name, 6510 Bool show_granularity ) 6511 { 6512 HChar dis_buf[50]; 6513 UChar modrm = getUChar(delta); 6514 Bool isReg = epartIsReg(modrm); 6515 IRExpr* argL = NULL; 6516 IRExpr* argR = NULL; 6517 IRExpr* argG = NULL; 6518 IRExpr* argE = NULL; 6519 IRTemp res = newTemp(Ity_I64); 6520 6521 Bool invG = False; 6522 IROp op = Iop_INVALID; 6523 void* hAddr = NULL; 6524 HChar* hName = NULL; 6525 Bool eLeft = False; 6526 6527 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 6528 6529 switch (opc) { 6530 /* Original MMX ones */ 6531 case 0xFC: op = Iop_Add8x8; break; 6532 case 0xFD: op = Iop_Add16x4; break; 6533 case 0xFE: op = Iop_Add32x2; break; 6534 6535 case 0xEC: op = Iop_QAdd8Sx8; break; 6536 case 0xED: op = Iop_QAdd16Sx4; break; 6537 6538 case 0xDC: op = Iop_QAdd8Ux8; break; 6539 case 0xDD: op = Iop_QAdd16Ux4; break; 6540 6541 case 0xF8: op = Iop_Sub8x8; break; 6542 case 0xF9: op = Iop_Sub16x4; break; 6543 case 0xFA: op = Iop_Sub32x2; break; 6544 6545 case 0xE8: op = Iop_QSub8Sx8; break; 6546 case 0xE9: op = Iop_QSub16Sx4; break; 6547 6548 case 0xD8: op = Iop_QSub8Ux8; break; 6549 case 0xD9: op = Iop_QSub16Ux4; break; 6550 6551 case 0xE5: op = Iop_MulHi16Sx4; break; 6552 case 0xD5: op = Iop_Mul16x4; break; 6553 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 6554 6555 case 0x74: op = Iop_CmpEQ8x8; break; 6556 case 0x75: op = Iop_CmpEQ16x4; break; 6557 case 0x76: op = Iop_CmpEQ32x2; break; 6558 6559 case 0x64: op = Iop_CmpGT8Sx8; break; 6560 case 0x65: op = Iop_CmpGT16Sx4; break; 6561 case 0x66: op = Iop_CmpGT32Sx2; break; 6562 6563 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 6564 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 6565 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 6566 6567 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 6568 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 6569 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 6570 6571 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 6572 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 6573 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 6574 6575 case 0xDB: op = Iop_And64; break; 6576 case 0xDF: op = Iop_And64; invG = True; break; 6577 case 0xEB: op = Iop_Or64; break; 6578 case 0xEF: /* Possibly do better here if argL and argR are the 6579 same reg */ 6580 op = Iop_Xor64; break; 6581 6582 /* Introduced in SSE1 */ 6583 case 0xE0: op = Iop_Avg8Ux8; break; 6584 case 0xE3: op = Iop_Avg16Ux4; break; 6585 case 0xEE: op = Iop_Max16Sx4; break; 6586 case 0xDE: op = Iop_Max8Ux8; break; 6587 case 0xEA: op = Iop_Min16Sx4; break; 6588 case 0xDA: op = Iop_Min8Ux8; break; 6589 case 0xE4: op = Iop_MulHi16Ux4; break; 6590 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 6591 6592 /* Introduced in SSE2 */ 6593 case 0xD4: op = Iop_Add64; break; 6594 case 0xFB: op = Iop_Sub64; break; 6595 6596 default: 6597 vex_printf("\n0x%x\n", (Int)opc); 6598 vpanic("dis_MMXop_regmem_to_reg"); 6599 } 6600 6601 # undef XXX 6602 6603 argG = getMMXReg(gregLO3ofRM(modrm)); 6604 if (invG) 6605 argG = unop(Iop_Not64, argG); 6606 6607 if (isReg) { 6608 delta++; 6609 argE = getMMXReg(eregLO3ofRM(modrm)); 6610 } else { 6611 Int len; 6612 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6613 delta += len; 6614 argE = loadLE(Ity_I64, mkexpr(addr)); 6615 } 6616 6617 if (eLeft) { 6618 argL = argE; 6619 argR = argG; 6620 } else { 6621 argL = argG; 6622 argR = argE; 6623 } 6624 6625 if (op != Iop_INVALID) { 6626 vassert(hName == NULL); 6627 vassert(hAddr == NULL); 6628 assign(res, binop(op, argL, argR)); 6629 } else { 6630 vassert(hName != NULL); 6631 vassert(hAddr != NULL); 6632 assign( res, 6633 mkIRExprCCall( 6634 Ity_I64, 6635 0/*regparms*/, hName, hAddr, 6636 mkIRExprVec_2( argL, argR ) 6637 ) 6638 ); 6639 } 6640 6641 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 6642 6643 DIP("%s%s %s, %s\n", 6644 name, show_granularity ? nameMMXGran(opc & 3) : "", 6645 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 6646 nameMMXReg(gregLO3ofRM(modrm)) ); 6647 6648 return delta; 6649 } 6650 6651 6652 /* Vector by scalar shift of G by the amount specified at the bottom 6653 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 6654 6655 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, 6656 Prefix pfx, Long delta, 6657 HChar* opname, IROp op ) 6658 { 6659 HChar dis_buf[50]; 6660 Int alen, size; 6661 IRTemp addr; 6662 Bool shl, shr, sar; 6663 UChar rm = getUChar(delta); 6664 IRTemp g0 = newTemp(Ity_I64); 6665 IRTemp g1 = newTemp(Ity_I64); 6666 IRTemp amt = newTemp(Ity_I64); 6667 IRTemp amt8 = newTemp(Ity_I8); 6668 6669 if (epartIsReg(rm)) { 6670 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 6671 DIP("%s %s,%s\n", opname, 6672 nameMMXReg(eregLO3ofRM(rm)), 6673 nameMMXReg(gregLO3ofRM(rm)) ); 6674 delta++; 6675 } else { 6676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 6677 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 6678 DIP("%s %s,%s\n", opname, 6679 dis_buf, 6680 nameMMXReg(gregLO3ofRM(rm)) ); 6681 delta += alen; 6682 } 6683 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 6684 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 6685 6686 shl = shr = sar = False; 6687 size = 0; 6688 switch (op) { 6689 case Iop_ShlN16x4: shl = True; size = 32; break; 6690 case Iop_ShlN32x2: shl = True; size = 32; break; 6691 case Iop_Shl64: shl = True; size = 64; break; 6692 case Iop_ShrN16x4: shr = True; size = 16; break; 6693 case Iop_ShrN32x2: shr = True; size = 32; break; 6694 case Iop_Shr64: shr = True; size = 64; break; 6695 case Iop_SarN16x4: sar = True; size = 16; break; 6696 case Iop_SarN32x2: sar = True; size = 32; break; 6697 default: vassert(0); 6698 } 6699 6700 if (shl || shr) { 6701 assign( 6702 g1, 6703 IRExpr_Mux0X( 6704 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6705 mkU64(0), 6706 binop(op, mkexpr(g0), mkexpr(amt8)) 6707 ) 6708 ); 6709 } else 6710 if (sar) { 6711 assign( 6712 g1, 6713 IRExpr_Mux0X( 6714 unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))), 6715 binop(op, mkexpr(g0), mkU8(size-1)), 6716 binop(op, mkexpr(g0), mkexpr(amt8)) 6717 ) 6718 ); 6719 } else { 6720 vassert(0); 6721 } 6722 6723 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 6724 return delta; 6725 } 6726 6727 6728 /* Vector by scalar shift of E by an immediate byte. This is a 6729 straight copy of dis_SSE_shiftE_imm. */ 6730 6731 static 6732 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op ) 6733 { 6734 Bool