1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_x86_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2011 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates x86 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 32-bit value is being written. 42 43 FUCOMI(P): what happens to A and S flags? Currently are forced 44 to zero. 45 46 x87 FP Limitations: 47 48 * all arithmetic done at 64 bits 49 50 * no FP exceptions, except for handling stack over/underflow 51 52 * FP rounding mode observed only for float->int conversions 53 and int->float conversions which could lose accuracy, and 54 for float-to-float rounding. For all other operations, 55 round-to-nearest is used, regardless. 56 57 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 58 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 59 even when it isn't. 60 61 * some of the FCOM cases could do with testing -- not convinced 62 that the args are the right way round. 63 64 * FSAVE does not re-initialise the FPU; it should do 65 66 * FINIT not only initialises the FPU environment, it also 67 zeroes all the FP registers. It should leave the registers 68 unchanged. 69 70 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 71 per Intel docs this bit has no meaning anyway. Since PUSHF is the 72 only way to observe eflags[1], a proper fix would be to make that 73 bit be set by PUSHF. 74 75 The state of %eflags.AC (alignment check, bit 18) is recorded by 76 the simulation (viz, if you set it with popf then a pushf produces 77 the value you set it to), but it is otherwise ignored. In 78 particular, setting it to 1 does NOT cause alignment checking to 79 happen. Programs that set it to 1 and then rely on the resulting 80 SIGBUSs to inform them of misaligned accesses will not work. 81 82 Implementation of sysenter is necessarily partial. sysenter is a 83 kind of system call entry. When doing a sysenter, the return 84 address is not known -- that is something that is beyond Vex's 85 knowledge. So the generated IR forces a return to the scheduler, 86 which can do what it likes to simulate the systenter, but it MUST 87 set this thread's guest_EIP field with the continuation address 88 before resuming execution. If that doesn't happen, the thread will 89 jump to address zero, which is probably fatal. 90 91 This module uses global variables and so is not MT-safe (if that 92 should ever become relevant). 93 94 The delta values are 32-bit ints, not 64-bit ints. That means 95 this module may not work right if run on a 64-bit host. That should 96 be fixed properly, really -- if anyone ever wants to use Vex to 97 translate x86 code for execution on a 64-bit host. 98 99 casLE (implementation of lock-prefixed insns) and rep-prefixed 100 insns: the side-exit back to the start of the insn is done with 101 Ijk_Boring. This is quite wrong, it should be done with 102 Ijk_NoRedir, since otherwise the side exit, which is intended to 103 restart the instruction for whatever reason, could go somewhere 104 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 105 no-redir jumps performance critical, at least for rep-prefixed 106 instructions, since all iterations thereof would involve such a 107 jump. It's not such a big deal with casLE since the side exit is 108 only taken if the CAS fails, that is, the location is contended, 109 which is relatively unlikely. 110 111 XXXX: Nov 2009: handling of SWP on ARM suffers from the same 112 problem. 113 114 Note also, the test for CAS success vs failure is done using 115 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 116 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 117 shouldn't definedness-check these comparisons. See 118 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 119 background/rationale. 120 */ 121 122 /* Performance holes: 123 124 - fcom ; fstsw %ax ; sahf 125 sahf does not update the O flag (sigh) and so O needs to 126 be computed. This is done expensively; it would be better 127 to have a calculate_eflags_o helper. 128 129 - emwarns; some FP codes can generate huge numbers of these 130 if the fpucw is changed in an inner loop. It would be 131 better for the guest state to have an emwarn-enable reg 132 which can be set zero or nonzero. If it is zero, emwarns 133 are not flagged, and instead control just flows all the 134 way through bbs as usual. 135 */ 136 137 /* "Special" instructions. 138 139 This instruction decoder can decode three special instructions 140 which mean nothing natively (are no-ops as far as regs/mem are 141 concerned) but have meaning for supporting Valgrind. A special 142 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D 143 C1C713 (in the standard interpretation, that means: roll $3, %edi; 144 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, 145 one of the following 3 are allowed (standard interpretation in 146 parentheses): 147 148 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) 149 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR 150 87D2 (xchgl %edx,%edx) call-noredir *%EAX 151 152 Any other bytes following the 12-byte preamble are illegal and 153 constitute a failure in instruction decoding. This all assumes 154 that the preamble will never occur except in specific code 155 fragments designed for Valgrind to catch. 156 157 No prefixes may precede a "Special" instruction. 158 */ 159 160 /* LOCK prefixed instructions. These are translated using IR-level 161 CAS statements (IRCAS) and are believed to preserve atomicity, even 162 from the point of view of some other process racing against a 163 simulated one (presumably they communicate via a shared memory 164 segment). 165 166 Handlers which are aware of LOCK prefixes are: 167 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 168 dis_cmpxchg_G_E (cmpxchg) 169 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 170 dis_Grp3 (not, neg) 171 dis_Grp4 (inc, dec) 172 dis_Grp5 (inc, dec) 173 dis_Grp8_Imm (bts, btc, btr) 174 dis_bt_G_E (bts, btc, btr) 175 dis_xadd_G_E (xadd) 176 */ 177 178 179 #include "libvex_basictypes.h" 180 #include "libvex_ir.h" 181 #include "libvex.h" 182 #include "libvex_guest_x86.h" 183 184 #include "main_util.h" 185 #include "main_globals.h" 186 #include "guest_generic_bb_to_IR.h" 187 #include "guest_generic_x87.h" 188 #include "guest_x86_defs.h" 189 190 191 /*------------------------------------------------------------*/ 192 /*--- Globals ---*/ 193 /*------------------------------------------------------------*/ 194 195 /* These are set at the start of the translation of an insn, right 196 down in disInstr_X86, so that we don't have to pass them around 197 endlessly. They are all constant during the translation of any 198 given insn. */ 199 200 /* We need to know this to do sub-register accesses correctly. */ 201 static Bool host_is_bigendian; 202 203 /* Pointer to the guest code area (points to start of BB, not to the 204 insn being processed). */ 205 static UChar* guest_code; 206 207 /* The guest address corresponding to guest_code[0]. */ 208 static Addr32 guest_EIP_bbstart; 209 210 /* The guest address for the instruction currently being 211 translated. */ 212 static Addr32 guest_EIP_curr_instr; 213 214 /* The IRSB* into which we're generating code. */ 215 static IRSB* irsb; 216 217 218 /*------------------------------------------------------------*/ 219 /*--- Debugging output ---*/ 220 /*------------------------------------------------------------*/ 221 222 #define DIP(format, args...) \ 223 if (vex_traceflags & VEX_TRACE_FE) \ 224 vex_printf(format, ## args) 225 226 #define DIS(buf, format, args...) \ 227 if (vex_traceflags & VEX_TRACE_FE) \ 228 vex_sprintf(buf, format, ## args) 229 230 231 /*------------------------------------------------------------*/ 232 /*--- Offsets of various parts of the x86 guest state. ---*/ 233 /*------------------------------------------------------------*/ 234 235 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX) 236 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX) 237 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX) 238 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX) 239 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP) 240 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP) 241 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI) 242 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI) 243 244 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP) 245 246 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP) 247 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1) 248 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2) 249 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP) 250 251 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0]) 252 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0]) 253 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG) 254 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG) 255 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG) 256 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP) 257 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) 258 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND) 259 260 #define OFFB_CS offsetof(VexGuestX86State,guest_CS) 261 #define OFFB_DS offsetof(VexGuestX86State,guest_DS) 262 #define OFFB_ES offsetof(VexGuestX86State,guest_ES) 263 #define OFFB_FS offsetof(VexGuestX86State,guest_FS) 264 #define OFFB_GS offsetof(VexGuestX86State,guest_GS) 265 #define OFFB_SS offsetof(VexGuestX86State,guest_SS) 266 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 267 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 268 269 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND) 270 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0) 271 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1) 272 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2) 273 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3) 274 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4) 275 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5) 276 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6) 277 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7) 278 279 #define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN) 280 281 #define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART) 282 #define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN) 283 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) 284 285 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) 286 287 288 /*------------------------------------------------------------*/ 289 /*--- Helper bits and pieces for deconstructing the ---*/ 290 /*--- x86 insn stream. ---*/ 291 /*------------------------------------------------------------*/ 292 293 /* This is the Intel register encoding -- integer regs. */ 294 #define R_EAX 0 295 #define R_ECX 1 296 #define R_EDX 2 297 #define R_EBX 3 298 #define R_ESP 4 299 #define R_EBP 5 300 #define R_ESI 6 301 #define R_EDI 7 302 303 #define R_AL (0+R_EAX) 304 #define R_AH (4+R_EAX) 305 306 /* This is the Intel register encoding -- segment regs. */ 307 #define R_ES 0 308 #define R_CS 1 309 #define R_SS 2 310 #define R_DS 3 311 #define R_FS 4 312 #define R_GS 5 313 314 315 /* Add a statement to the list held by "irbb". */ 316 static void stmt ( IRStmt* st ) 317 { 318 addStmtToIRSB( irsb, st ); 319 } 320 321 /* Generate a new temporary of the given type. */ 322 static IRTemp newTemp ( IRType ty ) 323 { 324 vassert(isPlausibleIRType(ty)); 325 return newIRTemp( irsb->tyenv, ty ); 326 } 327 328 /* Various simple conversions */ 329 330 static UInt extend_s_8to32( UInt x ) 331 { 332 return (UInt)((((Int)x) << 24) >> 24); 333 } 334 335 static UInt extend_s_16to32 ( UInt x ) 336 { 337 return (UInt)((((Int)x) << 16) >> 16); 338 } 339 340 /* Fetch a byte from the guest insn stream. */ 341 static UChar getIByte ( Int delta ) 342 { 343 return guest_code[delta]; 344 } 345 346 /* Extract the reg field from a modRM byte. */ 347 static Int gregOfRM ( UChar mod_reg_rm ) 348 { 349 return (Int)( (mod_reg_rm >> 3) & 7 ); 350 } 351 352 /* Figure out whether the mod and rm parts of a modRM byte refer to a 353 register or memory. If so, the byte will have the form 11XXXYYY, 354 where YYY is the register number. */ 355 static Bool epartIsReg ( UChar mod_reg_rm ) 356 { 357 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 358 } 359 360 /* ... and extract the register number ... */ 361 static Int eregOfRM ( UChar mod_reg_rm ) 362 { 363 return (Int)(mod_reg_rm & 0x7); 364 } 365 366 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 367 368 static UChar getUChar ( Int delta ) 369 { 370 UChar v = guest_code[delta+0]; 371 return toUChar(v); 372 } 373 374 static UInt getUDisp16 ( Int delta ) 375 { 376 UInt v = guest_code[delta+1]; v <<= 8; 377 v |= guest_code[delta+0]; 378 return v & 0xFFFF; 379 } 380 381 static UInt getUDisp32 ( Int delta ) 382 { 383 UInt v = guest_code[delta+3]; v <<= 8; 384 v |= guest_code[delta+2]; v <<= 8; 385 v |= guest_code[delta+1]; v <<= 8; 386 v |= guest_code[delta+0]; 387 return v; 388 } 389 390 static UInt getUDisp ( Int size, Int delta ) 391 { 392 switch (size) { 393 case 4: return getUDisp32(delta); 394 case 2: return getUDisp16(delta); 395 case 1: return (UInt)getUChar(delta); 396 default: vpanic("getUDisp(x86)"); 397 } 398 return 0; /*notreached*/ 399 } 400 401 402 /* Get a byte value out of the insn stream and sign-extend to 32 403 bits. */ 404 static UInt getSDisp8 ( Int delta ) 405 { 406 return extend_s_8to32( (UInt) (guest_code[delta]) ); 407 } 408 409 static UInt getSDisp16 ( Int delta0 ) 410 { 411 UChar* eip = (UChar*)(&guest_code[delta0]); 412 UInt d = *eip++; 413 d |= ((*eip++) << 8); 414 return extend_s_16to32(d); 415 } 416 417 static UInt getSDisp ( Int size, Int delta ) 418 { 419 switch (size) { 420 case 4: return getUDisp32(delta); 421 case 2: return getSDisp16(delta); 422 case 1: return getSDisp8(delta); 423 default: vpanic("getSDisp(x86)"); 424 } 425 return 0; /*notreached*/ 426 } 427 428 429 /*------------------------------------------------------------*/ 430 /*--- Helpers for constructing IR. ---*/ 431 /*------------------------------------------------------------*/ 432 433 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit 434 register references, we need to take the host endianness into 435 account. Supplied value is 0 .. 7 and in the Intel instruction 436 encoding. */ 437 438 static IRType szToITy ( Int n ) 439 { 440 switch (n) { 441 case 1: return Ity_I8; 442 case 2: return Ity_I16; 443 case 4: return Ity_I32; 444 default: vpanic("szToITy(x86)"); 445 } 446 } 447 448 /* On a little-endian host, less significant bits of the guest 449 registers are at lower addresses. Therefore, if a reference to a 450 register low half has the safe guest state offset as a reference to 451 the full register. 452 */ 453 static Int integerGuestRegOffset ( Int sz, UInt archreg ) 454 { 455 vassert(archreg < 8); 456 457 /* Correct for little-endian host only. */ 458 vassert(!host_is_bigendian); 459 460 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) { 461 switch (archreg) { 462 case R_EAX: return OFFB_EAX; 463 case R_EBX: return OFFB_EBX; 464 case R_ECX: return OFFB_ECX; 465 case R_EDX: return OFFB_EDX; 466 case R_ESI: return OFFB_ESI; 467 case R_EDI: return OFFB_EDI; 468 case R_ESP: return OFFB_ESP; 469 case R_EBP: return OFFB_EBP; 470 default: vpanic("integerGuestRegOffset(x86,le)(4,2)"); 471 } 472 } 473 474 vassert(archreg >= 4 && archreg < 8 && sz == 1); 475 switch (archreg-4) { 476 case R_EAX: return 1+ OFFB_EAX; 477 case R_EBX: return 1+ OFFB_EBX; 478 case R_ECX: return 1+ OFFB_ECX; 479 case R_EDX: return 1+ OFFB_EDX; 480 default: vpanic("integerGuestRegOffset(x86,le)(1h)"); 481 } 482 483 /* NOTREACHED */ 484 vpanic("integerGuestRegOffset(x86,le)"); 485 } 486 487 static Int segmentGuestRegOffset ( UInt sreg ) 488 { 489 switch (sreg) { 490 case R_ES: return OFFB_ES; 491 case R_CS: return OFFB_CS; 492 case R_SS: return OFFB_SS; 493 case R_DS: return OFFB_DS; 494 case R_FS: return OFFB_FS; 495 case R_GS: return OFFB_GS; 496 default: vpanic("segmentGuestRegOffset(x86)"); 497 } 498 } 499 500 static Int xmmGuestRegOffset ( UInt xmmreg ) 501 { 502 switch (xmmreg) { 503 case 0: return OFFB_XMM0; 504 case 1: return OFFB_XMM1; 505 case 2: return OFFB_XMM2; 506 case 3: return OFFB_XMM3; 507 case 4: return OFFB_XMM4; 508 case 5: return OFFB_XMM5; 509 case 6: return OFFB_XMM6; 510 case 7: return OFFB_XMM7; 511 default: vpanic("xmmGuestRegOffset"); 512 } 513 } 514 515 /* Lanes of vector registers are always numbered from zero being the 516 least significant lane (rightmost in the register). */ 517 518 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 519 { 520 /* Correct for little-endian host only. */ 521 vassert(!host_is_bigendian); 522 vassert(laneno >= 0 && laneno < 8); 523 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 524 } 525 526 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 527 { 528 /* Correct for little-endian host only. */ 529 vassert(!host_is_bigendian); 530 vassert(laneno >= 0 && laneno < 4); 531 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 532 } 533 534 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 535 { 536 /* Correct for little-endian host only. */ 537 vassert(!host_is_bigendian); 538 vassert(laneno >= 0 && laneno < 2); 539 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 540 } 541 542 static IRExpr* getIReg ( Int sz, UInt archreg ) 543 { 544 vassert(sz == 1 || sz == 2 || sz == 4); 545 vassert(archreg < 8); 546 return IRExpr_Get( integerGuestRegOffset(sz,archreg), 547 szToITy(sz) ); 548 } 549 550 /* Ditto, but write to a reg instead. */ 551 static void putIReg ( Int sz, UInt archreg, IRExpr* e ) 552 { 553 IRType ty = typeOfIRExpr(irsb->tyenv, e); 554 switch (sz) { 555 case 1: vassert(ty == Ity_I8); break; 556 case 2: vassert(ty == Ity_I16); break; 557 case 4: vassert(ty == Ity_I32); break; 558 default: vpanic("putIReg(x86)"); 559 } 560 vassert(archreg < 8); 561 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) ); 562 } 563 564 static IRExpr* getSReg ( UInt sreg ) 565 { 566 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 567 } 568 569 static void putSReg ( UInt sreg, IRExpr* e ) 570 { 571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 572 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 573 } 574 575 static IRExpr* getXMMReg ( UInt xmmreg ) 576 { 577 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 578 } 579 580 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 581 { 582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 583 } 584 585 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 586 { 587 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 588 } 589 590 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 591 { 592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 593 } 594 595 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 596 { 597 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 598 } 599 600 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 601 { 602 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 603 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 604 } 605 606 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 607 { 608 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 609 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 610 } 611 612 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 613 { 614 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 615 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 616 } 617 618 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 619 { 620 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 621 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 622 } 623 624 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 625 { 626 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 627 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 628 } 629 630 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 631 { 632 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 633 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 634 } 635 636 static void assign ( IRTemp dst, IRExpr* e ) 637 { 638 stmt( IRStmt_WrTmp(dst, e) ); 639 } 640 641 static void storeLE ( IRExpr* addr, IRExpr* data ) 642 { 643 stmt( IRStmt_Store(Iend_LE, addr, data) ); 644 } 645 646 static IRExpr* unop ( IROp op, IRExpr* a ) 647 { 648 return IRExpr_Unop(op, a); 649 } 650 651 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 652 { 653 return IRExpr_Binop(op, a1, a2); 654 } 655 656 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 657 { 658 return IRExpr_Triop(op, a1, a2, a3); 659 } 660 661 static IRExpr* mkexpr ( IRTemp tmp ) 662 { 663 return IRExpr_RdTmp(tmp); 664 } 665 666 static IRExpr* mkU8 ( UInt i ) 667 { 668 vassert(i < 256); 669 return IRExpr_Const(IRConst_U8( (UChar)i )); 670 } 671 672 static IRExpr* mkU16 ( UInt i ) 673 { 674 vassert(i < 65536); 675 return IRExpr_Const(IRConst_U16( (UShort)i )); 676 } 677 678 static IRExpr* mkU32 ( UInt i ) 679 { 680 return IRExpr_Const(IRConst_U32(i)); 681 } 682 683 static IRExpr* mkU64 ( ULong i ) 684 { 685 return IRExpr_Const(IRConst_U64(i)); 686 } 687 688 static IRExpr* mkU ( IRType ty, UInt i ) 689 { 690 if (ty == Ity_I8) return mkU8(i); 691 if (ty == Ity_I16) return mkU16(i); 692 if (ty == Ity_I32) return mkU32(i); 693 /* If this panics, it usually means you passed a size (1,2,4) 694 value as the IRType, rather than a real IRType. */ 695 vpanic("mkU(x86)"); 696 } 697 698 static IRExpr* mkV128 ( UShort mask ) 699 { 700 return IRExpr_Const(IRConst_V128(mask)); 701 } 702 703 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 704 { 705 return IRExpr_Load(Iend_LE, ty, addr); 706 } 707 708 static IROp mkSizedOp ( IRType ty, IROp op8 ) 709 { 710 Int adj; 711 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 712 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 713 || op8 == Iop_Mul8 714 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 715 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 716 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 717 || op8 == Iop_CasCmpNE8 718 || op8 == Iop_Not8); 719 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 720 return adj + op8; 721 } 722 723 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) 724 { 725 if (szSmall == 1 && szBig == 4) { 726 return signd ? Iop_8Sto32 : Iop_8Uto32; 727 } 728 if (szSmall == 1 && szBig == 2) { 729 return signd ? Iop_8Sto16 : Iop_8Uto16; 730 } 731 if (szSmall == 2 && szBig == 4) { 732 return signd ? Iop_16Sto32 : Iop_16Uto32; 733 } 734 vpanic("mkWidenOp(x86,guest)"); 735 } 736 737 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 738 { 739 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 740 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 741 return unop(Iop_32to1, 742 binop(Iop_And32, 743 unop(Iop_1Uto32,x), 744 unop(Iop_1Uto32,y))); 745 } 746 747 /* Generate a compare-and-swap operation, operating on memory at 748 'addr'. The expected value is 'expVal' and the new value is 749 'newVal'. If the operation fails, then transfer control (with a 750 no-redir jump (XXX no -- see comment at top of this file)) to 751 'restart_point', which is presumably the address of the guest 752 instruction again -- retrying, essentially. */ 753 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 754 Addr32 restart_point ) 755 { 756 IRCAS* cas; 757 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 758 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 759 IRTemp oldTmp = newTemp(tyE); 760 IRTemp expTmp = newTemp(tyE); 761 vassert(tyE == tyN); 762 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8); 763 assign(expTmp, expVal); 764 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 765 NULL, mkexpr(expTmp), NULL, newVal ); 766 stmt( IRStmt_CAS(cas) ); 767 stmt( IRStmt_Exit( 768 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 769 mkexpr(oldTmp), mkexpr(expTmp) ), 770 Ijk_Boring, /*Ijk_NoRedir*/ 771 IRConst_U32( restart_point ) 772 )); 773 } 774 775 776 /*------------------------------------------------------------*/ 777 /*--- Helpers for %eflags. ---*/ 778 /*------------------------------------------------------------*/ 779 780 /* -------------- Evaluating the flags-thunk. -------------- */ 781 782 /* Build IR to calculate all the eflags from stored 783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 784 Ity_I32. */ 785 static IRExpr* mk_x86g_calculate_eflags_all ( void ) 786 { 787 IRExpr** args 788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 789 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 790 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 791 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 792 IRExpr* call 793 = mkIRExprCCall( 794 Ity_I32, 795 0/*regparm*/, 796 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all, 797 args 798 ); 799 /* Exclude OP and NDEP from definedness checking. We're only 800 interested in DEP1 and DEP2. */ 801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 802 return call; 803 } 804 805 /* Build IR to calculate some particular condition from stored 806 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 807 Ity_Bit. */ 808 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond ) 809 { 810 IRExpr** args 811 = mkIRExprVec_5( mkU32(cond), 812 IRExpr_Get(OFFB_CC_OP, Ity_I32), 813 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 814 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 815 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 816 IRExpr* call 817 = mkIRExprCCall( 818 Ity_I32, 819 0/*regparm*/, 820 "x86g_calculate_condition", &x86g_calculate_condition, 821 args 822 ); 823 /* Exclude the requested condition, OP and NDEP from definedness 824 checking. We're only interested in DEP1 and DEP2. */ 825 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 826 return unop(Iop_32to1, call); 827 } 828 829 /* Build IR to calculate just the carry flag from stored 830 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ 831 static IRExpr* mk_x86g_calculate_eflags_c ( void ) 832 { 833 IRExpr** args 834 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 835 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 836 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 837 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 838 IRExpr* call 839 = mkIRExprCCall( 840 Ity_I32, 841 3/*regparm*/, 842 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c, 843 args 844 ); 845 /* Exclude OP and NDEP from definedness checking. We're only 846 interested in DEP1 and DEP2. */ 847 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 848 return call; 849 } 850 851 852 /* -------------- Building the flags-thunk. -------------- */ 853 854 /* The machinery in this section builds the flag-thunk following a 855 flag-setting operation. Hence the various setFlags_* functions. 856 */ 857 858 static Bool isAddSub ( IROp op8 ) 859 { 860 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 861 } 862 863 static Bool isLogic ( IROp op8 ) 864 { 865 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 866 } 867 868 /* U-widen 8/16/32 bit int expr to 32. */ 869 static IRExpr* widenUto32 ( IRExpr* e ) 870 { 871 switch (typeOfIRExpr(irsb->tyenv,e)) { 872 case Ity_I32: return e; 873 case Ity_I16: return unop(Iop_16Uto32,e); 874 case Ity_I8: return unop(Iop_8Uto32,e); 875 default: vpanic("widenUto32"); 876 } 877 } 878 879 /* S-widen 8/16/32 bit int expr to 32. */ 880 static IRExpr* widenSto32 ( IRExpr* e ) 881 { 882 switch (typeOfIRExpr(irsb->tyenv,e)) { 883 case Ity_I32: return e; 884 case Ity_I16: return unop(Iop_16Sto32,e); 885 case Ity_I8: return unop(Iop_8Sto32,e); 886 default: vpanic("widenSto32"); 887 } 888 } 889 890 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some 891 of these combinations make sense. */ 892 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 893 { 894 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 895 if (src_ty == dst_ty) 896 return e; 897 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 898 return unop(Iop_32to16, e); 899 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 900 return unop(Iop_32to8, e); 901 902 vex_printf("\nsrc, dst tys are: "); 903 ppIRType(src_ty); 904 vex_printf(", "); 905 ppIRType(dst_ty); 906 vex_printf("\n"); 907 vpanic("narrowTo(x86)"); 908 } 909 910 911 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 912 auto-sized up to the real op. */ 913 914 static 915 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 916 { 917 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 918 919 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 920 921 switch (op8) { 922 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break; 923 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break; 924 default: ppIROp(op8); 925 vpanic("setFlags_DEP1_DEP2(x86)"); 926 } 927 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 928 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 929 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); 930 /* Set NDEP even though it isn't used. This makes redundant-PUT 931 elimination of previous stores to this field work better. */ 932 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 933 } 934 935 936 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 937 938 static 939 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 940 { 941 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 942 943 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 944 945 switch (op8) { 946 case Iop_Or8: 947 case Iop_And8: 948 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break; 949 default: ppIROp(op8); 950 vpanic("setFlags_DEP1(x86)"); 951 } 952 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 953 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 954 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 955 /* Set NDEP even though it isn't used. This makes redundant-PUT 956 elimination of previous stores to this field work better. */ 957 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 958 } 959 960 961 /* For shift operations, we put in the result and the undershifted 962 result. Except if the shift amount is zero, the thunk is left 963 unchanged. */ 964 965 static void setFlags_DEP1_DEP2_shift ( IROp op32, 966 IRTemp res, 967 IRTemp resUS, 968 IRType ty, 969 IRTemp guard ) 970 { 971 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); 972 973 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 974 vassert(guard); 975 976 /* Both kinds of right shifts are handled by the same thunk 977 operation. */ 978 switch (op32) { 979 case Iop_Shr32: 980 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break; 981 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break; 982 default: ppIROp(op32); 983 vpanic("setFlags_DEP1_DEP2_shift(x86)"); 984 } 985 986 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 987 stmt( IRStmt_Put( OFFB_CC_OP, 988 IRExpr_Mux0X( mkexpr(guard), 989 IRExpr_Get(OFFB_CC_OP,Ity_I32), 990 mkU32(ccOp))) ); 991 stmt( IRStmt_Put( OFFB_CC_DEP1, 992 IRExpr_Mux0X( mkexpr(guard), 993 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 994 widenUto32(mkexpr(res)))) ); 995 stmt( IRStmt_Put( OFFB_CC_DEP2, 996 IRExpr_Mux0X( mkexpr(guard), 997 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 998 widenUto32(mkexpr(resUS)))) ); 999 /* Set NDEP even though it isn't used. This makes redundant-PUT 1000 elimination of previous stores to this field work better. */ 1001 stmt( IRStmt_Put( OFFB_CC_NDEP, 1002 IRExpr_Mux0X( mkexpr(guard), 1003 IRExpr_Get(OFFB_CC_NDEP,Ity_I32), 1004 mkU32(0) ))); 1005 } 1006 1007 1008 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1009 the former value of the carry flag, which unfortunately we have to 1010 compute. */ 1011 1012 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1013 { 1014 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB; 1015 1016 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 1017 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 1018 1019 /* This has to come first, because calculating the C flag 1020 may require reading all four thunk fields. */ 1021 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) ); 1022 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 1023 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) ); 1024 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 1025 } 1026 1027 1028 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1029 two arguments. */ 1030 1031 static 1032 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op ) 1033 { 1034 switch (ty) { 1035 case Ity_I8: 1036 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) ); 1037 break; 1038 case Ity_I16: 1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) ); 1040 break; 1041 case Ity_I32: 1042 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) ); 1043 break; 1044 default: 1045 vpanic("setFlags_MUL(x86)"); 1046 } 1047 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); 1048 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); 1049 /* Set NDEP even though it isn't used. This makes redundant-PUT 1050 elimination of previous stores to this field work better. */ 1051 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 1052 } 1053 1054 1055 /* -------------- Condition codes. -------------- */ 1056 1057 /* Condition codes, using the Intel encoding. */ 1058 1059 static HChar* name_X86Condcode ( X86Condcode cond ) 1060 { 1061 switch (cond) { 1062 case X86CondO: return "o"; 1063 case X86CondNO: return "no"; 1064 case X86CondB: return "b"; 1065 case X86CondNB: return "nb"; 1066 case X86CondZ: return "z"; 1067 case X86CondNZ: return "nz"; 1068 case X86CondBE: return "be"; 1069 case X86CondNBE: return "nbe"; 1070 case X86CondS: return "s"; 1071 case X86CondNS: return "ns"; 1072 case X86CondP: return "p"; 1073 case X86CondNP: return "np"; 1074 case X86CondL: return "l"; 1075 case X86CondNL: return "nl"; 1076 case X86CondLE: return "le"; 1077 case X86CondNLE: return "nle"; 1078 case X86CondAlways: return "ALWAYS"; 1079 default: vpanic("name_X86Condcode"); 1080 } 1081 } 1082 1083 static 1084 X86Condcode positiveIse_X86Condcode ( X86Condcode cond, 1085 Bool* needInvert ) 1086 { 1087 vassert(cond >= X86CondO && cond <= X86CondNLE); 1088 if (cond & 1) { 1089 *needInvert = True; 1090 return cond-1; 1091 } else { 1092 *needInvert = False; 1093 return cond; 1094 } 1095 } 1096 1097 1098 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1099 1100 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1101 appropriately. 1102 1103 Optionally, generate a store for the 'tres' value. This can either 1104 be a normal store, or it can be a cas-with-possible-failure style 1105 store: 1106 1107 if taddr is IRTemp_INVALID, then no store is generated. 1108 1109 if taddr is not IRTemp_INVALID, then a store (using taddr as 1110 the address) is generated: 1111 1112 if texpVal is IRTemp_INVALID then a normal store is 1113 generated, and restart_point must be zero (it is irrelevant). 1114 1115 if texpVal is not IRTemp_INVALID then a cas-style store is 1116 generated. texpVal is the expected value, restart_point 1117 is the restart point if the store fails, and texpVal must 1118 have the same type as tres. 1119 */ 1120 static void helper_ADC ( Int sz, 1121 IRTemp tres, IRTemp ta1, IRTemp ta2, 1122 /* info about optional store: */ 1123 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1124 { 1125 UInt thunkOp; 1126 IRType ty = szToITy(sz); 1127 IRTemp oldc = newTemp(Ity_I32); 1128 IRTemp oldcn = newTemp(ty); 1129 IROp plus = mkSizedOp(ty, Iop_Add8); 1130 IROp xor = mkSizedOp(ty, Iop_Xor8); 1131 1132 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1133 vassert(sz == 1 || sz == 2 || sz == 4); 1134 thunkOp = sz==4 ? X86G_CC_OP_ADCL 1135 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB); 1136 1137 /* oldc = old carry flag, 0 or 1 */ 1138 assign( oldc, binop(Iop_And32, 1139 mk_x86g_calculate_eflags_c(), 1140 mkU32(1)) ); 1141 1142 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1143 1144 assign( tres, binop(plus, 1145 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1146 mkexpr(oldcn)) ); 1147 1148 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1149 start of this function. */ 1150 if (taddr != IRTemp_INVALID) { 1151 if (texpVal == IRTemp_INVALID) { 1152 vassert(restart_point == 0); 1153 storeLE( mkexpr(taddr), mkexpr(tres) ); 1154 } else { 1155 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1156 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1157 casLE( mkexpr(taddr), 1158 mkexpr(texpVal), mkexpr(tres), restart_point ); 1159 } 1160 } 1161 1162 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1163 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) )); 1164 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1165 mkexpr(oldcn)) )) ); 1166 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1167 } 1168 1169 1170 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1171 appropriately. As with helper_ADC, possibly generate a store of 1172 the result -- see comments on helper_ADC for details. 1173 */ 1174 static void helper_SBB ( Int sz, 1175 IRTemp tres, IRTemp ta1, IRTemp ta2, 1176 /* info about optional store: */ 1177 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1178 { 1179 UInt thunkOp; 1180 IRType ty = szToITy(sz); 1181 IRTemp oldc = newTemp(Ity_I32); 1182 IRTemp oldcn = newTemp(ty); 1183 IROp minus = mkSizedOp(ty, Iop_Sub8); 1184 IROp xor = mkSizedOp(ty, Iop_Xor8); 1185 1186 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1187 vassert(sz == 1 || sz == 2 || sz == 4); 1188 thunkOp = sz==4 ? X86G_CC_OP_SBBL 1189 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB); 1190 1191 /* oldc = old carry flag, 0 or 1 */ 1192 assign( oldc, binop(Iop_And32, 1193 mk_x86g_calculate_eflags_c(), 1194 mkU32(1)) ); 1195 1196 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1197 1198 assign( tres, binop(minus, 1199 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1200 mkexpr(oldcn)) ); 1201 1202 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1203 start of this function. */ 1204 if (taddr != IRTemp_INVALID) { 1205 if (texpVal == IRTemp_INVALID) { 1206 vassert(restart_point == 0); 1207 storeLE( mkexpr(taddr), mkexpr(tres) ); 1208 } else { 1209 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1210 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1211 casLE( mkexpr(taddr), 1212 mkexpr(texpVal), mkexpr(tres), restart_point ); 1213 } 1214 } 1215 1216 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1217 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) ); 1218 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1219 mkexpr(oldcn)) )) ); 1220 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1221 } 1222 1223 1224 /* -------------- Helpers for disassembly printing. -------------- */ 1225 1226 static HChar* nameGrp1 ( Int opc_aux ) 1227 { 1228 static HChar* grp1_names[8] 1229 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1230 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)"); 1231 return grp1_names[opc_aux]; 1232 } 1233 1234 static HChar* nameGrp2 ( Int opc_aux ) 1235 { 1236 static HChar* grp2_names[8] 1237 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1238 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)"); 1239 return grp2_names[opc_aux]; 1240 } 1241 1242 static HChar* nameGrp4 ( Int opc_aux ) 1243 { 1244 static HChar* grp4_names[8] 1245 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1246 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)"); 1247 return grp4_names[opc_aux]; 1248 } 1249 1250 static HChar* nameGrp5 ( Int opc_aux ) 1251 { 1252 static HChar* grp5_names[8] 1253 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 1254 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)"); 1255 return grp5_names[opc_aux]; 1256 } 1257 1258 static HChar* nameGrp8 ( Int opc_aux ) 1259 { 1260 static HChar* grp8_names[8] 1261 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 1262 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)"); 1263 return grp8_names[opc_aux]; 1264 } 1265 1266 static HChar* nameIReg ( Int size, Int reg ) 1267 { 1268 static HChar* ireg32_names[8] 1269 = { "%eax", "%ecx", "%edx", "%ebx", 1270 "%esp", "%ebp", "%esi", "%edi" }; 1271 static HChar* ireg16_names[8] 1272 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; 1273 static HChar* ireg8_names[8] 1274 = { "%al", "%cl", "%dl", "%bl", 1275 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; 1276 if (reg < 0 || reg > 7) goto bad; 1277 switch (size) { 1278 case 4: return ireg32_names[reg]; 1279 case 2: return ireg16_names[reg]; 1280 case 1: return ireg8_names[reg]; 1281 } 1282 bad: 1283 vpanic("nameIReg(X86)"); 1284 return NULL; /*notreached*/ 1285 } 1286 1287 static HChar* nameSReg ( UInt sreg ) 1288 { 1289 switch (sreg) { 1290 case R_ES: return "%es"; 1291 case R_CS: return "%cs"; 1292 case R_SS: return "%ss"; 1293 case R_DS: return "%ds"; 1294 case R_FS: return "%fs"; 1295 case R_GS: return "%gs"; 1296 default: vpanic("nameSReg(x86)"); 1297 } 1298 } 1299 1300 static HChar* nameMMXReg ( Int mmxreg ) 1301 { 1302 static HChar* mmx_names[8] 1303 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 1304 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)"); 1305 return mmx_names[mmxreg]; 1306 } 1307 1308 static HChar* nameXMMReg ( Int xmmreg ) 1309 { 1310 static HChar* xmm_names[8] 1311 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 1312 "%xmm4", "%xmm5", "%xmm6", "%xmm7" }; 1313 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg"); 1314 return xmm_names[xmmreg]; 1315 } 1316 1317 static HChar* nameMMXGran ( Int gran ) 1318 { 1319 switch (gran) { 1320 case 0: return "b"; 1321 case 1: return "w"; 1322 case 2: return "d"; 1323 case 3: return "q"; 1324 default: vpanic("nameMMXGran(x86,guest)"); 1325 } 1326 } 1327 1328 static HChar nameISize ( Int size ) 1329 { 1330 switch (size) { 1331 case 4: return 'l'; 1332 case 2: return 'w'; 1333 case 1: return 'b'; 1334 default: vpanic("nameISize(x86)"); 1335 } 1336 } 1337 1338 1339 /*------------------------------------------------------------*/ 1340 /*--- JMP helpers ---*/ 1341 /*------------------------------------------------------------*/ 1342 1343 static void jmp_lit( IRJumpKind kind, Addr32 d32 ) 1344 { 1345 irsb->next = mkU32(d32); 1346 irsb->jumpkind = kind; 1347 } 1348 1349 static void jmp_treg( IRJumpKind kind, IRTemp t ) 1350 { 1351 irsb->next = mkexpr(t); 1352 irsb->jumpkind = kind; 1353 } 1354 1355 static 1356 void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) 1357 { 1358 Bool invert; 1359 X86Condcode condPos; 1360 condPos = positiveIse_X86Condcode ( cond, &invert ); 1361 if (invert) { 1362 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1363 Ijk_Boring, 1364 IRConst_U32(d32_false) ) ); 1365 irsb->next = mkU32(d32_true); 1366 irsb->jumpkind = Ijk_Boring; 1367 } else { 1368 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1369 Ijk_Boring, 1370 IRConst_U32(d32_true) ) ); 1371 irsb->next = mkU32(d32_false); 1372 irsb->jumpkind = Ijk_Boring; 1373 } 1374 } 1375 1376 1377 /*------------------------------------------------------------*/ 1378 /*--- Disassembling addressing modes ---*/ 1379 /*------------------------------------------------------------*/ 1380 1381 static 1382 HChar* sorbTxt ( UChar sorb ) 1383 { 1384 switch (sorb) { 1385 case 0: return ""; /* no override */ 1386 case 0x3E: return "%ds"; 1387 case 0x26: return "%es:"; 1388 case 0x64: return "%fs:"; 1389 case 0x65: return "%gs:"; 1390 default: vpanic("sorbTxt(x86,guest)"); 1391 } 1392 } 1393 1394 1395 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 1396 linear address by adding any required segment override as indicated 1397 by sorb. */ 1398 static 1399 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) 1400 { 1401 Int sreg; 1402 IRType hWordTy; 1403 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 1404 1405 if (sorb == 0) 1406 /* the common case - no override */ 1407 return virtual; 1408 1409 switch (sorb) { 1410 case 0x3E: sreg = R_DS; break; 1411 case 0x26: sreg = R_ES; break; 1412 case 0x64: sreg = R_FS; break; 1413 case 0x65: sreg = R_GS; break; 1414 default: vpanic("handleSegOverride(x86,guest)"); 1415 } 1416 1417 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 1418 1419 seg_selector = newTemp(Ity_I32); 1420 ldt_ptr = newTemp(hWordTy); 1421 gdt_ptr = newTemp(hWordTy); 1422 r64 = newTemp(Ity_I64); 1423 1424 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 1425 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 1426 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 1427 1428 /* 1429 Call this to do the translation and limit checks: 1430 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 1431 UInt seg_selector, UInt virtual_addr ) 1432 */ 1433 assign( 1434 r64, 1435 mkIRExprCCall( 1436 Ity_I64, 1437 0/*regparms*/, 1438 "x86g_use_seg_selector", 1439 &x86g_use_seg_selector, 1440 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 1441 mkexpr(seg_selector), virtual) 1442 ) 1443 ); 1444 1445 /* If the high 32 of the result are non-zero, there was a 1446 failure in address translation. In which case, make a 1447 quick exit. 1448 */ 1449 stmt( 1450 IRStmt_Exit( 1451 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 1452 Ijk_MapFail, 1453 IRConst_U32( guest_EIP_curr_instr ) 1454 ) 1455 ); 1456 1457 /* otherwise, here's the translated result. */ 1458 return unop(Iop_64to32, mkexpr(r64)); 1459 } 1460 1461 1462 /* Generate IR to calculate an address indicated by a ModRM and 1463 following SIB bytes. The expression, and the number of bytes in 1464 the address mode, are returned. Note that this fn should not be 1465 called if the R/M part of the address denotes a register instead of 1466 memory. If print_codegen is true, text of the addressing mode is 1467 placed in buf. 1468 1469 The computed address is stored in a new tempreg, and the 1470 identity of the tempreg is returned. */ 1471 1472 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 ) 1473 { 1474 IRTemp tmp = newTemp(Ity_I32); 1475 assign( tmp, addr32 ); 1476 return tmp; 1477 } 1478 1479 static 1480 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) 1481 { 1482 UChar mod_reg_rm = getIByte(delta); 1483 delta++; 1484 1485 buf[0] = (UChar)0; 1486 1487 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1488 jump table seems a bit excessive. 1489 */ 1490 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1491 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1492 /* is now XX0XXYYY */ 1493 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1494 switch (mod_reg_rm) { 1495 1496 /* (%eax) .. (%edi), not including (%esp) or (%ebp). 1497 --> GET %reg, t 1498 */ 1499 case 0x00: case 0x01: case 0x02: case 0x03: 1500 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1501 { UChar rm = mod_reg_rm; 1502 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm)); 1503 *len = 1; 1504 return disAMode_copy2tmp( 1505 handleSegOverride(sorb, getIReg(4,rm))); 1506 } 1507 1508 /* d8(%eax) ... d8(%edi), not including d8(%esp) 1509 --> GET %reg, t ; ADDL d8, t 1510 */ 1511 case 0x08: case 0x09: case 0x0A: case 0x0B: 1512 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1513 { UChar rm = toUChar(mod_reg_rm & 7); 1514 UInt d = getSDisp8(delta); 1515 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1516 *len = 2; 1517 return disAMode_copy2tmp( 1518 handleSegOverride(sorb, 1519 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1520 } 1521 1522 /* d32(%eax) ... d32(%edi), not including d32(%esp) 1523 --> GET %reg, t ; ADDL d8, t 1524 */ 1525 case 0x10: case 0x11: case 0x12: case 0x13: 1526 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1527 { UChar rm = toUChar(mod_reg_rm & 7); 1528 UInt d = getUDisp32(delta); 1529 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1530 *len = 5; 1531 return disAMode_copy2tmp( 1532 handleSegOverride(sorb, 1533 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1534 } 1535 1536 /* a register, %eax .. %edi. This shouldn't happen. */ 1537 case 0x18: case 0x19: case 0x1A: case 0x1B: 1538 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1539 vpanic("disAMode(x86): not an addr!"); 1540 1541 /* a 32-bit literal address 1542 --> MOV d32, tmp 1543 */ 1544 case 0x05: 1545 { UInt d = getUDisp32(delta); 1546 *len = 5; 1547 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d); 1548 return disAMode_copy2tmp( 1549 handleSegOverride(sorb, mkU32(d))); 1550 } 1551 1552 case 0x04: { 1553 /* SIB, with no displacement. Special cases: 1554 -- %esp cannot act as an index value. 1555 If index_r indicates %esp, zero is used for the index. 1556 -- when mod is zero and base indicates EBP, base is instead 1557 a 32-bit literal. 1558 It's all madness, I tell you. Extract %index, %base and 1559 scale from the SIB byte. The value denoted is then: 1560 | %index == %ESP && %base == %EBP 1561 = d32 following SIB byte 1562 | %index == %ESP && %base != %EBP 1563 = %base 1564 | %index != %ESP && %base == %EBP 1565 = d32 following SIB byte + (%index << scale) 1566 | %index != %ESP && %base != %ESP 1567 = %base + (%index << scale) 1568 1569 What happens to the souls of CPU architects who dream up such 1570 horrendous schemes, do you suppose? 1571 */ 1572 UChar sib = getIByte(delta); 1573 UChar scale = toUChar((sib >> 6) & 3); 1574 UChar index_r = toUChar((sib >> 3) & 7); 1575 UChar base_r = toUChar(sib & 7); 1576 delta++; 1577 1578 if (index_r != R_ESP && base_r != R_EBP) { 1579 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb), 1580 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1581 *len = 2; 1582 return 1583 disAMode_copy2tmp( 1584 handleSegOverride(sorb, 1585 binop(Iop_Add32, 1586 getIReg(4,base_r), 1587 binop(Iop_Shl32, getIReg(4,index_r), 1588 mkU8(scale))))); 1589 } 1590 1591 if (index_r != R_ESP && base_r == R_EBP) { 1592 UInt d = getUDisp32(delta); 1593 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d, 1594 nameIReg(4,index_r), 1<<scale); 1595 *len = 6; 1596 return 1597 disAMode_copy2tmp( 1598 handleSegOverride(sorb, 1599 binop(Iop_Add32, 1600 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)), 1601 mkU32(d)))); 1602 } 1603 1604 if (index_r == R_ESP && base_r != R_EBP) { 1605 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r)); 1606 *len = 2; 1607 return disAMode_copy2tmp( 1608 handleSegOverride(sorb, getIReg(4,base_r))); 1609 } 1610 1611 if (index_r == R_ESP && base_r == R_EBP) { 1612 UInt d = getUDisp32(delta); 1613 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d); 1614 *len = 6; 1615 return disAMode_copy2tmp( 1616 handleSegOverride(sorb, mkU32(d))); 1617 } 1618 /*NOTREACHED*/ 1619 vassert(0); 1620 } 1621 1622 /* SIB, with 8-bit displacement. Special cases: 1623 -- %esp cannot act as an index value. 1624 If index_r indicates %esp, zero is used for the index. 1625 Denoted value is: 1626 | %index == %ESP 1627 = d8 + %base 1628 | %index != %ESP 1629 = d8 + %base + (%index << scale) 1630 */ 1631 case 0x0C: { 1632 UChar sib = getIByte(delta); 1633 UChar scale = toUChar((sib >> 6) & 3); 1634 UChar index_r = toUChar((sib >> 3) & 7); 1635 UChar base_r = toUChar(sib & 7); 1636 UInt d = getSDisp8(delta+1); 1637 1638 if (index_r == R_ESP) { 1639 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1640 (Int)d, nameIReg(4,base_r)); 1641 *len = 3; 1642 return disAMode_copy2tmp( 1643 handleSegOverride(sorb, 1644 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1645 } else { 1646 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1647 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1648 *len = 3; 1649 return 1650 disAMode_copy2tmp( 1651 handleSegOverride(sorb, 1652 binop(Iop_Add32, 1653 binop(Iop_Add32, 1654 getIReg(4,base_r), 1655 binop(Iop_Shl32, 1656 getIReg(4,index_r), mkU8(scale))), 1657 mkU32(d)))); 1658 } 1659 /*NOTREACHED*/ 1660 vassert(0); 1661 } 1662 1663 /* SIB, with 32-bit displacement. Special cases: 1664 -- %esp cannot act as an index value. 1665 If index_r indicates %esp, zero is used for the index. 1666 Denoted value is: 1667 | %index == %ESP 1668 = d32 + %base 1669 | %index != %ESP 1670 = d32 + %base + (%index << scale) 1671 */ 1672 case 0x14: { 1673 UChar sib = getIByte(delta); 1674 UChar scale = toUChar((sib >> 6) & 3); 1675 UChar index_r = toUChar((sib >> 3) & 7); 1676 UChar base_r = toUChar(sib & 7); 1677 UInt d = getUDisp32(delta+1); 1678 1679 if (index_r == R_ESP) { 1680 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1681 (Int)d, nameIReg(4,base_r)); 1682 *len = 6; 1683 return disAMode_copy2tmp( 1684 handleSegOverride(sorb, 1685 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1686 } else { 1687 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1688 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1689 *len = 6; 1690 return 1691 disAMode_copy2tmp( 1692 handleSegOverride(sorb, 1693 binop(Iop_Add32, 1694 binop(Iop_Add32, 1695 getIReg(4,base_r), 1696 binop(Iop_Shl32, 1697 getIReg(4,index_r), mkU8(scale))), 1698 mkU32(d)))); 1699 } 1700 /*NOTREACHED*/ 1701 vassert(0); 1702 } 1703 1704 default: 1705 vpanic("disAMode(x86)"); 1706 return 0; /*notreached*/ 1707 } 1708 } 1709 1710 1711 /* Figure out the number of (insn-stream) bytes constituting the amode 1712 beginning at delta. Is useful for getting hold of literals beyond 1713 the end of the amode before it has been disassembled. */ 1714 1715 static UInt lengthAMode ( Int delta ) 1716 { 1717 UChar mod_reg_rm = getIByte(delta); delta++; 1718 1719 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1720 jump table seems a bit excessive. 1721 */ 1722 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1723 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1724 /* is now XX0XXYYY */ 1725 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1726 switch (mod_reg_rm) { 1727 1728 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ 1729 case 0x00: case 0x01: case 0x02: case 0x03: 1730 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1731 return 1; 1732 1733 /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 1734 case 0x08: case 0x09: case 0x0A: case 0x0B: 1735 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1736 return 2; 1737 1738 /* d32(%eax) ... d32(%edi), not including d32(%esp). */ 1739 case 0x10: case 0x11: case 0x12: case 0x13: 1740 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1741 return 5; 1742 1743 /* a register, %eax .. %edi. (Not an addr, but still handled.) */ 1744 case 0x18: case 0x19: case 0x1A: case 0x1B: 1745 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1746 return 1; 1747 1748 /* a 32-bit literal address. */ 1749 case 0x05: return 5; 1750 1751 /* SIB, no displacement. */ 1752 case 0x04: { 1753 UChar sib = getIByte(delta); 1754 UChar base_r = toUChar(sib & 7); 1755 if (base_r == R_EBP) return 6; else return 2; 1756 } 1757 /* SIB, with 8-bit displacement. */ 1758 case 0x0C: return 3; 1759 1760 /* SIB, with 32-bit displacement. */ 1761 case 0x14: return 6; 1762 1763 default: 1764 vpanic("lengthAMode"); 1765 return 0; /*notreached*/ 1766 } 1767 } 1768 1769 /*------------------------------------------------------------*/ 1770 /*--- Disassembling common idioms ---*/ 1771 /*------------------------------------------------------------*/ 1772 1773 /* Handle binary integer instructions of the form 1774 op E, G meaning 1775 op reg-or-mem, reg 1776 Is passed the a ptr to the modRM byte, the actual operation, and the 1777 data size. Returns the address advanced completely over this 1778 instruction. 1779 1780 E(src) is reg-or-mem 1781 G(dst) is reg. 1782 1783 If E is reg, --> GET %G, tmp 1784 OP %E, tmp 1785 PUT tmp, %G 1786 1787 If E is mem and OP is not reversible, 1788 --> (getAddr E) -> tmpa 1789 LD (tmpa), tmpa 1790 GET %G, tmp2 1791 OP tmpa, tmp2 1792 PUT tmp2, %G 1793 1794 If E is mem and OP is reversible 1795 --> (getAddr E) -> tmpa 1796 LD (tmpa), tmpa 1797 OP %G, tmpa 1798 PUT tmpa, %G 1799 */ 1800 static 1801 UInt dis_op2_E_G ( UChar sorb, 1802 Bool addSubCarry, 1803 IROp op8, 1804 Bool keep, 1805 Int size, 1806 Int delta0, 1807 HChar* t_x86opc ) 1808 { 1809 HChar dis_buf[50]; 1810 Int len; 1811 IRType ty = szToITy(size); 1812 IRTemp dst1 = newTemp(ty); 1813 IRTemp src = newTemp(ty); 1814 IRTemp dst0 = newTemp(ty); 1815 UChar rm = getUChar(delta0); 1816 IRTemp addr = IRTemp_INVALID; 1817 1818 /* addSubCarry == True indicates the intended operation is 1819 add-with-carry or subtract-with-borrow. */ 1820 if (addSubCarry) { 1821 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1822 vassert(keep); 1823 } 1824 1825 if (epartIsReg(rm)) { 1826 /* Specially handle XOR reg,reg, because that doesn't really 1827 depend on reg, and doing the obvious thing potentially 1828 generates a spurious value check failure due to the bogus 1829 dependency. Ditto SBB reg,reg. */ 1830 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1831 && gregOfRM(rm) == eregOfRM(rm)) { 1832 putIReg(size, gregOfRM(rm), mkU(ty,0)); 1833 } 1834 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1835 assign( src, getIReg(size,eregOfRM(rm)) ); 1836 1837 if (addSubCarry && op8 == Iop_Add8) { 1838 helper_ADC( size, dst1, dst0, src, 1839 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1840 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1841 } else 1842 if (addSubCarry && op8 == Iop_Sub8) { 1843 helper_SBB( size, dst1, dst0, src, 1844 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1845 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1846 } else { 1847 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1848 if (isAddSub(op8)) 1849 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1850 else 1851 setFlags_DEP1(op8, dst1, ty); 1852 if (keep) 1853 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1854 } 1855 1856 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1857 nameIReg(size,eregOfRM(rm)), 1858 nameIReg(size,gregOfRM(rm))); 1859 return 1+delta0; 1860 } else { 1861 /* E refers to memory */ 1862 addr = disAMode ( &len, sorb, delta0, dis_buf); 1863 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1864 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 1865 1866 if (addSubCarry && op8 == Iop_Add8) { 1867 helper_ADC( size, dst1, dst0, src, 1868 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1869 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1870 } else 1871 if (addSubCarry && op8 == Iop_Sub8) { 1872 helper_SBB( size, dst1, dst0, src, 1873 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1874 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1875 } else { 1876 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1877 if (isAddSub(op8)) 1878 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1879 else 1880 setFlags_DEP1(op8, dst1, ty); 1881 if (keep) 1882 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1883 } 1884 1885 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1886 dis_buf,nameIReg(size,gregOfRM(rm))); 1887 return len+delta0; 1888 } 1889 } 1890 1891 1892 1893 /* Handle binary integer instructions of the form 1894 op G, E meaning 1895 op reg, reg-or-mem 1896 Is passed the a ptr to the modRM byte, the actual operation, and the 1897 data size. Returns the address advanced completely over this 1898 instruction. 1899 1900 G(src) is reg. 1901 E(dst) is reg-or-mem 1902 1903 If E is reg, --> GET %E, tmp 1904 OP %G, tmp 1905 PUT tmp, %E 1906 1907 If E is mem, --> (getAddr E) -> tmpa 1908 LD (tmpa), tmpv 1909 OP %G, tmpv 1910 ST tmpv, (tmpa) 1911 */ 1912 static 1913 UInt dis_op2_G_E ( UChar sorb, 1914 Bool locked, 1915 Bool addSubCarry, 1916 IROp op8, 1917 Bool keep, 1918 Int size, 1919 Int delta0, 1920 HChar* t_x86opc ) 1921 { 1922 HChar dis_buf[50]; 1923 Int len; 1924 IRType ty = szToITy(size); 1925 IRTemp dst1 = newTemp(ty); 1926 IRTemp src = newTemp(ty); 1927 IRTemp dst0 = newTemp(ty); 1928 UChar rm = getIByte(delta0); 1929 IRTemp addr = IRTemp_INVALID; 1930 1931 /* addSubCarry == True indicates the intended operation is 1932 add-with-carry or subtract-with-borrow. */ 1933 if (addSubCarry) { 1934 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1935 vassert(keep); 1936 } 1937 1938 if (epartIsReg(rm)) { 1939 /* Specially handle XOR reg,reg, because that doesn't really 1940 depend on reg, and doing the obvious thing potentially 1941 generates a spurious value check failure due to the bogus 1942 dependency. Ditto SBB reg,reg.*/ 1943 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1944 && gregOfRM(rm) == eregOfRM(rm)) { 1945 putIReg(size, eregOfRM(rm), mkU(ty,0)); 1946 } 1947 assign(dst0, getIReg(size,eregOfRM(rm))); 1948 assign(src, getIReg(size,gregOfRM(rm))); 1949 1950 if (addSubCarry && op8 == Iop_Add8) { 1951 helper_ADC( size, dst1, dst0, src, 1952 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1953 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1954 } else 1955 if (addSubCarry && op8 == Iop_Sub8) { 1956 helper_SBB( size, dst1, dst0, src, 1957 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1958 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1959 } else { 1960 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 1961 if (isAddSub(op8)) 1962 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1963 else 1964 setFlags_DEP1(op8, dst1, ty); 1965 if (keep) 1966 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1967 } 1968 1969 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1970 nameIReg(size,gregOfRM(rm)), 1971 nameIReg(size,eregOfRM(rm))); 1972 return 1+delta0; 1973 } 1974 1975 /* E refers to memory */ 1976 { 1977 addr = disAMode ( &len, sorb, delta0, dis_buf); 1978 assign(dst0, loadLE(ty,mkexpr(addr))); 1979 assign(src, getIReg(size,gregOfRM(rm))); 1980 1981 if (addSubCarry && op8 == Iop_Add8) { 1982 if (locked) { 1983 /* cas-style store */ 1984 helper_ADC( size, dst1, dst0, src, 1985 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 1986 } else { 1987 /* normal store */ 1988 helper_ADC( size, dst1, dst0, src, 1989 /*store*/addr, IRTemp_INVALID, 0 ); 1990 } 1991 } else 1992 if (addSubCarry && op8 == Iop_Sub8) { 1993 if (locked) { 1994 /* cas-style store */ 1995 helper_SBB( size, dst1, dst0, src, 1996 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 1997 } else { 1998 /* normal store */ 1999 helper_SBB( size, dst1, dst0, src, 2000 /*store*/addr, IRTemp_INVALID, 0 ); 2001 } 2002 } else { 2003 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2004 if (keep) { 2005 if (locked) { 2006 if (0) vex_printf("locked case\n" ); 2007 casLE( mkexpr(addr), 2008 mkexpr(dst0)/*expval*/, 2009 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr ); 2010 } else { 2011 if (0) vex_printf("nonlocked case\n"); 2012 storeLE(mkexpr(addr), mkexpr(dst1)); 2013 } 2014 } 2015 if (isAddSub(op8)) 2016 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2017 else 2018 setFlags_DEP1(op8, dst1, ty); 2019 } 2020 2021 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 2022 nameIReg(size,gregOfRM(rm)), dis_buf); 2023 return len+delta0; 2024 } 2025 } 2026 2027 2028 /* Handle move instructions of the form 2029 mov E, G meaning 2030 mov reg-or-mem, reg 2031 Is passed the a ptr to the modRM byte, and the data size. Returns 2032 the address advanced completely over this instruction. 2033 2034 E(src) is reg-or-mem 2035 G(dst) is reg. 2036 2037 If E is reg, --> GET %E, tmpv 2038 PUT tmpv, %G 2039 2040 If E is mem --> (getAddr E) -> tmpa 2041 LD (tmpa), tmpb 2042 PUT tmpb, %G 2043 */ 2044 static 2045 UInt dis_mov_E_G ( UChar sorb, 2046 Int size, 2047 Int delta0 ) 2048 { 2049 Int len; 2050 UChar rm = getIByte(delta0); 2051 HChar dis_buf[50]; 2052 2053 if (epartIsReg(rm)) { 2054 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm))); 2055 DIP("mov%c %s,%s\n", nameISize(size), 2056 nameIReg(size,eregOfRM(rm)), 2057 nameIReg(size,gregOfRM(rm))); 2058 return 1+delta0; 2059 } 2060 2061 /* E refers to memory */ 2062 { 2063 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 2064 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr))); 2065 DIP("mov%c %s,%s\n", nameISize(size), 2066 dis_buf,nameIReg(size,gregOfRM(rm))); 2067 return delta0+len; 2068 } 2069 } 2070 2071 2072 /* Handle move instructions of the form 2073 mov G, E meaning 2074 mov reg, reg-or-mem 2075 Is passed the a ptr to the modRM byte, and the data size. Returns 2076 the address advanced completely over this instruction. 2077 2078 G(src) is reg. 2079 E(dst) is reg-or-mem 2080 2081 If E is reg, --> GET %G, tmp 2082 PUT tmp, %E 2083 2084 If E is mem, --> (getAddr E) -> tmpa 2085 GET %G, tmpv 2086 ST tmpv, (tmpa) 2087 */ 2088 static 2089 UInt dis_mov_G_E ( UChar sorb, 2090 Int size, 2091 Int delta0 ) 2092 { 2093 Int len; 2094 UChar rm = getIByte(delta0); 2095 HChar dis_buf[50]; 2096 2097 if (epartIsReg(rm)) { 2098 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm))); 2099 DIP("mov%c %s,%s\n", nameISize(size), 2100 nameIReg(size,gregOfRM(rm)), 2101 nameIReg(size,eregOfRM(rm))); 2102 return 1+delta0; 2103 } 2104 2105 /* E refers to memory */ 2106 { 2107 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf); 2108 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) ); 2109 DIP("mov%c %s,%s\n", nameISize(size), 2110 nameIReg(size,gregOfRM(rm)), dis_buf); 2111 return len+delta0; 2112 } 2113 } 2114 2115 2116 /* op $immediate, AL/AX/EAX. */ 2117 static 2118 UInt dis_op_imm_A ( Int size, 2119 Bool carrying, 2120 IROp op8, 2121 Bool keep, 2122 Int delta, 2123 HChar* t_x86opc ) 2124 { 2125 IRType ty = szToITy(size); 2126 IRTemp dst0 = newTemp(ty); 2127 IRTemp src = newTemp(ty); 2128 IRTemp dst1 = newTemp(ty); 2129 UInt lit = getUDisp(size,delta); 2130 assign(dst0, getIReg(size,R_EAX)); 2131 assign(src, mkU(ty,lit)); 2132 2133 if (isAddSub(op8) && !carrying) { 2134 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2135 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2136 } 2137 else 2138 if (isLogic(op8)) { 2139 vassert(!carrying); 2140 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2141 setFlags_DEP1(op8, dst1, ty); 2142 } 2143 else 2144 if (op8 == Iop_Add8 && carrying) { 2145 helper_ADC( size, dst1, dst0, src, 2146 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2147 } 2148 else 2149 if (op8 == Iop_Sub8 && carrying) { 2150 helper_SBB( size, dst1, dst0, src, 2151 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2152 } 2153 else 2154 vpanic("dis_op_imm_A(x86,guest)"); 2155 2156 if (keep) 2157 putIReg(size, R_EAX, mkexpr(dst1)); 2158 2159 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 2160 lit, nameIReg(size,R_EAX)); 2161 return delta+size; 2162 } 2163 2164 2165 /* Sign- and Zero-extending moves. */ 2166 static 2167 UInt dis_movx_E_G ( UChar sorb, 2168 Int delta, Int szs, Int szd, Bool sign_extend ) 2169 { 2170 UChar rm = getIByte(delta); 2171 if (epartIsReg(rm)) { 2172 if (szd == szs) { 2173 // mutant case. See #250799 2174 putIReg(szd, gregOfRM(rm), 2175 getIReg(szs,eregOfRM(rm))); 2176 } else { 2177 // normal case 2178 putIReg(szd, gregOfRM(rm), 2179 unop(mkWidenOp(szs,szd,sign_extend), 2180 getIReg(szs,eregOfRM(rm)))); 2181 } 2182 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2183 nameISize(szs), nameISize(szd), 2184 nameIReg(szs,eregOfRM(rm)), 2185 nameIReg(szd,gregOfRM(rm))); 2186 return 1+delta; 2187 } 2188 2189 /* E refers to memory */ 2190 { 2191 Int len; 2192 HChar dis_buf[50]; 2193 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf ); 2194 if (szd == szs) { 2195 // mutant case. See #250799 2196 putIReg(szd, gregOfRM(rm), 2197 loadLE(szToITy(szs),mkexpr(addr))); 2198 } else { 2199 // normal case 2200 putIReg(szd, gregOfRM(rm), 2201 unop(mkWidenOp(szs,szd,sign_extend), 2202 loadLE(szToITy(szs),mkexpr(addr)))); 2203 } 2204 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2205 nameISize(szs), nameISize(szd), 2206 dis_buf, nameIReg(szd,gregOfRM(rm))); 2207 return len+delta; 2208 } 2209 } 2210 2211 2212 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / 2213 16 / 8 bit quantity in the given IRTemp. */ 2214 static 2215 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 2216 { 2217 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32; 2218 IRTemp src64 = newTemp(Ity_I64); 2219 IRTemp dst64 = newTemp(Ity_I64); 2220 switch (sz) { 2221 case 4: 2222 assign( src64, binop(Iop_32HLto64, 2223 getIReg(4,R_EDX), getIReg(4,R_EAX)) ); 2224 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) ); 2225 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) ); 2226 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) ); 2227 break; 2228 case 2: { 2229 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2230 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2231 assign( src64, unop(widen3264, 2232 binop(Iop_16HLto32, 2233 getIReg(2,R_EDX), getIReg(2,R_EAX))) ); 2234 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 2235 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 2236 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 2237 break; 2238 } 2239 case 1: { 2240 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2241 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2242 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 2243 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) ); 2244 assign( dst64, 2245 binop(op, mkexpr(src64), 2246 unop(widen1632, unop(widen816, mkexpr(t)))) ); 2247 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16, 2248 unop(Iop_64to32,mkexpr(dst64)))) ); 2249 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16, 2250 unop(Iop_64HIto32,mkexpr(dst64)))) ); 2251 break; 2252 } 2253 default: vpanic("codegen_div(x86)"); 2254 } 2255 } 2256 2257 2258 static 2259 UInt dis_Grp1 ( UChar sorb, Bool locked, 2260 Int delta, UChar modrm, 2261 Int am_sz, Int d_sz, Int sz, UInt d32 ) 2262 { 2263 Int len; 2264 HChar dis_buf[50]; 2265 IRType ty = szToITy(sz); 2266 IRTemp dst1 = newTemp(ty); 2267 IRTemp src = newTemp(ty); 2268 IRTemp dst0 = newTemp(ty); 2269 IRTemp addr = IRTemp_INVALID; 2270 IROp op8 = Iop_INVALID; 2271 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF); 2272 2273 switch (gregOfRM(modrm)) { 2274 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 2275 case 2: break; // ADC 2276 case 3: break; // SBB 2277 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 2278 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 2279 /*NOTREACHED*/ 2280 default: vpanic("dis_Grp1: unhandled case"); 2281 } 2282 2283 if (epartIsReg(modrm)) { 2284 vassert(am_sz == 1); 2285 2286 assign(dst0, getIReg(sz,eregOfRM(modrm))); 2287 assign(src, mkU(ty,d32 & mask)); 2288 2289 if (gregOfRM(modrm) == 2 /* ADC */) { 2290 helper_ADC( sz, dst1, dst0, src, 2291 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2292 } else 2293 if (gregOfRM(modrm) == 3 /* SBB */) { 2294 helper_SBB( sz, dst1, dst0, src, 2295 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2296 } else { 2297 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2298 if (isAddSub(op8)) 2299 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2300 else 2301 setFlags_DEP1(op8, dst1, ty); 2302 } 2303 2304 if (gregOfRM(modrm) < 7) 2305 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2306 2307 delta += (am_sz + d_sz); 2308 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 2309 nameIReg(sz,eregOfRM(modrm))); 2310 } else { 2311 addr = disAMode ( &len, sorb, delta, dis_buf); 2312 2313 assign(dst0, loadLE(ty,mkexpr(addr))); 2314 assign(src, mkU(ty,d32 & mask)); 2315 2316 if (gregOfRM(modrm) == 2 /* ADC */) { 2317 if (locked) { 2318 /* cas-style store */ 2319 helper_ADC( sz, dst1, dst0, src, 2320 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2321 } else { 2322 /* normal store */ 2323 helper_ADC( sz, dst1, dst0, src, 2324 /*store*/addr, IRTemp_INVALID, 0 ); 2325 } 2326 } else 2327 if (gregOfRM(modrm) == 3 /* SBB */) { 2328 if (locked) { 2329 /* cas-style store */ 2330 helper_SBB( sz, dst1, dst0, src, 2331 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2332 } else { 2333 /* normal store */ 2334 helper_SBB( sz, dst1, dst0, src, 2335 /*store*/addr, IRTemp_INVALID, 0 ); 2336 } 2337 } else { 2338 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2339 if (gregOfRM(modrm) < 7) { 2340 if (locked) { 2341 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 2342 mkexpr(dst1)/*newVal*/, 2343 guest_EIP_curr_instr ); 2344 } else { 2345 storeLE(mkexpr(addr), mkexpr(dst1)); 2346 } 2347 } 2348 if (isAddSub(op8)) 2349 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2350 else 2351 setFlags_DEP1(op8, dst1, ty); 2352 } 2353 2354 delta += (len+d_sz); 2355 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), 2356 d32, dis_buf); 2357 } 2358 return delta; 2359 } 2360 2361 2362 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 2363 expression. */ 2364 2365 static 2366 UInt dis_Grp2 ( UChar sorb, 2367 Int delta, UChar modrm, 2368 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 2369 HChar* shift_expr_txt, Bool* decode_OK ) 2370 { 2371 /* delta on entry points at the modrm byte. */ 2372 HChar dis_buf[50]; 2373 Int len; 2374 Bool isShift, isRotate, isRotateC; 2375 IRType ty = szToITy(sz); 2376 IRTemp dst0 = newTemp(ty); 2377 IRTemp dst1 = newTemp(ty); 2378 IRTemp addr = IRTemp_INVALID; 2379 2380 *decode_OK = True; 2381 2382 vassert(sz == 1 || sz == 2 || sz == 4); 2383 2384 /* Put value to shift/rotate in dst0. */ 2385 if (epartIsReg(modrm)) { 2386 assign(dst0, getIReg(sz, eregOfRM(modrm))); 2387 delta += (am_sz + d_sz); 2388 } else { 2389 addr = disAMode ( &len, sorb, delta, dis_buf); 2390 assign(dst0, loadLE(ty,mkexpr(addr))); 2391 delta += len + d_sz; 2392 } 2393 2394 isShift = False; 2395 switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 2396 2397 isRotate = False; 2398 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; } 2399 2400 isRotateC = False; 2401 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; } 2402 2403 if (!isShift && !isRotate && !isRotateC) { 2404 /*NOTREACHED*/ 2405 vpanic("dis_Grp2(Reg): unhandled case(x86)"); 2406 } 2407 2408 if (isRotateC) { 2409 /* call a helper; these insns are so ridiculous they do not 2410 deserve better */ 2411 Bool left = toBool(gregOfRM(modrm) == 2); 2412 IRTemp r64 = newTemp(Ity_I64); 2413 IRExpr** args 2414 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */ 2415 widenUto32(shift_expr), /* rotate amount */ 2416 widenUto32(mk_x86g_calculate_eflags_all()), 2417 mkU32(sz) ); 2418 assign( r64, mkIRExprCCall( 2419 Ity_I64, 2420 0/*regparm*/, 2421 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR", 2422 left ? &x86g_calculate_RCL : &x86g_calculate_RCR, 2423 args 2424 ) 2425 ); 2426 /* new eflags in hi half r64; new value in lo half r64 */ 2427 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) ); 2428 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2429 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) )); 2430 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2431 /* Set NDEP even though it isn't used. This makes redundant-PUT 2432 elimination of previous stores to this field work better. */ 2433 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2434 } 2435 2436 if (isShift) { 2437 2438 IRTemp pre32 = newTemp(Ity_I32); 2439 IRTemp res32 = newTemp(Ity_I32); 2440 IRTemp res32ss = newTemp(Ity_I32); 2441 IRTemp shift_amt = newTemp(Ity_I8); 2442 IROp op32; 2443 2444 switch (gregOfRM(modrm)) { 2445 case 4: op32 = Iop_Shl32; break; 2446 case 5: op32 = Iop_Shr32; break; 2447 case 6: op32 = Iop_Shl32; break; 2448 case 7: op32 = Iop_Sar32; break; 2449 /*NOTREACHED*/ 2450 default: vpanic("dis_Grp2:shift"); break; 2451 } 2452 2453 /* Widen the value to be shifted to 32 bits, do the shift, and 2454 narrow back down. This seems surprisingly long-winded, but 2455 unfortunately the Intel semantics requires that 8/16-bit 2456 shifts give defined results for shift values all the way up 2457 to 31, and this seems the simplest way to do it. It has the 2458 advantage that the only IR level shifts generated are of 32 2459 bit values, and the shift amount is guaranteed to be in the 2460 range 0 .. 31, thereby observing the IR semantics requiring 2461 all shift values to be in the range 0 .. 2^word_size-1. */ 2462 2463 /* shift_amt = shift_expr & 31, regardless of operation size */ 2464 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) ); 2465 2466 /* suitably widen the value to be shifted to 32 bits. */ 2467 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0)) 2468 : widenUto32(mkexpr(dst0)) ); 2469 2470 /* res32 = pre32 `shift` shift_amt */ 2471 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) ); 2472 2473 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */ 2474 assign( res32ss, 2475 binop(op32, 2476 mkexpr(pre32), 2477 binop(Iop_And8, 2478 binop(Iop_Sub8, 2479 mkexpr(shift_amt), mkU8(1)), 2480 mkU8(31))) ); 2481 2482 /* Build the flags thunk. */ 2483 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt); 2484 2485 /* Narrow the result back down. */ 2486 assign( dst1, narrowTo(ty, mkexpr(res32)) ); 2487 2488 } /* if (isShift) */ 2489 2490 else 2491 if (isRotate) { 2492 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 2493 Bool left = toBool(gregOfRM(modrm) == 0); 2494 IRTemp rot_amt = newTemp(Ity_I8); 2495 IRTemp rot_amt32 = newTemp(Ity_I8); 2496 IRTemp oldFlags = newTemp(Ity_I32); 2497 2498 /* rot_amt = shift_expr & mask */ 2499 /* By masking the rotate amount thusly, the IR-level Shl/Shr 2500 expressions never shift beyond the word size and thus remain 2501 well defined. */ 2502 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31))); 2503 2504 if (ty == Ity_I32) 2505 assign(rot_amt, mkexpr(rot_amt32)); 2506 else 2507 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1))); 2508 2509 if (left) { 2510 2511 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 2512 assign(dst1, 2513 binop( mkSizedOp(ty,Iop_Or8), 2514 binop( mkSizedOp(ty,Iop_Shl8), 2515 mkexpr(dst0), 2516 mkexpr(rot_amt) 2517 ), 2518 binop( mkSizedOp(ty,Iop_Shr8), 2519 mkexpr(dst0), 2520 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2521 ) 2522 ) 2523 ); 2524 ccOp += X86G_CC_OP_ROLB; 2525 2526 } else { /* right */ 2527 2528 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 2529 assign(dst1, 2530 binop( mkSizedOp(ty,Iop_Or8), 2531 binop( mkSizedOp(ty,Iop_Shr8), 2532 mkexpr(dst0), 2533 mkexpr(rot_amt) 2534 ), 2535 binop( mkSizedOp(ty,Iop_Shl8), 2536 mkexpr(dst0), 2537 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2538 ) 2539 ) 2540 ); 2541 ccOp += X86G_CC_OP_RORB; 2542 2543 } 2544 2545 /* dst1 now holds the rotated value. Build flag thunk. We 2546 need the resulting value for this, and the previous flags. 2547 Except don't set it if the rotate count is zero. */ 2548 2549 assign(oldFlags, mk_x86g_calculate_eflags_all()); 2550 2551 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 2552 stmt( IRStmt_Put( OFFB_CC_OP, 2553 IRExpr_Mux0X( mkexpr(rot_amt32), 2554 IRExpr_Get(OFFB_CC_OP,Ity_I32), 2555 mkU32(ccOp))) ); 2556 stmt( IRStmt_Put( OFFB_CC_DEP1, 2557 IRExpr_Mux0X( mkexpr(rot_amt32), 2558 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 2559 widenUto32(mkexpr(dst1)))) ); 2560 stmt( IRStmt_Put( OFFB_CC_DEP2, 2561 IRExpr_Mux0X( mkexpr(rot_amt32), 2562 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 2563 mkU32(0))) ); 2564 stmt( IRStmt_Put( OFFB_CC_NDEP, 2565 IRExpr_Mux0X( mkexpr(rot_amt32), 2566 IRExpr_Get(OFFB_CC_NDEP,Ity_I32), 2567 mkexpr(oldFlags))) ); 2568 } /* if (isRotate) */ 2569 2570 /* Save result, and finish up. */ 2571 if (epartIsReg(modrm)) { 2572 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2573 if (vex_traceflags & VEX_TRACE_FE) { 2574 vex_printf("%s%c ", 2575 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2576 if (shift_expr_txt) 2577 vex_printf("%s", shift_expr_txt); 2578 else 2579 ppIRExpr(shift_expr); 2580 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm))); 2581 } 2582 } else { 2583 storeLE(mkexpr(addr), mkexpr(dst1)); 2584 if (vex_traceflags & VEX_TRACE_FE) { 2585 vex_printf("%s%c ", 2586 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2587 if (shift_expr_txt) 2588 vex_printf("%s", shift_expr_txt); 2589 else 2590 ppIRExpr(shift_expr); 2591 vex_printf(", %s\n", dis_buf); 2592 } 2593 } 2594 return delta; 2595 } 2596 2597 2598 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 2599 static 2600 UInt dis_Grp8_Imm ( UChar sorb, 2601 Bool locked, 2602 Int delta, UChar modrm, 2603 Int am_sz, Int sz, UInt src_val, 2604 Bool* decode_OK ) 2605 { 2606 /* src_val denotes a d8. 2607 And delta on entry points at the modrm byte. */ 2608 2609 IRType ty = szToITy(sz); 2610 IRTemp t2 = newTemp(Ity_I32); 2611 IRTemp t2m = newTemp(Ity_I32); 2612 IRTemp t_addr = IRTemp_INVALID; 2613 HChar dis_buf[50]; 2614 UInt mask; 2615 2616 /* we're optimists :-) */ 2617 *decode_OK = True; 2618 2619 /* Limit src_val -- the bit offset -- to something within a word. 2620 The Intel docs say that literal offsets larger than a word are 2621 masked in this way. */ 2622 switch (sz) { 2623 case 2: src_val &= 15; break; 2624 case 4: src_val &= 31; break; 2625 default: *decode_OK = False; return delta; 2626 } 2627 2628 /* Invent a mask suitable for the operation. */ 2629 switch (gregOfRM(modrm)) { 2630 case 4: /* BT */ mask = 0; break; 2631 case 5: /* BTS */ mask = 1 << src_val; break; 2632 case 6: /* BTR */ mask = ~(1 << src_val); break; 2633 case 7: /* BTC */ mask = 1 << src_val; break; 2634 /* If this needs to be extended, probably simplest to make a 2635 new function to handle the other cases (0 .. 3). The 2636 Intel docs do however not indicate any use for 0 .. 3, so 2637 we don't expect this to happen. */ 2638 default: *decode_OK = False; return delta; 2639 } 2640 2641 /* Fetch the value to be tested and modified into t2, which is 2642 32-bits wide regardless of sz. */ 2643 if (epartIsReg(modrm)) { 2644 vassert(am_sz == 1); 2645 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) ); 2646 delta += (am_sz + 1); 2647 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2648 src_val, nameIReg(sz,eregOfRM(modrm))); 2649 } else { 2650 Int len; 2651 t_addr = disAMode ( &len, sorb, delta, dis_buf); 2652 delta += (len+1); 2653 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) ); 2654 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2655 src_val, dis_buf); 2656 } 2657 2658 /* Compute the new value into t2m, if non-BT. */ 2659 switch (gregOfRM(modrm)) { 2660 case 4: /* BT */ 2661 break; 2662 case 5: /* BTS */ 2663 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) ); 2664 break; 2665 case 6: /* BTR */ 2666 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) ); 2667 break; 2668 case 7: /* BTC */ 2669 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) ); 2670 break; 2671 default: 2672 /*NOTREACHED*/ /*the previous switch guards this*/ 2673 vassert(0); 2674 } 2675 2676 /* Write the result back, if non-BT. If the CAS fails then we 2677 side-exit from the trace at this point, and so the flag state is 2678 not affected. This is of course as required. */ 2679 if (gregOfRM(modrm) != 4 /* BT */) { 2680 if (epartIsReg(modrm)) { 2681 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m))); 2682 } else { 2683 if (locked) { 2684 casLE( mkexpr(t_addr), 2685 narrowTo(ty, mkexpr(t2))/*expd*/, 2686 narrowTo(ty, mkexpr(t2m))/*new*/, 2687 guest_EIP_curr_instr ); 2688 } else { 2689 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 2690 } 2691 } 2692 } 2693 2694 /* Copy relevant bit from t2 into the carry flag. */ 2695 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 2696 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2697 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2698 stmt( IRStmt_Put( 2699 OFFB_CC_DEP1, 2700 binop(Iop_And32, 2701 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)), 2702 mkU32(1)) 2703 )); 2704 /* Set NDEP even though it isn't used. This makes redundant-PUT 2705 elimination of previous stores to this field work better. */ 2706 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2707 2708 return delta; 2709 } 2710 2711 2712 /* Signed/unsigned widening multiply. Generate IR to multiply the 2713 value in EAX/AX/AL by the given IRTemp, and park the result in 2714 EDX:EAX/DX:AX/AX. 2715 */ 2716 static void codegen_mulL_A_D ( Int sz, Bool syned, 2717 IRTemp tmp, HChar* tmp_txt ) 2718 { 2719 IRType ty = szToITy(sz); 2720 IRTemp t1 = newTemp(ty); 2721 2722 assign( t1, getIReg(sz, R_EAX) ); 2723 2724 switch (ty) { 2725 case Ity_I32: { 2726 IRTemp res64 = newTemp(Ity_I64); 2727 IRTemp resHi = newTemp(Ity_I32); 2728 IRTemp resLo = newTemp(Ity_I32); 2729 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 2730 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2731 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 2732 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2733 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 2734 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 2735 putIReg(4, R_EDX, mkexpr(resHi)); 2736 putIReg(4, R_EAX, mkexpr(resLo)); 2737 break; 2738 } 2739 case Ity_I16: { 2740 IRTemp res32 = newTemp(Ity_I32); 2741 IRTemp resHi = newTemp(Ity_I16); 2742 IRTemp resLo = newTemp(Ity_I16); 2743 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 2744 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2745 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 2746 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2747 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 2748 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 2749 putIReg(2, R_EDX, mkexpr(resHi)); 2750 putIReg(2, R_EAX, mkexpr(resLo)); 2751 break; 2752 } 2753 case Ity_I8: { 2754 IRTemp res16 = newTemp(Ity_I16); 2755 IRTemp resHi = newTemp(Ity_I8); 2756 IRTemp resLo = newTemp(Ity_I8); 2757 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 2758 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2759 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 2760 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2761 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 2762 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 2763 putIReg(2, R_EAX, mkexpr(res16)); 2764 break; 2765 } 2766 default: 2767 vpanic("codegen_mulL_A_D(x86)"); 2768 } 2769 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 2770 } 2771 2772 2773 /* Group 3 extended opcodes. */ 2774 static 2775 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK ) 2776 { 2777 UInt d32; 2778 UChar modrm; 2779 HChar dis_buf[50]; 2780 Int len; 2781 IRTemp addr; 2782 IRType ty = szToITy(sz); 2783 IRTemp t1 = newTemp(ty); 2784 IRTemp dst1, src, dst0; 2785 2786 *decode_OK = True; /* may change this later */ 2787 2788 modrm = getIByte(delta); 2789 2790 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) { 2791 /* LOCK prefix only allowed with not and neg subopcodes */ 2792 *decode_OK = False; 2793 return delta; 2794 } 2795 2796 if (epartIsReg(modrm)) { 2797 switch (gregOfRM(modrm)) { 2798 case 0: { /* TEST */ 2799 delta++; d32 = getUDisp(sz, delta); delta += sz; 2800 dst1 = newTemp(ty); 2801 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2802 getIReg(sz,eregOfRM(modrm)), 2803 mkU(ty,d32))); 2804 setFlags_DEP1( Iop_And8, dst1, ty ); 2805 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, 2806 nameIReg(sz, eregOfRM(modrm))); 2807 break; 2808 } 2809 case 1: /* UNDEFINED */ 2810 /* The Intel docs imply this insn is undefined and binutils 2811 agrees. Unfortunately Core 2 will run it (with who 2812 knows what result?) sandpile.org reckons it's an alias 2813 for case 0. We play safe. */ 2814 *decode_OK = False; 2815 break; 2816 case 2: /* NOT */ 2817 delta++; 2818 putIReg(sz, eregOfRM(modrm), 2819 unop(mkSizedOp(ty,Iop_Not8), 2820 getIReg(sz, eregOfRM(modrm)))); 2821 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2822 break; 2823 case 3: /* NEG */ 2824 delta++; 2825 dst0 = newTemp(ty); 2826 src = newTemp(ty); 2827 dst1 = newTemp(ty); 2828 assign(dst0, mkU(ty,0)); 2829 assign(src, getIReg(sz,eregOfRM(modrm))); 2830 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); 2831 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2832 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2833 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2834 break; 2835 case 4: /* MUL (unsigned widening) */ 2836 delta++; 2837 src = newTemp(ty); 2838 assign(src, getIReg(sz,eregOfRM(modrm))); 2839 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) ); 2840 break; 2841 case 5: /* IMUL (signed widening) */ 2842 delta++; 2843 src = newTemp(ty); 2844 assign(src, getIReg(sz,eregOfRM(modrm))); 2845 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) ); 2846 break; 2847 case 6: /* DIV */ 2848 delta++; 2849 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2850 codegen_div ( sz, t1, False ); 2851 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2852 break; 2853 case 7: /* IDIV */ 2854 delta++; 2855 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2856 codegen_div ( sz, t1, True ); 2857 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2858 break; 2859 default: 2860 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2861 vpanic("Grp3(x86)"); 2862 } 2863 } else { 2864 addr = disAMode ( &len, sorb, delta, dis_buf ); 2865 t1 = newTemp(ty); 2866 delta += len; 2867 assign(t1, loadLE(ty,mkexpr(addr))); 2868 switch (gregOfRM(modrm)) { 2869 case 0: { /* TEST */ 2870 d32 = getUDisp(sz, delta); delta += sz; 2871 dst1 = newTemp(ty); 2872 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2873 mkexpr(t1), mkU(ty,d32))); 2874 setFlags_DEP1( Iop_And8, dst1, ty ); 2875 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 2876 break; 2877 } 2878 case 1: /* UNDEFINED */ 2879 /* See comment above on R case */ 2880 *decode_OK = False; 2881 break; 2882 case 2: /* NOT */ 2883 dst1 = newTemp(ty); 2884 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 2885 if (locked) { 2886 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2887 guest_EIP_curr_instr ); 2888 } else { 2889 storeLE( mkexpr(addr), mkexpr(dst1) ); 2890 } 2891 DIP("not%c %s\n", nameISize(sz), dis_buf); 2892 break; 2893 case 3: /* NEG */ 2894 dst0 = newTemp(ty); 2895 src = newTemp(ty); 2896 dst1 = newTemp(ty); 2897 assign(dst0, mkU(ty,0)); 2898 assign(src, mkexpr(t1)); 2899 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), 2900 mkexpr(dst0), mkexpr(src))); 2901 if (locked) { 2902 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2903 guest_EIP_curr_instr ); 2904 } else { 2905 storeLE( mkexpr(addr), mkexpr(dst1) ); 2906 } 2907 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2908 DIP("neg%c %s\n", nameISize(sz), dis_buf); 2909 break; 2910 case 4: /* MUL */ 2911 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 2912 break; 2913 case 5: /* IMUL */ 2914 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 2915 break; 2916 case 6: /* DIV */ 2917 codegen_div ( sz, t1, False ); 2918 DIP("div%c %s\n", nameISize(sz), dis_buf); 2919 break; 2920 case 7: /* IDIV */ 2921 codegen_div ( sz, t1, True ); 2922 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 2923 break; 2924 default: 2925 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2926 vpanic("Grp3(x86)"); 2927 } 2928 } 2929 return delta; 2930 } 2931 2932 2933 /* Group 4 extended opcodes. */ 2934 static 2935 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) 2936 { 2937 Int alen; 2938 UChar modrm; 2939 HChar dis_buf[50]; 2940 IRType ty = Ity_I8; 2941 IRTemp t1 = newTemp(ty); 2942 IRTemp t2 = newTemp(ty); 2943 2944 *decode_OK = True; 2945 2946 modrm = getIByte(delta); 2947 2948 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 2949 /* LOCK prefix only allowed with inc and dec subopcodes */ 2950 *decode_OK = False; 2951 return delta; 2952 } 2953 2954 if (epartIsReg(modrm)) { 2955 assign(t1, getIReg(1, eregOfRM(modrm))); 2956 switch (gregOfRM(modrm)) { 2957 case 0: /* INC */ 2958 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2959 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2960 setFlags_INC_DEC( True, t2, ty ); 2961 break; 2962 case 1: /* DEC */ 2963 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2964 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2965 setFlags_INC_DEC( False, t2, ty ); 2966 break; 2967 default: 2968 *decode_OK = False; 2969 return delta; 2970 } 2971 delta++; 2972 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), 2973 nameIReg(1, eregOfRM(modrm))); 2974 } else { 2975 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf ); 2976 assign( t1, loadLE(ty, mkexpr(addr)) ); 2977 switch (gregOfRM(modrm)) { 2978 case 0: /* INC */ 2979 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2980 if (locked) { 2981 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 2982 guest_EIP_curr_instr ); 2983 } else { 2984 storeLE( mkexpr(addr), mkexpr(t2) ); 2985 } 2986 setFlags_INC_DEC( True, t2, ty ); 2987 break; 2988 case 1: /* DEC */ 2989 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2990 if (locked) { 2991 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 2992 guest_EIP_curr_instr ); 2993 } else { 2994 storeLE( mkexpr(addr), mkexpr(t2) ); 2995 } 2996 setFlags_INC_DEC( False, t2, ty ); 2997 break; 2998 default: 2999 *decode_OK = False; 3000 return delta; 3001 } 3002 delta += alen; 3003 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); 3004 } 3005 return delta; 3006 } 3007 3008 3009 /* Group 5 extended opcodes. */ 3010 static 3011 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, 3012 DisResult* dres, Bool* decode_OK ) 3013 { 3014 Int len; 3015 UChar modrm; 3016 HChar dis_buf[50]; 3017 IRTemp addr = IRTemp_INVALID; 3018 IRType ty = szToITy(sz); 3019 IRTemp t1 = newTemp(ty); 3020 IRTemp t2 = IRTemp_INVALID; 3021 3022 *decode_OK = True; 3023 3024 modrm = getIByte(delta); 3025 3026 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 3027 /* LOCK prefix only allowed with inc and dec subopcodes */ 3028 *decode_OK = False; 3029 return delta; 3030 } 3031 3032 if (epartIsReg(modrm)) { 3033 assign(t1, getIReg(sz,eregOfRM(modrm))); 3034 switch (gregOfRM(modrm)) { 3035 case 0: /* INC */ 3036 vassert(sz == 2 || sz == 4); 3037 t2 = newTemp(ty); 3038 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3039 mkexpr(t1), mkU(ty,1))); 3040 setFlags_INC_DEC( True, t2, ty ); 3041 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3042 break; 3043 case 1: /* DEC */ 3044 vassert(sz == 2 || sz == 4); 3045 t2 = newTemp(ty); 3046 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3047 mkexpr(t1), mkU(ty,1))); 3048 setFlags_INC_DEC( False, t2, ty ); 3049 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3050 break; 3051 case 2: /* call Ev */ 3052 vassert(sz == 4); 3053 t2 = newTemp(Ity_I32); 3054 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3055 putIReg(4, R_ESP, mkexpr(t2)); 3056 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); 3057 jmp_treg(Ijk_Call,t1); 3058 dres->whatNext = Dis_StopHere; 3059 break; 3060 case 4: /* jmp Ev */ 3061 vassert(sz == 4); 3062 jmp_treg(Ijk_Boring,t1); 3063 dres->whatNext = Dis_StopHere; 3064 break; 3065 case 6: /* PUSH Ev */ 3066 vassert(sz == 4 || sz == 2); 3067 t2 = newTemp(Ity_I32); 3068 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3069 putIReg(4, R_ESP, mkexpr(t2) ); 3070 storeLE( mkexpr(t2), mkexpr(t1) ); 3071 break; 3072 default: 3073 *decode_OK = False; 3074 return delta; 3075 } 3076 delta++; 3077 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3078 nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 3079 } else { 3080 addr = disAMode ( &len, sorb, delta, dis_buf ); 3081 assign(t1, loadLE(ty,mkexpr(addr))); 3082 switch (gregOfRM(modrm)) { 3083 case 0: /* INC */ 3084 t2 = newTemp(ty); 3085 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3086 mkexpr(t1), mkU(ty,1))); 3087 if (locked) { 3088 casLE( mkexpr(addr), 3089 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3090 } else { 3091 storeLE(mkexpr(addr),mkexpr(t2)); 3092 } 3093 setFlags_INC_DEC( True, t2, ty ); 3094 break; 3095 case 1: /* DEC */ 3096 t2 = newTemp(ty); 3097 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3098 mkexpr(t1), mkU(ty,1))); 3099 if (locked) { 3100 casLE( mkexpr(addr), 3101 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3102 } else { 3103 storeLE(mkexpr(addr),mkexpr(t2)); 3104 } 3105 setFlags_INC_DEC( False, t2, ty ); 3106 break; 3107 case 2: /* call Ev */ 3108 vassert(sz == 4); 3109 t2 = newTemp(Ity_I32); 3110 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3111 putIReg(4, R_ESP, mkexpr(t2)); 3112 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); 3113 jmp_treg(Ijk_Call,t1); 3114 dres->whatNext = Dis_StopHere; 3115 break; 3116 case 4: /* JMP Ev */ 3117 vassert(sz == 4); 3118 jmp_treg(Ijk_Boring,t1); 3119 dres->whatNext = Dis_StopHere; 3120 break; 3121 case 6: /* PUSH Ev */ 3122 vassert(sz == 4 || sz == 2); 3123 t2 = newTemp(Ity_I32); 3124 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3125 putIReg(4, R_ESP, mkexpr(t2) ); 3126 storeLE( mkexpr(t2), mkexpr(t1) ); 3127 break; 3128 default: 3129 *decode_OK = False; 3130 return delta; 3131 } 3132 delta += len; 3133 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3134 nameISize(sz), dis_buf); 3135 } 3136 return delta; 3137 } 3138 3139 3140 /*------------------------------------------------------------*/ 3141 /*--- Disassembling string ops (including REP prefixes) ---*/ 3142 /*------------------------------------------------------------*/ 3143 3144 /* Code shared by all the string ops */ 3145 static 3146 void dis_string_op_increment(Int sz, Int t_inc) 3147 { 3148 if (sz == 4 || sz == 2) { 3149 assign( t_inc, 3150 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ), 3151 mkU8(sz/2) ) ); 3152 } else { 3153 assign( t_inc, 3154 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) ); 3155 } 3156 } 3157 3158 static 3159 void dis_string_op( void (*dis_OP)( Int, IRTemp ), 3160 Int sz, HChar* name, UChar sorb ) 3161 { 3162 IRTemp t_inc = newTemp(Ity_I32); 3163 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */ 3164 dis_string_op_increment(sz, t_inc); 3165 dis_OP( sz, t_inc ); 3166 DIP("%s%c\n", name, nameISize(sz)); 3167 } 3168 3169 static 3170 void dis_MOVS ( Int sz, IRTemp t_inc ) 3171 { 3172 IRType ty = szToITy(sz); 3173 IRTemp td = newTemp(Ity_I32); /* EDI */ 3174 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3175 3176 assign( td, getIReg(4, R_EDI) ); 3177 assign( ts, getIReg(4, R_ESI) ); 3178 3179 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 3180 3181 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3182 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3183 } 3184 3185 static 3186 void dis_LODS ( Int sz, IRTemp t_inc ) 3187 { 3188 IRType ty = szToITy(sz); 3189 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3190 3191 assign( ts, getIReg(4, R_ESI) ); 3192 3193 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) ); 3194 3195 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3196 } 3197 3198 static 3199 void dis_STOS ( Int sz, IRTemp t_inc ) 3200 { 3201 IRType ty = szToITy(sz); 3202 IRTemp ta = newTemp(ty); /* EAX */ 3203 IRTemp td = newTemp(Ity_I32); /* EDI */ 3204 3205 assign( ta, getIReg(sz, R_EAX) ); 3206 assign( td, getIReg(4, R_EDI) ); 3207 3208 storeLE( mkexpr(td), mkexpr(ta) ); 3209 3210 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3211 } 3212 3213 static 3214 void dis_CMPS ( Int sz, IRTemp t_inc ) 3215 { 3216 IRType ty = szToITy(sz); 3217 IRTemp tdv = newTemp(ty); /* (EDI) */ 3218 IRTemp tsv = newTemp(ty); /* (ESI) */ 3219 IRTemp td = newTemp(Ity_I32); /* EDI */ 3220 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3221 3222 assign( td, getIReg(4, R_EDI) ); 3223 assign( ts, getIReg(4, R_ESI) ); 3224 3225 assign( tdv, loadLE(ty,mkexpr(td)) ); 3226 assign( tsv, loadLE(ty,mkexpr(ts)) ); 3227 3228 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 3229 3230 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3231 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3232 } 3233 3234 static 3235 void dis_SCAS ( Int sz, IRTemp t_inc ) 3236 { 3237 IRType ty = szToITy(sz); 3238 IRTemp ta = newTemp(ty); /* EAX */ 3239 IRTemp td = newTemp(Ity_I32); /* EDI */ 3240 IRTemp tdv = newTemp(ty); /* (EDI) */ 3241 3242 assign( ta, getIReg(sz, R_EAX) ); 3243 assign( td, getIReg(4, R_EDI) ); 3244 3245 assign( tdv, loadLE(ty,mkexpr(td)) ); 3246 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 3247 3248 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3249 } 3250 3251 3252 /* Wrap the appropriate string op inside a REP/REPE/REPNE. 3253 We assume the insn is the last one in the basic block, and so emit a jump 3254 to the next insn, rather than just falling through. */ 3255 static 3256 void dis_REP_op ( X86Condcode cond, 3257 void (*dis_OP)(Int, IRTemp), 3258 Int sz, Addr32 eip, Addr32 eip_next, HChar* name ) 3259 { 3260 IRTemp t_inc = newTemp(Ity_I32); 3261 IRTemp tc = newTemp(Ity_I32); /* ECX */ 3262 3263 assign( tc, getIReg(4,R_ECX) ); 3264 3265 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), 3266 Ijk_Boring, 3267 IRConst_U32(eip_next) ) ); 3268 3269 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 3270 3271 dis_string_op_increment(sz, t_inc); 3272 dis_OP (sz, t_inc); 3273 3274 if (cond == X86CondAlways) { 3275 jmp_lit(Ijk_Boring,eip); 3276 } else { 3277 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), 3278 Ijk_Boring, 3279 IRConst_U32(eip) ) ); 3280 jmp_lit(Ijk_Boring,eip_next); 3281 } 3282 DIP("%s%c\n", name, nameISize(sz)); 3283 } 3284 3285 3286 /*------------------------------------------------------------*/ 3287 /*--- Arithmetic, etc. ---*/ 3288 /*------------------------------------------------------------*/ 3289 3290 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 3291 static 3292 UInt dis_mul_E_G ( UChar sorb, 3293 Int size, 3294 Int delta0 ) 3295 { 3296 Int alen; 3297 HChar dis_buf[50]; 3298 UChar rm = getIByte(delta0); 3299 IRType ty = szToITy(size); 3300 IRTemp te = newTemp(ty); 3301 IRTemp tg = newTemp(ty); 3302 IRTemp resLo = newTemp(ty); 3303 3304 assign( tg, getIReg(size, gregOfRM(rm)) ); 3305 if (epartIsReg(rm)) { 3306 assign( te, getIReg(size, eregOfRM(rm)) ); 3307 } else { 3308 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf ); 3309 assign( te, loadLE(ty,mkexpr(addr)) ); 3310 } 3311 3312 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB ); 3313 3314 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 3315 3316 putIReg(size, gregOfRM(rm), mkexpr(resLo) ); 3317 3318 if (epartIsReg(rm)) { 3319 DIP("imul%c %s, %s\n", nameISize(size), 3320 nameIReg(size,eregOfRM(rm)), 3321 nameIReg(size,gregOfRM(rm))); 3322 return 1+delta0; 3323 } else { 3324 DIP("imul%c %s, %s\n", nameISize(size), 3325 dis_buf, nameIReg(size,gregOfRM(rm))); 3326 return alen+delta0; 3327 } 3328 } 3329 3330 3331 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ 3332 static 3333 UInt dis_imul_I_E_G ( UChar sorb, 3334 Int size, 3335 Int delta, 3336 Int litsize ) 3337 { 3338 Int d32, alen; 3339 HChar dis_buf[50]; 3340 UChar rm = getIByte(delta); 3341 IRType ty = szToITy(size); 3342 IRTemp te = newTemp(ty); 3343 IRTemp tl = newTemp(ty); 3344 IRTemp resLo = newTemp(ty); 3345 3346 vassert(size == 1 || size == 2 || size == 4); 3347 3348 if (epartIsReg(rm)) { 3349 assign(te, getIReg(size, eregOfRM(rm))); 3350 delta++; 3351 } else { 3352 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf ); 3353 assign(te, loadLE(ty, mkexpr(addr))); 3354 delta += alen; 3355 } 3356 d32 = getSDisp(litsize,delta); 3357 delta += litsize; 3358 3359 if (size == 1) d32 &= 0xFF; 3360 if (size == 2) d32 &= 0xFFFF; 3361 3362 assign(tl, mkU(ty,d32)); 3363 3364 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 3365 3366 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB ); 3367 3368 putIReg(size, gregOfRM(rm), mkexpr(resLo)); 3369 3370 DIP("imul %d, %s, %s\n", d32, 3371 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ), 3372 nameIReg(size,gregOfRM(rm)) ); 3373 return delta; 3374 } 3375 3376 3377 /* Generate an IR sequence to do a count-leading-zeroes operation on 3378 the supplied IRTemp, and return a new IRTemp holding the result. 3379 'ty' may be Ity_I16 or Ity_I32 only. In the case where the 3380 argument is zero, return the number of bits in the word (the 3381 natural semantics). */ 3382 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 3383 { 3384 vassert(ty == Ity_I32 || ty == Ity_I16); 3385 3386 IRTemp src32 = newTemp(Ity_I32); 3387 assign(src32, widenUto32( mkexpr(src) )); 3388 3389 IRTemp src32x = newTemp(Ity_I32); 3390 assign(src32x, 3391 binop(Iop_Shl32, mkexpr(src32), 3392 mkU8(32 - 8 * sizeofIRType(ty)))); 3393 3394 // Clz32 has undefined semantics when its input is zero, so 3395 // special-case around that. 3396 IRTemp res32 = newTemp(Ity_I32); 3397 assign(res32, 3398 IRExpr_Mux0X( 3399 unop(Iop_1Uto8, 3400 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0))), 3401 unop(Iop_Clz32, mkexpr(src32x)), 3402 mkU32(8 * sizeofIRType(ty)) 3403 )); 3404 3405 IRTemp res = newTemp(ty); 3406 assign(res, narrowTo(ty, mkexpr(res32))); 3407 return res; 3408 } 3409 3410 3411 /*------------------------------------------------------------*/ 3412 /*--- ---*/ 3413 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 3414 /*--- ---*/ 3415 /*------------------------------------------------------------*/ 3416 3417 /* --- Helper functions for dealing with the register stack. --- */ 3418 3419 /* --- Set the emulation-warning pseudo-register. --- */ 3420 3421 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 3422 { 3423 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3424 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 3425 } 3426 3427 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 3428 3429 static IRExpr* mkQNaN64 ( void ) 3430 { 3431 /* QNaN is 0 2047 1 0(51times) 3432 == 0b 11111111111b 1 0(51times) 3433 == 0x7FF8 0000 0000 0000 3434 */ 3435 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 3436 } 3437 3438 /* --------- Get/put the top-of-stack pointer. --------- */ 3439 3440 static IRExpr* get_ftop ( void ) 3441 { 3442 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 3443 } 3444 3445 static void put_ftop ( IRExpr* e ) 3446 { 3447 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3448 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 3449 } 3450 3451 /* --------- Get/put the C3210 bits. --------- */ 3452 3453 static IRExpr* get_C3210 ( void ) 3454 { 3455 return IRExpr_Get( OFFB_FC3210, Ity_I32 ); 3456 } 3457 3458 static void put_C3210 ( IRExpr* e ) 3459 { 3460 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 3461 } 3462 3463 /* --------- Get/put the FPU rounding mode. --------- */ 3464 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 3465 { 3466 return IRExpr_Get( OFFB_FPROUND, Ity_I32 ); 3467 } 3468 3469 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 3470 { 3471 stmt( IRStmt_Put( OFFB_FPROUND, e ) ); 3472 } 3473 3474 3475 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 3476 /* Produces a value in 0 .. 3, which is encoded as per the type 3477 IRRoundingMode. Since the guest_FPROUND value is also encoded as 3478 per IRRoundingMode, we merely need to get it and mask it for 3479 safety. 3480 */ 3481 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 3482 { 3483 return binop( Iop_And32, get_fpround(), mkU32(3) ); 3484 } 3485 3486 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 3487 { 3488 return mkU32(Irrm_NEAREST); 3489 } 3490 3491 3492 /* --------- Get/set FP register tag bytes. --------- */ 3493 3494 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 3495 3496 static void put_ST_TAG ( Int i, IRExpr* value ) 3497 { 3498 IRRegArray* descr; 3499 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 3500 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3501 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 3502 } 3503 3504 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 3505 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 3506 3507 static IRExpr* get_ST_TAG ( Int i ) 3508 { 3509 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3510 return IRExpr_GetI( descr, get_ftop(), i ); 3511 } 3512 3513 3514 /* --------- Get/set FP registers. --------- */ 3515 3516 /* Given i, and some expression e, emit 'ST(i) = e' and set the 3517 register's tag to indicate the register is full. The previous 3518 state of the register is not checked. */ 3519 3520 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 3521 { 3522 IRRegArray* descr; 3523 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 3524 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3525 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 3526 /* Mark the register as in-use. */ 3527 put_ST_TAG(i, mkU8(1)); 3528 } 3529 3530 /* Given i, and some expression e, emit 3531 ST(i) = is_full(i) ? NaN : e 3532 and set the tag accordingly. 3533 */ 3534 3535 static void put_ST ( Int i, IRExpr* value ) 3536 { 3537 put_ST_UNCHECKED( i, 3538 IRExpr_Mux0X( get_ST_TAG(i), 3539 /* 0 means empty */ 3540 value, 3541 /* non-0 means full */ 3542 mkQNaN64() 3543 ) 3544 ); 3545 } 3546 3547 3548 /* Given i, generate an expression yielding 'ST(i)'. */ 3549 3550 static IRExpr* get_ST_UNCHECKED ( Int i ) 3551 { 3552 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3553 return IRExpr_GetI( descr, get_ftop(), i ); 3554 } 3555 3556 3557 /* Given i, generate an expression yielding 3558 is_full(i) ? ST(i) : NaN 3559 */ 3560 3561 static IRExpr* get_ST ( Int i ) 3562 { 3563 return 3564 IRExpr_Mux0X( get_ST_TAG(i), 3565 /* 0 means empty */ 3566 mkQNaN64(), 3567 /* non-0 means full */ 3568 get_ST_UNCHECKED(i)); 3569 } 3570 3571 3572 /* Adjust FTOP downwards by one register. */ 3573 3574 static void fp_push ( void ) 3575 { 3576 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 3577 } 3578 3579 /* Adjust FTOP upwards by one register, and mark the vacated register 3580 as empty. */ 3581 3582 static void fp_pop ( void ) 3583 { 3584 put_ST_TAG(0, mkU8(0)); 3585 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 3586 } 3587 3588 /* Clear the C2 bit of the FPU status register, for 3589 sin/cos/tan/sincos. */ 3590 3591 static void clear_C2 ( void ) 3592 { 3593 put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); 3594 } 3595 3596 /* Invent a plausible-looking FPU status word value: 3597 ((ftop & 7) << 11) | (c3210 & 0x4700) 3598 */ 3599 static IRExpr* get_FPU_sw ( void ) 3600 { 3601 return 3602 unop(Iop_32to16, 3603 binop(Iop_Or32, 3604 binop(Iop_Shl32, 3605 binop(Iop_And32, get_ftop(), mkU32(7)), 3606 mkU8(11)), 3607 binop(Iop_And32, get_C3210(), mkU32(0x4700)) 3608 )); 3609 } 3610 3611 3612 /* ------------------------------------------------------- */ 3613 /* Given all that stack-mangling junk, we can now go ahead 3614 and describe FP instructions. 3615 */ 3616 3617 /* ST(0) = ST(0) `op` mem64/32(addr) 3618 Need to check ST(0)'s tag on read, but not on write. 3619 */ 3620 static 3621 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3622 IROp op, Bool dbl ) 3623 { 3624 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3625 if (dbl) { 3626 put_ST_UNCHECKED(0, 3627 triop( op, 3628 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3629 get_ST(0), 3630 loadLE(Ity_F64,mkexpr(addr)) 3631 )); 3632 } else { 3633 put_ST_UNCHECKED(0, 3634 triop( op, 3635 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3636 get_ST(0), 3637 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 3638 )); 3639 } 3640 } 3641 3642 3643 /* ST(0) = mem64/32(addr) `op` ST(0) 3644 Need to check ST(0)'s tag on read, but not on write. 3645 */ 3646 static 3647 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3648 IROp op, Bool dbl ) 3649 { 3650 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3651 if (dbl) { 3652 put_ST_UNCHECKED(0, 3653 triop( op, 3654 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3655 loadLE(Ity_F64,mkexpr(addr)), 3656 get_ST(0) 3657 )); 3658 } else { 3659 put_ST_UNCHECKED(0, 3660 triop( op, 3661 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3662 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 3663 get_ST(0) 3664 )); 3665 } 3666 } 3667 3668 3669 /* ST(dst) = ST(dst) `op` ST(src). 3670 Check dst and src tags when reading but not on write. 3671 */ 3672 static 3673 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3674 Bool pop_after ) 3675 { 3676 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3677 (Int)st_src, (Int)st_dst ); 3678 put_ST_UNCHECKED( 3679 st_dst, 3680 triop( op, 3681 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3682 get_ST(st_dst), 3683 get_ST(st_src) ) 3684 ); 3685 if (pop_after) 3686 fp_pop(); 3687 } 3688 3689 /* ST(dst) = ST(src) `op` ST(dst). 3690 Check dst and src tags when reading but not on write. 3691 */ 3692 static 3693 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3694 Bool pop_after ) 3695 { 3696 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3697 (Int)st_src, (Int)st_dst ); 3698 put_ST_UNCHECKED( 3699 st_dst, 3700 triop( op, 3701 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3702 get_ST(st_src), 3703 get_ST(st_dst) ) 3704 ); 3705 if (pop_after) 3706 fp_pop(); 3707 } 3708 3709 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 3710 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 3711 { 3712 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i ); 3713 /* This is a bit of a hack (and isn't really right). It sets 3714 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 3715 documentation implies A and S are unchanged. 3716 */ 3717 /* It's also fishy in that it is used both for COMIP and 3718 UCOMIP, and they aren't the same (although similar). */ 3719 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 3720 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 3721 stmt( IRStmt_Put( OFFB_CC_DEP1, 3722 binop( Iop_And32, 3723 binop(Iop_CmpF64, get_ST(0), get_ST(i)), 3724 mkU32(0x45) 3725 ))); 3726 /* Set NDEP even though it isn't used. This makes redundant-PUT 3727 elimination of previous stores to this field work better. */ 3728 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 3729 if (pop_after) 3730 fp_pop(); 3731 } 3732 3733 3734 static 3735 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) 3736 { 3737 Int len; 3738 UInt r_src, r_dst; 3739 HChar dis_buf[50]; 3740 IRTemp t1, t2; 3741 3742 /* On entry, delta points at the second byte of the insn (the modrm 3743 byte).*/ 3744 UChar first_opcode = getIByte(delta-1); 3745 UChar modrm = getIByte(delta+0); 3746 3747 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 3748 3749 if (first_opcode == 0xD8) { 3750 if (modrm < 0xC0) { 3751 3752 /* bits 5,4,3 are an opcode extension, and the modRM also 3753 specifies an address. */ 3754 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3755 delta += len; 3756 3757 switch (gregOfRM(modrm)) { 3758 3759 case 0: /* FADD single-real */ 3760 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 3761 break; 3762 3763 case 1: /* FMUL single-real */ 3764 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 3765 break; 3766 3767 case 2: /* FCOM single-real */ 3768 DIP("fcoms %s\n", dis_buf); 3769 /* This forces C1 to zero, which isn't right. */ 3770 put_C3210( 3771 binop( Iop_And32, 3772 binop(Iop_Shl32, 3773 binop(Iop_CmpF64, 3774 get_ST(0), 3775 unop(Iop_F32toF64, 3776 loadLE(Ity_F32,mkexpr(addr)))), 3777 mkU8(8)), 3778 mkU32(0x4500) 3779 )); 3780 break; 3781 3782 case 3: /* FCOMP single-real */ 3783 DIP("fcomps %s\n", dis_buf); 3784 /* This forces C1 to zero, which isn't right. */ 3785 put_C3210( 3786 binop( Iop_And32, 3787 binop(Iop_Shl32, 3788 binop(Iop_CmpF64, 3789 get_ST(0), 3790 unop(Iop_F32toF64, 3791 loadLE(Ity_F32,mkexpr(addr)))), 3792 mkU8(8)), 3793 mkU32(0x4500) 3794 )); 3795 fp_pop(); 3796 break; 3797 3798 case 4: /* FSUB single-real */ 3799 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 3800 break; 3801 3802 case 5: /* FSUBR single-real */ 3803 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 3804 break; 3805 3806 case 6: /* FDIV single-real */ 3807 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 3808 break; 3809 3810 case 7: /* FDIVR single-real */ 3811 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 3812 break; 3813 3814 default: 3815 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 3816 vex_printf("first_opcode == 0xD8\n"); 3817 goto decode_fail; 3818 } 3819 } else { 3820 delta++; 3821 switch (modrm) { 3822 3823 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 3824 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 3825 break; 3826 3827 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 3828 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 3829 break; 3830 3831 /* Dunno if this is right */ 3832 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 3833 r_dst = (UInt)modrm - 0xD0; 3834 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst); 3835 /* This forces C1 to zero, which isn't right. */ 3836 put_C3210( 3837 binop( Iop_And32, 3838 binop(Iop_Shl32, 3839 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3840 mkU8(8)), 3841 mkU32(0x4500) 3842 )); 3843 break; 3844 3845 /* Dunno if this is right */ 3846 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 3847 r_dst = (UInt)modrm - 0xD8; 3848 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst); 3849 /* This forces C1 to zero, which isn't right. */ 3850 put_C3210( 3851 binop( Iop_And32, 3852 binop(Iop_Shl32, 3853 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3854 mkU8(8)), 3855 mkU32(0x4500) 3856 )); 3857 fp_pop(); 3858 break; 3859 3860 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 3861 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 3862 break; 3863 3864 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 3865 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 3866 break; 3867 3868 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 3869 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 3870 break; 3871 3872 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 3873 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 3874 break; 3875 3876 default: 3877 goto decode_fail; 3878 } 3879 } 3880 } 3881 3882 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 3883 else 3884 if (first_opcode == 0xD9) { 3885 if (modrm < 0xC0) { 3886 3887 /* bits 5,4,3 are an opcode extension, and the modRM also 3888 specifies an address. */ 3889 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3890 delta += len; 3891 3892 switch (gregOfRM(modrm)) { 3893 3894 case 0: /* FLD single-real */ 3895 DIP("flds %s\n", dis_buf); 3896 fp_push(); 3897 put_ST(0, unop(Iop_F32toF64, 3898 loadLE(Ity_F32, mkexpr(addr)))); 3899 break; 3900 3901 case 2: /* FST single-real */ 3902 DIP("fsts %s\n", dis_buf); 3903 storeLE(mkexpr(addr), 3904 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3905 break; 3906 3907 case 3: /* FSTP single-real */ 3908 DIP("fstps %s\n", dis_buf); 3909 storeLE(mkexpr(addr), 3910 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3911 fp_pop(); 3912 break; 3913 3914 case 4: { /* FLDENV m28 */ 3915 /* Uses dirty helper: 3916 VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */ 3917 IRTemp ew = newTemp(Ity_I32); 3918 IRDirty* d = unsafeIRDirty_0_N ( 3919 0/*regparms*/, 3920 "x86g_dirtyhelper_FLDENV", 3921 &x86g_dirtyhelper_FLDENV, 3922 mkIRExprVec_1( mkexpr(addr) ) 3923 ); 3924 d->needsBBP = True; 3925 d->tmp = ew; 3926 /* declare we're reading memory */ 3927 d->mFx = Ifx_Read; 3928 d->mAddr = mkexpr(addr); 3929 d->mSize = 28; 3930 3931 /* declare we're writing guest state */ 3932 d->nFxState = 4; 3933 3934 d->fxState[0].fx = Ifx_Write; 3935 d->fxState[0].offset = OFFB_FTOP; 3936 d->fxState[0].size = sizeof(UInt); 3937 3938 d->fxState[1].fx = Ifx_Write; 3939 d->fxState[1].offset = OFFB_FPTAGS; 3940 d->fxState[1].size = 8 * sizeof(UChar); 3941 3942 d->fxState[2].fx = Ifx_Write; 3943 d->fxState[2].offset = OFFB_FPROUND; 3944 d->fxState[2].size = sizeof(UInt); 3945 3946 d->fxState[3].fx = Ifx_Write; 3947 d->fxState[3].offset = OFFB_FC3210; 3948 d->fxState[3].size = sizeof(UInt); 3949 3950 stmt( IRStmt_Dirty(d) ); 3951 3952 /* ew contains any emulation warning we may need to 3953 issue. If needed, side-exit to the next insn, 3954 reporting the warning, so that Valgrind's dispatcher 3955 sees the warning. */ 3956 put_emwarn( mkexpr(ew) ); 3957 stmt( 3958 IRStmt_Exit( 3959 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 3960 Ijk_EmWarn, 3961 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 3962 ) 3963 ); 3964 3965 DIP("fldenv %s\n", dis_buf); 3966 break; 3967 } 3968 3969 case 5: {/* FLDCW */ 3970 /* The only thing we observe in the control word is the 3971 rounding mode. Therefore, pass the 16-bit value 3972 (x87 native-format control word) to a clean helper, 3973 getting back a 64-bit value, the lower half of which 3974 is the FPROUND value to store, and the upper half of 3975 which is the emulation-warning token which may be 3976 generated. 3977 */ 3978 /* ULong x86h_check_fldcw ( UInt ); */ 3979 IRTemp t64 = newTemp(Ity_I64); 3980 IRTemp ew = newTemp(Ity_I32); 3981 DIP("fldcw %s\n", dis_buf); 3982 assign( t64, mkIRExprCCall( 3983 Ity_I64, 0/*regparms*/, 3984 "x86g_check_fldcw", 3985 &x86g_check_fldcw, 3986 mkIRExprVec_1( 3987 unop( Iop_16Uto32, 3988 loadLE(Ity_I16, mkexpr(addr))) 3989 ) 3990 ) 3991 ); 3992 3993 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 3994 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 3995 put_emwarn( mkexpr(ew) ); 3996 /* Finally, if an emulation warning was reported, 3997 side-exit to the next insn, reporting the warning, 3998 so that Valgrind's dispatcher sees the warning. */ 3999 stmt( 4000 IRStmt_Exit( 4001 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4002 Ijk_EmWarn, 4003 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 4004 ) 4005 ); 4006 break; 4007 } 4008 4009 case 6: { /* FNSTENV m28 */ 4010 /* Uses dirty helper: 4011 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */ 4012 IRDirty* d = unsafeIRDirty_0_N ( 4013 0/*regparms*/, 4014 "x86g_dirtyhelper_FSTENV", 4015 &x86g_dirtyhelper_FSTENV, 4016 mkIRExprVec_1( mkexpr(addr) ) 4017 ); 4018 d->needsBBP = True; 4019 /* declare we're writing memory */ 4020 d->mFx = Ifx_Write; 4021 d->mAddr = mkexpr(addr); 4022 d->mSize = 28; 4023 4024 /* declare we're reading guest state */ 4025 d->nFxState = 4; 4026 4027 d->fxState[0].fx = Ifx_Read; 4028 d->fxState[0].offset = OFFB_FTOP; 4029 d->fxState[0].size = sizeof(UInt); 4030 4031 d->fxState[1].fx = Ifx_Read; 4032 d->fxState[1].offset = OFFB_FPTAGS; 4033 d->fxState[1].size = 8 * sizeof(UChar); 4034 4035 d->fxState[2].fx = Ifx_Read; 4036 d->fxState[2].offset = OFFB_FPROUND; 4037 d->fxState[2].size = sizeof(UInt); 4038 4039 d->fxState[3].fx = Ifx_Read; 4040 d->fxState[3].offset = OFFB_FC3210; 4041 d->fxState[3].size = sizeof(UInt); 4042 4043 stmt( IRStmt_Dirty(d) ); 4044 4045 DIP("fnstenv %s\n", dis_buf); 4046 break; 4047 } 4048 4049 case 7: /* FNSTCW */ 4050 /* Fake up a native x87 FPU control word. The only 4051 thing it depends on is FPROUND[1:0], so call a clean 4052 helper to cook it up. */ 4053 /* UInt x86h_create_fpucw ( UInt fpround ) */ 4054 DIP("fnstcw %s\n", dis_buf); 4055 storeLE( 4056 mkexpr(addr), 4057 unop( Iop_32to16, 4058 mkIRExprCCall( 4059 Ity_I32, 0/*regp*/, 4060 "x86g_create_fpucw", &x86g_create_fpucw, 4061 mkIRExprVec_1( get_fpround() ) 4062 ) 4063 ) 4064 ); 4065 break; 4066 4067 default: 4068 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4069 vex_printf("first_opcode == 0xD9\n"); 4070 goto decode_fail; 4071 } 4072 4073 } else { 4074 delta++; 4075 switch (modrm) { 4076 4077 case 0xC0 ... 0xC7: /* FLD %st(?) */ 4078 r_src = (UInt)modrm - 0xC0; 4079 DIP("fld %%st(%d)\n", (Int)r_src); 4080 t1 = newTemp(Ity_F64); 4081 assign(t1, get_ST(r_src)); 4082 fp_push(); 4083 put_ST(0, mkexpr(t1)); 4084 break; 4085 4086 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 4087 r_src = (UInt)modrm - 0xC8; 4088 DIP("fxch %%st(%d)\n", (Int)r_src); 4089 t1 = newTemp(Ity_F64); 4090 t2 = newTemp(Ity_F64); 4091 assign(t1, get_ST(0)); 4092 assign(t2, get_ST(r_src)); 4093 put_ST_UNCHECKED(0, mkexpr(t2)); 4094 put_ST_UNCHECKED(r_src, mkexpr(t1)); 4095 break; 4096 4097 case 0xE0: /* FCHS */ 4098 DIP("fchs\n"); 4099 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 4100 break; 4101 4102 case 0xE1: /* FABS */ 4103 DIP("fabs\n"); 4104 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 4105 break; 4106 4107 case 0xE4: /* FTST */ 4108 DIP("ftst\n"); 4109 /* This forces C1 to zero, which isn't right. */ 4110 /* Well, in fact the Intel docs say (bizarrely): "C1 is 4111 set to 0 if stack underflow occurred; otherwise, set 4112 to 0" which is pretty nonsensical. I guess it's a 4113 typo. */ 4114 put_C3210( 4115 binop( Iop_And32, 4116 binop(Iop_Shl32, 4117 binop(Iop_CmpF64, 4118 get_ST(0), 4119 IRExpr_Const(IRConst_F64i(0x0ULL))), 4120 mkU8(8)), 4121 mkU32(0x4500) 4122 )); 4123 break; 4124 4125 case 0xE5: { /* FXAM */ 4126 /* This is an interesting one. It examines %st(0), 4127 regardless of whether the tag says it's empty or not. 4128 Here, just pass both the tag (in our format) and the 4129 value (as a double, actually a ULong) to a helper 4130 function. */ 4131 IRExpr** args 4132 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)), 4133 unop(Iop_ReinterpF64asI64, 4134 get_ST_UNCHECKED(0)) ); 4135 put_C3210(mkIRExprCCall( 4136 Ity_I32, 4137 0/*regparm*/, 4138 "x86g_calculate_FXAM", &x86g_calculate_FXAM, 4139 args 4140 )); 4141 DIP("fxam\n"); 4142 break; 4143 } 4144 4145 case 0xE8: /* FLD1 */ 4146 DIP("fld1\n"); 4147 fp_push(); 4148 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 4149 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 4150 break; 4151 4152 case 0xE9: /* FLDL2T */ 4153 DIP("fldl2t\n"); 4154 fp_push(); 4155 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 4156 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 4157 break; 4158 4159 case 0xEA: /* FLDL2E */ 4160 DIP("fldl2e\n"); 4161 fp_push(); 4162 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 4163 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 4164 break; 4165 4166 case 0xEB: /* FLDPI */ 4167 DIP("fldpi\n"); 4168 fp_push(); 4169 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 4170 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 4171 break; 4172 4173 case 0xEC: /* FLDLG2 */ 4174 DIP("fldlg2\n"); 4175 fp_push(); 4176 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 4177 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 4178 break; 4179 4180 case 0xED: /* FLDLN2 */ 4181 DIP("fldln2\n"); 4182 fp_push(); 4183 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 4184 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 4185 break; 4186 4187 case 0xEE: /* FLDZ */ 4188 DIP("fldz\n"); 4189 fp_push(); 4190 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 4191 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 4192 break; 4193 4194 case 0xF0: /* F2XM1 */ 4195 DIP("f2xm1\n"); 4196 put_ST_UNCHECKED(0, 4197 binop(Iop_2xm1F64, 4198 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4199 get_ST(0))); 4200 break; 4201 4202 case 0xF1: /* FYL2X */ 4203 DIP("fyl2x\n"); 4204 put_ST_UNCHECKED(1, 4205 triop(Iop_Yl2xF64, 4206 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4207 get_ST(1), 4208 get_ST(0))); 4209 fp_pop(); 4210 break; 4211 4212 case 0xF2: /* FPTAN */ 4213 DIP("ftan\n"); 4214 put_ST_UNCHECKED(0, 4215 binop(Iop_TanF64, 4216 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4217 get_ST(0))); 4218 fp_push(); 4219 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 4220 clear_C2(); /* HACK */ 4221 break; 4222 4223 case 0xF3: /* FPATAN */ 4224 DIP("fpatan\n"); 4225 put_ST_UNCHECKED(1, 4226 triop(Iop_AtanF64, 4227 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4228 get_ST(1), 4229 get_ST(0))); 4230 fp_pop(); 4231 break; 4232 4233 case 0xF4: { /* FXTRACT */ 4234 IRTemp argF = newTemp(Ity_F64); 4235 IRTemp sigF = newTemp(Ity_F64); 4236 IRTemp expF = newTemp(Ity_F64); 4237 IRTemp argI = newTemp(Ity_I64); 4238 IRTemp sigI = newTemp(Ity_I64); 4239 IRTemp expI = newTemp(Ity_I64); 4240 DIP("fxtract\n"); 4241 assign( argF, get_ST(0) ); 4242 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 4243 assign( sigI, 4244 mkIRExprCCall( 4245 Ity_I64, 0/*regparms*/, 4246 "x86amd64g_calculate_FXTRACT", 4247 &x86amd64g_calculate_FXTRACT, 4248 mkIRExprVec_2( mkexpr(argI), 4249 mkIRExpr_HWord(0)/*sig*/ )) 4250 ); 4251 assign( expI, 4252 mkIRExprCCall( 4253 Ity_I64, 0/*regparms*/, 4254 "x86amd64g_calculate_FXTRACT", 4255 &x86amd64g_calculate_FXTRACT, 4256 mkIRExprVec_2( mkexpr(argI), 4257 mkIRExpr_HWord(1)/*exp*/ )) 4258 ); 4259 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 4260 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 4261 /* exponent */ 4262 put_ST_UNCHECKED(0, mkexpr(expF) ); 4263 fp_push(); 4264 /* significand */ 4265 put_ST(0, mkexpr(sigF) ); 4266 break; 4267 } 4268 4269 case 0xF5: { /* FPREM1 -- IEEE compliant */ 4270 IRTemp a1 = newTemp(Ity_F64); 4271 IRTemp a2 = newTemp(Ity_F64); 4272 DIP("fprem1\n"); 4273 /* Do FPREM1 twice, once to get the remainder, and once 4274 to get the C3210 flag values. */ 4275 assign( a1, get_ST(0) ); 4276 assign( a2, get_ST(1) ); 4277 put_ST_UNCHECKED(0, 4278 triop(Iop_PRem1F64, 4279 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4280 mkexpr(a1), 4281 mkexpr(a2))); 4282 put_C3210( 4283 triop(Iop_PRem1C3210F64, 4284 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4285 mkexpr(a1), 4286 mkexpr(a2)) ); 4287 break; 4288 } 4289 4290 case 0xF7: /* FINCSTP */ 4291 DIP("fprem\n"); 4292 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4293 break; 4294 4295 case 0xF8: { /* FPREM -- not IEEE compliant */ 4296 IRTemp a1 = newTemp(Ity_F64); 4297 IRTemp a2 = newTemp(Ity_F64); 4298 DIP("fprem\n"); 4299 /* Do FPREM twice, once to get the remainder, and once 4300 to get the C3210 flag values. */ 4301 assign( a1, get_ST(0) ); 4302 assign( a2, get_ST(1) ); 4303 put_ST_UNCHECKED(0, 4304 triop(Iop_PRemF64, 4305 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4306 mkexpr(a1), 4307 mkexpr(a2))); 4308 put_C3210( 4309 triop(Iop_PRemC3210F64, 4310 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4311 mkexpr(a1), 4312 mkexpr(a2)) ); 4313 break; 4314 } 4315 4316 case 0xF9: /* FYL2XP1 */ 4317 DIP("fyl2xp1\n"); 4318 put_ST_UNCHECKED(1, 4319 triop(Iop_Yl2xp1F64, 4320 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4321 get_ST(1), 4322 get_ST(0))); 4323 fp_pop(); 4324 break; 4325 4326 case 0xFA: /* FSQRT */ 4327 DIP("fsqrt\n"); 4328 put_ST_UNCHECKED(0, 4329 binop(Iop_SqrtF64, 4330 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4331 get_ST(0))); 4332 break; 4333 4334 case 0xFB: { /* FSINCOS */ 4335 IRTemp a1 = newTemp(Ity_F64); 4336 assign( a1, get_ST(0) ); 4337 DIP("fsincos\n"); 4338 put_ST_UNCHECKED(0, 4339 binop(Iop_SinF64, 4340 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4341 mkexpr(a1))); 4342 fp_push(); 4343 put_ST(0, 4344 binop(Iop_CosF64, 4345 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4346 mkexpr(a1))); 4347 clear_C2(); /* HACK */ 4348 break; 4349 } 4350 4351 case 0xFC: /* FRNDINT */ 4352 DIP("frndint\n"); 4353 put_ST_UNCHECKED(0, 4354 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 4355 break; 4356 4357 case 0xFD: /* FSCALE */ 4358 DIP("fscale\n"); 4359 put_ST_UNCHECKED(0, 4360 triop(Iop_ScaleF64, 4361 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4362 get_ST(0), 4363 get_ST(1))); 4364 break; 4365 4366 case 0xFE: /* FSIN */ 4367 DIP("fsin\n"); 4368 put_ST_UNCHECKED(0, 4369 binop(Iop_SinF64, 4370 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4371 get_ST(0))); 4372 clear_C2(); /* HACK */ 4373 break; 4374 4375 case 0xFF: /* FCOS */ 4376 DIP("fcos\n"); 4377 put_ST_UNCHECKED(0, 4378 binop(Iop_CosF64, 4379 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4380 get_ST(0))); 4381 clear_C2(); /* HACK */ 4382 break; 4383 4384 default: 4385 goto decode_fail; 4386 } 4387 } 4388 } 4389 4390 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 4391 else 4392 if (first_opcode == 0xDA) { 4393 4394 if (modrm < 0xC0) { 4395 4396 /* bits 5,4,3 are an opcode extension, and the modRM also 4397 specifies an address. */ 4398 IROp fop; 4399 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4400 delta += len; 4401 switch (gregOfRM(modrm)) { 4402 4403 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 4404 DIP("fiaddl %s\n", dis_buf); 4405 fop = Iop_AddF64; 4406 goto do_fop_m32; 4407 4408 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 4409 DIP("fimull %s\n", dis_buf); 4410 fop = Iop_MulF64; 4411 goto do_fop_m32; 4412 4413 case 2: /* FICOM m32int */ 4414 DIP("ficoml %s\n", dis_buf); 4415 /* This forces C1 to zero, which isn't right. */ 4416 put_C3210( 4417 binop( Iop_And32, 4418 binop(Iop_Shl32, 4419 binop(Iop_CmpF64, 4420 get_ST(0), 4421 unop(Iop_I32StoF64, 4422 loadLE(Ity_I32,mkexpr(addr)))), 4423 mkU8(8)), 4424 mkU32(0x4500) 4425 )); 4426 break; 4427 4428 case 3: /* FICOMP m32int */ 4429 DIP("ficompl %s\n", dis_buf); 4430 /* This forces C1 to zero, which isn't right. */ 4431 put_C3210( 4432 binop( Iop_And32, 4433 binop(Iop_Shl32, 4434 binop(Iop_CmpF64, 4435 get_ST(0), 4436 unop(Iop_I32StoF64, 4437 loadLE(Ity_I32,mkexpr(addr)))), 4438 mkU8(8)), 4439 mkU32(0x4500) 4440 )); 4441 fp_pop(); 4442 break; 4443 4444 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 4445 DIP("fisubl %s\n", dis_buf); 4446 fop = Iop_SubF64; 4447 goto do_fop_m32; 4448 4449 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 4450 DIP("fisubrl %s\n", dis_buf); 4451 fop = Iop_SubF64; 4452 goto do_foprev_m32; 4453 4454 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 4455 DIP("fidivl %s\n", dis_buf); 4456 fop = Iop_DivF64; 4457 goto do_fop_m32; 4458 4459 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 4460 DIP("fidivrl %s\n", dis_buf); 4461 fop = Iop_DivF64; 4462 goto do_foprev_m32; 4463 4464 do_fop_m32: 4465 put_ST_UNCHECKED(0, 4466 triop(fop, 4467 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4468 get_ST(0), 4469 unop(Iop_I32StoF64, 4470 loadLE(Ity_I32, mkexpr(addr))))); 4471 break; 4472 4473 do_foprev_m32: 4474 put_ST_UNCHECKED(0, 4475 triop(fop, 4476 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4477 unop(Iop_I32StoF64, 4478 loadLE(Ity_I32, mkexpr(addr))), 4479 get_ST(0))); 4480 break; 4481 4482 default: 4483 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4484 vex_printf("first_opcode == 0xDA\n"); 4485 goto decode_fail; 4486 } 4487 4488 } else { 4489 4490 delta++; 4491 switch (modrm) { 4492 4493 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 4494 r_src = (UInt)modrm - 0xC0; 4495 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src); 4496 put_ST_UNCHECKED(0, 4497 IRExpr_Mux0X( 4498 unop(Iop_1Uto8, 4499 mk_x86g_calculate_condition(X86CondB)), 4500 get_ST(0), get_ST(r_src)) ); 4501 break; 4502 4503 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 4504 r_src = (UInt)modrm - 0xC8; 4505 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src); 4506 put_ST_UNCHECKED(0, 4507 IRExpr_Mux0X( 4508 unop(Iop_1Uto8, 4509 mk_x86g_calculate_condition(X86CondZ)), 4510 get_ST(0), get_ST(r_src)) ); 4511 break; 4512 4513 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 4514 r_src = (UInt)modrm - 0xD0; 4515 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src); 4516 put_ST_UNCHECKED(0, 4517 IRExpr_Mux0X( 4518 unop(Iop_1Uto8, 4519 mk_x86g_calculate_condition(X86CondBE)), 4520 get_ST(0), get_ST(r_src)) ); 4521 break; 4522 4523 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 4524 r_src = (UInt)modrm - 0xD8; 4525 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src); 4526 put_ST_UNCHECKED(0, 4527 IRExpr_Mux0X( 4528 unop(Iop_1Uto8, 4529 mk_x86g_calculate_condition(X86CondP)), 4530 get_ST(0), get_ST(r_src)) ); 4531 break; 4532 4533 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 4534 DIP("fucompp %%st(0),%%st(1)\n"); 4535 /* This forces C1 to zero, which isn't right. */ 4536 put_C3210( 4537 binop( Iop_And32, 4538 binop(Iop_Shl32, 4539 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 4540 mkU8(8)), 4541 mkU32(0x4500) 4542 )); 4543 fp_pop(); 4544 fp_pop(); 4545 break; 4546 4547 default: 4548 goto decode_fail; 4549 } 4550 4551 } 4552 } 4553 4554 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 4555 else 4556 if (first_opcode == 0xDB) { 4557 if (modrm < 0xC0) { 4558 4559 /* bits 5,4,3 are an opcode extension, and the modRM also 4560 specifies an address. */ 4561 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4562 delta += len; 4563 4564 switch (gregOfRM(modrm)) { 4565 4566 case 0: /* FILD m32int */ 4567 DIP("fildl %s\n", dis_buf); 4568 fp_push(); 4569 put_ST(0, unop(Iop_I32StoF64, 4570 loadLE(Ity_I32, mkexpr(addr)))); 4571 break; 4572 4573 case 1: /* FISTTPL m32 (SSE3) */ 4574 DIP("fisttpl %s\n", dis_buf); 4575 storeLE( mkexpr(addr), 4576 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 4577 fp_pop(); 4578 break; 4579 4580 case 2: /* FIST m32 */ 4581 DIP("fistl %s\n", dis_buf); 4582 storeLE( mkexpr(addr), 4583 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4584 break; 4585 4586 case 3: /* FISTP m32 */ 4587 DIP("fistpl %s\n", dis_buf); 4588 storeLE( mkexpr(addr), 4589 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4590 fp_pop(); 4591 break; 4592 4593 case 5: { /* FLD extended-real */ 4594 /* Uses dirty helper: 4595 ULong x86g_loadF80le ( UInt ) 4596 addr holds the address. First, do a dirty call to 4597 get hold of the data. */ 4598 IRTemp val = newTemp(Ity_I64); 4599 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 4600 4601 IRDirty* d = unsafeIRDirty_1_N ( 4602 val, 4603 0/*regparms*/, 4604 "x86g_dirtyhelper_loadF80le", 4605 &x86g_dirtyhelper_loadF80le, 4606 args 4607 ); 4608 /* declare that we're reading memory */ 4609 d->mFx = Ifx_Read; 4610 d->mAddr = mkexpr(addr); 4611 d->mSize = 10; 4612 4613 /* execute the dirty call, dumping the result in val. */ 4614 stmt( IRStmt_Dirty(d) ); 4615 fp_push(); 4616 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 4617 4618 DIP("fldt %s\n", dis_buf); 4619 break; 4620 } 4621 4622 case 7: { /* FSTP extended-real */ 4623 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */ 4624 IRExpr** args 4625 = mkIRExprVec_2( mkexpr(addr), 4626 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 4627 4628 IRDirty* d = unsafeIRDirty_0_N ( 4629 0/*regparms*/, 4630 "x86g_dirtyhelper_storeF80le", 4631 &x86g_dirtyhelper_storeF80le, 4632 args 4633 ); 4634 /* declare we're writing memory */ 4635 d->mFx = Ifx_Write; 4636 d->mAddr = mkexpr(addr); 4637 d->mSize = 10; 4638 4639 /* execute the dirty call. */ 4640 stmt( IRStmt_Dirty(d) ); 4641 fp_pop(); 4642 4643 DIP("fstpt\n %s", dis_buf); 4644 break; 4645 } 4646 4647 default: 4648 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4649 vex_printf("first_opcode == 0xDB\n"); 4650 goto decode_fail; 4651 } 4652 4653 } else { 4654 4655 delta++; 4656 switch (modrm) { 4657 4658 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 4659 r_src = (UInt)modrm - 0xC0; 4660 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src); 4661 put_ST_UNCHECKED(0, 4662 IRExpr_Mux0X( 4663 unop(Iop_1Uto8, 4664 mk_x86g_calculate_condition(X86CondNB)), 4665 get_ST(0), get_ST(r_src)) ); 4666 break; 4667 4668 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 4669 r_src = (UInt)modrm - 0xC8; 4670 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src); 4671 put_ST_UNCHECKED(0, 4672 IRExpr_Mux0X( 4673 unop(Iop_1Uto8, 4674 mk_x86g_calculate_condition(X86CondNZ)), 4675 get_ST(0), get_ST(r_src)) ); 4676 break; 4677 4678 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 4679 r_src = (UInt)modrm - 0xD0; 4680 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src); 4681 put_ST_UNCHECKED(0, 4682 IRExpr_Mux0X( 4683 unop(Iop_1Uto8, 4684 mk_x86g_calculate_condition(X86CondNBE)), 4685 get_ST(0), get_ST(r_src)) ); 4686 break; 4687 4688 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 4689 r_src = (UInt)modrm - 0xD8; 4690 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src); 4691 put_ST_UNCHECKED(0, 4692 IRExpr_Mux0X( 4693 unop(Iop_1Uto8, 4694 mk_x86g_calculate_condition(X86CondNP)), 4695 get_ST(0), get_ST(r_src)) ); 4696 break; 4697 4698 case 0xE2: 4699 DIP("fnclex\n"); 4700 break; 4701 4702 case 0xE3: { 4703 /* Uses dirty helper: 4704 void x86g_do_FINIT ( VexGuestX86State* ) */ 4705 IRDirty* d = unsafeIRDirty_0_N ( 4706 0/*regparms*/, 4707 "x86g_dirtyhelper_FINIT", 4708 &x86g_dirtyhelper_FINIT, 4709 mkIRExprVec_0() 4710 ); 4711 d->needsBBP = True; 4712 4713 /* declare we're writing guest state */ 4714 d->nFxState = 5; 4715 4716 d->fxState[0].fx = Ifx_Write; 4717 d->fxState[0].offset = OFFB_FTOP; 4718 d->fxState[0].size = sizeof(UInt); 4719 4720 d->fxState[1].fx = Ifx_Write; 4721 d->fxState[1].offset = OFFB_FPREGS; 4722 d->fxState[1].size = 8 * sizeof(ULong); 4723 4724 d->fxState[2].fx = Ifx_Write; 4725 d->fxState[2].offset = OFFB_FPTAGS; 4726 d->fxState[2].size = 8 * sizeof(UChar); 4727 4728 d->fxState[3].fx = Ifx_Write; 4729 d->fxState[3].offset = OFFB_FPROUND; 4730 d->fxState[3].size = sizeof(UInt); 4731 4732 d->fxState[4].fx = Ifx_Write; 4733 d->fxState[4].offset = OFFB_FC3210; 4734 d->fxState[4].size = sizeof(UInt); 4735 4736 stmt( IRStmt_Dirty(d) ); 4737 4738 DIP("fninit\n"); 4739 break; 4740 } 4741 4742 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 4743 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 4744 break; 4745 4746 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 4747 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 4748 break; 4749 4750 default: 4751 goto decode_fail; 4752 } 4753 } 4754 } 4755 4756 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 4757 else 4758 if (first_opcode == 0xDC) { 4759 if (modrm < 0xC0) { 4760 4761 /* bits 5,4,3 are an opcode extension, and the modRM also 4762 specifies an address. */ 4763 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4764 delta += len; 4765 4766 switch (gregOfRM(modrm)) { 4767 4768 case 0: /* FADD double-real */ 4769 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 4770 break; 4771 4772 case 1: /* FMUL double-real */ 4773 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 4774 break; 4775 4776 case 2: /* FCOM double-real */ 4777 DIP("fcoml %s\n", dis_buf); 4778 /* This forces C1 to zero, which isn't right. */ 4779 put_C3210( 4780 binop( Iop_And32, 4781 binop(Iop_Shl32, 4782 binop(Iop_CmpF64, 4783 get_ST(0), 4784 loadLE(Ity_F64,mkexpr(addr))), 4785 mkU8(8)), 4786 mkU32(0x4500) 4787 )); 4788 break; 4789 4790 case 3: /* FCOMP double-real */ 4791 DIP("fcompl %s\n", dis_buf); 4792 /* This forces C1 to zero, which isn't right. */ 4793 put_C3210( 4794 binop( Iop_And32, 4795 binop(Iop_Shl32, 4796 binop(Iop_CmpF64, 4797 get_ST(0), 4798 loadLE(Ity_F64,mkexpr(addr))), 4799 mkU8(8)), 4800 mkU32(0x4500) 4801 )); 4802 fp_pop(); 4803 break; 4804 4805 case 4: /* FSUB double-real */ 4806 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 4807 break; 4808 4809 case 5: /* FSUBR double-real */ 4810 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 4811 break; 4812 4813 case 6: /* FDIV double-real */ 4814 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 4815 break; 4816 4817 case 7: /* FDIVR double-real */ 4818 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 4819 break; 4820 4821 default: 4822 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4823 vex_printf("first_opcode == 0xDC\n"); 4824 goto decode_fail; 4825 } 4826 4827 } else { 4828 4829 delta++; 4830 switch (modrm) { 4831 4832 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 4833 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 4834 break; 4835 4836 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 4837 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 4838 break; 4839 4840 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 4841 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 4842 break; 4843 4844 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 4845 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 4846 break; 4847 4848 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 4849 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 4850 break; 4851 4852 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 4853 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 4854 break; 4855 4856 default: 4857 goto decode_fail; 4858 } 4859 4860 } 4861 } 4862 4863 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 4864 else 4865 if (first_opcode == 0xDD) { 4866 4867 if (modrm < 0xC0) { 4868 4869 /* bits 5,4,3 are an opcode extension, and the modRM also 4870 specifies an address. */ 4871 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4872 delta += len; 4873 4874 switch (gregOfRM(modrm)) { 4875 4876 case 0: /* FLD double-real */ 4877 DIP("fldl %s\n", dis_buf); 4878 fp_push(); 4879 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 4880 break; 4881 4882 case 1: /* FISTTPQ m64 (SSE3) */ 4883 DIP("fistppll %s\n", dis_buf); 4884 storeLE( mkexpr(addr), 4885 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 4886 fp_pop(); 4887 break; 4888 4889 case 2: /* FST double-real */ 4890 DIP("fstl %s\n", dis_buf); 4891 storeLE(mkexpr(addr), get_ST(0)); 4892 break; 4893 4894 case 3: /* FSTP double-real */ 4895 DIP("fstpl %s\n", dis_buf); 4896 storeLE(mkexpr(addr), get_ST(0)); 4897 fp_pop(); 4898 break; 4899 4900 case 4: { /* FRSTOR m108 */ 4901 /* Uses dirty helper: 4902 VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 4903 IRTemp ew = newTemp(Ity_I32); 4904 IRDirty* d = unsafeIRDirty_0_N ( 4905 0/*regparms*/, 4906 "x86g_dirtyhelper_FRSTOR", 4907 &x86g_dirtyhelper_FRSTOR, 4908 mkIRExprVec_1( mkexpr(addr) ) 4909 ); 4910 d->needsBBP = True; 4911 d->tmp = ew; 4912 /* declare we're reading memory */ 4913 d->mFx = Ifx_Read; 4914 d->mAddr = mkexpr(addr); 4915 d->mSize = 108; 4916 4917 /* declare we're writing guest state */ 4918 d->nFxState = 5; 4919 4920 d->fxState[0].fx = Ifx_Write; 4921 d->fxState[0].offset = OFFB_FTOP; 4922 d->fxState[0].size = sizeof(UInt); 4923 4924 d->fxState[1].fx = Ifx_Write; 4925 d->fxState[1].offset = OFFB_FPREGS; 4926 d->fxState[1].size = 8 * sizeof(ULong); 4927 4928 d->fxState[2].fx = Ifx_Write; 4929 d->fxState[2].offset = OFFB_FPTAGS; 4930 d->fxState[2].size = 8 * sizeof(UChar); 4931 4932 d->fxState[3].fx = Ifx_Write; 4933 d->fxState[3].offset = OFFB_FPROUND; 4934 d->fxState[3].size = sizeof(UInt); 4935 4936 d->fxState[4].fx = Ifx_Write; 4937 d->fxState[4].offset = OFFB_FC3210; 4938 d->fxState[4].size = sizeof(UInt); 4939 4940 stmt( IRStmt_Dirty(d) ); 4941 4942 /* ew contains any emulation warning we may need to 4943 issue. If needed, side-exit to the next insn, 4944 reporting the warning, so that Valgrind's dispatcher 4945 sees the warning. */ 4946 put_emwarn( mkexpr(ew) ); 4947 stmt( 4948 IRStmt_Exit( 4949 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4950 Ijk_EmWarn, 4951 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 4952 ) 4953 ); 4954 4955 DIP("frstor %s\n", dis_buf); 4956 break; 4957 } 4958 4959 case 6: { /* FNSAVE m108 */ 4960 /* Uses dirty helper: 4961 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 4962 IRDirty* d = unsafeIRDirty_0_N ( 4963 0/*regparms*/, 4964 "x86g_dirtyhelper_FSAVE", 4965 &x86g_dirtyhelper_FSAVE, 4966 mkIRExprVec_1( mkexpr(addr) ) 4967 ); 4968 d->needsBBP = True; 4969 /* declare we're writing memory */ 4970 d->mFx = Ifx_Write; 4971 d->mAddr = mkexpr(addr); 4972 d->mSize = 108; 4973 4974 /* declare we're reading guest state */ 4975 d->nFxState = 5; 4976 4977 d->fxState[0].fx = Ifx_Read; 4978 d->fxState[0].offset = OFFB_FTOP; 4979 d->fxState[0].size = sizeof(UInt); 4980 4981 d->fxState[1].fx = Ifx_Read; 4982 d->fxState[1].offset = OFFB_FPREGS; 4983 d->fxState[1].size = 8 * sizeof(ULong); 4984 4985 d->fxState[2].fx = Ifx_Read; 4986 d->fxState[2].offset = OFFB_FPTAGS; 4987 d->fxState[2].size = 8 * sizeof(UChar); 4988 4989 d->fxState[3].fx = Ifx_Read; 4990 d->fxState[3].offset = OFFB_FPROUND; 4991 d->fxState[3].size = sizeof(UInt); 4992 4993 d->fxState[4].fx = Ifx_Read; 4994 d->fxState[4].offset = OFFB_FC3210; 4995 d->fxState[4].size = sizeof(UInt); 4996 4997 stmt( IRStmt_Dirty(d) ); 4998 4999 DIP("fnsave %s\n", dis_buf); 5000 break; 5001 } 5002 5003 case 7: { /* FNSTSW m16 */ 5004 IRExpr* sw = get_FPU_sw(); 5005 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 5006 storeLE( mkexpr(addr), sw ); 5007 DIP("fnstsw %s\n", dis_buf); 5008 break; 5009 } 5010 5011 default: 5012 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5013 vex_printf("first_opcode == 0xDD\n"); 5014 goto decode_fail; 5015 } 5016 } else { 5017 delta++; 5018 switch (modrm) { 5019 5020 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 5021 r_dst = (UInt)modrm - 0xC0; 5022 DIP("ffree %%st(%d)\n", (Int)r_dst); 5023 put_ST_TAG ( r_dst, mkU8(0) ); 5024 break; 5025 5026 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 5027 r_dst = (UInt)modrm - 0xD0; 5028 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst); 5029 /* P4 manual says: "If the destination operand is a 5030 non-empty register, the invalid-operation exception 5031 is not generated. Hence put_ST_UNCHECKED. */ 5032 put_ST_UNCHECKED(r_dst, get_ST(0)); 5033 break; 5034 5035 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 5036 r_dst = (UInt)modrm - 0xD8; 5037 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst); 5038 /* P4 manual says: "If the destination operand is a 5039 non-empty register, the invalid-operation exception 5040 is not generated. Hence put_ST_UNCHECKED. */ 5041 put_ST_UNCHECKED(r_dst, get_ST(0)); 5042 fp_pop(); 5043 break; 5044 5045 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 5046 r_dst = (UInt)modrm - 0xE0; 5047 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst); 5048 /* This forces C1 to zero, which isn't right. */ 5049 put_C3210( 5050 binop( Iop_And32, 5051 binop(Iop_Shl32, 5052 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5053 mkU8(8)), 5054 mkU32(0x4500) 5055 )); 5056 break; 5057 5058 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 5059 r_dst = (UInt)modrm - 0xE8; 5060 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst); 5061 /* This forces C1 to zero, which isn't right. */ 5062 put_C3210( 5063 binop( Iop_And32, 5064 binop(Iop_Shl32, 5065 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5066 mkU8(8)), 5067 mkU32(0x4500) 5068 )); 5069 fp_pop(); 5070 break; 5071 5072 default: 5073 goto decode_fail; 5074 } 5075 } 5076 } 5077 5078 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 5079 else 5080 if (first_opcode == 0xDE) { 5081 5082 if (modrm < 0xC0) { 5083 5084 /* bits 5,4,3 are an opcode extension, and the modRM also 5085 specifies an address. */ 5086 IROp fop; 5087 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5088 delta += len; 5089 5090 switch (gregOfRM(modrm)) { 5091 5092 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 5093 DIP("fiaddw %s\n", dis_buf); 5094 fop = Iop_AddF64; 5095 goto do_fop_m16; 5096 5097 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 5098 DIP("fimulw %s\n", dis_buf); 5099 fop = Iop_MulF64; 5100 goto do_fop_m16; 5101 5102 case 2: /* FICOM m16int */ 5103 DIP("ficomw %s\n", dis_buf); 5104 /* This forces C1 to zero, which isn't right. */ 5105 put_C3210( 5106 binop( Iop_And32, 5107 binop(Iop_Shl32, 5108 binop(Iop_CmpF64, 5109 get_ST(0), 5110 unop(Iop_I32StoF64, 5111 unop(Iop_16Sto32, 5112 loadLE(Ity_I16,mkexpr(addr))))), 5113 mkU8(8)), 5114 mkU32(0x4500) 5115 )); 5116 break; 5117 5118 case 3: /* FICOMP m16int */ 5119 DIP("ficompw %s\n", dis_buf); 5120 /* This forces C1 to zero, which isn't right. */ 5121 put_C3210( 5122 binop( Iop_And32, 5123 binop(Iop_Shl32, 5124 binop(Iop_CmpF64, 5125 get_ST(0), 5126 unop(Iop_I32StoF64, 5127 unop(Iop_16Sto32, 5128 loadLE(Ity_I16,mkexpr(addr))))), 5129 mkU8(8)), 5130 mkU32(0x4500) 5131 )); 5132 fp_pop(); 5133 break; 5134 5135 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 5136 DIP("fisubw %s\n", dis_buf); 5137 fop = Iop_SubF64; 5138 goto do_fop_m16; 5139 5140 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 5141 DIP("fisubrw %s\n", dis_buf); 5142 fop = Iop_SubF64; 5143 goto do_foprev_m16; 5144 5145 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 5146 DIP("fisubw %s\n", dis_buf); 5147 fop = Iop_DivF64; 5148 goto do_fop_m16; 5149 5150 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 5151 DIP("fidivrw %s\n", dis_buf); 5152 fop = Iop_DivF64; 5153 goto do_foprev_m16; 5154 5155 do_fop_m16: 5156 put_ST_UNCHECKED(0, 5157 triop(fop, 5158 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5159 get_ST(0), 5160 unop(Iop_I32StoF64, 5161 unop(Iop_16Sto32, 5162 loadLE(Ity_I16, mkexpr(addr)))))); 5163 break; 5164 5165 do_foprev_m16: 5166 put_ST_UNCHECKED(0, 5167 triop(fop, 5168 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5169 unop(Iop_I32StoF64, 5170 unop(Iop_16Sto32, 5171 loadLE(Ity_I16, mkexpr(addr)))), 5172 get_ST(0))); 5173 break; 5174 5175 default: 5176 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5177 vex_printf("first_opcode == 0xDE\n"); 5178 goto decode_fail; 5179 } 5180 5181 } else { 5182 5183 delta++; 5184 switch (modrm) { 5185 5186 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 5187 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 5188 break; 5189 5190 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 5191 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 5192 break; 5193 5194 case 0xD9: /* FCOMPP %st(0),%st(1) */ 5195 DIP("fuompp %%st(0),%%st(1)\n"); 5196 /* This forces C1 to zero, which isn't right. */ 5197 put_C3210( 5198 binop( Iop_And32, 5199 binop(Iop_Shl32, 5200 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5201 mkU8(8)), 5202 mkU32(0x4500) 5203 )); 5204 fp_pop(); 5205 fp_pop(); 5206 break; 5207 5208 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 5209 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 5210 break; 5211 5212 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 5213 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 5214 break; 5215 5216 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 5217 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 5218 break; 5219 5220 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 5221 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 5222 break; 5223 5224 default: 5225 goto decode_fail; 5226 } 5227 5228 } 5229 } 5230 5231 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 5232 else 5233 if (first_opcode == 0xDF) { 5234 5235 if (modrm < 0xC0) { 5236 5237 /* bits 5,4,3 are an opcode extension, and the modRM also 5238 specifies an address. */ 5239 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5240 delta += len; 5241 5242 switch (gregOfRM(modrm)) { 5243 5244 case 0: /* FILD m16int */ 5245 DIP("fildw %s\n", dis_buf); 5246 fp_push(); 5247 put_ST(0, unop(Iop_I32StoF64, 5248 unop(Iop_16Sto32, 5249 loadLE(Ity_I16, mkexpr(addr))))); 5250 break; 5251 5252 case 1: /* FISTTPS m16 (SSE3) */ 5253 DIP("fisttps %s\n", dis_buf); 5254 storeLE( mkexpr(addr), 5255 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) ); 5256 fp_pop(); 5257 break; 5258 5259 case 2: /* FIST m16 */ 5260 DIP("fistp %s\n", dis_buf); 5261 storeLE( mkexpr(addr), 5262 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5263 break; 5264 5265 case 3: /* FISTP m16 */ 5266 DIP("fistps %s\n", dis_buf); 5267 storeLE( mkexpr(addr), 5268 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5269 fp_pop(); 5270 break; 5271 5272 case 5: /* FILD m64 */ 5273 DIP("fildll %s\n", dis_buf); 5274 fp_push(); 5275 put_ST(0, binop(Iop_I64StoF64, 5276 get_roundingmode(), 5277 loadLE(Ity_I64, mkexpr(addr)))); 5278 break; 5279 5280 case 7: /* FISTP m64 */ 5281 DIP("fistpll %s\n", dis_buf); 5282 storeLE( mkexpr(addr), 5283 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 5284 fp_pop(); 5285 break; 5286 5287 default: 5288 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5289 vex_printf("first_opcode == 0xDF\n"); 5290 goto decode_fail; 5291 } 5292 5293 } else { 5294 5295 delta++; 5296 switch (modrm) { 5297 5298 case 0xC0: /* FFREEP %st(0) */ 5299 DIP("ffreep %%st(%d)\n", 0); 5300 put_ST_TAG ( 0, mkU8(0) ); 5301 fp_pop(); 5302 break; 5303 5304 case 0xE0: /* FNSTSW %ax */ 5305 DIP("fnstsw %%ax\n"); 5306 /* Get the FPU status word value and dump it in %AX. */ 5307 if (0) { 5308 /* The obvious thing to do is simply dump the 16-bit 5309 status word value in %AX. However, due to a 5310 limitation in Memcheck's origin tracking 5311 machinery, this causes Memcheck not to track the 5312 origin of any undefinedness into %AH (only into 5313 %AL/%AX/%EAX), which means origins are lost in 5314 the sequence "fnstsw %ax; test $M,%ah; jcond .." */ 5315 putIReg(2, R_EAX, get_FPU_sw()); 5316 } else { 5317 /* So a somewhat lame kludge is to make it very 5318 clear to Memcheck that the value is written to 5319 both %AH and %AL. This generates marginally 5320 worse code, but I don't think it matters much. */ 5321 IRTemp t16 = newTemp(Ity_I16); 5322 assign(t16, get_FPU_sw()); 5323 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) ); 5324 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) ); 5325 } 5326 break; 5327 5328 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 5329 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 5330 break; 5331 5332 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 5333 /* not really right since COMIP != UCOMIP */ 5334 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 5335 break; 5336 5337 default: 5338 goto decode_fail; 5339 } 5340 } 5341 5342 } 5343 5344 else 5345 vpanic("dis_FPU(x86): invalid primary opcode"); 5346 5347 *decode_ok = True; 5348 return delta; 5349 5350 decode_fail: 5351 *decode_ok = False; 5352 return delta; 5353 } 5354 5355 5356 /*------------------------------------------------------------*/ 5357 /*--- ---*/ 5358 /*--- MMX INSTRUCTIONS ---*/ 5359 /*--- ---*/ 5360 /*------------------------------------------------------------*/ 5361 5362 /* Effect of MMX insns on x87 FPU state (table 11-2 of 5363 IA32 arch manual, volume 3): 5364 5365 Read from, or write to MMX register (viz, any insn except EMMS): 5366 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 5367 * FP stack pointer set to zero 5368 5369 EMMS: 5370 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 5371 * FP stack pointer set to zero 5372 */ 5373 5374 static void do_MMX_preamble ( void ) 5375 { 5376 Int i; 5377 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5378 IRExpr* zero = mkU32(0); 5379 IRExpr* tag1 = mkU8(1); 5380 put_ftop(zero); 5381 for (i = 0; i < 8; i++) 5382 stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); 5383 } 5384 5385 static void do_EMMS_preamble ( void ) 5386 { 5387 Int i; 5388 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5389 IRExpr* zero = mkU32(0); 5390 IRExpr* tag0 = mkU8(0); 5391 put_ftop(zero); 5392 for (i = 0; i < 8; i++) 5393 stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); 5394 } 5395 5396 5397 static IRExpr* getMMXReg ( UInt archreg ) 5398 { 5399 vassert(archreg < 8); 5400 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 5401 } 5402 5403 5404 static void putMMXReg ( UInt archreg, IRExpr* e ) 5405 { 5406 vassert(archreg < 8); 5407 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 5408 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 5409 } 5410 5411 5412 /* Helper for non-shift MMX insns. Note this is incomplete in the 5413 sense that it does not first call do_MMX_preamble() -- that is the 5414 responsibility of its caller. */ 5415 5416 static 5417 UInt dis_MMXop_regmem_to_reg ( UChar sorb, 5418 Int delta, 5419 UChar opc, 5420 HChar* name, 5421 Bool show_granularity ) 5422 { 5423 HChar dis_buf[50]; 5424 UChar modrm = getIByte(delta); 5425 Bool isReg = epartIsReg(modrm); 5426 IRExpr* argL = NULL; 5427 IRExpr* argR = NULL; 5428 IRExpr* argG = NULL; 5429 IRExpr* argE = NULL; 5430 IRTemp res = newTemp(Ity_I64); 5431 5432 Bool invG = False; 5433 IROp op = Iop_INVALID; 5434 void* hAddr = NULL; 5435 HChar* hName = NULL; 5436 Bool eLeft = False; 5437 5438 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 5439 5440 switch (opc) { 5441 /* Original MMX ones */ 5442 case 0xFC: op = Iop_Add8x8; break; 5443 case 0xFD: op = Iop_Add16x4; break; 5444 case 0xFE: op = Iop_Add32x2; break; 5445 5446 case 0xEC: op = Iop_QAdd8Sx8; break; 5447 case 0xED: op = Iop_QAdd16Sx4; break; 5448 5449 case 0xDC: op = Iop_QAdd8Ux8; break; 5450 case 0xDD: op = Iop_QAdd16Ux4; break; 5451 5452 case 0xF8: op = Iop_Sub8x8; break; 5453 case 0xF9: op = Iop_Sub16x4; break; 5454 case 0xFA: op = Iop_Sub32x2; break; 5455 5456 case 0xE8: op = Iop_QSub8Sx8; break; 5457 case 0xE9: op = Iop_QSub16Sx4; break; 5458 5459 case 0xD8: op = Iop_QSub8Ux8; break; 5460 case 0xD9: op = Iop_QSub16Ux4; break; 5461 5462 case 0xE5: op = Iop_MulHi16Sx4; break; 5463 case 0xD5: op = Iop_Mul16x4; break; 5464 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break; 5465 5466 case 0x74: op = Iop_CmpEQ8x8; break; 5467 case 0x75: op = Iop_CmpEQ16x4; break; 5468 case 0x76: op = Iop_CmpEQ32x2; break; 5469 5470 case 0x64: op = Iop_CmpGT8Sx8; break; 5471 case 0x65: op = Iop_CmpGT16Sx4; break; 5472 case 0x66: op = Iop_CmpGT32Sx2; break; 5473 5474 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 5475 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 5476 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 5477 5478 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 5479 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 5480 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 5481 5482 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 5483 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 5484 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 5485 5486 case 0xDB: op = Iop_And64; break; 5487 case 0xDF: op = Iop_And64; invG = True; break; 5488 case 0xEB: op = Iop_Or64; break; 5489 case 0xEF: /* Possibly do better here if argL and argR are the 5490 same reg */ 5491 op = Iop_Xor64; break; 5492 5493 /* Introduced in SSE1 */ 5494 case 0xE0: op = Iop_Avg8Ux8; break; 5495 case 0xE3: op = Iop_Avg16Ux4; break; 5496 case 0xEE: op = Iop_Max16Sx4; break; 5497 case 0xDE: op = Iop_Max8Ux8; break; 5498 case 0xEA: op = Iop_Min16Sx4; break; 5499 case 0xDA: op = Iop_Min8Ux8; break; 5500 case 0xE4: op = Iop_MulHi16Ux4; break; 5501 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break; 5502 5503 /* Introduced in SSE2 */ 5504 case 0xD4: op = Iop_Add64; break; 5505 case 0xFB: op = Iop_Sub64; break; 5506 5507 default: 5508 vex_printf("\n0x%x\n", (Int)opc); 5509 vpanic("dis_MMXop_regmem_to_reg"); 5510 } 5511 5512 # undef XXX 5513 5514 argG = getMMXReg(gregOfRM(modrm)); 5515 if (invG) 5516 argG = unop(Iop_Not64, argG); 5517 5518 if (isReg) { 5519 delta++; 5520 argE = getMMXReg(eregOfRM(modrm)); 5521 } else { 5522 Int len; 5523 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5524 delta += len; 5525 argE = loadLE(Ity_I64, mkexpr(addr)); 5526 } 5527 5528 if (eLeft) { 5529 argL = argE; 5530 argR = argG; 5531 } else { 5532 argL = argG; 5533 argR = argE; 5534 } 5535 5536 if (op != Iop_INVALID) { 5537 vassert(hName == NULL); 5538 vassert(hAddr == NULL); 5539 assign(res, binop(op, argL, argR)); 5540 } else { 5541 vassert(hName != NULL); 5542 vassert(hAddr != NULL); 5543 assign( res, 5544 mkIRExprCCall( 5545 Ity_I64, 5546 0/*regparms*/, hName, hAddr, 5547 mkIRExprVec_2( argL, argR ) 5548 ) 5549 ); 5550 } 5551 5552 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 5553 5554 DIP("%s%s %s, %s\n", 5555 name, show_granularity ? nameMMXGran(opc & 3) : "", 5556 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ), 5557 nameMMXReg(gregOfRM(modrm)) ); 5558 5559 return delta; 5560 } 5561 5562 5563 /* Vector by scalar shift of G by the amount specified at the bottom 5564 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 5565 5566 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta, 5567 HChar* opname, IROp op ) 5568 { 5569 HChar dis_buf[50]; 5570 Int alen, size; 5571 IRTemp addr; 5572 Bool shl, shr, sar; 5573 UChar rm = getIByte(delta); 5574 IRTemp g0 = newTemp(Ity_I64); 5575 IRTemp g1 = newTemp(Ity_I64); 5576 IRTemp amt = newTemp(Ity_I32); 5577 IRTemp amt8 = newTemp(Ity_I8); 5578 5579 if (epartIsReg(rm)) { 5580 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) ); 5581 DIP("%s %s,%s\n", opname, 5582 nameMMXReg(eregOfRM(rm)), 5583 nameMMXReg(gregOfRM(rm)) ); 5584 delta++; 5585 } else { 5586 addr = disAMode ( &alen, sorb, delta, dis_buf ); 5587 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 5588 DIP("%s %s,%s\n", opname, 5589 dis_buf, 5590 nameMMXReg(gregOfRM(rm)) ); 5591 delta += alen; 5592 } 5593 assign( g0, getMMXReg(gregOfRM(rm)) ); 5594 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 5595 5596 shl = shr = sar = False; 5597 size = 0; 5598 switch (op) { 5599 case Iop_ShlN16x4: shl = True; size = 32; break; 5600 case Iop_ShlN32x2: shl = True; size = 32; break; 5601 case Iop_Shl64: shl = True; size = 64; break; 5602 case Iop_ShrN16x4: shr = True; size = 16; break; 5603 case Iop_ShrN32x2: shr = True; size = 32; break; 5604 case Iop_Shr64: shr = True; size = 64; break; 5605 case Iop_SarN16x4: sar = True; size = 16; break; 5606 case Iop_SarN32x2: sar = True; size = 32; break; 5607 default: vassert(0); 5608 } 5609 5610 if (shl || shr) { 5611 assign( 5612 g1, 5613 IRExpr_Mux0X( 5614 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5615 mkU64(0), 5616 binop(op, mkexpr(g0), mkexpr(amt8)) 5617 ) 5618 ); 5619 } else 5620 if (sar) { 5621 assign( 5622 g1, 5623 IRExpr_Mux0X( 5624 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5625 binop(op, mkexpr(g0), mkU8(size-1)), 5626 binop(op, mkexpr(g0), mkexpr(amt8)) 5627 ) 5628 ); 5629 } else { 5630 /*NOTREACHED*/ 5631 vassert(0); 5632 } 5633 5634 putMMXReg( gregOfRM(rm), mkexpr(g1) ); 5635 return delta; 5636 } 5637 5638 5639 /* Vector by scalar shift of E by an immediate byte. This is a 5640 straight copy of dis_SSE_shiftE_imm. */ 5641 5642 static 5643 UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op ) 5644 { 5645 Bool shl, shr, sar; 5646 UChar rm = getIByte(delta); 5647 IRTemp e0 = newTemp(Ity_I64); 5648 IRTemp e1 = newTemp(Ity_I64); 5649 UChar amt, size; 5650 vassert(epartIsReg(rm)); 5651 vassert(gregOfRM(rm) == 2 5652 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 5653 amt = getIByte(delta+1); 5654 delta += 2; 5655 DIP("%s $%d,%s\n", opname, 5656 (Int)amt, 5657 nameMMXReg(eregOfRM(rm)) ); 5658 5659 assign( e0, getMMXReg(eregOfRM(rm)) ); 5660 5661 shl = shr = sar = False; 5662 size = 0; 5663 switch (op) { 5664 case Iop_ShlN16x4: shl = True; size = 16; break; 5665 case Iop_ShlN32x2: shl = True; size = 32; break; 5666 case Iop_Shl64: shl = True; size = 64; break; 5667 case Iop_SarN16x4: sar = True; size = 16; break; 5668 case Iop_SarN32x2: sar = True; size = 32; break; 5669 case Iop_ShrN16x4: shr = True; size = 16; break; 5670 case Iop_ShrN32x2: shr = True; size = 32; break; 5671 case Iop_Shr64: shr = True; size = 64; break; 5672 default: vassert(0); 5673 } 5674 5675 if (shl || shr) { 5676 assign( e1, amt >= size 5677 ? mkU64(0) 5678 : binop(op, mkexpr(e0), mkU8(amt)) 5679 ); 5680 } else 5681 if (sar) { 5682 assign( e1, amt >= size 5683 ? binop(op, mkexpr(e0), mkU8(size-1)) 5684 : binop(op, mkexpr(e0), mkU8(amt)) 5685 ); 5686 } else { 5687 /*NOTREACHED*/ 5688 vassert(0); 5689 } 5690 5691 putMMXReg( eregOfRM(rm), mkexpr(e1) ); 5692 return delta; 5693 } 5694 5695 5696 /* Completely handle all MMX instructions except emms. */ 5697 5698 static 5699 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) 5700 { 5701 Int len; 5702 UChar modrm; 5703 HChar dis_buf[50]; 5704 UChar opc = getIByte(delta); 5705 delta++; 5706 5707 /* dis_MMX handles all insns except emms. */ 5708 do_MMX_preamble(); 5709 5710 switch (opc) { 5711 5712 case 0x6E: 5713 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/ 5714 if (sz != 4) 5715 goto mmx_decode_failure; 5716 modrm = getIByte(delta); 5717 if (epartIsReg(modrm)) { 5718 delta++; 5719 putMMXReg( 5720 gregOfRM(modrm), 5721 binop( Iop_32HLto64, 5722 mkU32(0), 5723 getIReg(4, eregOfRM(modrm)) ) ); 5724 DIP("movd %s, %s\n", 5725 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5726 } else { 5727 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5728 delta += len; 5729 putMMXReg( 5730 gregOfRM(modrm), 5731 binop( Iop_32HLto64, 5732 mkU32(0), 5733 loadLE(Ity_I32, mkexpr(addr)) ) ); 5734 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm))); 5735 } 5736 break; 5737 5738 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */ 5739 if (sz != 4) 5740 goto mmx_decode_failure; 5741 modrm = getIByte(delta); 5742 if (epartIsReg(modrm)) { 5743 delta++; 5744 putIReg( 4, eregOfRM(modrm), 5745 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5746 DIP("movd %s, %s\n", 5747 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 5748 } else { 5749 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5750 delta += len; 5751 storeLE( mkexpr(addr), 5752 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5753 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf); 5754 } 5755 break; 5756 5757 case 0x6F: 5758 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 5759 if (sz != 4) 5760 goto mmx_decode_failure; 5761 modrm = getIByte(delta); 5762 if (epartIsReg(modrm)) { 5763 delta++; 5764 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) ); 5765 DIP("movq %s, %s\n", 5766 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5767 } else { 5768 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5769 delta += len; 5770 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 5771 DIP("movq %s, %s\n", 5772 dis_buf, nameMMXReg(gregOfRM(modrm))); 5773 } 5774 break; 5775 5776 case 0x7F: 5777 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 5778 if (sz != 4) 5779 goto mmx_decode_failure; 5780 modrm = getIByte(delta); 5781 if (epartIsReg(modrm)) { 5782 delta++; 5783 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) ); 5784 DIP("movq %s, %s\n", 5785 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm))); 5786 } else { 5787 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5788 delta += len; 5789 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 5790 DIP("mov(nt)q %s, %s\n", 5791 nameMMXReg(gregOfRM(modrm)), dis_buf); 5792 } 5793 break; 5794 5795 case 0xFC: 5796 case 0xFD: 5797 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 5798 if (sz != 4) 5799 goto mmx_decode_failure; 5800 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True ); 5801 break; 5802 5803 case 0xEC: 5804 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5805 if (sz != 4) 5806 goto mmx_decode_failure; 5807 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True ); 5808 break; 5809 5810 case 0xDC: 5811 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5812 if (sz != 4) 5813 goto mmx_decode_failure; 5814 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True ); 5815 break; 5816 5817 case 0xF8: 5818 case 0xF9: 5819 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 5820 if (sz != 4) 5821 goto mmx_decode_failure; 5822 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True ); 5823 break; 5824 5825 case 0xE8: 5826 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5827 if (sz != 4) 5828 goto mmx_decode_failure; 5829 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True ); 5830 break; 5831 5832 case 0xD8: 5833 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5834 if (sz != 4) 5835 goto mmx_decode_failure; 5836 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True ); 5837 break; 5838 5839 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 5840 if (sz != 4) 5841 goto mmx_decode_failure; 5842 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False ); 5843 break; 5844 5845 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 5846 if (sz != 4) 5847 goto mmx_decode_failure; 5848 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False ); 5849 break; 5850 5851 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 5852 vassert(sz == 4); 5853 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False ); 5854 break; 5855 5856 case 0x74: 5857 case 0x75: 5858 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 5859 if (sz != 4) 5860 goto mmx_decode_failure; 5861 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True ); 5862 break; 5863 5864 case 0x64: 5865 case 0x65: 5866 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 5867 if (sz != 4) 5868 goto mmx_decode_failure; 5869 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True ); 5870 break; 5871 5872 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 5873 if (sz != 4) 5874 goto mmx_decode_failure; 5875 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False ); 5876 break; 5877 5878 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5879 if (sz != 4) 5880 goto mmx_decode_failure; 5881 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False ); 5882 break; 5883 5884 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5885 if (sz != 4) 5886 goto mmx_decode_failure; 5887 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False ); 5888 break; 5889 5890 case 0x68: 5891 case 0x69: 5892 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 5893 if (sz != 4) 5894 goto mmx_decode_failure; 5895 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True ); 5896 break; 5897 5898 case 0x60: 5899 case 0x61: 5900 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5901 if (sz != 4) 5902 goto mmx_decode_failure; 5903 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True ); 5904 break; 5905 5906 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 5907 if (sz != 4) 5908 goto mmx_decode_failure; 5909 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False ); 5910 break; 5911 5912 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 5913 if (sz != 4) 5914 goto mmx_decode_failure; 5915 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False ); 5916 break; 5917 5918 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 5919 if (sz != 4) 5920 goto mmx_decode_failure; 5921 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False ); 5922 break; 5923 5924 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 5925 if (sz != 4) 5926 goto mmx_decode_failure; 5927 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False ); 5928 break; 5929 5930 # define SHIFT_BY_REG(_name,_op) \ 5931 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \ 5932 break; 5933 5934 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5935 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 5936 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 5937 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 5938 5939 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5940 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 5941 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 5942 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 5943 5944 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 5945 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 5946 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 5947 5948 # undef SHIFT_BY_REG 5949 5950 case 0x71: 5951 case 0x72: 5952 case 0x73: { 5953 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 5954 UChar byte2, subopc; 5955 if (sz != 4) 5956 goto mmx_decode_failure; 5957 byte2 = getIByte(delta); /* amode / sub-opcode */ 5958 subopc = toUChar( (byte2 >> 3) & 7 ); 5959 5960 # define SHIFT_BY_IMM(_name,_op) \ 5961 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 5962 } while (0) 5963 5964 if (subopc == 2 /*SRL*/ && opc == 0x71) 5965 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 5966 else if (subopc == 2 /*SRL*/ && opc == 0x72) 5967 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 5968 else if (subopc == 2 /*SRL*/ && opc == 0x73) 5969 SHIFT_BY_IMM("psrlq", Iop_Shr64); 5970 5971 else if (subopc == 4 /*SAR*/ && opc == 0x71) 5972 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 5973 else if (subopc == 4 /*SAR*/ && opc == 0x72) 5974 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 5975 5976 else if (subopc == 6 /*SHL*/ && opc == 0x71) 5977 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 5978 else if (subopc == 6 /*SHL*/ && opc == 0x72) 5979 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 5980 else if (subopc == 6 /*SHL*/ && opc == 0x73) 5981 SHIFT_BY_IMM("psllq", Iop_Shl64); 5982 5983 else goto mmx_decode_failure; 5984 5985 # undef SHIFT_BY_IMM 5986 break; 5987 } 5988 5989 case 0xF7: { 5990 IRTemp addr = newTemp(Ity_I32); 5991 IRTemp regD = newTemp(Ity_I64); 5992 IRTemp regM = newTemp(Ity_I64); 5993 IRTemp mask = newTemp(Ity_I64); 5994 IRTemp olddata = newTemp(Ity_I64); 5995 IRTemp newdata = newTemp(Ity_I64); 5996 5997 modrm = getIByte(delta); 5998 if (sz != 4 || (!epartIsReg(modrm))) 5999 goto mmx_decode_failure; 6000 delta++; 6001 6002 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 6003 assign( regM, getMMXReg( eregOfRM(modrm) )); 6004 assign( regD, getMMXReg( gregOfRM(modrm) )); 6005 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 6006 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 6007 assign( newdata, 6008 binop(Iop_Or64, 6009 binop(Iop_And64, 6010 mkexpr(regD), 6011 mkexpr(mask) ), 6012 binop(Iop_And64, 6013 mkexpr(olddata), 6014 unop(Iop_Not64, mkexpr(mask)))) ); 6015 storeLE( mkexpr(addr), mkexpr(newdata) ); 6016 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), 6017 nameMMXReg( gregOfRM(modrm) ) ); 6018 break; 6019 } 6020 6021 /* --- MMX decode failure --- */ 6022 default: 6023 mmx_decode_failure: 6024 *decode_ok = False; 6025 return delta; /* ignored */ 6026 6027 } 6028 6029 *decode_ok = True; 6030 return delta; 6031 } 6032 6033 6034 /*------------------------------------------------------------*/ 6035 /*--- More misc arithmetic and other obscure insns. ---*/ 6036 /*------------------------------------------------------------*/ 6037 6038 /* Double length left and right shifts. Apparently only required in 6039 v-size (no b- variant). */ 6040 static 6041 UInt dis_SHLRD_Gv_Ev ( UChar sorb, 6042 Int delta, UChar modrm, 6043 Int sz, 6044 IRExpr* shift_amt, 6045 Bool amt_is_literal, 6046 HChar* shift_amt_txt, 6047 Bool left_shift ) 6048 { 6049 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 6050 for printing it. And eip on entry points at the modrm byte. */ 6051 Int len; 6052 HChar dis_buf[50]; 6053 6054 IRType ty = szToITy(sz); 6055 IRTemp gsrc = newTemp(ty); 6056 IRTemp esrc = newTemp(ty); 6057 IRTemp addr = IRTemp_INVALID; 6058 IRTemp tmpSH = newTemp(Ity_I8); 6059 IRTemp tmpL = IRTemp_INVALID; 6060 IRTemp tmpRes = IRTemp_INVALID; 6061 IRTemp tmpSubSh = IRTemp_INVALID; 6062 IROp mkpair; 6063 IROp getres; 6064 IROp shift; 6065 IRExpr* mask = NULL; 6066 6067 vassert(sz == 2 || sz == 4); 6068 6069 /* The E-part is the destination; this is shifted. The G-part 6070 supplies bits to be shifted into the E-part, but is not 6071 changed. 6072 6073 If shifting left, form a double-length word with E at the top 6074 and G at the bottom, and shift this left. The result is then in 6075 the high part. 6076 6077 If shifting right, form a double-length word with G at the top 6078 and E at the bottom, and shift this right. The result is then 6079 at the bottom. */ 6080 6081 /* Fetch the operands. */ 6082 6083 assign( gsrc, getIReg(sz, gregOfRM(modrm)) ); 6084 6085 if (epartIsReg(modrm)) { 6086 delta++; 6087 assign( esrc, getIReg(sz, eregOfRM(modrm)) ); 6088 DIP("sh%cd%c %s, %s, %s\n", 6089 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6090 shift_amt_txt, 6091 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm))); 6092 } else { 6093 addr = disAMode ( &len, sorb, delta, dis_buf ); 6094 delta += len; 6095 assign( esrc, loadLE(ty, mkexpr(addr)) ); 6096 DIP("sh%cd%c %s, %s, %s\n", 6097 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6098 shift_amt_txt, 6099 nameIReg(sz, gregOfRM(modrm)), dis_buf); 6100 } 6101 6102 /* Round up the relevant primops. */ 6103 6104 if (sz == 4) { 6105 tmpL = newTemp(Ity_I64); 6106 tmpRes = newTemp(Ity_I32); 6107 tmpSubSh = newTemp(Ity_I32); 6108 mkpair = Iop_32HLto64; 6109 getres = left_shift ? Iop_64HIto32 : Iop_64to32; 6110 shift = left_shift ? Iop_Shl64 : Iop_Shr64; 6111 mask = mkU8(31); 6112 } else { 6113 /* sz == 2 */ 6114 tmpL = newTemp(Ity_I32); 6115 tmpRes = newTemp(Ity_I16); 6116 tmpSubSh = newTemp(Ity_I16); 6117 mkpair = Iop_16HLto32; 6118 getres = left_shift ? Iop_32HIto16 : Iop_32to16; 6119 shift = left_shift ? Iop_Shl32 : Iop_Shr32; 6120 mask = mkU8(15); 6121 } 6122 6123 /* Do the shift, calculate the subshift value, and set 6124 the flag thunk. */ 6125 6126 assign( tmpSH, binop(Iop_And8, shift_amt, mask) ); 6127 6128 if (left_shift) 6129 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) ); 6130 else 6131 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) ); 6132 6133 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) ); 6134 assign( tmpSubSh, 6135 unop(getres, 6136 binop(shift, 6137 mkexpr(tmpL), 6138 binop(Iop_And8, 6139 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 6140 mask))) ); 6141 6142 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32, 6143 tmpRes, tmpSubSh, ty, tmpSH ); 6144 6145 /* Put result back. */ 6146 6147 if (epartIsReg(modrm)) { 6148 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes)); 6149 } else { 6150 storeLE( mkexpr(addr), mkexpr(tmpRes) ); 6151 } 6152 6153 if (amt_is_literal) delta++; 6154 return delta; 6155 } 6156 6157 6158 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 6159 required. */ 6160 6161 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 6162 6163 static HChar* nameBtOp ( BtOp op ) 6164 { 6165 switch (op) { 6166 case BtOpNone: return ""; 6167 case BtOpSet: return "s"; 6168 case BtOpReset: return "r"; 6169 case BtOpComp: return "c"; 6170 default: vpanic("nameBtOp(x86)"); 6171 } 6172 } 6173 6174 6175 static 6176 UInt dis_bt_G_E ( VexAbiInfo* vbi, 6177 UChar sorb, Bool locked, Int sz, Int delta, BtOp op ) 6178 { 6179 HChar dis_buf[50]; 6180 UChar modrm; 6181 Int len; 6182 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 6183 t_addr1, t_esp, t_mask, t_new; 6184 6185 vassert(sz == 2 || sz == 4); 6186 6187 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 6188 = t_addr0 = t_addr1 = t_esp 6189 = t_mask = t_new = IRTemp_INVALID; 6190 6191 t_fetched = newTemp(Ity_I8); 6192 t_new = newTemp(Ity_I8); 6193 t_bitno0 = newTemp(Ity_I32); 6194 t_bitno1 = newTemp(Ity_I32); 6195 t_bitno2 = newTemp(Ity_I8); 6196 t_addr1 = newTemp(Ity_I32); 6197 modrm = getIByte(delta); 6198 6199 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) ); 6200 6201 if (epartIsReg(modrm)) { 6202 delta++; 6203 /* Get it onto the client's stack. */ 6204 t_esp = newTemp(Ity_I32); 6205 t_addr0 = newTemp(Ity_I32); 6206 6207 /* For the choice of the value 128, see comment in dis_bt_G_E in 6208 guest_amd64_toIR.c. We point out here only that 128 is 6209 fast-cased in Memcheck and is > 0, so seems like a good 6210 choice. */ 6211 vassert(vbi->guest_stack_redzone_size == 0); 6212 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) ); 6213 putIReg(4, R_ESP, mkexpr(t_esp)); 6214 6215 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) ); 6216 6217 /* Make t_addr0 point at it. */ 6218 assign( t_addr0, mkexpr(t_esp) ); 6219 6220 /* Mask out upper bits of the shift amount, since we're doing a 6221 reg. */ 6222 assign( t_bitno1, binop(Iop_And32, 6223 mkexpr(t_bitno0), 6224 mkU32(sz == 4 ? 31 : 15)) ); 6225 6226 } else { 6227 t_addr0 = disAMode ( &len, sorb, delta, dis_buf ); 6228 delta += len; 6229 assign( t_bitno1, mkexpr(t_bitno0) ); 6230 } 6231 6232 /* At this point: t_addr0 is the address being operated on. If it 6233 was a reg, we will have pushed it onto the client's stack. 6234 t_bitno1 is the bit number, suitably masked in the case of a 6235 reg. */ 6236 6237 /* Now the main sequence. */ 6238 assign( t_addr1, 6239 binop(Iop_Add32, 6240 mkexpr(t_addr0), 6241 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) ); 6242 6243 /* t_addr1 now holds effective address */ 6244 6245 assign( t_bitno2, 6246 unop(Iop_32to8, 6247 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) ); 6248 6249 /* t_bitno2 contains offset of bit within byte */ 6250 6251 if (op != BtOpNone) { 6252 t_mask = newTemp(Ity_I8); 6253 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 6254 } 6255 6256 /* t_mask is now a suitable byte mask */ 6257 6258 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 6259 6260 if (op != BtOpNone) { 6261 switch (op) { 6262 case BtOpSet: 6263 assign( t_new, 6264 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6265 break; 6266 case BtOpComp: 6267 assign( t_new, 6268 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6269 break; 6270 case BtOpReset: 6271 assign( t_new, 6272 binop(Iop_And8, mkexpr(t_fetched), 6273 unop(Iop_Not8, mkexpr(t_mask))) ); 6274 break; 6275 default: 6276 vpanic("dis_bt_G_E(x86)"); 6277 } 6278 if (locked && !epartIsReg(modrm)) { 6279 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 6280 mkexpr(t_new)/*new*/, 6281 guest_EIP_curr_instr ); 6282 } else { 6283 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 6284 } 6285 } 6286 6287 /* Side effect done; now get selected bit into Carry flag */ 6288 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 6289 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6290 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6291 stmt( IRStmt_Put( 6292 OFFB_CC_DEP1, 6293 binop(Iop_And32, 6294 binop(Iop_Shr32, 6295 unop(Iop_8Uto32, mkexpr(t_fetched)), 6296 mkexpr(t_bitno2)), 6297 mkU32(1))) 6298 ); 6299 /* Set NDEP even though it isn't used. This makes redundant-PUT 6300 elimination of previous stores to this field work better. */ 6301 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6302 6303 /* Move reg operand from stack back to reg */ 6304 if (epartIsReg(modrm)) { 6305 /* t_esp still points at it. */ 6306 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) ); 6307 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) ); 6308 } 6309 6310 DIP("bt%s%c %s, %s\n", 6311 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 6312 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) ); 6313 6314 return delta; 6315 } 6316 6317 6318 6319 /* Handle BSF/BSR. Only v-size seems necessary. */ 6320 static 6321 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) 6322 { 6323 Bool isReg; 6324 UChar modrm; 6325 HChar dis_buf[50]; 6326 6327 IRType ty = szToITy(sz); 6328 IRTemp src = newTemp(ty); 6329 IRTemp dst = newTemp(ty); 6330 6331 IRTemp src32 = newTemp(Ity_I32); 6332 IRTemp dst32 = newTemp(Ity_I32); 6333 IRTemp src8 = newTemp(Ity_I8); 6334 6335 vassert(sz == 4 || sz == 2); 6336 6337 modrm = getIByte(delta); 6338 6339 isReg = epartIsReg(modrm); 6340 if (isReg) { 6341 delta++; 6342 assign( src, getIReg(sz, eregOfRM(modrm)) ); 6343 } else { 6344 Int len; 6345 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 6346 delta += len; 6347 assign( src, loadLE(ty, mkexpr(addr)) ); 6348 } 6349 6350 DIP("bs%c%c %s, %s\n", 6351 fwds ? 'f' : 'r', nameISize(sz), 6352 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), 6353 nameIReg(sz, gregOfRM(modrm))); 6354 6355 /* Generate an 8-bit expression which is zero iff the 6356 original is zero, and nonzero otherwise */ 6357 assign( src8, 6358 unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8), 6359 mkexpr(src), mkU(ty,0))) ); 6360 6361 /* Flags: Z is 1 iff source value is zero. All others 6362 are undefined -- we force them to zero. */ 6363 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6364 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6365 stmt( IRStmt_Put( 6366 OFFB_CC_DEP1, 6367 IRExpr_Mux0X( mkexpr(src8), 6368 /* src==0 */ 6369 mkU32(X86G_CC_MASK_Z), 6370 /* src!=0 */ 6371 mkU32(0) 6372 ) 6373 )); 6374 /* Set NDEP even though it isn't used. This makes redundant-PUT 6375 elimination of previous stores to this field work better. */ 6376 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6377 6378 /* Result: iff source value is zero, we can't use 6379 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case. 6380 But anyway, Intel x86 semantics say the result is undefined in 6381 such situations. Hence handle the zero case specially. */ 6382 6383 /* Bleh. What we compute: 6384 6385 bsf32: if src == 0 then 0 else Ctz32(src) 6386 bsr32: if src == 0 then 0 else 31 - Clz32(src) 6387 6388 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src)) 6389 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src)) 6390 6391 First, widen src to 32 bits if it is not already. 6392 6393 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the 6394 dst register unchanged when src == 0. Hence change accordingly. 6395 */ 6396 if (sz == 2) 6397 assign( src32, unop(Iop_16Uto32, mkexpr(src)) ); 6398 else 6399 assign( src32, mkexpr(src) ); 6400 6401 /* The main computation, guarding against zero. */ 6402 assign( dst32, 6403 IRExpr_Mux0X( 6404 mkexpr(src8), 6405 /* src == 0 -- leave dst unchanged */ 6406 widenUto32( getIReg( sz, gregOfRM(modrm) ) ), 6407 /* src != 0 */ 6408 fwds ? unop(Iop_Ctz32, mkexpr(src32)) 6409 : binop(Iop_Sub32, 6410 mkU32(31), 6411 unop(Iop_Clz32, mkexpr(src32))) 6412 ) 6413 ); 6414 6415 if (sz == 2) 6416 assign( dst, unop(Iop_32to16, mkexpr(dst32)) ); 6417 else 6418 assign( dst, mkexpr(dst32) ); 6419 6420 /* dump result back */ 6421 putIReg( sz, gregOfRM(modrm), mkexpr(dst) ); 6422 6423 return delta; 6424 } 6425 6426 6427 static 6428 void codegen_xchg_eAX_Reg ( Int sz, Int reg ) 6429 { 6430 IRType ty = szToITy(sz); 6431 IRTemp t1 = newTemp(ty); 6432 IRTemp t2 = newTemp(ty); 6433 vassert(sz == 2 || sz == 4); 6434 assign( t1, getIReg(sz, R_EAX) ); 6435 assign( t2, getIReg(sz, reg) ); 6436 putIReg( sz, R_EAX, mkexpr(t2) ); 6437 putIReg( sz, reg, mkexpr(t1) ); 6438 DIP("xchg%c %s, %s\n", 6439 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg)); 6440 } 6441 6442 6443 static 6444 void codegen_SAHF ( void ) 6445 { 6446 /* Set the flags to: 6447 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag 6448 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6449 |X86G_CC_MASK_P|X86G_CC_MASK_C) 6450 */ 6451 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6452 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6453 IRTemp oldflags = newTemp(Ity_I32); 6454 assign( oldflags, mk_x86g_calculate_eflags_all() ); 6455 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6456 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6457 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6458 stmt( IRStmt_Put( OFFB_CC_DEP1, 6459 binop(Iop_Or32, 6460 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)), 6461 binop(Iop_And32, 6462 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)), 6463 mkU32(mask_SZACP)) 6464 ) 6465 )); 6466 /* Set NDEP even though it isn't used. This makes redundant-PUT 6467 elimination of previous stores to this field work better. */ 6468 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6469 } 6470 6471 6472 static 6473 void codegen_LAHF ( void ) 6474 { 6475 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 6476 IRExpr* eax_with_hole; 6477 IRExpr* new_byte; 6478 IRExpr* new_eax; 6479 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6480 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6481 6482 IRTemp flags = newTemp(Ity_I32); 6483 assign( flags, mk_x86g_calculate_eflags_all() ); 6484 6485 eax_with_hole 6486 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF)); 6487 new_byte 6488 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)), 6489 mkU32(1<<1)); 6490 new_eax 6491 = binop(Iop_Or32, eax_with_hole, 6492 binop(Iop_Shl32, new_byte, mkU8(8))); 6493 putIReg(4, R_EAX, new_eax); 6494 } 6495 6496 6497 static 6498 UInt dis_cmpxchg_G_E ( UChar sorb, 6499 Bool locked, 6500 Int size, 6501 Int delta0 ) 6502 { 6503 HChar dis_buf[50]; 6504 Int len; 6505 6506 IRType ty = szToITy(size); 6507 IRTemp acc = newTemp(ty); 6508 IRTemp src = newTemp(ty); 6509 IRTemp dest = newTemp(ty); 6510 IRTemp dest2 = newTemp(ty); 6511 IRTemp acc2 = newTemp(ty); 6512 IRTemp cond8 = newTemp(Ity_I8); 6513 IRTemp addr = IRTemp_INVALID; 6514 UChar rm = getUChar(delta0); 6515 6516 /* There are 3 cases to consider: 6517 6518 reg-reg: ignore any lock prefix, generate sequence based 6519 on Mux0X 6520 6521 reg-mem, not locked: ignore any lock prefix, generate sequence 6522 based on Mux0X 6523 6524 reg-mem, locked: use IRCAS 6525 */ 6526 if (epartIsReg(rm)) { 6527 /* case 1 */ 6528 assign( dest, getIReg(size, eregOfRM(rm)) ); 6529 delta0++; 6530 assign( src, getIReg(size, gregOfRM(rm)) ); 6531 assign( acc, getIReg(size, R_EAX) ); 6532 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6533 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6534 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6535 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6536 putIReg(size, R_EAX, mkexpr(acc2)); 6537 putIReg(size, eregOfRM(rm), mkexpr(dest2)); 6538 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6539 nameIReg(size,gregOfRM(rm)), 6540 nameIReg(size,eregOfRM(rm)) ); 6541 } 6542 else if (!epartIsReg(rm) && !locked) { 6543 /* case 2 */ 6544 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6545 assign( dest, loadLE(ty, mkexpr(addr)) ); 6546 delta0 += len; 6547 assign( src, getIReg(size, gregOfRM(rm)) ); 6548 assign( acc, getIReg(size, R_EAX) ); 6549 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6550 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6551 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6552 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6553 putIReg(size, R_EAX, mkexpr(acc2)); 6554 storeLE( mkexpr(addr), mkexpr(dest2) ); 6555 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6556 nameIReg(size,gregOfRM(rm)), dis_buf); 6557 } 6558 else if (!epartIsReg(rm) && locked) { 6559 /* case 3 */ 6560 /* src is new value. acc is expected value. dest is old value. 6561 Compute success from the output of the IRCAS, and steer the 6562 new value for EAX accordingly: in case of success, EAX is 6563 unchanged. */ 6564 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6565 delta0 += len; 6566 assign( src, getIReg(size, gregOfRM(rm)) ); 6567 assign( acc, getIReg(size, R_EAX) ); 6568 stmt( IRStmt_CAS( 6569 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 6570 NULL, mkexpr(acc), NULL, mkexpr(src) ) 6571 )); 6572 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6573 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6574 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6575 putIReg(size, R_EAX, mkexpr(acc2)); 6576 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6577 nameIReg(size,gregOfRM(rm)), dis_buf); 6578 } 6579 else vassert(0); 6580 6581 return delta0; 6582 } 6583 6584 6585 /* Handle conditional move instructions of the form 6586 cmovcc E(reg-or-mem), G(reg) 6587 6588 E(src) is reg-or-mem 6589 G(dst) is reg. 6590 6591 If E is reg, --> GET %E, tmps 6592 GET %G, tmpd 6593 CMOVcc tmps, tmpd 6594 PUT tmpd, %G 6595 6596 If E is mem --> (getAddr E) -> tmpa 6597 LD (tmpa), tmps 6598 GET %G, tmpd 6599 CMOVcc tmps, tmpd 6600 PUT tmpd, %G 6601 */ 6602 static 6603 UInt dis_cmov_E_G ( UChar sorb, 6604 Int sz, 6605 X86Condcode cond, 6606 Int delta0 ) 6607 { 6608 UChar rm = getIByte(delta0); 6609 HChar dis_buf[50]; 6610 Int len; 6611 6612 IRType ty = szToITy(sz); 6613 IRTemp tmps = newTemp(ty); 6614 IRTemp tmpd = newTemp(ty); 6615 6616 if (epartIsReg(rm)) { 6617 assign( tmps, getIReg(sz, eregOfRM(rm)) ); 6618 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6619 6620 putIReg(sz, gregOfRM(rm), 6621 IRExpr_Mux0X( unop(Iop_1Uto8, 6622 mk_x86g_calculate_condition(cond)), 6623 mkexpr(tmpd), 6624 mkexpr(tmps) ) 6625 ); 6626 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6627 name_X86Condcode(cond), 6628 nameIReg(sz,eregOfRM(rm)), 6629 nameIReg(sz,gregOfRM(rm))); 6630 return 1+delta0; 6631 } 6632 6633 /* E refers to memory */ 6634 { 6635 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6636 assign( tmps, loadLE(ty, mkexpr(addr)) ); 6637 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6638 6639 putIReg(sz, gregOfRM(rm), 6640 IRExpr_Mux0X( unop(Iop_1Uto8, 6641 mk_x86g_calculate_condition(cond)), 6642 mkexpr(tmpd), 6643 mkexpr(tmps) ) 6644 ); 6645 6646 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6647 name_X86Condcode(cond), 6648 dis_buf, 6649 nameIReg(sz,gregOfRM(rm))); 6650 return len+delta0; 6651 } 6652 } 6653 6654 6655 static 6656 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0, 6657 Bool* decodeOK ) 6658 { 6659 Int len; 6660 UChar rm = getIByte(delta0); 6661 HChar dis_buf[50]; 6662 6663 IRType ty = szToITy(sz); 6664 IRTemp tmpd = newTemp(ty); 6665 IRTemp tmpt0 = newTemp(ty); 6666 IRTemp tmpt1 = newTemp(ty); 6667 6668 /* There are 3 cases to consider: 6669 6670 reg-reg: ignore any lock prefix, 6671 generate 'naive' (non-atomic) sequence 6672 6673 reg-mem, not locked: ignore any lock prefix, generate 'naive' 6674 (non-atomic) sequence 6675 6676 reg-mem, locked: use IRCAS 6677 */ 6678 6679 if (epartIsReg(rm)) { 6680 /* case 1 */ 6681 assign( tmpd, getIReg(sz, eregOfRM(rm))); 6682 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6683 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6684 mkexpr(tmpd), mkexpr(tmpt0)) ); 6685 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6686 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1)); 6687 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6688 DIP("xadd%c %s, %s\n", 6689 nameISize(sz), nameIReg(sz,gregOfRM(rm)), 6690 nameIReg(sz,eregOfRM(rm))); 6691 *decodeOK = True; 6692 return 1+delta0; 6693 } 6694 else if (!epartIsReg(rm) && !locked) { 6695 /* case 2 */ 6696 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6697 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6698 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6699 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6700 mkexpr(tmpd), mkexpr(tmpt0)) ); 6701 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 6702 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6703 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6704 DIP("xadd%c %s, %s\n", 6705 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6706 *decodeOK = True; 6707 return len+delta0; 6708 } 6709 else if (!epartIsReg(rm) && locked) { 6710 /* case 3 */ 6711 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6712 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6713 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6714 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6715 mkexpr(tmpd), mkexpr(tmpt0)) ); 6716 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 6717 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr ); 6718 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6719 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6720 DIP("xadd%c %s, %s\n", 6721 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6722 *decodeOK = True; 6723 return len+delta0; 6724 } 6725 /*UNREACHED*/ 6726 vassert(0); 6727 } 6728 6729 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 6730 6731 static 6732 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 ) 6733 { 6734 Int len; 6735 IRTemp addr; 6736 UChar rm = getIByte(delta0); 6737 HChar dis_buf[50]; 6738 6739 if (epartIsReg(rm)) { 6740 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 6741 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 6742 return 1+delta0; 6743 } else { 6744 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6745 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 6746 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 6747 return len+delta0; 6748 } 6749 } 6750 6751 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 6752 dst is ireg and sz==4, zero out top half of it. */ 6753 6754 static 6755 UInt dis_mov_Sw_Ew ( UChar sorb, 6756 Int sz, 6757 Int delta0 ) 6758 { 6759 Int len; 6760 IRTemp addr; 6761 UChar rm = getIByte(delta0); 6762 HChar dis_buf[50]; 6763 6764 vassert(sz == 2 || sz == 4); 6765 6766 if (epartIsReg(rm)) { 6767 if (sz == 4) 6768 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 6769 else 6770 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 6771 6772 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 6773 return 1+delta0; 6774 } else { 6775 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6776 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 6777 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 6778 return len+delta0; 6779 } 6780 } 6781 6782 6783 static 6784 void dis_push_segreg ( UInt sreg, Int sz ) 6785 { 6786 IRTemp t1 = newTemp(Ity_I16); 6787 IRTemp ta = newTemp(Ity_I32); 6788 vassert(sz == 2 || sz == 4); 6789 6790 assign( t1, getSReg(sreg) ); 6791 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 6792 putIReg(4, R_ESP, mkexpr(ta)); 6793 storeLE( mkexpr(ta), mkexpr(t1) ); 6794 6795 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6796 } 6797 6798 static 6799 void dis_pop_segreg ( UInt sreg, Int sz ) 6800 { 6801 IRTemp t1 = newTemp(Ity_I16); 6802 IRTemp ta = newTemp(Ity_I32); 6803 vassert(sz == 2 || sz == 4); 6804 6805 assign( ta, getIReg(4, R_ESP) ); 6806 assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 6807 6808 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 6809 putSReg( sreg, mkexpr(t1) ); 6810 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6811 } 6812 6813 static 6814 void dis_ret ( UInt d32 ) 6815 { 6816 IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32); 6817 assign(t1, getIReg(4,R_ESP)); 6818 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 6819 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); 6820 jmp_treg(Ijk_Ret,t2); 6821 } 6822 6823 /*------------------------------------------------------------*/ 6824 /*--- SSE/SSE2/SSE3 helpers ---*/ 6825 /*------------------------------------------------------------*/ 6826 6827 /* Worker function; do not call directly. 6828 Handles full width G = G `op` E and G = (not G) `op` E. 6829 */ 6830 6831 static UInt dis_SSE_E_to_G_all_wrk ( 6832 UChar sorb, Int delta, 6833 HChar* opname, IROp op, 6834 Bool invertG 6835 ) 6836 { 6837 HChar dis_buf[50]; 6838 Int alen; 6839 IRTemp addr; 6840 UChar rm = getIByte(delta); 6841 IRExpr* gpart 6842 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) 6843 : getXMMReg(gregOfRM(rm)); 6844 if (epartIsReg(rm)) { 6845 putXMMReg( gregOfRM(rm), 6846 binop(op, gpart, 6847 getXMMReg(eregOfRM(rm))) ); 6848 DIP("%s %s,%s\n", opname, 6849 nameXMMReg(eregOfRM(rm)), 6850 nameXMMReg(gregOfRM(rm)) ); 6851 return delta+1; 6852 } else { 6853 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6854 putXMMReg( gregOfRM(rm), 6855 binop(op, gpart, 6856 loadLE(Ity_V128, mkexpr(addr))) ); 6857 DIP("%s %s,%s\n", opname, 6858 dis_buf, 6859 nameXMMReg(gregOfRM(rm)) ); 6860 return delta+alen; 6861 } 6862 } 6863 6864 6865 /* All lanes SSE binary operation, G = G `op` E. */ 6866 6867 static 6868 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op ) 6869 { 6870 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False ); 6871 } 6872 6873 /* All lanes SSE binary operation, G = (not G) `op` E. */ 6874 6875 static 6876 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta, 6877 HChar* opname, IROp op ) 6878 { 6879 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True ); 6880 } 6881 6882 6883 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 6884 6885 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta, 6886 HChar* opname, IROp op ) 6887 { 6888 HChar dis_buf[50]; 6889 Int alen; 6890 IRTemp addr; 6891 UChar rm = getIByte(delta); 6892 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6893 if (epartIsReg(rm)) { 6894 putXMMReg( gregOfRM(rm), 6895 binop(op, gpart, 6896 getXMMReg(eregOfRM(rm))) ); 6897 DIP("%s %s,%s\n", opname, 6898 nameXMMReg(eregOfRM(rm)), 6899 nameXMMReg(gregOfRM(rm)) ); 6900 return delta+1; 6901 } else { 6902 /* We can only do a 32-bit memory read, so the upper 3/4 of the 6903 E operand needs to be made simply of zeroes. */ 6904 IRTemp epart = newTemp(Ity_V128); 6905 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6906 assign( epart, unop( Iop_32UtoV128, 6907 loadLE(Ity_I32, mkexpr(addr))) ); 6908 putXMMReg( gregOfRM(rm), 6909 binop(op, gpart, mkexpr(epart)) ); 6910 DIP("%s %s,%s\n", opname, 6911 dis_buf, 6912 nameXMMReg(gregOfRM(rm)) ); 6913 return delta+alen; 6914 } 6915 } 6916 6917 6918 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 6919 6920 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta, 6921 HChar* opname, IROp op ) 6922 { 6923 HChar dis_buf[50]; 6924 Int alen; 6925 IRTemp addr; 6926 UChar rm = getIByte(delta); 6927 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6928 if (epartIsReg(rm)) { 6929 putXMMReg( gregOfRM(rm), 6930 binop(op, gpart, 6931 getXMMReg(eregOfRM(rm))) ); 6932 DIP("%s %s,%s\n", opname, 6933 nameXMMReg(eregOfRM(rm)), 6934 nameXMMReg(gregOfRM(rm)) ); 6935 return delta+1; 6936 } else { 6937 /* We can only do a 64-bit memory read, so the upper half of the 6938 E operand needs to be made simply of zeroes. */ 6939 IRTemp epart = newTemp(Ity_V128); 6940 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6941 assign( epart, unop( Iop_64UtoV128, 6942 loadLE(Ity_I64, mkexpr(addr))) ); 6943 putXMMReg( gregOfRM(rm), 6944 binop(op, gpart, mkexpr(epart)) ); 6945 DIP("%s %s,%s\n", opname, 6946 dis_buf, 6947 nameXMMReg(gregOfRM(rm)) ); 6948 return delta+alen; 6949 } 6950 } 6951 6952 6953 /* All lanes unary SSE operation, G = op(E). */ 6954 6955 static UInt dis_SSE_E_to_G_unary_all ( 6956 UChar sorb, Int delta, 6957 HChar* opname, IROp op 6958 ) 6959 { 6960 HChar dis_buf[50]; 6961 Int alen; 6962 IRTemp addr; 6963 UChar rm = getIByte(delta); 6964 if (epartIsReg(rm)) { 6965 putXMMReg( gregOfRM(rm), 6966 unop(op, getXMMReg(eregOfRM(rm))) ); 6967 DIP("%s %s,%s\n", opname, 6968 nameXMMReg(eregOfRM(rm)), 6969 nameXMMReg(gregOfRM(rm)) ); 6970 return delta+1; 6971 } else { 6972 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6973 putXMMReg( gregOfRM(rm), 6974 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 6975 DIP("%s %s,%s\n", opname, 6976 dis_buf, 6977 nameXMMReg(gregOfRM(rm)) ); 6978 return delta+alen; 6979 } 6980 } 6981 6982 6983 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 6984 6985 static UInt dis_SSE_E_to_G_unary_lo32 ( 6986 UChar sorb, Int delta, 6987 HChar* opname, IROp op 6988 ) 6989 { 6990 /* First we need to get the old G value and patch the low 32 bits 6991 of the E operand into it. Then apply op and write back to G. */ 6992 HChar dis_buf[50]; 6993 Int alen; 6994 IRTemp addr; 6995 UChar rm = getIByte(delta); 6996 IRTemp oldG0 = newTemp(Ity_V128); 6997 IRTemp oldG1 = newTemp(Ity_V128); 6998 6999 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7000 7001 if (epartIsReg(rm)) { 7002 assign( oldG1, 7003 binop( Iop_SetV128lo32, 7004 mkexpr(oldG0), 7005 getXMMRegLane32(eregOfRM(rm), 0)) ); 7006 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7007 DIP("%s %s,%s\n", opname, 7008 nameXMMReg(eregOfRM(rm)), 7009 nameXMMReg(gregOfRM(rm)) ); 7010 return delta+1; 7011 } else { 7012 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7013 assign( oldG1, 7014 binop( Iop_SetV128lo32, 7015 mkexpr(oldG0), 7016 loadLE(Ity_I32, mkexpr(addr)) )); 7017 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7018 DIP("%s %s,%s\n", opname, 7019 dis_buf, 7020 nameXMMReg(gregOfRM(rm)) ); 7021 return delta+alen; 7022 } 7023 } 7024 7025 7026 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 7027 7028 static UInt dis_SSE_E_to_G_unary_lo64 ( 7029 UChar sorb, Int delta, 7030 HChar* opname, IROp op 7031 ) 7032 { 7033 /* First we need to get the old G value and patch the low 64 bits 7034 of the E operand into it. Then apply op and write back to G. */ 7035 HChar dis_buf[50]; 7036 Int alen; 7037 IRTemp addr; 7038 UChar rm = getIByte(delta); 7039 IRTemp oldG0 = newTemp(Ity_V128); 7040 IRTemp oldG1 = newTemp(Ity_V128); 7041 7042 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7043 7044 if (epartIsReg(rm)) { 7045 assign( oldG1, 7046 binop( Iop_SetV128lo64, 7047 mkexpr(oldG0), 7048 getXMMRegLane64(eregOfRM(rm), 0)) ); 7049 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7050 DIP("%s %s,%s\n", opname, 7051 nameXMMReg(eregOfRM(rm)), 7052 nameXMMReg(gregOfRM(rm)) ); 7053 return delta+1; 7054 } else { 7055 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7056 assign( oldG1, 7057 binop( Iop_SetV128lo64, 7058 mkexpr(oldG0), 7059 loadLE(Ity_I64, mkexpr(addr)) )); 7060 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7061 DIP("%s %s,%s\n", opname, 7062 dis_buf, 7063 nameXMMReg(gregOfRM(rm)) ); 7064 return delta+alen; 7065 } 7066 } 7067 7068 7069 /* SSE integer binary operation: 7070 G = G `op` E (eLeft == False) 7071 G = E `op` G (eLeft == True) 7072 */ 7073 static UInt dis_SSEint_E_to_G( 7074 UChar sorb, Int delta, 7075 HChar* opname, IROp op, 7076 Bool eLeft 7077 ) 7078 { 7079 HChar dis_buf[50]; 7080 Int alen; 7081 IRTemp addr; 7082 UChar rm = getIByte(delta); 7083 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7084 IRExpr* epart = NULL; 7085 if (epartIsReg(rm)) { 7086 epart = getXMMReg(eregOfRM(rm)); 7087 DIP("%s %s,%s\n", opname, 7088 nameXMMReg(eregOfRM(rm)), 7089 nameXMMReg(gregOfRM(rm)) ); 7090 delta += 1; 7091 } else { 7092 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7093 epart = loadLE(Ity_V128, mkexpr(addr)); 7094 DIP("%s %s,%s\n", opname, 7095 dis_buf, 7096 nameXMMReg(gregOfRM(rm)) ); 7097 delta += alen; 7098 } 7099 putXMMReg( gregOfRM(rm), 7100 eLeft ? binop(op, epart, gpart) 7101 : binop(op, gpart, epart) ); 7102 return delta; 7103 } 7104 7105 7106 /* Helper for doing SSE FP comparisons. */ 7107 7108 static void findSSECmpOp ( Bool* needNot, IROp* op, 7109 Int imm8, Bool all_lanes, Int sz ) 7110 { 7111 imm8 &= 7; 7112 *needNot = False; 7113 *op = Iop_INVALID; 7114 if (imm8 >= 4) { 7115 *needNot = True; 7116 imm8 -= 4; 7117 } 7118 7119 if (sz == 4 && all_lanes) { 7120 switch (imm8) { 7121 case 0: *op = Iop_CmpEQ32Fx4; return; 7122 case 1: *op = Iop_CmpLT32Fx4; return; 7123 case 2: *op = Iop_CmpLE32Fx4; return; 7124 case 3: *op = Iop_CmpUN32Fx4; return; 7125 default: break; 7126 } 7127 } 7128 if (sz == 4 && !all_lanes) { 7129 switch (imm8) { 7130 case 0: *op = Iop_CmpEQ32F0x4; return; 7131 case 1: *op = Iop_CmpLT32F0x4; return; 7132 case 2: *op = Iop_CmpLE32F0x4; return; 7133 case 3: *op = Iop_CmpUN32F0x4; return; 7134 default: break; 7135 } 7136 } 7137 if (sz == 8 && all_lanes) { 7138 switch (imm8) { 7139 case 0: *op = Iop_CmpEQ64Fx2; return; 7140 case 1: *op = Iop_CmpLT64Fx2; return; 7141 case 2: *op = Iop_CmpLE64Fx2; return; 7142 case 3: *op = Iop_CmpUN64Fx2; return; 7143 default: break; 7144 } 7145 } 7146 if (sz == 8 && !all_lanes) { 7147 switch (imm8) { 7148 case 0: *op = Iop_CmpEQ64F0x2; return; 7149 case 1: *op = Iop_CmpLT64F0x2; return; 7150 case 2: *op = Iop_CmpLE64F0x2; return; 7151 case 3: *op = Iop_CmpUN64F0x2; return; 7152 default: break; 7153 } 7154 } 7155 vpanic("findSSECmpOp(x86,guest)"); 7156 } 7157 7158 /* Handles SSE 32F/64F comparisons. */ 7159 7160 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta, 7161 HChar* opname, Bool all_lanes, Int sz ) 7162 { 7163 HChar dis_buf[50]; 7164 Int alen, imm8; 7165 IRTemp addr; 7166 Bool needNot = False; 7167 IROp op = Iop_INVALID; 7168 IRTemp plain = newTemp(Ity_V128); 7169 UChar rm = getIByte(delta); 7170 UShort mask = 0; 7171 vassert(sz == 4 || sz == 8); 7172 if (epartIsReg(rm)) { 7173 imm8 = getIByte(delta+1); 7174 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7175 assign( plain, binop(op, getXMMReg(gregOfRM(rm)), 7176 getXMMReg(eregOfRM(rm))) ); 7177 delta += 2; 7178 DIP("%s $%d,%s,%s\n", opname, 7179 (Int)imm8, 7180 nameXMMReg(eregOfRM(rm)), 7181 nameXMMReg(gregOfRM(rm)) ); 7182 } else { 7183 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7184 imm8 = getIByte(delta+alen); 7185 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7186 assign( plain, 7187 binop( 7188 op, 7189 getXMMReg(gregOfRM(rm)), 7190 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 7191 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 7192 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 7193 ) 7194 ); 7195 delta += alen+1; 7196 DIP("%s $%d,%s,%s\n", opname, 7197 (Int)imm8, 7198 dis_buf, 7199 nameXMMReg(gregOfRM(rm)) ); 7200 } 7201 7202 if (needNot && all_lanes) { 7203 putXMMReg( gregOfRM(rm), 7204 unop(Iop_NotV128, mkexpr(plain)) ); 7205 } 7206 else 7207 if (needNot && !all_lanes) { 7208 mask = toUShort( sz==4 ? 0x000F : 0x00FF ); 7209 putXMMReg( gregOfRM(rm), 7210 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 7211 } 7212 else { 7213 putXMMReg( gregOfRM(rm), mkexpr(plain) ); 7214 } 7215 7216 return delta; 7217 } 7218 7219 7220 /* Vector by scalar shift of G by the amount specified at the bottom 7221 of E. */ 7222 7223 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta, 7224 HChar* opname, IROp op ) 7225 { 7226 HChar dis_buf[50]; 7227 Int alen, size; 7228 IRTemp addr; 7229 Bool shl, shr, sar; 7230 UChar rm = getIByte(delta); 7231 IRTemp g0 = newTemp(Ity_V128); 7232 IRTemp g1 = newTemp(Ity_V128); 7233 IRTemp amt = newTemp(Ity_I32); 7234 IRTemp amt8 = newTemp(Ity_I8); 7235 if (epartIsReg(rm)) { 7236 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) ); 7237 DIP("%s %s,%s\n", opname, 7238 nameXMMReg(eregOfRM(rm)), 7239 nameXMMReg(gregOfRM(rm)) ); 7240 delta++; 7241 } else { 7242 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7243 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 7244 DIP("%s %s,%s\n", opname, 7245 dis_buf, 7246 nameXMMReg(gregOfRM(rm)) ); 7247 delta += alen; 7248 } 7249 assign( g0, getXMMReg(gregOfRM(rm)) ); 7250 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 7251 7252 shl = shr = sar = False; 7253 size = 0; 7254 switch (op) { 7255 case Iop_ShlN16x8: shl = True; size = 32; break; 7256 case Iop_ShlN32x4: shl = True; size = 32; break; 7257 case Iop_ShlN64x2: shl = True; size = 64; break; 7258 case Iop_SarN16x8: sar = True; size = 16; break; 7259 case Iop_SarN32x4: sar = True; size = 32; break; 7260 case Iop_ShrN16x8: shr = True; size = 16; break; 7261 case Iop_ShrN32x4: shr = True; size = 32; break; 7262 case Iop_ShrN64x2: shr = True; size = 64; break; 7263 default: vassert(0); 7264 } 7265 7266 if (shl || shr) { 7267 assign( 7268 g1, 7269 IRExpr_Mux0X( 7270 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7271 mkV128(0x0000), 7272 binop(op, mkexpr(g0), mkexpr(amt8)) 7273 ) 7274 ); 7275 } else 7276 if (sar) { 7277 assign( 7278 g1, 7279 IRExpr_Mux0X( 7280 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7281 binop(op, mkexpr(g0), mkU8(size-1)), 7282 binop(op, mkexpr(g0), mkexpr(amt8)) 7283 ) 7284 ); 7285 } else { 7286 /*NOTREACHED*/ 7287 vassert(0); 7288 } 7289 7290 putXMMReg( gregOfRM(rm), mkexpr(g1) ); 7291 return delta; 7292 } 7293 7294 7295 /* Vector by scalar shift of E by an immediate byte. */ 7296 7297 static 7298 UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op ) 7299 { 7300 Bool shl, shr, sar; 7301 UChar rm = getIByte(delta); 7302 IRTemp e0 = newTemp(Ity_V128); 7303 IRTemp e1 = newTemp(Ity_V128); 7304 UChar amt, size; 7305 vassert(epartIsReg(rm)); 7306 vassert(gregOfRM(rm) == 2 7307 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 7308 amt = getIByte(delta+1); 7309 delta += 2; 7310 DIP("%s $%d,%s\n", opname, 7311 (Int)amt, 7312 nameXMMReg(eregOfRM(rm)) ); 7313 assign( e0, getXMMReg(eregOfRM(rm)) ); 7314 7315 shl = shr = sar = False; 7316 size = 0; 7317 switch (op) { 7318 case Iop_ShlN16x8: shl = True; size = 16; break; 7319 case Iop_ShlN32x4: shl = True; size = 32; break; 7320 case Iop_ShlN64x2: shl = True; size = 64; break; 7321 case Iop_SarN16x8: sar = True; size = 16; break; 7322 case Iop_SarN32x4: sar = True; size = 32; break; 7323 case Iop_ShrN16x8: shr = True; size = 16; break; 7324 case Iop_ShrN32x4: shr = True; size = 32; break; 7325 case Iop_ShrN64x2: shr = True; size = 64; break; 7326 default: vassert(0); 7327 } 7328 7329 if (shl || shr) { 7330 assign( e1, amt >= size 7331 ? mkV128(0x0000) 7332 : binop(op, mkexpr(e0), mkU8(amt)) 7333 ); 7334 } else 7335 if (sar) { 7336 assign( e1, amt >= size 7337 ? binop(op, mkexpr(e0), mkU8(size-1)) 7338 : binop(op, mkexpr(e0), mkU8(amt)) 7339 ); 7340 } else { 7341 /*NOTREACHED*/ 7342 vassert(0); 7343 } 7344 7345 putXMMReg( eregOfRM(rm), mkexpr(e1) ); 7346 return delta; 7347 } 7348 7349 7350 /* Get the current SSE rounding mode. */ 7351 7352 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 7353 { 7354 return binop( Iop_And32, 7355 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ), 7356 mkU32(3) ); 7357 } 7358 7359 static void put_sse_roundingmode ( IRExpr* sseround ) 7360 { 7361 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 7362 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) ); 7363 } 7364 7365 /* Break a 128-bit value up into four 32-bit ints. */ 7366 7367 static void breakup128to32s ( IRTemp t128, 7368 /*OUTs*/ 7369 IRTemp* t3, IRTemp* t2, 7370 IRTemp* t1, IRTemp* t0 ) 7371 { 7372 IRTemp hi64 = newTemp(Ity_I64); 7373 IRTemp lo64 = newTemp(Ity_I64); 7374 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 7375 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 7376 7377 vassert(t0 && *t0 == IRTemp_INVALID); 7378 vassert(t1 && *t1 == IRTemp_INVALID); 7379 vassert(t2 && *t2 == IRTemp_INVALID); 7380 vassert(t3 && *t3 == IRTemp_INVALID); 7381 7382 *t0 = newTemp(Ity_I32); 7383 *t1 = newTemp(Ity_I32); 7384 *t2 = newTemp(Ity_I32); 7385 *t3 = newTemp(Ity_I32); 7386 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 7387 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 7388 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 7389 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 7390 } 7391 7392 /* Construct a 128-bit value from four 32-bit ints. */ 7393 7394 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 7395 IRTemp t1, IRTemp t0 ) 7396 { 7397 return 7398 binop( Iop_64HLtoV128, 7399 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 7400 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 7401 ); 7402 } 7403 7404 /* Break a 64-bit value up into four 16-bit ints. */ 7405 7406 static void breakup64to16s ( IRTemp t64, 7407 /*OUTs*/ 7408 IRTemp* t3, IRTemp* t2, 7409 IRTemp* t1, IRTemp* t0 ) 7410 { 7411 IRTemp hi32 = newTemp(Ity_I32); 7412 IRTemp lo32 = newTemp(Ity_I32); 7413 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 7414 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 7415 7416 vassert(t0 && *t0 == IRTemp_INVALID); 7417 vassert(t1 && *t1 == IRTemp_INVALID); 7418 vassert(t2 && *t2 == IRTemp_INVALID); 7419 vassert(t3 && *t3 == IRTemp_INVALID); 7420 7421 *t0 = newTemp(Ity_I16); 7422 *t1 = newTemp(Ity_I16); 7423 *t2 = newTemp(Ity_I16); 7424 *t3 = newTemp(Ity_I16); 7425 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 7426 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 7427 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 7428 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 7429 } 7430 7431 /* Construct a 64-bit value from four 16-bit ints. */ 7432 7433 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 7434 IRTemp t1, IRTemp t0 ) 7435 { 7436 return 7437 binop( Iop_32HLto64, 7438 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 7439 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 7440 ); 7441 } 7442 7443 /* Generate IR to set the guest %EFLAGS from the pushfl-format image 7444 in the given 32-bit temporary. The flags that are set are: O S Z A 7445 C P D ID AC. 7446 7447 In all cases, code to set AC is generated. However, VEX actually 7448 ignores the AC value and so can optionally emit an emulation 7449 warning when it is enabled. In this routine, an emulation warning 7450 is only emitted if emit_AC_emwarn is True, in which case 7451 next_insn_EIP must be correct (this allows for correct code 7452 generation for popfl/popfw). If emit_AC_emwarn is False, 7453 next_insn_EIP is unimportant (this allows for easy if kludgey code 7454 generation for IRET.) */ 7455 7456 static 7457 void set_EFLAGS_from_value ( IRTemp t1, 7458 Bool emit_AC_emwarn, 7459 Addr32 next_insn_EIP ) 7460 { 7461 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32); 7462 7463 /* t1 is the flag word. Mask out everything except OSZACP and set 7464 the flags thunk to X86G_CC_OP_COPY. */ 7465 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 7466 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 7467 stmt( IRStmt_Put( OFFB_CC_DEP1, 7468 binop(Iop_And32, 7469 mkexpr(t1), 7470 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 7471 | X86G_CC_MASK_A | X86G_CC_MASK_Z 7472 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 7473 ) 7474 ) 7475 ); 7476 /* Set NDEP even though it isn't used. This makes redundant-PUT 7477 elimination of previous stores to this field work better. */ 7478 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 7479 7480 /* Also need to set the D flag, which is held in bit 10 of t1. 7481 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 7482 stmt( IRStmt_Put( 7483 OFFB_DFLAG, 7484 IRExpr_Mux0X( 7485 unop(Iop_32to8, 7486 binop(Iop_And32, 7487 binop(Iop_Shr32, mkexpr(t1), mkU8(10)), 7488 mkU32(1))), 7489 mkU32(1), 7490 mkU32(0xFFFFFFFF))) 7491 ); 7492 7493 /* Set the ID flag */ 7494 stmt( IRStmt_Put( 7495 OFFB_IDFLAG, 7496 IRExpr_Mux0X( 7497 unop(Iop_32to8, 7498 binop(Iop_And32, 7499 binop(Iop_Shr32, mkexpr(t1), mkU8(21)), 7500 mkU32(1))), 7501 mkU32(0), 7502 mkU32(1))) 7503 ); 7504 7505 /* And set the AC flag. If setting it 1 to, possibly emit an 7506 emulation warning. */ 7507 stmt( IRStmt_Put( 7508 OFFB_ACFLAG, 7509 IRExpr_Mux0X( 7510 unop(Iop_32to8, 7511 binop(Iop_And32, 7512 binop(Iop_Shr32, mkexpr(t1), mkU8(18)), 7513 mkU32(1))), 7514 mkU32(0), 7515 mkU32(1))) 7516 ); 7517 7518 if (emit_AC_emwarn) { 7519 put_emwarn( mkU32(EmWarn_X86_acFlag) ); 7520 stmt( 7521 IRStmt_Exit( 7522 binop( Iop_CmpNE32, 7523 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), 7524 mkU32(0) ), 7525 Ijk_EmWarn, 7526 IRConst_U32( next_insn_EIP ) 7527 ) 7528 ); 7529 } 7530 } 7531 7532 7533 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 7534 values (aa,bb), computes, for each of the 4 16-bit lanes: 7535 7536 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 7537 */ 7538 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 7539 { 7540 IRTemp aa = newTemp(Ity_I64); 7541 IRTemp bb = newTemp(Ity_I64); 7542 IRTemp aahi32s = newTemp(Ity_I64); 7543 IRTemp aalo32s = newTemp(Ity_I64); 7544 IRTemp bbhi32s = newTemp(Ity_I64); 7545 IRTemp bblo32s = newTemp(Ity_I64); 7546 IRTemp rHi = newTemp(Ity_I64); 7547 IRTemp rLo = newTemp(Ity_I64); 7548 IRTemp one32x2 = newTemp(Ity_I64); 7549 assign(aa, aax); 7550 assign(bb, bbx); 7551 assign( aahi32s, 7552 binop(Iop_SarN32x2, 7553 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 7554 mkU8(16) )); 7555 assign( aalo32s, 7556 binop(Iop_SarN32x2, 7557 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 7558 mkU8(16) )); 7559 assign( bbhi32s, 7560 binop(Iop_SarN32x2, 7561 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 7562 mkU8(16) )); 7563 assign( bblo32s, 7564 binop(Iop_SarN32x2, 7565 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 7566 mkU8(16) )); 7567 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 7568 assign( 7569 rHi, 7570 binop( 7571 Iop_ShrN32x2, 7572 binop( 7573 Iop_Add32x2, 7574 binop( 7575 Iop_ShrN32x2, 7576 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 7577 mkU8(14) 7578 ), 7579 mkexpr(one32x2) 7580 ), 7581 mkU8(1) 7582 ) 7583 ); 7584 assign( 7585 rLo, 7586 binop( 7587 Iop_ShrN32x2, 7588 binop( 7589 Iop_Add32x2, 7590 binop( 7591 Iop_ShrN32x2, 7592 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 7593 mkU8(14) 7594 ), 7595 mkexpr(one32x2) 7596 ), 7597 mkU8(1) 7598 ) 7599 ); 7600 return 7601 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 7602 } 7603 7604 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 7605 values (aa,bb), computes, for each lane: 7606 7607 if aa_lane < 0 then - bb_lane 7608 else if aa_lane > 0 then bb_lane 7609 else 0 7610 */ 7611 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 7612 { 7613 IRTemp aa = newTemp(Ity_I64); 7614 IRTemp bb = newTemp(Ity_I64); 7615 IRTemp zero = newTemp(Ity_I64); 7616 IRTemp bbNeg = newTemp(Ity_I64); 7617 IRTemp negMask = newTemp(Ity_I64); 7618 IRTemp posMask = newTemp(Ity_I64); 7619 IROp opSub = Iop_INVALID; 7620 IROp opCmpGTS = Iop_INVALID; 7621 7622 switch (laneszB) { 7623 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 7624 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 7625 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 7626 default: vassert(0); 7627 } 7628 7629 assign( aa, aax ); 7630 assign( bb, bbx ); 7631 assign( zero, mkU64(0) ); 7632 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 7633 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 7634 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 7635 7636 return 7637 binop(Iop_Or64, 7638 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 7639 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 7640 7641 } 7642 7643 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 7644 value aa, computes, for each lane 7645 7646 if aa < 0 then -aa else aa 7647 7648 Note that the result is interpreted as unsigned, so that the 7649 absolute value of the most negative signed input can be 7650 represented. 7651 */ 7652 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 7653 { 7654 IRTemp aa = newTemp(Ity_I64); 7655 IRTemp zero = newTemp(Ity_I64); 7656 IRTemp aaNeg = newTemp(Ity_I64); 7657 IRTemp negMask = newTemp(Ity_I64); 7658 IRTemp posMask = newTemp(Ity_I64); 7659 IROp opSub = Iop_INVALID; 7660 IROp opSarN = Iop_INVALID; 7661 7662 switch (laneszB) { 7663 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 7664 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 7665 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 7666 default: vassert(0); 7667 } 7668 7669 assign( aa, aax ); 7670 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 7671 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 7672 assign( zero, mkU64(0) ); 7673 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 7674 return 7675 binop(Iop_Or64, 7676 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 7677 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 7678 } 7679 7680 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 7681 IRTemp lo64, Int byteShift ) 7682 { 7683 vassert(byteShift >= 1 && byteShift <= 7); 7684 return 7685 binop(Iop_Or64, 7686 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 7687 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 7688 ); 7689 } 7690 7691 /* Generate a SIGSEGV followed by a restart of the current instruction 7692 if effective_addr is not 16-aligned. This is required behaviour 7693 for some SSE3 instructions and all 128-bit SSSE3 instructions. 7694 This assumes that guest_RIP_curr_instr is set correctly! */ 7695 /* TODO(glider): we've replaced the 0xF mask with 0x0, effectively disabling 7696 * the check. Need to enable it once TSan stops generating unaligned 7697 * accesses in the wrappers. 7698 * See http://code.google.com/p/data-race-test/issues/detail?id=49 */ 7699 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 7700 { 7701 stmt( 7702 IRStmt_Exit( 7703 binop(Iop_CmpNE32, 7704 binop(Iop_And32,mkexpr(effective_addr),mkU32(0x0)), 7705 mkU32(0)), 7706 Ijk_SigSEGV, 7707 IRConst_U32(guest_EIP_curr_instr) 7708 ) 7709 ); 7710 } 7711 7712 7713 /* Helper for deciding whether a given insn (starting at the opcode 7714 byte) may validly be used with a LOCK prefix. The following insns 7715 may be used with LOCK when their destination operand is in memory. 7716 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 7717 7718 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 7719 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 7720 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 7721 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 7722 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 7723 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 7724 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 7725 7726 DEC FE /1, FF /1 7727 INC FE /0, FF /0 7728 7729 NEG F6 /3, F7 /3 7730 NOT F6 /2, F7 /2 7731 7732 XCHG 86, 87 7733 7734 BTC 0F BB, 0F BA /7 7735 BTR 0F B3, 0F BA /6 7736 BTS 0F AB, 0F BA /5 7737 7738 CMPXCHG 0F B0, 0F B1 7739 CMPXCHG8B 0F C7 /1 7740 7741 XADD 0F C0, 0F C1 7742 7743 ------------------------------ 7744 7745 80 /0 = addb $imm8, rm8 7746 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 7747 82 /0 = addb $imm8, rm8 7748 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 7749 7750 00 = addb r8, rm8 7751 01 = addl r32, rm32 and addw r16, rm16 7752 7753 Same for ADD OR ADC SBB AND SUB XOR 7754 7755 FE /1 = dec rm8 7756 FF /1 = dec rm32 and dec rm16 7757 7758 FE /0 = inc rm8 7759 FF /0 = inc rm32 and inc rm16 7760 7761 F6 /3 = neg rm8 7762 F7 /3 = neg rm32 and neg rm16 7763 7764 F6 /2 = not rm8 7765 F7 /2 = not rm32 and not rm16 7766 7767 0F BB = btcw r16, rm16 and btcl r32, rm32 7768 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 7769 7770 Same for BTS, BTR 7771 */ 7772 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 7773 { 7774 switch (opc[0]) { 7775 case 0x00: case 0x01: case 0x08: case 0x09: 7776 case 0x10: case 0x11: case 0x18: case 0x19: 7777 case 0x20: case 0x21: case 0x28: case 0x29: 7778 case 0x30: case 0x31: 7779 if (!epartIsReg(opc[1])) 7780 return True; 7781 break; 7782 7783 case 0x80: case 0x81: case 0x82: case 0x83: 7784 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6 7785 && !epartIsReg(opc[1])) 7786 return True; 7787 break; 7788 7789 case 0xFE: case 0xFF: 7790 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1 7791 && !epartIsReg(opc[1])) 7792 return True; 7793 break; 7794 7795 case 0xF6: case 0xF7: 7796 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3 7797 && !epartIsReg(opc[1])) 7798 return True; 7799 break; 7800 7801 case 0x86: case 0x87: 7802 if (!epartIsReg(opc[1])) 7803 return True; 7804 break; 7805 7806 case 0x0F: { 7807 switch (opc[1]) { 7808 case 0xBB: case 0xB3: case 0xAB: 7809 if (!epartIsReg(opc[2])) 7810 return True; 7811 break; 7812 case 0xBA: 7813 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7 7814 && !epartIsReg(opc[2])) 7815 return True; 7816 break; 7817 case 0xB0: case 0xB1: 7818 if (!epartIsReg(opc[2])) 7819 return True; 7820 break; 7821 case 0xC7: 7822 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 7823 return True; 7824 break; 7825 case 0xC0: case 0xC1: 7826 if (!epartIsReg(opc[2])) 7827 return True; 7828 break; 7829 default: 7830 break; 7831 } /* switch (opc[1]) */ 7832 break; 7833 } 7834 7835 default: 7836 break; 7837 } /* switch (opc[0]) */ 7838 7839 return False; 7840 } 7841 7842 7843 /*------------------------------------------------------------*/ 7844 /*--- Disassemble a single instruction ---*/ 7845 /*------------------------------------------------------------*/ 7846 7847 /* Disassemble a single instruction into IR. The instruction is 7848 located in host memory at &guest_code[delta]. *expect_CAS is set 7849 to True if the resulting IR is expected to contain an IRCAS 7850 statement, and False if it's not expected to. This makes it 7851 possible for the caller of disInstr_X86_WRK to check that 7852 LOCK-prefixed instructions are at least plausibly translated, in 7853 that it becomes possible to check that a (validly) LOCK-prefixed 7854 instruction generates a translation containing an IRCAS, and 7855 instructions without LOCK prefixes don't generate translations 7856 containing an IRCAS. 7857 */ 7858 static 7859 DisResult disInstr_X86_WRK ( 7860 /*OUT*/Bool* expect_CAS, 7861 Bool put_IP, 7862 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 7863 Bool resteerCisOk, 7864 void* callback_opaque, 7865 Long delta64, 7866 VexArchInfo* archinfo, 7867 VexAbiInfo* vbi 7868 ) 7869 { 7870 IRType ty; 7871 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 7872 Int alen; 7873 UChar opc, modrm, abyte, pre; 7874 UInt d32; 7875 HChar dis_buf[50]; 7876 Int am_sz, d_sz, n_prefixes; 7877 DisResult dres; 7878 UChar* insn; /* used in SSE decoders */ 7879 7880 /* The running delta */ 7881 Int delta = (Int)delta64; 7882 7883 /* Holds eip at the start of the insn, so that we can print 7884 consistent error messages for unimplemented insns. */ 7885 Int delta_start = delta; 7886 7887 /* sz denotes the nominal data-op size of the insn; we change it to 7888 2 if an 0x66 prefix is seen */ 7889 Int sz = 4; 7890 7891 /* sorb holds the segment-override-prefix byte, if any. Zero if no 7892 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65} 7893 indicating the prefix. */ 7894 UChar sorb = 0; 7895 7896 /* Gets set to True if a LOCK prefix is seen. */ 7897 Bool pfx_lock = False; 7898 7899 /* Set result defaults. */ 7900 dres.whatNext = Dis_Continue; 7901 dres.len = 0; 7902 dres.continueAt = 0; 7903 7904 *expect_CAS = False; 7905 7906 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 7907 7908 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); 7909 DIP("\t0x%x: ", guest_EIP_bbstart+delta); 7910 7911 /* We may be asked to update the guest EIP before going further. */ 7912 if (put_IP) 7913 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) ); 7914 7915 /* Spot "Special" instructions (see comment at top of file). */ 7916 { 7917 UChar* code = (UChar*)(guest_code + delta); 7918 /* Spot the 12-byte preamble: 7919 C1C703 roll $3, %edi 7920 C1C70D roll $13, %edi 7921 C1C71D roll $29, %edi 7922 C1C713 roll $19, %edi 7923 */ 7924 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 && 7925 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D && 7926 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D && 7927 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) { 7928 /* Got a "Special" instruction preamble. Which one is it? */ 7929 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) { 7930 /* %EDX = client_request ( %EAX ) */ 7931 DIP("%%edx = client_request ( %%eax )\n"); 7932 delta += 14; 7933 jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta); 7934 dres.whatNext = Dis_StopHere; 7935 goto decode_success; 7936 } 7937 else 7938 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) { 7939 /* %EAX = guest_NRADDR */ 7940 DIP("%%eax = guest_NRADDR\n"); 7941 delta += 14; 7942 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 )); 7943 goto decode_success; 7944 } 7945 else 7946 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) { 7947 /* call-noredir *%EAX */ 7948 DIP("call-noredir *%%eax\n"); 7949 delta += 14; 7950 t1 = newTemp(Ity_I32); 7951 assign(t1, getIReg(4,R_EAX)); 7952 t2 = newTemp(Ity_I32); 7953 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 7954 putIReg(4, R_ESP, mkexpr(t2)); 7955 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); 7956 jmp_treg(Ijk_NoRedir,t1); 7957 dres.whatNext = Dis_StopHere; 7958 goto decode_success; 7959 } 7960 /* We don't know what it is. */ 7961 goto decode_failure; 7962 /*NOTREACHED*/ 7963 } 7964 } 7965 7966 /* Handle a couple of weird-ass NOPs that have been observed in the 7967 wild. */ 7968 { 7969 UChar* code = (UChar*)(guest_code + delta); 7970 /* Sun's JVM 1.5.0 uses the following as a NOP: 7971 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ 7972 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 7973 && code[3] == 0x65 && code[4] == 0x90) { 7974 DIP("%%es:%%cs:%%fs:%%gs:nop\n"); 7975 delta += 5; 7976 goto decode_success; 7977 } 7978 /* Don't barf on recent binutils padding, 7979 all variants of which are: nopw %cs:0x0(%eax,%eax,1) 7980 66 2e 0f 1f 84 00 00 00 00 00 7981 66 66 2e 0f 1f 84 00 00 00 00 00 7982 66 66 66 2e 0f 1f 84 00 00 00 00 00 7983 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7984 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7985 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7986 */ 7987 if (code[0] == 0x66) { 7988 Int data16_cnt; 7989 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++) 7990 if (code[data16_cnt] != 0x66) 7991 break; 7992 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F 7993 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84 7994 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00 7995 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00 7996 && code[data16_cnt + 8] == 0x00 ) { 7997 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); 7998 delta += 9 + data16_cnt; 7999 goto decode_success; 8000 } 8001 } 8002 } 8003 8004 /* Normal instruction handling starts here. */ 8005 8006 /* Deal with some but not all prefixes: 8007 66(oso) 8008 F0(lock) 8009 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) 8010 Not dealt with (left in place): 8011 F2 F3 8012 */ 8013 n_prefixes = 0; 8014 while (True) { 8015 if (n_prefixes > 7) goto decode_failure; 8016 pre = getUChar(delta); 8017 switch (pre) { 8018 case 0x66: 8019 sz = 2; 8020 break; 8021 case 0xF0: 8022 pfx_lock = True; 8023 *expect_CAS = True; 8024 break; 8025 case 0x3E: /* %DS: */ 8026 case 0x26: /* %ES: */ 8027 case 0x64: /* %FS: */ 8028 case 0x65: /* %GS: */ 8029 if (sorb != 0) 8030 goto decode_failure; /* only one seg override allowed */ 8031 sorb = pre; 8032 break; 8033 case 0x2E: { /* %CS: */ 8034 /* 2E prefix on a conditional branch instruction is a 8035 branch-prediction hint, which can safely be ignored. */ 8036 UChar op1 = getIByte(delta+1); 8037 UChar op2 = getIByte(delta+2); 8038 if ((op1 >= 0x70 && op1 <= 0x7F) 8039 || (op1 == 0xE3) 8040 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { 8041 if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); 8042 } else { 8043 /* All other CS override cases are not handled */ 8044 goto decode_failure; 8045 } 8046 break; 8047 } 8048 case 0x36: /* %SS: */ 8049 /* SS override cases are not handled */ 8050 goto decode_failure; 8051 default: 8052 goto not_a_prefix; 8053 } 8054 n_prefixes++; 8055 delta++; 8056 } 8057 8058 not_a_prefix: 8059 8060 /* Now we should be looking at the primary opcode byte or the 8061 leading F2 or F3. Check that any LOCK prefix is actually 8062 allowed. */ 8063 8064 if (pfx_lock) { 8065 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 8066 DIP("lock "); 8067 } else { 8068 *expect_CAS = False; 8069 goto decode_failure; 8070 } 8071 } 8072 8073 8074 /* ---------------------------------------------------- */ 8075 /* --- The SSE decoder. --- */ 8076 /* ---------------------------------------------------- */ 8077 8078 /* What did I do to deserve SSE ? Perhaps I was really bad in a 8079 previous life? */ 8080 8081 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a 8082 later section, further on. */ 8083 8084 insn = (UChar*)&guest_code[delta]; 8085 8086 /* Treat fxsave specially. It should be doable even on an SSE0 8087 (Pentium-II class) CPU. Hence be prepared to handle it on 8088 any subarchitecture variant. 8089 */ 8090 8091 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 8092 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8093 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { 8094 IRDirty* d; 8095 modrm = getIByte(delta+2); 8096 vassert(sz == 4); 8097 vassert(!epartIsReg(modrm)); 8098 8099 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8100 delta += 2+alen; 8101 gen_SEGV_if_not_16_aligned(addr); 8102 8103 DIP("fxsave %s\n", dis_buf); 8104 8105 /* Uses dirty helper: 8106 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ 8107 d = unsafeIRDirty_0_N ( 8108 0/*regparms*/, 8109 "x86g_dirtyhelper_FXSAVE", 8110 &x86g_dirtyhelper_FXSAVE, 8111 mkIRExprVec_1( mkexpr(addr) ) 8112 ); 8113 d->needsBBP = True; 8114 8115 /* declare we're writing memory */ 8116 d->mFx = Ifx_Write; 8117 d->mAddr = mkexpr(addr); 8118 d->mSize = 512; 8119 8120 /* declare we're reading guest state */ 8121 d->nFxState = 7; 8122 8123 d->fxState[0].fx = Ifx_Read; 8124 d->fxState[0].offset = OFFB_FTOP; 8125 d->fxState[0].size = sizeof(UInt); 8126 8127 d->fxState[1].fx = Ifx_Read; 8128 d->fxState[1].offset = OFFB_FPREGS; 8129 d->fxState[1].size = 8 * sizeof(ULong); 8130 8131 d->fxState[2].fx = Ifx_Read; 8132 d->fxState[2].offset = OFFB_FPTAGS; 8133 d->fxState[2].size = 8 * sizeof(UChar); 8134 8135 d->fxState[3].fx = Ifx_Read; 8136 d->fxState[3].offset = OFFB_FPROUND; 8137 d->fxState[3].size = sizeof(UInt); 8138 8139 d->fxState[4].fx = Ifx_Read; 8140 d->fxState[4].offset = OFFB_FC3210; 8141 d->fxState[4].size = sizeof(UInt); 8142 8143 d->fxState[5].fx = Ifx_Read; 8144 d->fxState[5].offset = OFFB_XMM0; 8145 d->fxState[5].size = 8 * sizeof(U128); 8146 8147 d->fxState[6].fx = Ifx_Read; 8148 d->fxState[6].offset = OFFB_SSEROUND; 8149 d->fxState[6].size = sizeof(UInt); 8150 8151 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8152 images are packed back-to-back. If not, the value of 8153 d->fxState[5].size is wrong. */ 8154 vassert(16 == sizeof(U128)); 8155 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8156 8157 stmt( IRStmt_Dirty(d) ); 8158 8159 goto decode_success; 8160 } 8161 8162 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 8163 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8164 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) { 8165 IRDirty* d; 8166 modrm = getIByte(delta+2); 8167 vassert(sz == 4); 8168 vassert(!epartIsReg(modrm)); 8169 8170 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8171 delta += 2+alen; 8172 gen_SEGV_if_not_16_aligned(addr); 8173 8174 DIP("fxrstor %s\n", dis_buf); 8175 8176 /* Uses dirty helper: 8177 VexEmWarn x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) 8178 NOTE: 8179 the VexEmWarn value is simply ignored (unlike for FRSTOR) 8180 */ 8181 d = unsafeIRDirty_0_N ( 8182 0/*regparms*/, 8183 "x86g_dirtyhelper_FXRSTOR", 8184 &x86g_dirtyhelper_FXRSTOR, 8185 mkIRExprVec_1( mkexpr(addr) ) 8186 ); 8187 d->needsBBP = True; 8188 8189 /* declare we're reading memory */ 8190 d->mFx = Ifx_Read; 8191 d->mAddr = mkexpr(addr); 8192 d->mSize = 512; 8193 8194 /* declare we're writing guest state */ 8195 d->nFxState = 7; 8196 8197 d->fxState[0].fx = Ifx_Write; 8198 d->fxState[0].offset = OFFB_FTOP; 8199 d->fxState[0].size = sizeof(UInt); 8200 8201 d->fxState[1].fx = Ifx_Write; 8202 d->fxState[1].offset = OFFB_FPREGS; 8203 d->fxState[1].size = 8 * sizeof(ULong); 8204 8205 d->fxState[2].fx = Ifx_Write; 8206 d->fxState[2].offset = OFFB_FPTAGS; 8207 d->fxState[2].size = 8 * sizeof(UChar); 8208 8209 d->fxState[3].fx = Ifx_Write; 8210 d->fxState[3].offset = OFFB_FPROUND; 8211 d->fxState[3].size = sizeof(UInt); 8212 8213 d->fxState[4].fx = Ifx_Write; 8214 d->fxState[4].offset = OFFB_FC3210; 8215 d->fxState[4].size = sizeof(UInt); 8216 8217 d->fxState[5].fx = Ifx_Write; 8218 d->fxState[5].offset = OFFB_XMM0; 8219 d->fxState[5].size = 8 * sizeof(U128); 8220 8221 d->fxState[6].fx = Ifx_Write; 8222 d->fxState[6].offset = OFFB_SSEROUND; 8223 d->fxState[6].size = sizeof(UInt); 8224 8225 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8226 images are packed back-to-back. If not, the value of 8227 d->fxState[5].size is wrong. */ 8228 vassert(16 == sizeof(U128)); 8229 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8230 8231 stmt( IRStmt_Dirty(d) ); 8232 8233 goto decode_success; 8234 } 8235 8236 /* ------ SSE decoder main ------ */ 8237 8238 /* Skip parts of the decoder which don't apply given the stated 8239 guest subarchitecture. */ 8240 if (archinfo->hwcaps == 0/*baseline, no sse at all*/) 8241 goto after_sse_decoders; 8242 8243 /* Otherwise we must be doing sse1 or sse2, so we can at least try 8244 for SSE1 here. */ 8245 8246 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 8247 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) { 8248 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 ); 8249 goto decode_success; 8250 } 8251 8252 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 8253 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) { 8254 vassert(sz == 4); 8255 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 ); 8256 goto decode_success; 8257 } 8258 8259 /* 0F 55 = ANDNPS -- G = (not G) and E */ 8260 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) { 8261 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 ); 8262 goto decode_success; 8263 } 8264 8265 /* 0F 54 = ANDPS -- G = G and E */ 8266 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) { 8267 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 ); 8268 goto decode_success; 8269 } 8270 8271 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 8272 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) { 8273 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 ); 8274 goto decode_success; 8275 } 8276 8277 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 8278 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) { 8279 vassert(sz == 4); 8280 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 ); 8281 goto decode_success; 8282 } 8283 8284 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 8285 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 8286 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 8287 IRTemp argL = newTemp(Ity_F32); 8288 IRTemp argR = newTemp(Ity_F32); 8289 modrm = getIByte(delta+2); 8290 if (epartIsReg(modrm)) { 8291 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 8292 delta += 2+1; 8293 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8294 nameXMMReg(gregOfRM(modrm)) ); 8295 } else { 8296 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8297 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 8298 delta += 2+alen; 8299 DIP("[u]comiss %s,%s\n", dis_buf, 8300 nameXMMReg(gregOfRM(modrm)) ); 8301 } 8302 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 8303 8304 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 8305 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 8306 stmt( IRStmt_Put( 8307 OFFB_CC_DEP1, 8308 binop( Iop_And32, 8309 binop(Iop_CmpF64, 8310 unop(Iop_F32toF64,mkexpr(argL)), 8311 unop(Iop_F32toF64,mkexpr(argR))), 8312 mkU32(0x45) 8313 ))); 8314 /* Set NDEP even though it isn't used. This makes redundant-PUT 8315 elimination of previous stores to this field work better. */ 8316 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 8317 goto decode_success; 8318 } 8319 8320 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 8321 half xmm */ 8322 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) { 8323 IRTemp arg64 = newTemp(Ity_I64); 8324 IRTemp rmode = newTemp(Ity_I32); 8325 vassert(sz == 4); 8326 8327 modrm = getIByte(delta+2); 8328 do_MMX_preamble(); 8329 if (epartIsReg(modrm)) { 8330 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 8331 delta += 2+1; 8332 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8333 nameXMMReg(gregOfRM(modrm))); 8334 } else { 8335 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8336 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 8337 delta += 2+alen; 8338 DIP("cvtpi2ps %s,%s\n", dis_buf, 8339 nameXMMReg(gregOfRM(modrm)) ); 8340 } 8341 8342 assign( rmode, get_sse_roundingmode() ); 8343 8344 putXMMRegLane32F( 8345 gregOfRM(modrm), 0, 8346 binop(Iop_F64toF32, 8347 mkexpr(rmode), 8348 unop(Iop_I32StoF64, 8349 unop(Iop_64to32, mkexpr(arg64)) )) ); 8350 8351 putXMMRegLane32F( 8352 gregOfRM(modrm), 1, 8353 binop(Iop_F64toF32, 8354 mkexpr(rmode), 8355 unop(Iop_I32StoF64, 8356 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 8357 8358 goto decode_success; 8359 } 8360 8361 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low 8362 quarter xmm */ 8363 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) { 8364 IRTemp arg32 = newTemp(Ity_I32); 8365 IRTemp rmode = newTemp(Ity_I32); 8366 vassert(sz == 4); 8367 8368 modrm = getIByte(delta+3); 8369 if (epartIsReg(modrm)) { 8370 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 8371 delta += 3+1; 8372 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)), 8373 nameXMMReg(gregOfRM(modrm))); 8374 } else { 8375 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8376 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 8377 delta += 3+alen; 8378 DIP("cvtsi2ss %s,%s\n", dis_buf, 8379 nameXMMReg(gregOfRM(modrm)) ); 8380 } 8381 8382 assign( rmode, get_sse_roundingmode() ); 8383 8384 putXMMRegLane32F( 8385 gregOfRM(modrm), 0, 8386 binop(Iop_F64toF32, 8387 mkexpr(rmode), 8388 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 8389 8390 goto decode_success; 8391 } 8392 8393 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8394 I32 in mmx, according to prevailing SSE rounding mode */ 8395 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8396 I32 in mmx, rounding towards zero */ 8397 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 8398 IRTemp dst64 = newTemp(Ity_I64); 8399 IRTemp rmode = newTemp(Ity_I32); 8400 IRTemp f32lo = newTemp(Ity_F32); 8401 IRTemp f32hi = newTemp(Ity_F32); 8402 Bool r2zero = toBool(insn[1] == 0x2C); 8403 8404 do_MMX_preamble(); 8405 modrm = getIByte(delta+2); 8406 8407 if (epartIsReg(modrm)) { 8408 delta += 2+1; 8409 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8410 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1)); 8411 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8412 nameXMMReg(eregOfRM(modrm)), 8413 nameMMXReg(gregOfRM(modrm))); 8414 } else { 8415 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8416 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8417 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32, 8418 mkexpr(addr), 8419 mkU32(4) ))); 8420 delta += 2+alen; 8421 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8422 dis_buf, 8423 nameMMXReg(gregOfRM(modrm))); 8424 } 8425 8426 if (r2zero) { 8427 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 8428 } else { 8429 assign( rmode, get_sse_roundingmode() ); 8430 } 8431 8432 assign( 8433 dst64, 8434 binop( Iop_32HLto64, 8435 binop( Iop_F64toI32S, 8436 mkexpr(rmode), 8437 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 8438 binop( Iop_F64toI32S, 8439 mkexpr(rmode), 8440 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8441 ) 8442 ); 8443 8444 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 8445 goto decode_success; 8446 } 8447 8448 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to 8449 I32 in ireg, according to prevailing SSE rounding mode */ 8450 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to 8451 I32 in ireg, rounding towards zero */ 8452 if (insn[0] == 0xF3 && insn[1] == 0x0F 8453 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 8454 IRTemp rmode = newTemp(Ity_I32); 8455 IRTemp f32lo = newTemp(Ity_F32); 8456 Bool r2zero = toBool(insn[2] == 0x2C); 8457 vassert(sz == 4); 8458 8459 modrm = getIByte(delta+3); 8460 if (epartIsReg(modrm)) { 8461 delta += 3+1; 8462 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8463 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8464 nameXMMReg(eregOfRM(modrm)), 8465 nameIReg(4, gregOfRM(modrm))); 8466 } else { 8467 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8468 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8469 delta += 3+alen; 8470 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8471 dis_buf, 8472 nameIReg(4, gregOfRM(modrm))); 8473 } 8474 8475 if (r2zero) { 8476 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 8477 } else { 8478 assign( rmode, get_sse_roundingmode() ); 8479 } 8480 8481 putIReg(4, gregOfRM(modrm), 8482 binop( Iop_F64toI32S, 8483 mkexpr(rmode), 8484 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8485 ); 8486 8487 goto decode_success; 8488 } 8489 8490 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 8491 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) { 8492 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 ); 8493 goto decode_success; 8494 } 8495 8496 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 8497 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) { 8498 vassert(sz == 4); 8499 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 ); 8500 goto decode_success; 8501 } 8502 8503 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 8504 if (insn[0] == 0x0F && insn[1] == 0xAE 8505 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) { 8506 8507 IRTemp t64 = newTemp(Ity_I64); 8508 IRTemp ew = newTemp(Ity_I32); 8509 8510 modrm = getIByte(delta+2); 8511 vassert(!epartIsReg(modrm)); 8512 vassert(sz == 4); 8513 8514 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8515 delta += 2+alen; 8516 DIP("ldmxcsr %s\n", dis_buf); 8517 8518 /* The only thing we observe in %mxcsr is the rounding mode. 8519 Therefore, pass the 32-bit value (SSE native-format control 8520 word) to a clean helper, getting back a 64-bit value, the 8521 lower half of which is the SSEROUND value to store, and the 8522 upper half of which is the emulation-warning token which may 8523 be generated. 8524 */ 8525 /* ULong x86h_check_ldmxcsr ( UInt ); */ 8526 assign( t64, mkIRExprCCall( 8527 Ity_I64, 0/*regparms*/, 8528 "x86g_check_ldmxcsr", 8529 &x86g_check_ldmxcsr, 8530 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) ) 8531 ) 8532 ); 8533 8534 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 8535 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 8536 put_emwarn( mkexpr(ew) ); 8537 /* Finally, if an emulation warning was reported, side-exit to 8538 the next insn, reporting the warning, so that Valgrind's 8539 dispatcher sees the warning. */ 8540 stmt( 8541 IRStmt_Exit( 8542 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 8543 Ijk_EmWarn, 8544 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 8545 ) 8546 ); 8547 goto decode_success; 8548 } 8549 8550 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8551 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 8552 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { 8553 Bool ok = False; 8554 delta = dis_MMX( &ok, sorb, sz, delta+1 ); 8555 if (!ok) 8556 goto decode_failure; 8557 goto decode_success; 8558 } 8559 8560 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 8561 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { 8562 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); 8563 goto decode_success; 8564 } 8565 8566 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 8567 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { 8568 vassert(sz == 4); 8569 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); 8570 goto decode_success; 8571 } 8572 8573 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 8574 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { 8575 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); 8576 goto decode_success; 8577 } 8578 8579 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 8580 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { 8581 vassert(sz == 4); 8582 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); 8583 goto decode_success; 8584 } 8585 8586 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 8587 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 8588 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 8589 modrm = getIByte(delta+2); 8590 if (epartIsReg(modrm)) { 8591 putXMMReg( gregOfRM(modrm), 8592 getXMMReg( eregOfRM(modrm) )); 8593 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8594 nameXMMReg(gregOfRM(modrm))); 8595 delta += 2+1; 8596 } else { 8597 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8598 if (insn[1] == 0x28/*movaps*/) 8599 gen_SEGV_if_not_16_aligned( addr ); 8600 putXMMReg( gregOfRM(modrm), 8601 loadLE(Ity_V128, mkexpr(addr)) ); 8602 DIP("mov[ua]ps %s,%s\n", dis_buf, 8603 nameXMMReg(gregOfRM(modrm))); 8604 delta += 2+alen; 8605 } 8606 goto decode_success; 8607 } 8608 8609 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 8610 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 8611 if (sz == 4 && insn[0] == 0x0F 8612 && (insn[1] == 0x29 || insn[1] == 0x11)) { 8613 modrm = getIByte(delta+2); 8614 if (epartIsReg(modrm)) { 8615 /* fall through; awaiting test case */ 8616 } else { 8617 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8618 if (insn[1] == 0x29/*movaps*/) 8619 gen_SEGV_if_not_16_aligned( addr ); 8620 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8621 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8622 dis_buf ); 8623 delta += 2+alen; 8624 goto decode_success; 8625 } 8626 } 8627 8628 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 8629 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 8630 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { 8631 modrm = getIByte(delta+2); 8632 if (epartIsReg(modrm)) { 8633 delta += 2+1; 8634 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8635 getXMMRegLane64( eregOfRM(modrm), 0 ) ); 8636 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8637 nameXMMReg(gregOfRM(modrm))); 8638 } else { 8639 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8640 delta += 2+alen; 8641 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8642 loadLE(Ity_I64, mkexpr(addr)) ); 8643 DIP("movhps %s,%s\n", dis_buf, 8644 nameXMMReg( gregOfRM(modrm) )); 8645 } 8646 goto decode_success; 8647 } 8648 8649 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 8650 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { 8651 if (!epartIsReg(insn[2])) { 8652 delta += 2; 8653 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8654 delta += alen; 8655 storeLE( mkexpr(addr), 8656 getXMMRegLane64( gregOfRM(insn[2]), 8657 1/*upper lane*/ ) ); 8658 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 8659 dis_buf); 8660 goto decode_success; 8661 } 8662 /* else fall through */ 8663 } 8664 8665 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 8666 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 8667 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { 8668 modrm = getIByte(delta+2); 8669 if (epartIsReg(modrm)) { 8670 delta += 2+1; 8671 putXMMRegLane64( gregOfRM(modrm), 8672 0/*lower lane*/, 8673 getXMMRegLane64( eregOfRM(modrm), 1 )); 8674 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), 8675 nameXMMReg(gregOfRM(modrm))); 8676 } else { 8677 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8678 delta += 2+alen; 8679 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 8680 loadLE(Ity_I64, mkexpr(addr)) ); 8681 DIP("movlps %s, %s\n", 8682 dis_buf, nameXMMReg( gregOfRM(modrm) )); 8683 } 8684 goto decode_success; 8685 } 8686 8687 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 8688 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { 8689 if (!epartIsReg(insn[2])) { 8690 delta += 2; 8691 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8692 delta += alen; 8693 storeLE( mkexpr(addr), 8694 getXMMRegLane64( gregOfRM(insn[2]), 8695 0/*lower lane*/ ) ); 8696 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 8697 dis_buf); 8698 goto decode_success; 8699 } 8700 /* else fall through */ 8701 } 8702 8703 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 8704 to 4 lowest bits of ireg(G) */ 8705 if (insn[0] == 0x0F && insn[1] == 0x50) { 8706 modrm = getIByte(delta+2); 8707 if (sz == 4 && epartIsReg(modrm)) { 8708 Int src; 8709 t0 = newTemp(Ity_I32); 8710 t1 = newTemp(Ity_I32); 8711 t2 = newTemp(Ity_I32); 8712 t3 = newTemp(Ity_I32); 8713 delta += 2+1; 8714 src = eregOfRM(modrm); 8715 assign( t0, binop( Iop_And32, 8716 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 8717 mkU32(1) )); 8718 assign( t1, binop( Iop_And32, 8719 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 8720 mkU32(2) )); 8721 assign( t2, binop( Iop_And32, 8722 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 8723 mkU32(4) )); 8724 assign( t3, binop( Iop_And32, 8725 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 8726 mkU32(8) )); 8727 putIReg(4, gregOfRM(modrm), 8728 binop(Iop_Or32, 8729 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 8730 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 8731 ) 8732 ); 8733 DIP("movmskps %s,%s\n", nameXMMReg(src), 8734 nameIReg(4, gregOfRM(modrm))); 8735 goto decode_success; 8736 } 8737 /* else fall through */ 8738 } 8739 8740 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 8741 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 8742 if (insn[0] == 0x0F && insn[1] == 0x2B) { 8743 modrm = getIByte(delta+2); 8744 if (!epartIsReg(modrm)) { 8745 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8746 gen_SEGV_if_not_16_aligned( addr ); 8747 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8748 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 8749 dis_buf, 8750 nameXMMReg(gregOfRM(modrm))); 8751 delta += 2+alen; 8752 goto decode_success; 8753 } 8754 /* else fall through */ 8755 } 8756 8757 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8758 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 8759 Intel manual does not say anything about the usual business of 8760 the FP reg tags getting trashed whenever an MMX insn happens. 8761 So we just leave them alone. 8762 */ 8763 if (insn[0] == 0x0F && insn[1] == 0xE7) { 8764 modrm = getIByte(delta+2); 8765 if (sz == 4 && !epartIsReg(modrm)) { 8766 /* do_MMX_preamble(); Intel docs don't specify this */ 8767 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8768 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 8769 DIP("movntq %s,%s\n", dis_buf, 8770 nameMMXReg(gregOfRM(modrm))); 8771 delta += 2+alen; 8772 goto decode_success; 8773 } 8774 /* else fall through */ 8775 } 8776 8777 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 8778 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 8779 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { 8780 vassert(sz == 4); 8781 modrm = getIByte(delta+3); 8782 if (epartIsReg(modrm)) { 8783 putXMMRegLane32( gregOfRM(modrm), 0, 8784 getXMMRegLane32( eregOfRM(modrm), 0 )); 8785 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8786 nameXMMReg(gregOfRM(modrm))); 8787 delta += 3+1; 8788 } else { 8789 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8790 /* zero bits 127:64 */ 8791 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 8792 /* zero bits 63:32 */ 8793 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); 8794 /* write bits 31:0 */ 8795 putXMMRegLane32( gregOfRM(modrm), 0, 8796 loadLE(Ity_I32, mkexpr(addr)) ); 8797 DIP("movss %s,%s\n", dis_buf, 8798 nameXMMReg(gregOfRM(modrm))); 8799 delta += 3+alen; 8800 } 8801 goto decode_success; 8802 } 8803 8804 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 8805 or lo 1/4 xmm). */ 8806 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { 8807 vassert(sz == 4); 8808 modrm = getIByte(delta+3); 8809 if (epartIsReg(modrm)) { 8810 /* fall through, we don't yet have a test case */ 8811 } else { 8812 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8813 storeLE( mkexpr(addr), 8814 getXMMRegLane32(gregOfRM(modrm), 0) ); 8815 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8816 dis_buf); 8817 delta += 3+alen; 8818 goto decode_success; 8819 } 8820 } 8821 8822 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 8823 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { 8824 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); 8825 goto decode_success; 8826 } 8827 8828 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 8829 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { 8830 vassert(sz == 4); 8831 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); 8832 goto decode_success; 8833 } 8834 8835 /* 0F 56 = ORPS -- G = G and E */ 8836 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { 8837 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); 8838 goto decode_success; 8839 } 8840 8841 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8842 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 8843 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { 8844 do_MMX_preamble(); 8845 delta = dis_MMXop_regmem_to_reg ( 8846 sorb, delta+2, insn[1], "pavgb", False ); 8847 goto decode_success; 8848 } 8849 8850 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8851 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 8852 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) { 8853 do_MMX_preamble(); 8854 delta = dis_MMXop_regmem_to_reg ( 8855 sorb, delta+2, insn[1], "pavgw", False ); 8856 goto decode_success; 8857 } 8858 8859 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8860 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 8861 zero-extend of it in ireg(G). */ 8862 if (insn[0] == 0x0F && insn[1] == 0xC5) { 8863 modrm = insn[2]; 8864 if (sz == 4 && epartIsReg(modrm)) { 8865 IRTemp sV = newTemp(Ity_I64); 8866 t5 = newTemp(Ity_I16); 8867 do_MMX_preamble(); 8868 assign(sV, getMMXReg(eregOfRM(modrm))); 8869 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 8870 switch (insn[3] & 3) { 8871 case 0: assign(t5, mkexpr(t0)); break; 8872 case 1: assign(t5, mkexpr(t1)); break; 8873 case 2: assign(t5, mkexpr(t2)); break; 8874 case 3: assign(t5, mkexpr(t3)); break; 8875 default: vassert(0); /*NOTREACHED*/ 8876 } 8877 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5))); 8878 DIP("pextrw $%d,%s,%s\n", 8879 (Int)insn[3], nameMMXReg(eregOfRM(modrm)), 8880 nameIReg(4,gregOfRM(modrm))); 8881 delta += 4; 8882 goto decode_success; 8883 } 8884 /* else fall through */ 8885 } 8886 8887 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8888 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 8889 put it into the specified lane of mmx(G). */ 8890 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) { 8891 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 8892 mmx reg. t4 is the new lane value. t5 is the original 8893 mmx value. t6 is the new mmx value. */ 8894 Int lane; 8895 t4 = newTemp(Ity_I16); 8896 t5 = newTemp(Ity_I64); 8897 t6 = newTemp(Ity_I64); 8898 modrm = insn[2]; 8899 do_MMX_preamble(); 8900 8901 assign(t5, getMMXReg(gregOfRM(modrm))); 8902 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 8903 8904 if (epartIsReg(modrm)) { 8905 assign(t4, getIReg(2, eregOfRM(modrm))); 8906 delta += 3+1; 8907 lane = insn[3+1-1]; 8908 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8909 nameIReg(2,eregOfRM(modrm)), 8910 nameMMXReg(gregOfRM(modrm))); 8911 } else { 8912 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8913 delta += 3+alen; 8914 lane = insn[3+alen-1]; 8915 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 8916 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8917 dis_buf, 8918 nameMMXReg(gregOfRM(modrm))); 8919 } 8920 8921 switch (lane & 3) { 8922 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 8923 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 8924 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 8925 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 8926 default: vassert(0); /*NOTREACHED*/ 8927 } 8928 putMMXReg(gregOfRM(modrm), mkexpr(t6)); 8929 goto decode_success; 8930 } 8931 8932 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8933 /* 0F EE = PMAXSW -- 16x4 signed max */ 8934 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) { 8935 do_MMX_preamble(); 8936 delta = dis_MMXop_regmem_to_reg ( 8937 sorb, delta+2, insn[1], "pmaxsw", False ); 8938 goto decode_success; 8939 } 8940 8941 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8942 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 8943 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) { 8944 do_MMX_preamble(); 8945 delta = dis_MMXop_regmem_to_reg ( 8946 sorb, delta+2, insn[1], "pmaxub", False ); 8947 goto decode_success; 8948 } 8949 8950 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8951 /* 0F EA = PMINSW -- 16x4 signed min */ 8952 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) { 8953 do_MMX_preamble(); 8954 delta = dis_MMXop_regmem_to_reg ( 8955 sorb, delta+2, insn[1], "pminsw", False ); 8956 goto decode_success; 8957 } 8958 8959 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8960 /* 0F DA = PMINUB -- 8x8 unsigned min */ 8961 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) { 8962 do_MMX_preamble(); 8963 delta = dis_MMXop_regmem_to_reg ( 8964 sorb, delta+2, insn[1], "pminub", False ); 8965 goto decode_success; 8966 } 8967 8968 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8969 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 8970 mmx(G), turn them into a byte, and put zero-extend of it in 8971 ireg(G). */ 8972 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { 8973 modrm = insn[2]; 8974 if (epartIsReg(modrm)) { 8975 do_MMX_preamble(); 8976 t0 = newTemp(Ity_I64); 8977 t1 = newTemp(Ity_I32); 8978 assign(t0, getMMXReg(eregOfRM(modrm))); 8979 assign(t1, mkIRExprCCall( 8980 Ity_I32, 0/*regparms*/, 8981 "x86g_calculate_mmx_pmovmskb", 8982 &x86g_calculate_mmx_pmovmskb, 8983 mkIRExprVec_1(mkexpr(t0)))); 8984 putIReg(4, gregOfRM(modrm), mkexpr(t1)); 8985 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8986 nameIReg(4,gregOfRM(modrm))); 8987 delta += 3; 8988 goto decode_success; 8989 } 8990 /* else fall through */ 8991 } 8992 8993 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8994 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 8995 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) { 8996 do_MMX_preamble(); 8997 delta = dis_MMXop_regmem_to_reg ( 8998 sorb, delta+2, insn[1], "pmuluh", False ); 8999 goto decode_success; 9000 } 9001 9002 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 9003 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 9004 /* 0F 18 /2 = PREFETCH1 */ 9005 /* 0F 18 /3 = PREFETCH2 */ 9006 if (insn[0] == 0x0F && insn[1] == 0x18 9007 && !epartIsReg(insn[2]) 9008 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) { 9009 HChar* hintstr = "??"; 9010 9011 modrm = getIByte(delta+2); 9012 vassert(!epartIsReg(modrm)); 9013 9014 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9015 delta += 2+alen; 9016 9017 switch (gregOfRM(modrm)) { 9018 case 0: hintstr = "nta"; break; 9019 case 1: hintstr = "t0"; break; 9020 case 2: hintstr = "t1"; break; 9021 case 3: hintstr = "t2"; break; 9022 default: vassert(0); /*NOTREACHED*/ 9023 } 9024 9025 DIP("prefetch%s %s\n", hintstr, dis_buf); 9026 goto decode_success; 9027 } 9028 9029 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */ 9030 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */ 9031 if (insn[0] == 0x0F && insn[1] == 0x0D 9032 && !epartIsReg(insn[2]) 9033 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) { 9034 HChar* hintstr = "??"; 9035 9036 modrm = getIByte(delta+2); 9037 vassert(!epartIsReg(modrm)); 9038 9039 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9040 delta += 2+alen; 9041 9042 switch (gregOfRM(modrm)) { 9043 case 0: hintstr = ""; break; 9044 case 1: hintstr = "w"; break; 9045 default: vassert(0); /*NOTREACHED*/ 9046 } 9047 9048 DIP("prefetch%s %s\n", hintstr, dis_buf); 9049 goto decode_success; 9050 } 9051 9052 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9053 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 9054 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) { 9055 do_MMX_preamble(); 9056 delta = dis_MMXop_regmem_to_reg ( 9057 sorb, delta+2, insn[1], "psadbw", False ); 9058 goto decode_success; 9059 } 9060 9061 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9062 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 9063 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) { 9064 Int order; 9065 IRTemp sV, dV, s3, s2, s1, s0; 9066 s3 = s2 = s1 = s0 = IRTemp_INVALID; 9067 sV = newTemp(Ity_I64); 9068 dV = newTemp(Ity_I64); 9069 do_MMX_preamble(); 9070 modrm = insn[2]; 9071 if (epartIsReg(modrm)) { 9072 assign( sV, getMMXReg(eregOfRM(modrm)) ); 9073 order = (Int)insn[3]; 9074 delta += 2+2; 9075 DIP("pshufw $%d,%s,%s\n", order, 9076 nameMMXReg(eregOfRM(modrm)), 9077 nameMMXReg(gregOfRM(modrm))); 9078 } else { 9079 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9080 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 9081 order = (Int)insn[2+alen]; 9082 delta += 3+alen; 9083 DIP("pshufw $%d,%s,%s\n", order, 9084 dis_buf, 9085 nameMMXReg(gregOfRM(modrm))); 9086 } 9087 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 9088 9089 # define SEL(n) \ 9090 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9091 assign(dV, 9092 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 9093 SEL((order>>2)&3), SEL((order>>0)&3) ) 9094 ); 9095 putMMXReg(gregOfRM(modrm), mkexpr(dV)); 9096 # undef SEL 9097 goto decode_success; 9098 } 9099 9100 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 9101 if (insn[0] == 0x0F && insn[1] == 0x53) { 9102 vassert(sz == 4); 9103 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9104 "rcpps", Iop_Recip32Fx4 ); 9105 goto decode_success; 9106 } 9107 9108 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 9109 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) { 9110 vassert(sz == 4); 9111 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9112 "rcpss", Iop_Recip32F0x4 ); 9113 goto decode_success; 9114 } 9115 9116 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 9117 if (insn[0] == 0x0F && insn[1] == 0x52) { 9118 vassert(sz == 4); 9119 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9120 "rsqrtps", Iop_RSqrt32Fx4 ); 9121 goto decode_success; 9122 } 9123 9124 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 9125 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) { 9126 vassert(sz == 4); 9127 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9128 "rsqrtss", Iop_RSqrt32F0x4 ); 9129 goto decode_success; 9130 } 9131 9132 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 9133 if (insn[0] == 0x0F && insn[1] == 0xAE 9134 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 9135 vassert(sz == 4); 9136 delta += 3; 9137 /* Insert a memory fence. It's sometimes important that these 9138 are carried through to the generated code. */ 9139 stmt( IRStmt_MBE(Imbe_Fence) ); 9140 DIP("sfence\n"); 9141 goto decode_success; 9142 } 9143 9144 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 9145 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { 9146 Int select; 9147 IRTemp sV, dV; 9148 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9149 sV = newTemp(Ity_V128); 9150 dV = newTemp(Ity_V128); 9151 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9152 modrm = insn[2]; 9153 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9154 9155 if (epartIsReg(modrm)) { 9156 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9157 select = (Int)insn[3]; 9158 delta += 2+2; 9159 DIP("shufps $%d,%s,%s\n", select, 9160 nameXMMReg(eregOfRM(modrm)), 9161 nameXMMReg(gregOfRM(modrm))); 9162 } else { 9163 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9164 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9165 select = (Int)insn[2+alen]; 9166 delta += 3+alen; 9167 DIP("shufps $%d,%s,%s\n", select, 9168 dis_buf, 9169 nameXMMReg(gregOfRM(modrm))); 9170 } 9171 9172 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9173 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9174 9175 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 9176 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9177 9178 putXMMReg( 9179 gregOfRM(modrm), 9180 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 9181 SELD((select>>2)&3), SELD((select>>0)&3) ) 9182 ); 9183 9184 # undef SELD 9185 # undef SELS 9186 9187 goto decode_success; 9188 } 9189 9190 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 9191 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) { 9192 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9193 "sqrtps", Iop_Sqrt32Fx4 ); 9194 goto decode_success; 9195 } 9196 9197 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 9198 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) { 9199 vassert(sz == 4); 9200 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9201 "sqrtss", Iop_Sqrt32F0x4 ); 9202 goto decode_success; 9203 } 9204 9205 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 9206 if (insn[0] == 0x0F && insn[1] == 0xAE 9207 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) { 9208 modrm = getIByte(delta+2); 9209 vassert(sz == 4); 9210 vassert(!epartIsReg(modrm)); 9211 9212 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9213 delta += 2+alen; 9214 9215 /* Fake up a native SSE mxcsr word. The only thing it depends 9216 on is SSEROUND[1:0], so call a clean helper to cook it up. 9217 */ 9218 /* UInt x86h_create_mxcsr ( UInt sseround ) */ 9219 DIP("stmxcsr %s\n", dis_buf); 9220 storeLE( mkexpr(addr), 9221 mkIRExprCCall( 9222 Ity_I32, 0/*regp*/, 9223 "x86g_create_mxcsr", &x86g_create_mxcsr, 9224 mkIRExprVec_1( get_sse_roundingmode() ) 9225 ) 9226 ); 9227 goto decode_success; 9228 } 9229 9230 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 9231 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) { 9232 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 ); 9233 goto decode_success; 9234 } 9235 9236 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 9237 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) { 9238 vassert(sz == 4); 9239 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 ); 9240 goto decode_success; 9241 } 9242 9243 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 9244 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 9245 /* These just appear to be special cases of SHUFPS */ 9246 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 9247 IRTemp sV, dV; 9248 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9249 Bool hi = toBool(insn[1] == 0x15); 9250 sV = newTemp(Ity_V128); 9251 dV = newTemp(Ity_V128); 9252 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9253 modrm = insn[2]; 9254 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9255 9256 if (epartIsReg(modrm)) { 9257 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9258 delta += 2+1; 9259 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9260 nameXMMReg(eregOfRM(modrm)), 9261 nameXMMReg(gregOfRM(modrm))); 9262 } else { 9263 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9264 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9265 delta += 2+alen; 9266 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9267 dis_buf, 9268 nameXMMReg(gregOfRM(modrm))); 9269 } 9270 9271 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9272 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9273 9274 if (hi) { 9275 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) ); 9276 } else { 9277 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) ); 9278 } 9279 9280 goto decode_success; 9281 } 9282 9283 /* 0F 57 = XORPS -- G = G and E */ 9284 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) { 9285 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 ); 9286 goto decode_success; 9287 } 9288 9289 /* ---------------------------------------------------- */ 9290 /* --- end of the SSE decoder. --- */ 9291 /* ---------------------------------------------------- */ 9292 9293 /* ---------------------------------------------------- */ 9294 /* --- start of the SSE2 decoder. --- */ 9295 /* ---------------------------------------------------- */ 9296 9297 /* Skip parts of the decoder which don't apply given the stated 9298 guest subarchitecture. */ 9299 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 9300 goto after_sse_decoders; /* no SSE2 capabilities */ 9301 9302 insn = (UChar*)&guest_code[delta]; 9303 9304 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 9305 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { 9306 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); 9307 goto decode_success; 9308 } 9309 9310 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 9311 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) { 9312 vassert(sz == 4); 9313 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 ); 9314 goto decode_success; 9315 } 9316 9317 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 9318 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) { 9319 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 ); 9320 goto decode_success; 9321 } 9322 9323 /* 66 0F 54 = ANDPD -- G = G and E */ 9324 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) { 9325 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 ); 9326 goto decode_success; 9327 } 9328 9329 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 9330 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) { 9331 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 ); 9332 goto decode_success; 9333 } 9334 9335 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 9336 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) { 9337 vassert(sz == 4); 9338 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 ); 9339 goto decode_success; 9340 } 9341 9342 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 9343 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 9344 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9345 IRTemp argL = newTemp(Ity_F64); 9346 IRTemp argR = newTemp(Ity_F64); 9347 modrm = getIByte(delta+2); 9348 if (epartIsReg(modrm)) { 9349 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 9350 delta += 2+1; 9351 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9352 nameXMMReg(gregOfRM(modrm)) ); 9353 } else { 9354 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9355 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9356 delta += 2+alen; 9357 DIP("[u]comisd %s,%s\n", dis_buf, 9358 nameXMMReg(gregOfRM(modrm)) ); 9359 } 9360 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 9361 9362 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 9363 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 9364 stmt( IRStmt_Put( 9365 OFFB_CC_DEP1, 9366 binop( Iop_And32, 9367 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)), 9368 mkU32(0x45) 9369 ))); 9370 /* Set NDEP even though it isn't used. This makes redundant-PUT 9371 elimination of previous stores to this field work better. */ 9372 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 9373 goto decode_success; 9374 } 9375 9376 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 9377 F64 in xmm(G) */ 9378 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) { 9379 IRTemp arg64 = newTemp(Ity_I64); 9380 vassert(sz == 4); 9381 9382 modrm = getIByte(delta+3); 9383 if (epartIsReg(modrm)) { 9384 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) ); 9385 delta += 3+1; 9386 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9387 nameXMMReg(gregOfRM(modrm))); 9388 } else { 9389 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9390 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9391 delta += 3+alen; 9392 DIP("cvtdq2pd %s,%s\n", dis_buf, 9393 nameXMMReg(gregOfRM(modrm)) ); 9394 } 9395 9396 putXMMRegLane64F( 9397 gregOfRM(modrm), 0, 9398 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 9399 ); 9400 9401 putXMMRegLane64F( 9402 gregOfRM(modrm), 1, 9403 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 9404 ); 9405 9406 goto decode_success; 9407 } 9408 9409 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 9410 xmm(G) */ 9411 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) { 9412 IRTemp argV = newTemp(Ity_V128); 9413 IRTemp rmode = newTemp(Ity_I32); 9414 9415 modrm = getIByte(delta+2); 9416 if (epartIsReg(modrm)) { 9417 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9418 delta += 2+1; 9419 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9420 nameXMMReg(gregOfRM(modrm))); 9421 } else { 9422 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9423 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9424 delta += 2+alen; 9425 DIP("cvtdq2ps %s,%s\n", dis_buf, 9426 nameXMMReg(gregOfRM(modrm)) ); 9427 } 9428 9429 assign( rmode, get_sse_roundingmode() ); 9430 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9431 9432 # define CVT(_t) binop( Iop_F64toF32, \ 9433 mkexpr(rmode), \ 9434 unop(Iop_I32StoF64,mkexpr(_t))) 9435 9436 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) ); 9437 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) ); 9438 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9439 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9440 9441 # undef CVT 9442 9443 goto decode_success; 9444 } 9445 9446 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9447 lo half xmm(G), and zero upper half */ 9448 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) { 9449 IRTemp argV = newTemp(Ity_V128); 9450 IRTemp rmode = newTemp(Ity_I32); 9451 vassert(sz == 4); 9452 9453 modrm = getIByte(delta+3); 9454 if (epartIsReg(modrm)) { 9455 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9456 delta += 3+1; 9457 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9458 nameXMMReg(gregOfRM(modrm))); 9459 } else { 9460 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9461 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9462 delta += 3+alen; 9463 DIP("cvtpd2dq %s,%s\n", dis_buf, 9464 nameXMMReg(gregOfRM(modrm)) ); 9465 } 9466 9467 assign( rmode, get_sse_roundingmode() ); 9468 t0 = newTemp(Ity_F64); 9469 t1 = newTemp(Ity_F64); 9470 assign( t0, unop(Iop_ReinterpI64asF64, 9471 unop(Iop_V128to64, mkexpr(argV))) ); 9472 assign( t1, unop(Iop_ReinterpI64asF64, 9473 unop(Iop_V128HIto64, mkexpr(argV))) ); 9474 9475 # define CVT(_t) binop( Iop_F64toI32S, \ 9476 mkexpr(rmode), \ 9477 mkexpr(_t) ) 9478 9479 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9480 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9481 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9482 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9483 9484 # undef CVT 9485 9486 goto decode_success; 9487 } 9488 9489 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9490 I32 in mmx, according to prevailing SSE rounding mode */ 9491 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9492 I32 in mmx, rounding towards zero */ 9493 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9494 IRTemp dst64 = newTemp(Ity_I64); 9495 IRTemp rmode = newTemp(Ity_I32); 9496 IRTemp f64lo = newTemp(Ity_F64); 9497 IRTemp f64hi = newTemp(Ity_F64); 9498 Bool r2zero = toBool(insn[1] == 0x2C); 9499 9500 do_MMX_preamble(); 9501 modrm = getIByte(delta+2); 9502 9503 if (epartIsReg(modrm)) { 9504 delta += 2+1; 9505 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9506 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1)); 9507 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 9508 nameXMMReg(eregOfRM(modrm)), 9509 nameMMXReg(gregOfRM(modrm))); 9510 } else { 9511 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9512 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9513 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32, 9514 mkexpr(addr), 9515 mkU32(8) ))); 9516 delta += 2+alen; 9517 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 9518 dis_buf, 9519 nameMMXReg(gregOfRM(modrm))); 9520 } 9521 9522 if (r2zero) { 9523 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9524 } else { 9525 assign( rmode, get_sse_roundingmode() ); 9526 } 9527 9528 assign( 9529 dst64, 9530 binop( Iop_32HLto64, 9531 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 9532 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 9533 ) 9534 ); 9535 9536 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 9537 goto decode_success; 9538 } 9539 9540 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 9541 lo half xmm(G), and zero upper half */ 9542 /* Note, this is practically identical to CVTPD2DQ. It would have 9543 been nicer to merge them together, but the insn[] offsets differ 9544 by one. */ 9545 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) { 9546 IRTemp argV = newTemp(Ity_V128); 9547 IRTemp rmode = newTemp(Ity_I32); 9548 9549 modrm = getIByte(delta+2); 9550 if (epartIsReg(modrm)) { 9551 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9552 delta += 2+1; 9553 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9554 nameXMMReg(gregOfRM(modrm))); 9555 } else { 9556 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9557 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9558 delta += 2+alen; 9559 DIP("cvtpd2ps %s,%s\n", dis_buf, 9560 nameXMMReg(gregOfRM(modrm)) ); 9561 } 9562 9563 assign( rmode, get_sse_roundingmode() ); 9564 t0 = newTemp(Ity_F64); 9565 t1 = newTemp(Ity_F64); 9566 assign( t0, unop(Iop_ReinterpI64asF64, 9567 unop(Iop_V128to64, mkexpr(argV))) ); 9568 assign( t1, unop(Iop_ReinterpI64asF64, 9569 unop(Iop_V128HIto64, mkexpr(argV))) ); 9570 9571 # define CVT(_t) binop( Iop_F64toF32, \ 9572 mkexpr(rmode), \ 9573 mkexpr(_t) ) 9574 9575 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9576 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9577 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9578 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9579 9580 # undef CVT 9581 9582 goto decode_success; 9583 } 9584 9585 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 9586 xmm(G) */ 9587 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) { 9588 IRTemp arg64 = newTemp(Ity_I64); 9589 9590 modrm = getIByte(delta+2); 9591 if (epartIsReg(modrm)) { 9592 /* Only switch to MMX mode if the source is a MMX register. 9593 This is inconsistent with all other instructions which 9594 convert between XMM and (M64 or MMX), which always switch 9595 to MMX mode even if 64-bit operand is M64 and not MMX. At 9596 least, that's what the Intel docs seem to me to say. 9597 Fixes #210264. */ 9598 do_MMX_preamble(); 9599 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 9600 delta += 2+1; 9601 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)), 9602 nameXMMReg(gregOfRM(modrm))); 9603 } else { 9604 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9605 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9606 delta += 2+alen; 9607 DIP("cvtpi2pd %s,%s\n", dis_buf, 9608 nameXMMReg(gregOfRM(modrm)) ); 9609 } 9610 9611 putXMMRegLane64F( 9612 gregOfRM(modrm), 0, 9613 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 9614 ); 9615 9616 putXMMRegLane64F( 9617 gregOfRM(modrm), 1, 9618 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 9619 ); 9620 9621 goto decode_success; 9622 } 9623 9624 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9625 xmm(G) */ 9626 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) { 9627 IRTemp argV = newTemp(Ity_V128); 9628 IRTemp rmode = newTemp(Ity_I32); 9629 9630 modrm = getIByte(delta+2); 9631 if (epartIsReg(modrm)) { 9632 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9633 delta += 2+1; 9634 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9635 nameXMMReg(gregOfRM(modrm))); 9636 } else { 9637 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9638 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9639 delta += 2+alen; 9640 DIP("cvtps2dq %s,%s\n", dis_buf, 9641 nameXMMReg(gregOfRM(modrm)) ); 9642 } 9643 9644 assign( rmode, get_sse_roundingmode() ); 9645 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9646 9647 /* This is less than ideal. If it turns out to be a performance 9648 bottleneck it can be improved. */ 9649 # define CVT(_t) \ 9650 binop( Iop_F64toI32S, \ 9651 mkexpr(rmode), \ 9652 unop( Iop_F32toF64, \ 9653 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9654 9655 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9656 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9657 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9658 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9659 9660 # undef CVT 9661 9662 goto decode_success; 9663 } 9664 9665 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 9666 F64 in xmm(G). */ 9667 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) { 9668 IRTemp f32lo = newTemp(Ity_F32); 9669 IRTemp f32hi = newTemp(Ity_F32); 9670 9671 modrm = getIByte(delta+2); 9672 if (epartIsReg(modrm)) { 9673 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) ); 9674 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) ); 9675 delta += 2+1; 9676 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9677 nameXMMReg(gregOfRM(modrm))); 9678 } else { 9679 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9680 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 9681 assign( f32hi, loadLE(Ity_F32, 9682 binop(Iop_Add32,mkexpr(addr),mkU32(4))) ); 9683 delta += 2+alen; 9684 DIP("cvtps2pd %s,%s\n", dis_buf, 9685 nameXMMReg(gregOfRM(modrm)) ); 9686 } 9687 9688 putXMMRegLane64F( gregOfRM(modrm), 1, 9689 unop(Iop_F32toF64, mkexpr(f32hi)) ); 9690 putXMMRegLane64F( gregOfRM(modrm), 0, 9691 unop(Iop_F32toF64, mkexpr(f32lo)) ); 9692 9693 goto decode_success; 9694 } 9695 9696 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to 9697 I32 in ireg, according to prevailing SSE rounding mode */ 9698 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to 9699 I32 in ireg, rounding towards zero */ 9700 if (insn[0] == 0xF2 && insn[1] == 0x0F 9701 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 9702 IRTemp rmode = newTemp(Ity_I32); 9703 IRTemp f64lo = newTemp(Ity_F64); 9704 Bool r2zero = toBool(insn[2] == 0x2C); 9705 vassert(sz == 4); 9706 9707 modrm = getIByte(delta+3); 9708 if (epartIsReg(modrm)) { 9709 delta += 3+1; 9710 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9711 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9712 nameXMMReg(eregOfRM(modrm)), 9713 nameIReg(4, gregOfRM(modrm))); 9714 } else { 9715 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9716 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9717 delta += 3+alen; 9718 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9719 dis_buf, 9720 nameIReg(4, gregOfRM(modrm))); 9721 } 9722 9723 if (r2zero) { 9724 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9725 } else { 9726 assign( rmode, get_sse_roundingmode() ); 9727 } 9728 9729 putIReg(4, gregOfRM(modrm), 9730 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 9731 9732 goto decode_success; 9733 } 9734 9735 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 9736 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 9737 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) { 9738 IRTemp rmode = newTemp(Ity_I32); 9739 IRTemp f64lo = newTemp(Ity_F64); 9740 vassert(sz == 4); 9741 9742 modrm = getIByte(delta+3); 9743 if (epartIsReg(modrm)) { 9744 delta += 3+1; 9745 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9746 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9747 nameXMMReg(gregOfRM(modrm))); 9748 } else { 9749 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9750 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9751 delta += 3+alen; 9752 DIP("cvtsd2ss %s,%s\n", dis_buf, 9753 nameXMMReg(gregOfRM(modrm))); 9754 } 9755 9756 assign( rmode, get_sse_roundingmode() ); 9757 putXMMRegLane32F( 9758 gregOfRM(modrm), 0, 9759 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 9760 ); 9761 9762 goto decode_success; 9763 } 9764 9765 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low 9766 half xmm */ 9767 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) { 9768 IRTemp arg32 = newTemp(Ity_I32); 9769 vassert(sz == 4); 9770 9771 modrm = getIByte(delta+3); 9772 if (epartIsReg(modrm)) { 9773 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 9774 delta += 3+1; 9775 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)), 9776 nameXMMReg(gregOfRM(modrm))); 9777 } else { 9778 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9779 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 9780 delta += 3+alen; 9781 DIP("cvtsi2sd %s,%s\n", dis_buf, 9782 nameXMMReg(gregOfRM(modrm)) ); 9783 } 9784 9785 putXMMRegLane64F( 9786 gregOfRM(modrm), 0, 9787 unop(Iop_I32StoF64, mkexpr(arg32)) ); 9788 9789 goto decode_success; 9790 } 9791 9792 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 9793 low half xmm(G) */ 9794 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) { 9795 IRTemp f32lo = newTemp(Ity_F32); 9796 vassert(sz == 4); 9797 9798 modrm = getIByte(delta+3); 9799 if (epartIsReg(modrm)) { 9800 delta += 3+1; 9801 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 9802 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9803 nameXMMReg(gregOfRM(modrm))); 9804 } else { 9805 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9806 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9807 delta += 3+alen; 9808 DIP("cvtss2sd %s,%s\n", dis_buf, 9809 nameXMMReg(gregOfRM(modrm))); 9810 } 9811 9812 putXMMRegLane64F( gregOfRM(modrm), 0, 9813 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 9814 9815 goto decode_success; 9816 } 9817 9818 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9819 lo half xmm(G), and zero upper half, rounding towards zero */ 9820 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) { 9821 IRTemp argV = newTemp(Ity_V128); 9822 IRTemp rmode = newTemp(Ity_I32); 9823 9824 modrm = getIByte(delta+2); 9825 if (epartIsReg(modrm)) { 9826 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9827 delta += 2+1; 9828 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9829 nameXMMReg(gregOfRM(modrm))); 9830 } else { 9831 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9832 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9833 delta += 2+alen; 9834 DIP("cvttpd2dq %s,%s\n", dis_buf, 9835 nameXMMReg(gregOfRM(modrm)) ); 9836 } 9837 9838 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9839 9840 t0 = newTemp(Ity_F64); 9841 t1 = newTemp(Ity_F64); 9842 assign( t0, unop(Iop_ReinterpI64asF64, 9843 unop(Iop_V128to64, mkexpr(argV))) ); 9844 assign( t1, unop(Iop_ReinterpI64asF64, 9845 unop(Iop_V128HIto64, mkexpr(argV))) ); 9846 9847 # define CVT(_t) binop( Iop_F64toI32S, \ 9848 mkexpr(rmode), \ 9849 mkexpr(_t) ) 9850 9851 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9852 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9853 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9854 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9855 9856 # undef CVT 9857 9858 goto decode_success; 9859 } 9860 9861 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9862 xmm(G), rounding towards zero */ 9863 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) { 9864 IRTemp argV = newTemp(Ity_V128); 9865 IRTemp rmode = newTemp(Ity_I32); 9866 vassert(sz == 4); 9867 9868 modrm = getIByte(delta+3); 9869 if (epartIsReg(modrm)) { 9870 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9871 delta += 3+1; 9872 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9873 nameXMMReg(gregOfRM(modrm))); 9874 } else { 9875 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9876 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9877 delta += 3+alen; 9878 DIP("cvttps2dq %s,%s\n", dis_buf, 9879 nameXMMReg(gregOfRM(modrm)) ); 9880 } 9881 9882 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9883 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9884 9885 /* This is less than ideal. If it turns out to be a performance 9886 bottleneck it can be improved. */ 9887 # define CVT(_t) \ 9888 binop( Iop_F64toI32S, \ 9889 mkexpr(rmode), \ 9890 unop( Iop_F32toF64, \ 9891 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9892 9893 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9894 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9895 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9896 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9897 9898 # undef CVT 9899 9900 goto decode_success; 9901 } 9902 9903 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 9904 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) { 9905 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 ); 9906 goto decode_success; 9907 } 9908 9909 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 9910 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) { 9911 vassert(sz == 4); 9912 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 ); 9913 goto decode_success; 9914 } 9915 9916 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 9917 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 9918 if (insn[0] == 0x0F && insn[1] == 0xAE 9919 && epartIsReg(insn[2]) 9920 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) { 9921 vassert(sz == 4); 9922 delta += 3; 9923 /* Insert a memory fence. It's sometimes important that these 9924 are carried through to the generated code. */ 9925 stmt( IRStmt_MBE(Imbe_Fence) ); 9926 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); 9927 goto decode_success; 9928 } 9929 9930 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 9931 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) { 9932 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 ); 9933 goto decode_success; 9934 } 9935 9936 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 9937 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) { 9938 vassert(sz == 4); 9939 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 ); 9940 goto decode_success; 9941 } 9942 9943 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 9944 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) { 9945 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 ); 9946 goto decode_success; 9947 } 9948 9949 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 9950 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) { 9951 vassert(sz == 4); 9952 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 ); 9953 goto decode_success; 9954 } 9955 9956 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 9957 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 9958 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 9959 if (sz == 2 && insn[0] == 0x0F 9960 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 9961 HChar* wot = insn[1]==0x28 ? "apd" : 9962 insn[1]==0x10 ? "upd" : "dqa"; 9963 modrm = getIByte(delta+2); 9964 if (epartIsReg(modrm)) { 9965 putXMMReg( gregOfRM(modrm), 9966 getXMMReg( eregOfRM(modrm) )); 9967 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)), 9968 nameXMMReg(gregOfRM(modrm))); 9969 delta += 2+1; 9970 } else { 9971 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9972 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 9973 gen_SEGV_if_not_16_aligned( addr ); 9974 putXMMReg( gregOfRM(modrm), 9975 loadLE(Ity_V128, mkexpr(addr)) ); 9976 DIP("mov%s %s,%s\n", wot, dis_buf, 9977 nameXMMReg(gregOfRM(modrm))); 9978 delta += 2+alen; 9979 } 9980 goto decode_success; 9981 } 9982 9983 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 9984 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 9985 if (sz == 2 && insn[0] == 0x0F 9986 && (insn[1] == 0x29 || insn[1] == 0x11)) { 9987 HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 9988 modrm = getIByte(delta+2); 9989 if (epartIsReg(modrm)) { 9990 /* fall through; awaiting test case */ 9991 } else { 9992 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9993 if (insn[1] == 0x29/*movapd*/) 9994 gen_SEGV_if_not_16_aligned( addr ); 9995 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 9996 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)), 9997 dis_buf ); 9998 delta += 2+alen; 9999 goto decode_success; 10000 } 10001 } 10002 10003 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ 10004 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { 10005 modrm = getIByte(delta+2); 10006 if (epartIsReg(modrm)) { 10007 delta += 2+1; 10008 putXMMReg( 10009 gregOfRM(modrm), 10010 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) ) 10011 ); 10012 DIP("movd %s, %s\n", 10013 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm))); 10014 } else { 10015 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10016 delta += 2+alen; 10017 putXMMReg( 10018 gregOfRM(modrm), 10019 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 10020 ); 10021 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); 10022 } 10023 goto decode_success; 10024 } 10025 10026 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */ 10027 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) { 10028 modrm = getIByte(delta+2); 10029 if (epartIsReg(modrm)) { 10030 delta += 2+1; 10031 putIReg( 4, eregOfRM(modrm), 10032 getXMMRegLane32(gregOfRM(modrm), 0) ); 10033 DIP("movd %s, %s\n", 10034 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 10035 } else { 10036 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10037 delta += 2+alen; 10038 storeLE( mkexpr(addr), 10039 getXMMRegLane32(gregOfRM(modrm), 0) ); 10040 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10041 } 10042 goto decode_success; 10043 } 10044 10045 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 10046 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) { 10047 modrm = getIByte(delta+2); 10048 if (epartIsReg(modrm)) { 10049 delta += 2+1; 10050 putXMMReg( eregOfRM(modrm), 10051 getXMMReg(gregOfRM(modrm)) ); 10052 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10053 nameXMMReg(eregOfRM(modrm))); 10054 } else { 10055 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10056 delta += 2+alen; 10057 gen_SEGV_if_not_16_aligned( addr ); 10058 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10059 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10060 } 10061 goto decode_success; 10062 } 10063 10064 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 10065 /* Unfortunately can't simply use the MOVDQA case since the 10066 prefix lengths are different (66 vs F3) */ 10067 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) { 10068 vassert(sz == 4); 10069 modrm = getIByte(delta+3); 10070 if (epartIsReg(modrm)) { 10071 putXMMReg( gregOfRM(modrm), 10072 getXMMReg( eregOfRM(modrm) )); 10073 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10074 nameXMMReg(gregOfRM(modrm))); 10075 delta += 3+1; 10076 } else { 10077 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10078 putXMMReg( gregOfRM(modrm), 10079 loadLE(Ity_V128, mkexpr(addr)) ); 10080 DIP("movdqu %s,%s\n", dis_buf, 10081 nameXMMReg(gregOfRM(modrm))); 10082 delta += 3+alen; 10083 } 10084 goto decode_success; 10085 } 10086 10087 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 10088 /* Unfortunately can't simply use the MOVDQA case since the 10089 prefix lengths are different (66 vs F3) */ 10090 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) { 10091 vassert(sz == 4); 10092 modrm = getIByte(delta+3); 10093 if (epartIsReg(modrm)) { 10094 delta += 3+1; 10095 putXMMReg( eregOfRM(modrm), 10096 getXMMReg(gregOfRM(modrm)) ); 10097 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10098 nameXMMReg(eregOfRM(modrm))); 10099 } else { 10100 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 10101 delta += 3+alen; 10102 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10103 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10104 } 10105 goto decode_success; 10106 } 10107 10108 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 10109 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) { 10110 vassert(sz == 4); 10111 modrm = getIByte(delta+3); 10112 if (epartIsReg(modrm)) { 10113 do_MMX_preamble(); 10114 putMMXReg( gregOfRM(modrm), 10115 getXMMRegLane64( eregOfRM(modrm), 0 )); 10116 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10117 nameMMXReg(gregOfRM(modrm))); 10118 delta += 3+1; 10119 goto decode_success; 10120 } else { 10121 /* fall through, apparently no mem case for this insn */ 10122 } 10123 } 10124 10125 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 10126 /* These seems identical to MOVHPS. This instruction encoding is 10127 completely crazy. */ 10128 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { 10129 modrm = getIByte(delta+2); 10130 if (epartIsReg(modrm)) { 10131 /* fall through; apparently reg-reg is not possible */ 10132 } else { 10133 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10134 delta += 2+alen; 10135 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 10136 loadLE(Ity_I64, mkexpr(addr)) ); 10137 DIP("movhpd %s,%s\n", dis_buf, 10138 nameXMMReg( gregOfRM(modrm) )); 10139 goto decode_success; 10140 } 10141 } 10142 10143 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 10144 /* Again, this seems identical to MOVHPS. */ 10145 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { 10146 if (!epartIsReg(insn[2])) { 10147 delta += 2; 10148 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10149 delta += alen; 10150 storeLE( mkexpr(addr), 10151 getXMMRegLane64( gregOfRM(insn[2]), 10152 1/*upper lane*/ ) ); 10153 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 10154 dis_buf); 10155 goto decode_success; 10156 } 10157 /* else fall through */ 10158 } 10159 10160 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 10161 /* Identical to MOVLPS ? */ 10162 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) { 10163 modrm = getIByte(delta+2); 10164 if (epartIsReg(modrm)) { 10165 /* fall through; apparently reg-reg is not possible */ 10166 } else { 10167 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10168 delta += 2+alen; 10169 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 10170 loadLE(Ity_I64, mkexpr(addr)) ); 10171 DIP("movlpd %s, %s\n", 10172 dis_buf, nameXMMReg( gregOfRM(modrm) )); 10173 goto decode_success; 10174 } 10175 } 10176 10177 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 10178 /* Identical to MOVLPS ? */ 10179 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { 10180 if (!epartIsReg(insn[2])) { 10181 delta += 2; 10182 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10183 delta += alen; 10184 storeLE( mkexpr(addr), 10185 getXMMRegLane64( gregOfRM(insn[2]), 10186 0/*lower lane*/ ) ); 10187 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 10188 dis_buf); 10189 goto decode_success; 10190 } 10191 /* else fall through */ 10192 } 10193 10194 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 10195 2 lowest bits of ireg(G) */ 10196 if (insn[0] == 0x0F && insn[1] == 0x50) { 10197 modrm = getIByte(delta+2); 10198 if (sz == 2 && epartIsReg(modrm)) { 10199 Int src; 10200 t0 = newTemp(Ity_I32); 10201 t1 = newTemp(Ity_I32); 10202 delta += 2+1; 10203 src = eregOfRM(modrm); 10204 assign( t0, binop( Iop_And32, 10205 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 10206 mkU32(1) )); 10207 assign( t1, binop( Iop_And32, 10208 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 10209 mkU32(2) )); 10210 putIReg(4, gregOfRM(modrm), 10211 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 10212 ); 10213 DIP("movmskpd %s,%s\n", nameXMMReg(src), 10214 nameIReg(4, gregOfRM(modrm))); 10215 goto decode_success; 10216 } 10217 /* else fall through */ 10218 } 10219 10220 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 10221 if (insn[0] == 0x0F && insn[1] == 0xF7) { 10222 modrm = getIByte(delta+2); 10223 if (sz == 2 && epartIsReg(modrm)) { 10224 IRTemp regD = newTemp(Ity_V128); 10225 IRTemp mask = newTemp(Ity_V128); 10226 IRTemp olddata = newTemp(Ity_V128); 10227 IRTemp newdata = newTemp(Ity_V128); 10228 addr = newTemp(Ity_I32); 10229 10230 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 10231 assign( regD, getXMMReg( gregOfRM(modrm) )); 10232 10233 /* Unfortunately can't do the obvious thing with SarN8x16 10234 here since that can't be re-emitted as SSE2 code - no such 10235 insn. */ 10236 assign( 10237 mask, 10238 binop(Iop_64HLtoV128, 10239 binop(Iop_SarN8x8, 10240 getXMMRegLane64( eregOfRM(modrm), 1 ), 10241 mkU8(7) ), 10242 binop(Iop_SarN8x8, 10243 getXMMRegLane64( eregOfRM(modrm), 0 ), 10244 mkU8(7) ) )); 10245 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 10246 assign( newdata, 10247 binop(Iop_OrV128, 10248 binop(Iop_AndV128, 10249 mkexpr(regD), 10250 mkexpr(mask) ), 10251 binop(Iop_AndV128, 10252 mkexpr(olddata), 10253 unop(Iop_NotV128, mkexpr(mask)))) ); 10254 storeLE( mkexpr(addr), mkexpr(newdata) ); 10255 10256 delta += 2+1; 10257 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), 10258 nameXMMReg( gregOfRM(modrm) ) ); 10259 goto decode_success; 10260 } 10261 /* else fall through */ 10262 } 10263 10264 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 10265 if (insn[0] == 0x0F && insn[1] == 0xE7) { 10266 modrm = getIByte(delta+2); 10267 if (sz == 2 && !epartIsReg(modrm)) { 10268 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10269 gen_SEGV_if_not_16_aligned( addr ); 10270 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10271 DIP("movntdq %s,%s\n", dis_buf, 10272 nameXMMReg(gregOfRM(modrm))); 10273 delta += 2+alen; 10274 goto decode_success; 10275 } 10276 /* else fall through */ 10277 } 10278 10279 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 10280 if (insn[0] == 0x0F && insn[1] == 0xC3) { 10281 vassert(sz == 4); 10282 modrm = getIByte(delta+2); 10283 if (!epartIsReg(modrm)) { 10284 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10285 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) ); 10286 DIP("movnti %s,%s\n", dis_buf, 10287 nameIReg(4, gregOfRM(modrm))); 10288 delta += 2+alen; 10289 goto decode_success; 10290 } 10291 /* else fall through */ 10292 } 10293 10294 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 10295 or lo half xmm). */ 10296 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) { 10297 modrm = getIByte(delta+2); 10298 if (epartIsReg(modrm)) { 10299 /* fall through, awaiting test case */ 10300 /* dst: lo half copied, hi half zeroed */ 10301 } else { 10302 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10303 storeLE( mkexpr(addr), 10304 getXMMRegLane64( gregOfRM(modrm), 0 )); 10305 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf ); 10306 delta += 2+alen; 10307 goto decode_success; 10308 } 10309 } 10310 10311 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 10312 hi half). */ 10313 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) { 10314 vassert(sz == 4); 10315 modrm = getIByte(delta+3); 10316 if (epartIsReg(modrm)) { 10317 do_MMX_preamble(); 10318 putXMMReg( gregOfRM(modrm), 10319 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) ); 10320 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10321 nameXMMReg(gregOfRM(modrm))); 10322 delta += 3+1; 10323 goto decode_success; 10324 } else { 10325 /* fall through, apparently no mem case for this insn */ 10326 } 10327 } 10328 10329 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 10330 G (lo half xmm). Upper half of G is zeroed out. */ 10331 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 10332 G (lo half xmm). If E is mem, upper half of G is zeroed out. 10333 If E is reg, upper half of G is unchanged. */ 10334 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10) 10335 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) { 10336 vassert(sz == 4); 10337 modrm = getIByte(delta+3); 10338 if (epartIsReg(modrm)) { 10339 putXMMRegLane64( gregOfRM(modrm), 0, 10340 getXMMRegLane64( eregOfRM(modrm), 0 )); 10341 if (insn[0] == 0xF3/*MOVQ*/) { 10342 /* zero bits 127:64 */ 10343 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10344 } 10345 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10346 nameXMMReg(gregOfRM(modrm))); 10347 delta += 3+1; 10348 } else { 10349 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10350 /* zero bits 127:64 */ 10351 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10352 /* write bits 63:0 */ 10353 putXMMRegLane64( gregOfRM(modrm), 0, 10354 loadLE(Ity_I64, mkexpr(addr)) ); 10355 DIP("movsd %s,%s\n", dis_buf, 10356 nameXMMReg(gregOfRM(modrm))); 10357 delta += 3+alen; 10358 } 10359 goto decode_success; 10360 } 10361 10362 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 10363 or lo half xmm). */ 10364 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) { 10365 vassert(sz == 4); 10366 modrm = getIByte(delta+3); 10367 if (epartIsReg(modrm)) { 10368 putXMMRegLane64( eregOfRM(modrm), 0, 10369 getXMMRegLane64( gregOfRM(modrm), 0 )); 10370 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10371 nameXMMReg(eregOfRM(modrm))); 10372 delta += 3+1; 10373 } else { 10374 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10375 storeLE( mkexpr(addr), 10376 getXMMRegLane64(gregOfRM(modrm), 0) ); 10377 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10378 dis_buf); 10379 delta += 3+alen; 10380 } 10381 goto decode_success; 10382 } 10383 10384 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 10385 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { 10386 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); 10387 goto decode_success; 10388 } 10389 10390 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 10391 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) { 10392 vassert(sz == 4); 10393 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 ); 10394 goto decode_success; 10395 } 10396 10397 /* 66 0F 56 = ORPD -- G = G and E */ 10398 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) { 10399 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 ); 10400 goto decode_success; 10401 } 10402 10403 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 10404 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) { 10405 Int select; 10406 IRTemp sV = newTemp(Ity_V128); 10407 IRTemp dV = newTemp(Ity_V128); 10408 IRTemp s1 = newTemp(Ity_I64); 10409 IRTemp s0 = newTemp(Ity_I64); 10410 IRTemp d1 = newTemp(Ity_I64); 10411 IRTemp d0 = newTemp(Ity_I64); 10412 10413 modrm = insn[2]; 10414 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10415 10416 if (epartIsReg(modrm)) { 10417 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10418 select = (Int)insn[3]; 10419 delta += 2+2; 10420 DIP("shufpd $%d,%s,%s\n", select, 10421 nameXMMReg(eregOfRM(modrm)), 10422 nameXMMReg(gregOfRM(modrm))); 10423 } else { 10424 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10425 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10426 select = (Int)insn[2+alen]; 10427 delta += 3+alen; 10428 DIP("shufpd $%d,%s,%s\n", select, 10429 dis_buf, 10430 nameXMMReg(gregOfRM(modrm))); 10431 } 10432 10433 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10434 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10435 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10436 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10437 10438 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 10439 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 10440 10441 putXMMReg( 10442 gregOfRM(modrm), 10443 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 10444 ); 10445 10446 # undef SELD 10447 # undef SELS 10448 10449 goto decode_success; 10450 } 10451 10452 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 10453 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) { 10454 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 10455 "sqrtpd", Iop_Sqrt64Fx2 ); 10456 goto decode_success; 10457 } 10458 10459 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 10460 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) { 10461 vassert(sz == 4); 10462 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3, 10463 "sqrtsd", Iop_Sqrt64F0x2 ); 10464 goto decode_success; 10465 } 10466 10467 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 10468 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { 10469 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); 10470 goto decode_success; 10471 } 10472 10473 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 10474 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) { 10475 vassert(sz == 4); 10476 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 ); 10477 goto decode_success; 10478 } 10479 10480 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 10481 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 10482 /* These just appear to be special cases of SHUFPS */ 10483 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10484 IRTemp s1 = newTemp(Ity_I64); 10485 IRTemp s0 = newTemp(Ity_I64); 10486 IRTemp d1 = newTemp(Ity_I64); 10487 IRTemp d0 = newTemp(Ity_I64); 10488 IRTemp sV = newTemp(Ity_V128); 10489 IRTemp dV = newTemp(Ity_V128); 10490 Bool hi = toBool(insn[1] == 0x15); 10491 10492 modrm = insn[2]; 10493 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10494 10495 if (epartIsReg(modrm)) { 10496 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10497 delta += 2+1; 10498 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10499 nameXMMReg(eregOfRM(modrm)), 10500 nameXMMReg(gregOfRM(modrm))); 10501 } else { 10502 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10503 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10504 delta += 2+alen; 10505 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10506 dis_buf, 10507 nameXMMReg(gregOfRM(modrm))); 10508 } 10509 10510 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10511 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10512 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10513 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10514 10515 if (hi) { 10516 putXMMReg( gregOfRM(modrm), 10517 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 10518 } else { 10519 putXMMReg( gregOfRM(modrm), 10520 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 10521 } 10522 10523 goto decode_success; 10524 } 10525 10526 /* 66 0F 57 = XORPD -- G = G and E */ 10527 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) { 10528 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 ); 10529 goto decode_success; 10530 } 10531 10532 /* 66 0F 6B = PACKSSDW */ 10533 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { 10534 delta = dis_SSEint_E_to_G( sorb, delta+2, 10535 "packssdw", 10536 Iop_QNarrowBin32Sto16Sx8, True ); 10537 goto decode_success; 10538 } 10539 10540 /* 66 0F 63 = PACKSSWB */ 10541 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { 10542 delta = dis_SSEint_E_to_G( sorb, delta+2, 10543 "packsswb", 10544 Iop_QNarrowBin16Sto8Sx16, True ); 10545 goto decode_success; 10546 } 10547 10548 /* 66 0F 67 = PACKUSWB */ 10549 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { 10550 delta = dis_SSEint_E_to_G( sorb, delta+2, 10551 "packuswb", 10552 Iop_QNarrowBin16Sto8Ux16, True ); 10553 goto decode_success; 10554 } 10555 10556 /* 66 0F FC = PADDB */ 10557 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) { 10558 delta = dis_SSEint_E_to_G( sorb, delta+2, 10559 "paddb", Iop_Add8x16, False ); 10560 goto decode_success; 10561 } 10562 10563 /* 66 0F FE = PADDD */ 10564 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) { 10565 delta = dis_SSEint_E_to_G( sorb, delta+2, 10566 "paddd", Iop_Add32x4, False ); 10567 goto decode_success; 10568 } 10569 10570 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10571 /* 0F D4 = PADDQ -- add 64x1 */ 10572 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) { 10573 do_MMX_preamble(); 10574 delta = dis_MMXop_regmem_to_reg ( 10575 sorb, delta+2, insn[1], "paddq", False ); 10576 goto decode_success; 10577 } 10578 10579 /* 66 0F D4 = PADDQ */ 10580 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) { 10581 delta = dis_SSEint_E_to_G( sorb, delta+2, 10582 "paddq", Iop_Add64x2, False ); 10583 goto decode_success; 10584 } 10585 10586 /* 66 0F FD = PADDW */ 10587 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) { 10588 delta = dis_SSEint_E_to_G( sorb, delta+2, 10589 "paddw", Iop_Add16x8, False ); 10590 goto decode_success; 10591 } 10592 10593 /* 66 0F EC = PADDSB */ 10594 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) { 10595 delta = dis_SSEint_E_to_G( sorb, delta+2, 10596 "paddsb", Iop_QAdd8Sx16, False ); 10597 goto decode_success; 10598 } 10599 10600 /* 66 0F ED = PADDSW */ 10601 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) { 10602 delta = dis_SSEint_E_to_G( sorb, delta+2, 10603 "paddsw", Iop_QAdd16Sx8, False ); 10604 goto decode_success; 10605 } 10606 10607 /* 66 0F DC = PADDUSB */ 10608 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) { 10609 delta = dis_SSEint_E_to_G( sorb, delta+2, 10610 "paddusb", Iop_QAdd8Ux16, False ); 10611 goto decode_success; 10612 } 10613 10614 /* 66 0F DD = PADDUSW */ 10615 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) { 10616 delta = dis_SSEint_E_to_G( sorb, delta+2, 10617 "paddusw", Iop_QAdd16Ux8, False ); 10618 goto decode_success; 10619 } 10620 10621 /* 66 0F DB = PAND */ 10622 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) { 10623 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 ); 10624 goto decode_success; 10625 } 10626 10627 /* 66 0F DF = PANDN */ 10628 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) { 10629 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 ); 10630 goto decode_success; 10631 } 10632 10633 /* 66 0F E0 = PAVGB */ 10634 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) { 10635 delta = dis_SSEint_E_to_G( sorb, delta+2, 10636 "pavgb", Iop_Avg8Ux16, False ); 10637 goto decode_success; 10638 } 10639 10640 /* 66 0F E3 = PAVGW */ 10641 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) { 10642 delta = dis_SSEint_E_to_G( sorb, delta+2, 10643 "pavgw", Iop_Avg16Ux8, False ); 10644 goto decode_success; 10645 } 10646 10647 /* 66 0F 74 = PCMPEQB */ 10648 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) { 10649 delta = dis_SSEint_E_to_G( sorb, delta+2, 10650 "pcmpeqb", Iop_CmpEQ8x16, False ); 10651 goto decode_success; 10652 } 10653 10654 /* 66 0F 76 = PCMPEQD */ 10655 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) { 10656 delta = dis_SSEint_E_to_G( sorb, delta+2, 10657 "pcmpeqd", Iop_CmpEQ32x4, False ); 10658 goto decode_success; 10659 } 10660 10661 /* 66 0F 75 = PCMPEQW */ 10662 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) { 10663 delta = dis_SSEint_E_to_G( sorb, delta+2, 10664 "pcmpeqw", Iop_CmpEQ16x8, False ); 10665 goto decode_success; 10666 } 10667 10668 /* 66 0F 64 = PCMPGTB */ 10669 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) { 10670 delta = dis_SSEint_E_to_G( sorb, delta+2, 10671 "pcmpgtb", Iop_CmpGT8Sx16, False ); 10672 goto decode_success; 10673 } 10674 10675 /* 66 0F 66 = PCMPGTD */ 10676 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) { 10677 delta = dis_SSEint_E_to_G( sorb, delta+2, 10678 "pcmpgtd", Iop_CmpGT32Sx4, False ); 10679 goto decode_success; 10680 } 10681 10682 /* 66 0F 65 = PCMPGTW */ 10683 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) { 10684 delta = dis_SSEint_E_to_G( sorb, delta+2, 10685 "pcmpgtw", Iop_CmpGT16Sx8, False ); 10686 goto decode_success; 10687 } 10688 10689 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 10690 zero-extend of it in ireg(G). */ 10691 if (insn[0] == 0x0F && insn[1] == 0xC5) { 10692 modrm = insn[2]; 10693 if (sz == 2 && epartIsReg(modrm)) { 10694 t5 = newTemp(Ity_V128); 10695 t4 = newTemp(Ity_I16); 10696 assign(t5, getXMMReg(eregOfRM(modrm))); 10697 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 10698 switch (insn[3] & 7) { 10699 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 10700 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 10701 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 10702 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 10703 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 10704 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 10705 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 10706 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 10707 default: vassert(0); /*NOTREACHED*/ 10708 } 10709 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4))); 10710 DIP("pextrw $%d,%s,%s\n", 10711 (Int)insn[3], nameXMMReg(eregOfRM(modrm)), 10712 nameIReg(4,gregOfRM(modrm))); 10713 delta += 4; 10714 goto decode_success; 10715 } 10716 /* else fall through */ 10717 } 10718 10719 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10720 put it into the specified lane of xmm(G). */ 10721 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) { 10722 Int lane; 10723 t4 = newTemp(Ity_I16); 10724 modrm = insn[2]; 10725 10726 if (epartIsReg(modrm)) { 10727 assign(t4, getIReg(2, eregOfRM(modrm))); 10728 delta += 3+1; 10729 lane = insn[3+1-1]; 10730 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10731 nameIReg(2,eregOfRM(modrm)), 10732 nameXMMReg(gregOfRM(modrm))); 10733 } else { 10734 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10735 delta += 3+alen; 10736 lane = insn[3+alen-1]; 10737 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10738 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10739 dis_buf, 10740 nameXMMReg(gregOfRM(modrm))); 10741 } 10742 10743 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) ); 10744 goto decode_success; 10745 } 10746 10747 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 10748 E(xmm or mem) to G(xmm) */ 10749 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) { 10750 IRTemp s1V = newTemp(Ity_V128); 10751 IRTemp s2V = newTemp(Ity_V128); 10752 IRTemp dV = newTemp(Ity_V128); 10753 IRTemp s1Hi = newTemp(Ity_I64); 10754 IRTemp s1Lo = newTemp(Ity_I64); 10755 IRTemp s2Hi = newTemp(Ity_I64); 10756 IRTemp s2Lo = newTemp(Ity_I64); 10757 IRTemp dHi = newTemp(Ity_I64); 10758 IRTemp dLo = newTemp(Ity_I64); 10759 modrm = insn[2]; 10760 if (epartIsReg(modrm)) { 10761 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 10762 delta += 2+1; 10763 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10764 nameXMMReg(gregOfRM(modrm))); 10765 } else { 10766 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10767 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 10768 delta += 2+alen; 10769 DIP("pmaddwd %s,%s\n", dis_buf, 10770 nameXMMReg(gregOfRM(modrm))); 10771 } 10772 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 10773 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 10774 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 10775 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 10776 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 10777 assign( dHi, mkIRExprCCall( 10778 Ity_I64, 0/*regparms*/, 10779 "x86g_calculate_mmx_pmaddwd", 10780 &x86g_calculate_mmx_pmaddwd, 10781 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 10782 )); 10783 assign( dLo, mkIRExprCCall( 10784 Ity_I64, 0/*regparms*/, 10785 "x86g_calculate_mmx_pmaddwd", 10786 &x86g_calculate_mmx_pmaddwd, 10787 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 10788 )); 10789 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 10790 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 10791 goto decode_success; 10792 } 10793 10794 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 10795 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) { 10796 delta = dis_SSEint_E_to_G( sorb, delta+2, 10797 "pmaxsw", Iop_Max16Sx8, False ); 10798 goto decode_success; 10799 } 10800 10801 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 10802 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) { 10803 delta = dis_SSEint_E_to_G( sorb, delta+2, 10804 "pmaxub", Iop_Max8Ux16, False ); 10805 goto decode_success; 10806 } 10807 10808 /* 66 0F EA = PMINSW -- 16x8 signed min */ 10809 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) { 10810 delta = dis_SSEint_E_to_G( sorb, delta+2, 10811 "pminsw", Iop_Min16Sx8, False ); 10812 goto decode_success; 10813 } 10814 10815 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 10816 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) { 10817 delta = dis_SSEint_E_to_G( sorb, delta+2, 10818 "pminub", Iop_Min8Ux16, False ); 10819 goto decode_success; 10820 } 10821 10822 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in 10823 xmm(G), turn them into a byte, and put zero-extend of it in 10824 ireg(G). Doing this directly is just too cumbersome; give up 10825 therefore and call a helper. */ 10826 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 10827 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { 10828 modrm = insn[2]; 10829 if (epartIsReg(modrm)) { 10830 t0 = newTemp(Ity_I64); 10831 t1 = newTemp(Ity_I64); 10832 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); 10833 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); 10834 t5 = newTemp(Ity_I32); 10835 assign(t5, mkIRExprCCall( 10836 Ity_I32, 0/*regparms*/, 10837 "x86g_calculate_sse_pmovmskb", 10838 &x86g_calculate_sse_pmovmskb, 10839 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 10840 putIReg(4, gregOfRM(modrm), mkexpr(t5)); 10841 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10842 nameIReg(4,gregOfRM(modrm))); 10843 delta += 3; 10844 goto decode_success; 10845 } 10846 /* else fall through */ 10847 } 10848 10849 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 10850 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) { 10851 delta = dis_SSEint_E_to_G( sorb, delta+2, 10852 "pmulhuw", Iop_MulHi16Ux8, False ); 10853 goto decode_success; 10854 } 10855 10856 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 10857 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) { 10858 delta = dis_SSEint_E_to_G( sorb, delta+2, 10859 "pmulhw", Iop_MulHi16Sx8, False ); 10860 goto decode_success; 10861 } 10862 10863 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 10864 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) { 10865 delta = dis_SSEint_E_to_G( sorb, delta+2, 10866 "pmullw", Iop_Mul16x8, False ); 10867 goto decode_success; 10868 } 10869 10870 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10871 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10872 0 to form 64-bit result */ 10873 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) { 10874 IRTemp sV = newTemp(Ity_I64); 10875 IRTemp dV = newTemp(Ity_I64); 10876 t1 = newTemp(Ity_I32); 10877 t0 = newTemp(Ity_I32); 10878 modrm = insn[2]; 10879 10880 do_MMX_preamble(); 10881 assign( dV, getMMXReg(gregOfRM(modrm)) ); 10882 10883 if (epartIsReg(modrm)) { 10884 assign( sV, getMMXReg(eregOfRM(modrm)) ); 10885 delta += 2+1; 10886 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10887 nameMMXReg(gregOfRM(modrm))); 10888 } else { 10889 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10890 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 10891 delta += 2+alen; 10892 DIP("pmuludq %s,%s\n", dis_buf, 10893 nameMMXReg(gregOfRM(modrm))); 10894 } 10895 10896 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 10897 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 10898 putMMXReg( gregOfRM(modrm), 10899 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 10900 goto decode_success; 10901 } 10902 10903 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10904 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 10905 half */ 10906 /* This is a really poor translation -- could be improved if 10907 performance critical */ 10908 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) { 10909 IRTemp sV, dV; 10910 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10911 sV = newTemp(Ity_V128); 10912 dV = newTemp(Ity_V128); 10913 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10914 t1 = newTemp(Ity_I64); 10915 t0 = newTemp(Ity_I64); 10916 modrm = insn[2]; 10917 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10918 10919 if (epartIsReg(modrm)) { 10920 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10921 delta += 2+1; 10922 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10923 nameXMMReg(gregOfRM(modrm))); 10924 } else { 10925 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10926 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10927 delta += 2+alen; 10928 DIP("pmuludq %s,%s\n", dis_buf, 10929 nameXMMReg(gregOfRM(modrm))); 10930 } 10931 10932 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10933 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10934 10935 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 10936 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) ); 10937 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 10938 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) ); 10939 goto decode_success; 10940 } 10941 10942 /* 66 0F EB = POR */ 10943 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) { 10944 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 ); 10945 goto decode_success; 10946 } 10947 10948 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 10949 from E(xmm or mem) to G(xmm) */ 10950 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) { 10951 IRTemp s1V = newTemp(Ity_V128); 10952 IRTemp s2V = newTemp(Ity_V128); 10953 IRTemp dV = newTemp(Ity_V128); 10954 IRTemp s1Hi = newTemp(Ity_I64); 10955 IRTemp s1Lo = newTemp(Ity_I64); 10956 IRTemp s2Hi = newTemp(Ity_I64); 10957 IRTemp s2Lo = newTemp(Ity_I64); 10958 IRTemp dHi = newTemp(Ity_I64); 10959 IRTemp dLo = newTemp(Ity_I64); 10960 modrm = insn[2]; 10961 if (epartIsReg(modrm)) { 10962 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 10963 delta += 2+1; 10964 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10965 nameXMMReg(gregOfRM(modrm))); 10966 } else { 10967 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10968 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 10969 delta += 2+alen; 10970 DIP("psadbw %s,%s\n", dis_buf, 10971 nameXMMReg(gregOfRM(modrm))); 10972 } 10973 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 10974 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 10975 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 10976 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 10977 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 10978 assign( dHi, mkIRExprCCall( 10979 Ity_I64, 0/*regparms*/, 10980 "x86g_calculate_mmx_psadbw", 10981 &x86g_calculate_mmx_psadbw, 10982 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 10983 )); 10984 assign( dLo, mkIRExprCCall( 10985 Ity_I64, 0/*regparms*/, 10986 "x86g_calculate_mmx_psadbw", 10987 &x86g_calculate_mmx_psadbw, 10988 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 10989 )); 10990 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 10991 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 10992 goto decode_success; 10993 } 10994 10995 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 10996 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) { 10997 Int order; 10998 IRTemp sV, dV, s3, s2, s1, s0; 10999 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11000 sV = newTemp(Ity_V128); 11001 dV = newTemp(Ity_V128); 11002 modrm = insn[2]; 11003 if (epartIsReg(modrm)) { 11004 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11005 order = (Int)insn[3]; 11006 delta += 2+2; 11007 DIP("pshufd $%d,%s,%s\n", order, 11008 nameXMMReg(eregOfRM(modrm)), 11009 nameXMMReg(gregOfRM(modrm))); 11010 } else { 11011 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11012 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11013 order = (Int)insn[2+alen]; 11014 delta += 3+alen; 11015 DIP("pshufd $%d,%s,%s\n", order, 11016 dis_buf, 11017 nameXMMReg(gregOfRM(modrm))); 11018 } 11019 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11020 11021 # define SEL(n) \ 11022 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11023 assign(dV, 11024 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 11025 SEL((order>>2)&3), SEL((order>>0)&3) ) 11026 ); 11027 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11028 # undef SEL 11029 goto decode_success; 11030 } 11031 11032 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 11033 mem) to G(xmm), and copy lower half */ 11034 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) { 11035 Int order; 11036 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 11037 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11038 sV = newTemp(Ity_V128); 11039 dV = newTemp(Ity_V128); 11040 sVhi = newTemp(Ity_I64); 11041 dVhi = newTemp(Ity_I64); 11042 modrm = insn[3]; 11043 if (epartIsReg(modrm)) { 11044 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11045 order = (Int)insn[4]; 11046 delta += 4+1; 11047 DIP("pshufhw $%d,%s,%s\n", order, 11048 nameXMMReg(eregOfRM(modrm)), 11049 nameXMMReg(gregOfRM(modrm))); 11050 } else { 11051 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11052 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11053 order = (Int)insn[3+alen]; 11054 delta += 4+alen; 11055 DIP("pshufhw $%d,%s,%s\n", order, 11056 dis_buf, 11057 nameXMMReg(gregOfRM(modrm))); 11058 } 11059 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 11060 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 11061 11062 # define SEL(n) \ 11063 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11064 assign(dVhi, 11065 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11066 SEL((order>>2)&3), SEL((order>>0)&3) ) 11067 ); 11068 assign(dV, binop( Iop_64HLtoV128, 11069 mkexpr(dVhi), 11070 unop(Iop_V128to64, mkexpr(sV))) ); 11071 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11072 # undef SEL 11073 goto decode_success; 11074 } 11075 11076 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 11077 mem) to G(xmm), and copy upper half */ 11078 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) { 11079 Int order; 11080 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 11081 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11082 sV = newTemp(Ity_V128); 11083 dV = newTemp(Ity_V128); 11084 sVlo = newTemp(Ity_I64); 11085 dVlo = newTemp(Ity_I64); 11086 modrm = insn[3]; 11087 if (epartIsReg(modrm)) { 11088 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11089 order = (Int)insn[4]; 11090 delta += 4+1; 11091 DIP("pshuflw $%d,%s,%s\n", order, 11092 nameXMMReg(eregOfRM(modrm)), 11093 nameXMMReg(gregOfRM(modrm))); 11094 } else { 11095 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11096 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11097 order = (Int)insn[3+alen]; 11098 delta += 4+alen; 11099 DIP("pshuflw $%d,%s,%s\n", order, 11100 dis_buf, 11101 nameXMMReg(gregOfRM(modrm))); 11102 } 11103 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 11104 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 11105 11106 # define SEL(n) \ 11107 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11108 assign(dVlo, 11109 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11110 SEL((order>>2)&3), SEL((order>>0)&3) ) 11111 ); 11112 assign(dV, binop( Iop_64HLtoV128, 11113 unop(Iop_V128HIto64, mkexpr(sV)), 11114 mkexpr(dVlo) ) ); 11115 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11116 # undef SEL 11117 goto decode_success; 11118 } 11119 11120 /* 66 0F 72 /6 ib = PSLLD by immediate */ 11121 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11122 && epartIsReg(insn[2]) 11123 && gregOfRM(insn[2]) == 6) { 11124 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 ); 11125 goto decode_success; 11126 } 11127 11128 /* 66 0F F2 = PSLLD by E */ 11129 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) { 11130 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 ); 11131 goto decode_success; 11132 } 11133 11134 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 11135 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11136 && epartIsReg(insn[2]) 11137 && gregOfRM(insn[2]) == 7) { 11138 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11139 Int imm = (Int)insn[3]; 11140 Int reg = eregOfRM(insn[2]); 11141 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 11142 vassert(imm >= 0 && imm <= 255); 11143 delta += 4; 11144 11145 sV = newTemp(Ity_V128); 11146 dV = newTemp(Ity_V128); 11147 hi64 = newTemp(Ity_I64); 11148 lo64 = newTemp(Ity_I64); 11149 hi64r = newTemp(Ity_I64); 11150 lo64r = newTemp(Ity_I64); 11151 11152 if (imm >= 16) { 11153 putXMMReg(reg, mkV128(0x0000)); 11154 goto decode_success; 11155 } 11156 11157 assign( sV, getXMMReg(reg) ); 11158 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11159 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11160 11161 if (imm == 0) { 11162 assign( lo64r, mkexpr(lo64) ); 11163 assign( hi64r, mkexpr(hi64) ); 11164 } 11165 else 11166 if (imm == 8) { 11167 assign( lo64r, mkU64(0) ); 11168 assign( hi64r, mkexpr(lo64) ); 11169 } 11170 else 11171 if (imm > 8) { 11172 assign( lo64r, mkU64(0) ); 11173 assign( hi64r, binop( Iop_Shl64, 11174 mkexpr(lo64), 11175 mkU8( 8*(imm-8) ) )); 11176 } else { 11177 assign( lo64r, binop( Iop_Shl64, 11178 mkexpr(lo64), 11179 mkU8(8 * imm) )); 11180 assign( hi64r, 11181 binop( Iop_Or64, 11182 binop(Iop_Shl64, mkexpr(hi64), 11183 mkU8(8 * imm)), 11184 binop(Iop_Shr64, mkexpr(lo64), 11185 mkU8(8 * (8 - imm)) ) 11186 ) 11187 ); 11188 } 11189 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11190 putXMMReg(reg, mkexpr(dV)); 11191 goto decode_success; 11192 } 11193 11194 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 11195 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11196 && epartIsReg(insn[2]) 11197 && gregOfRM(insn[2]) == 6) { 11198 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 ); 11199 goto decode_success; 11200 } 11201 11202 /* 66 0F F3 = PSLLQ by E */ 11203 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) { 11204 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 ); 11205 goto decode_success; 11206 } 11207 11208 /* 66 0F 71 /6 ib = PSLLW by immediate */ 11209 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11210 && epartIsReg(insn[2]) 11211 && gregOfRM(insn[2]) == 6) { 11212 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 ); 11213 goto decode_success; 11214 } 11215 11216 /* 66 0F F1 = PSLLW by E */ 11217 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) { 11218 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 ); 11219 goto decode_success; 11220 } 11221 11222 /* 66 0F 72 /4 ib = PSRAD by immediate */ 11223 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11224 && epartIsReg(insn[2]) 11225 && gregOfRM(insn[2]) == 4) { 11226 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 ); 11227 goto decode_success; 11228 } 11229 11230 /* 66 0F E2 = PSRAD by E */ 11231 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) { 11232 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 ); 11233 goto decode_success; 11234 } 11235 11236 /* 66 0F 71 /4 ib = PSRAW by immediate */ 11237 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11238 && epartIsReg(insn[2]) 11239 && gregOfRM(insn[2]) == 4) { 11240 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 ); 11241 goto decode_success; 11242 } 11243 11244 /* 66 0F E1 = PSRAW by E */ 11245 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) { 11246 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 ); 11247 goto decode_success; 11248 } 11249 11250 /* 66 0F 72 /2 ib = PSRLD by immediate */ 11251 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11252 && epartIsReg(insn[2]) 11253 && gregOfRM(insn[2]) == 2) { 11254 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 ); 11255 goto decode_success; 11256 } 11257 11258 /* 66 0F D2 = PSRLD by E */ 11259 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) { 11260 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 ); 11261 goto decode_success; 11262 } 11263 11264 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 11265 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11266 && epartIsReg(insn[2]) 11267 && gregOfRM(insn[2]) == 3) { 11268 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11269 Int imm = (Int)insn[3]; 11270 Int reg = eregOfRM(insn[2]); 11271 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 11272 vassert(imm >= 0 && imm <= 255); 11273 delta += 4; 11274 11275 sV = newTemp(Ity_V128); 11276 dV = newTemp(Ity_V128); 11277 hi64 = newTemp(Ity_I64); 11278 lo64 = newTemp(Ity_I64); 11279 hi64r = newTemp(Ity_I64); 11280 lo64r = newTemp(Ity_I64); 11281 11282 if (imm >= 16) { 11283 putXMMReg(reg, mkV128(0x0000)); 11284 goto decode_success; 11285 } 11286 11287 assign( sV, getXMMReg(reg) ); 11288 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11289 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11290 11291 if (imm == 0) { 11292 assign( lo64r, mkexpr(lo64) ); 11293 assign( hi64r, mkexpr(hi64) ); 11294 } 11295 else 11296 if (imm == 8) { 11297 assign( hi64r, mkU64(0) ); 11298 assign( lo64r, mkexpr(hi64) ); 11299 } 11300 else 11301 if (imm > 8) { 11302 assign( hi64r, mkU64(0) ); 11303 assign( lo64r, binop( Iop_Shr64, 11304 mkexpr(hi64), 11305 mkU8( 8*(imm-8) ) )); 11306 } else { 11307 assign( hi64r, binop( Iop_Shr64, 11308 mkexpr(hi64), 11309 mkU8(8 * imm) )); 11310 assign( lo64r, 11311 binop( Iop_Or64, 11312 binop(Iop_Shr64, mkexpr(lo64), 11313 mkU8(8 * imm)), 11314 binop(Iop_Shl64, mkexpr(hi64), 11315 mkU8(8 * (8 - imm)) ) 11316 ) 11317 ); 11318 } 11319 11320 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11321 putXMMReg(reg, mkexpr(dV)); 11322 goto decode_success; 11323 } 11324 11325 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 11326 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11327 && epartIsReg(insn[2]) 11328 && gregOfRM(insn[2]) == 2) { 11329 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 ); 11330 goto decode_success; 11331 } 11332 11333 /* 66 0F D3 = PSRLQ by E */ 11334 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) { 11335 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 ); 11336 goto decode_success; 11337 } 11338 11339 /* 66 0F 71 /2 ib = PSRLW by immediate */ 11340 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11341 && epartIsReg(insn[2]) 11342 && gregOfRM(insn[2]) == 2) { 11343 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 ); 11344 goto decode_success; 11345 } 11346 11347 /* 66 0F D1 = PSRLW by E */ 11348 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) { 11349 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 ); 11350 goto decode_success; 11351 } 11352 11353 /* 66 0F F8 = PSUBB */ 11354 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) { 11355 delta = dis_SSEint_E_to_G( sorb, delta+2, 11356 "psubb", Iop_Sub8x16, False ); 11357 goto decode_success; 11358 } 11359 11360 /* 66 0F FA = PSUBD */ 11361 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) { 11362 delta = dis_SSEint_E_to_G( sorb, delta+2, 11363 "psubd", Iop_Sub32x4, False ); 11364 goto decode_success; 11365 } 11366 11367 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11368 /* 0F FB = PSUBQ -- sub 64x1 */ 11369 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) { 11370 do_MMX_preamble(); 11371 delta = dis_MMXop_regmem_to_reg ( 11372 sorb, delta+2, insn[1], "psubq", False ); 11373 goto decode_success; 11374 } 11375 11376 /* 66 0F FB = PSUBQ */ 11377 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) { 11378 delta = dis_SSEint_E_to_G( sorb, delta+2, 11379 "psubq", Iop_Sub64x2, False ); 11380 goto decode_success; 11381 } 11382 11383 /* 66 0F F9 = PSUBW */ 11384 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) { 11385 delta = dis_SSEint_E_to_G( sorb, delta+2, 11386 "psubw", Iop_Sub16x8, False ); 11387 goto decode_success; 11388 } 11389 11390 /* 66 0F E8 = PSUBSB */ 11391 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) { 11392 delta = dis_SSEint_E_to_G( sorb, delta+2, 11393 "psubsb", Iop_QSub8Sx16, False ); 11394 goto decode_success; 11395 } 11396 11397 /* 66 0F E9 = PSUBSW */ 11398 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) { 11399 delta = dis_SSEint_E_to_G( sorb, delta+2, 11400 "psubsw", Iop_QSub16Sx8, False ); 11401 goto decode_success; 11402 } 11403 11404 /* 66 0F D8 = PSUBSB */ 11405 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) { 11406 delta = dis_SSEint_E_to_G( sorb, delta+2, 11407 "psubusb", Iop_QSub8Ux16, False ); 11408 goto decode_success; 11409 } 11410 11411 /* 66 0F D9 = PSUBSW */ 11412 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) { 11413 delta = dis_SSEint_E_to_G( sorb, delta+2, 11414 "psubusw", Iop_QSub16Ux8, False ); 11415 goto decode_success; 11416 } 11417 11418 /* 66 0F 68 = PUNPCKHBW */ 11419 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) { 11420 delta = dis_SSEint_E_to_G( sorb, delta+2, 11421 "punpckhbw", 11422 Iop_InterleaveHI8x16, True ); 11423 goto decode_success; 11424 } 11425 11426 /* 66 0F 6A = PUNPCKHDQ */ 11427 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) { 11428 delta = dis_SSEint_E_to_G( sorb, delta+2, 11429 "punpckhdq", 11430 Iop_InterleaveHI32x4, True ); 11431 goto decode_success; 11432 } 11433 11434 /* 66 0F 6D = PUNPCKHQDQ */ 11435 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) { 11436 delta = dis_SSEint_E_to_G( sorb, delta+2, 11437 "punpckhqdq", 11438 Iop_InterleaveHI64x2, True ); 11439 goto decode_success; 11440 } 11441 11442 /* 66 0F 69 = PUNPCKHWD */ 11443 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) { 11444 delta = dis_SSEint_E_to_G( sorb, delta+2, 11445 "punpckhwd", 11446 Iop_InterleaveHI16x8, True ); 11447 goto decode_success; 11448 } 11449 11450 /* 66 0F 60 = PUNPCKLBW */ 11451 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) { 11452 delta = dis_SSEint_E_to_G( sorb, delta+2, 11453 "punpcklbw", 11454 Iop_InterleaveLO8x16, True ); 11455 goto decode_success; 11456 } 11457 11458 /* 66 0F 62 = PUNPCKLDQ */ 11459 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) { 11460 delta = dis_SSEint_E_to_G( sorb, delta+2, 11461 "punpckldq", 11462 Iop_InterleaveLO32x4, True ); 11463 goto decode_success; 11464 } 11465 11466 /* 66 0F 6C = PUNPCKLQDQ */ 11467 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) { 11468 delta = dis_SSEint_E_to_G( sorb, delta+2, 11469 "punpcklqdq", 11470 Iop_InterleaveLO64x2, True ); 11471 goto decode_success; 11472 } 11473 11474 /* 66 0F 61 = PUNPCKLWD */ 11475 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) { 11476 delta = dis_SSEint_E_to_G( sorb, delta+2, 11477 "punpcklwd", 11478 Iop_InterleaveLO16x8, True ); 11479 goto decode_success; 11480 } 11481 11482 /* 66 0F EF = PXOR */ 11483 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) { 11484 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 ); 11485 goto decode_success; 11486 } 11487 11488 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 11489 //-- if (insn[0] == 0x0F && insn[1] == 0xAE 11490 //-- && (!epartIsReg(insn[2])) 11491 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 11492 //-- Bool store = gregOfRM(insn[2]) == 0; 11493 //-- vg_assert(sz == 4); 11494 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 11495 //-- t1 = LOW24(pair); 11496 //-- eip += 2+HI8(pair); 11497 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 11498 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 11499 //-- Lit16, (UShort)insn[2], 11500 //-- TempReg, t1 ); 11501 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 11502 //-- goto decode_success; 11503 //-- } 11504 11505 /* 0F AE /7 = CLFLUSH -- flush cache line */ 11506 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 11507 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 11508 11509 /* This is something of a hack. We need to know the size of the 11510 cache line containing addr. Since we don't (easily), assume 11511 256 on the basis that no real cache would have a line that 11512 big. It's safe to invalidate more stuff than we need, just 11513 inefficient. */ 11514 UInt lineszB = 256; 11515 11516 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11517 delta += 2+alen; 11518 11519 /* Round addr down to the start of the containing block. */ 11520 stmt( IRStmt_Put( 11521 OFFB_TISTART, 11522 binop( Iop_And32, 11523 mkexpr(addr), 11524 mkU32( ~(lineszB-1) ))) ); 11525 11526 stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) ); 11527 11528 irsb->jumpkind = Ijk_TInval; 11529 irsb->next = mkU32(guest_EIP_bbstart+delta); 11530 dres.whatNext = Dis_StopHere; 11531 11532 DIP("clflush %s\n", dis_buf); 11533 goto decode_success; 11534 } 11535 11536 /* ---------------------------------------------------- */ 11537 /* --- end of the SSE2 decoder. --- */ 11538 /* ---------------------------------------------------- */ 11539 11540 /* ---------------------------------------------------- */ 11541 /* --- start of the SSE3 decoder. --- */ 11542 /* ---------------------------------------------------- */ 11543 11544 /* Skip parts of the decoder which don't apply given the stated 11545 guest subarchitecture. */ 11546 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */ 11547 /* In fact this is highly bogus; we accept SSE3 insns even on a 11548 SSE2-only guest since they turn into IR which can be re-emitted 11549 successfully on an SSE2 host. */ 11550 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 11551 goto after_sse_decoders; /* no SSE3 capabilities */ 11552 11553 insn = (UChar*)&guest_code[delta]; 11554 11555 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 11556 duplicating some lanes (2:2:0:0). */ 11557 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 11558 duplicating some lanes (3:3:1:1). */ 11559 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F 11560 && (insn[2] == 0x12 || insn[2] == 0x16)) { 11561 IRTemp s3, s2, s1, s0; 11562 IRTemp sV = newTemp(Ity_V128); 11563 Bool isH = insn[2] == 0x16; 11564 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11565 11566 modrm = insn[3]; 11567 if (epartIsReg(modrm)) { 11568 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11569 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11570 nameXMMReg(eregOfRM(modrm)), 11571 nameXMMReg(gregOfRM(modrm))); 11572 delta += 3+1; 11573 } else { 11574 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11575 gen_SEGV_if_not_16_aligned( addr ); 11576 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11577 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11578 dis_buf, 11579 nameXMMReg(gregOfRM(modrm))); 11580 delta += 3+alen; 11581 } 11582 11583 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11584 putXMMReg( gregOfRM(modrm), 11585 isH ? mk128from32s( s3, s3, s1, s1 ) 11586 : mk128from32s( s2, s2, s0, s0 ) ); 11587 goto decode_success; 11588 } 11589 11590 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 11591 duplicating some lanes (0:1:0:1). */ 11592 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) { 11593 IRTemp sV = newTemp(Ity_V128); 11594 IRTemp d0 = newTemp(Ity_I64); 11595 11596 modrm = insn[3]; 11597 if (epartIsReg(modrm)) { 11598 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11599 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11600 nameXMMReg(gregOfRM(modrm))); 11601 delta += 3+1; 11602 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 11603 } else { 11604 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11605 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 11606 DIP("movddup %s,%s\n", dis_buf, 11607 nameXMMReg(gregOfRM(modrm))); 11608 delta += 3+alen; 11609 } 11610 11611 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 11612 goto decode_success; 11613 } 11614 11615 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 11616 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) { 11617 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11618 IRTemp eV = newTemp(Ity_V128); 11619 IRTemp gV = newTemp(Ity_V128); 11620 IRTemp addV = newTemp(Ity_V128); 11621 IRTemp subV = newTemp(Ity_V128); 11622 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11623 11624 modrm = insn[3]; 11625 if (epartIsReg(modrm)) { 11626 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11627 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11628 nameXMMReg(gregOfRM(modrm))); 11629 delta += 3+1; 11630 } else { 11631 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11632 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11633 DIP("addsubps %s,%s\n", dis_buf, 11634 nameXMMReg(gregOfRM(modrm))); 11635 delta += 3+alen; 11636 } 11637 11638 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11639 11640 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); 11641 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); 11642 11643 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 11644 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 11645 11646 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 )); 11647 goto decode_success; 11648 } 11649 11650 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 11651 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) { 11652 IRTemp eV = newTemp(Ity_V128); 11653 IRTemp gV = newTemp(Ity_V128); 11654 IRTemp addV = newTemp(Ity_V128); 11655 IRTemp subV = newTemp(Ity_V128); 11656 IRTemp a1 = newTemp(Ity_I64); 11657 IRTemp s0 = newTemp(Ity_I64); 11658 11659 modrm = insn[2]; 11660 if (epartIsReg(modrm)) { 11661 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11662 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11663 nameXMMReg(gregOfRM(modrm))); 11664 delta += 2+1; 11665 } else { 11666 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11667 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11668 DIP("addsubpd %s,%s\n", dis_buf, 11669 nameXMMReg(gregOfRM(modrm))); 11670 delta += 2+alen; 11671 } 11672 11673 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11674 11675 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); 11676 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); 11677 11678 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11679 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11680 11681 putXMMReg( gregOfRM(modrm), 11682 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11683 goto decode_success; 11684 } 11685 11686 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 11687 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 11688 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F 11689 && (insn[2] == 0x7C || insn[2] == 0x7D)) { 11690 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 11691 IRTemp eV = newTemp(Ity_V128); 11692 IRTemp gV = newTemp(Ity_V128); 11693 IRTemp leftV = newTemp(Ity_V128); 11694 IRTemp rightV = newTemp(Ity_V128); 11695 Bool isAdd = insn[2] == 0x7C; 11696 HChar* str = isAdd ? "add" : "sub"; 11697 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 11698 11699 modrm = insn[3]; 11700 if (epartIsReg(modrm)) { 11701 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11702 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11703 nameXMMReg(gregOfRM(modrm))); 11704 delta += 3+1; 11705 } else { 11706 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11707 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11708 DIP("h%sps %s,%s\n", str, dis_buf, 11709 nameXMMReg(gregOfRM(modrm))); 11710 delta += 3+alen; 11711 } 11712 11713 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11714 11715 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 11716 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 11717 11718 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 11719 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 11720 11721 putXMMReg( gregOfRM(modrm), 11722 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 11723 mkexpr(leftV), mkexpr(rightV) ) ); 11724 goto decode_success; 11725 } 11726 11727 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 11728 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 11729 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 11730 IRTemp e1 = newTemp(Ity_I64); 11731 IRTemp e0 = newTemp(Ity_I64); 11732 IRTemp g1 = newTemp(Ity_I64); 11733 IRTemp g0 = newTemp(Ity_I64); 11734 IRTemp eV = newTemp(Ity_V128); 11735 IRTemp gV = newTemp(Ity_V128); 11736 IRTemp leftV = newTemp(Ity_V128); 11737 IRTemp rightV = newTemp(Ity_V128); 11738 Bool isAdd = insn[1] == 0x7C; 11739 HChar* str = isAdd ? "add" : "sub"; 11740 11741 modrm = insn[2]; 11742 if (epartIsReg(modrm)) { 11743 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11744 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11745 nameXMMReg(gregOfRM(modrm))); 11746 delta += 2+1; 11747 } else { 11748 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11749 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11750 DIP("h%spd %s,%s\n", str, dis_buf, 11751 nameXMMReg(gregOfRM(modrm))); 11752 delta += 2+alen; 11753 } 11754 11755 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11756 11757 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 11758 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 11759 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 11760 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 11761 11762 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 11763 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 11764 11765 putXMMReg( gregOfRM(modrm), 11766 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 11767 mkexpr(leftV), mkexpr(rightV) ) ); 11768 goto decode_success; 11769 } 11770 11771 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 11772 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) { 11773 modrm = getIByte(delta+3); 11774 if (epartIsReg(modrm)) { 11775 goto decode_failure; 11776 } else { 11777 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11778 putXMMReg( gregOfRM(modrm), 11779 loadLE(Ity_V128, mkexpr(addr)) ); 11780 DIP("lddqu %s,%s\n", dis_buf, 11781 nameXMMReg(gregOfRM(modrm))); 11782 delta += 3+alen; 11783 } 11784 goto decode_success; 11785 } 11786 11787 /* ---------------------------------------------------- */ 11788 /* --- end of the SSE3 decoder. --- */ 11789 /* ---------------------------------------------------- */ 11790 11791 /* ---------------------------------------------------- */ 11792 /* --- start of the SSSE3 decoder. --- */ 11793 /* ---------------------------------------------------- */ 11794 11795 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11796 Unsigned Bytes (MMX) */ 11797 if (sz == 4 11798 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11799 IRTemp sV = newTemp(Ity_I64); 11800 IRTemp dV = newTemp(Ity_I64); 11801 IRTemp sVoddsSX = newTemp(Ity_I64); 11802 IRTemp sVevensSX = newTemp(Ity_I64); 11803 IRTemp dVoddsZX = newTemp(Ity_I64); 11804 IRTemp dVevensZX = newTemp(Ity_I64); 11805 11806 modrm = insn[3]; 11807 do_MMX_preamble(); 11808 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11809 11810 if (epartIsReg(modrm)) { 11811 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11812 delta += 3+1; 11813 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 11814 nameMMXReg(gregOfRM(modrm))); 11815 } else { 11816 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11817 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11818 delta += 3+alen; 11819 DIP("pmaddubsw %s,%s\n", dis_buf, 11820 nameMMXReg(gregOfRM(modrm))); 11821 } 11822 11823 /* compute dV unsigned x sV signed */ 11824 assign( sVoddsSX, 11825 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 11826 assign( sVevensSX, 11827 binop(Iop_SarN16x4, 11828 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 11829 mkU8(8)) ); 11830 assign( dVoddsZX, 11831 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 11832 assign( dVevensZX, 11833 binop(Iop_ShrN16x4, 11834 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 11835 mkU8(8)) ); 11836 11837 putMMXReg( 11838 gregOfRM(modrm), 11839 binop(Iop_QAdd16Sx4, 11840 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11841 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11842 ) 11843 ); 11844 goto decode_success; 11845 } 11846 11847 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11848 Unsigned Bytes (XMM) */ 11849 if (sz == 2 11850 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11851 IRTemp sV = newTemp(Ity_V128); 11852 IRTemp dV = newTemp(Ity_V128); 11853 IRTemp sVoddsSX = newTemp(Ity_V128); 11854 IRTemp sVevensSX = newTemp(Ity_V128); 11855 IRTemp dVoddsZX = newTemp(Ity_V128); 11856 IRTemp dVevensZX = newTemp(Ity_V128); 11857 11858 modrm = insn[3]; 11859 assign( dV, getXMMReg(gregOfRM(modrm)) ); 11860 11861 if (epartIsReg(modrm)) { 11862 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11863 delta += 3+1; 11864 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11865 nameXMMReg(gregOfRM(modrm))); 11866 } else { 11867 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11868 gen_SEGV_if_not_16_aligned( addr ); 11869 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11870 delta += 3+alen; 11871 DIP("pmaddubsw %s,%s\n", dis_buf, 11872 nameXMMReg(gregOfRM(modrm))); 11873 } 11874 11875 /* compute dV unsigned x sV signed */ 11876 assign( sVoddsSX, 11877 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 11878 assign( sVevensSX, 11879 binop(Iop_SarN16x8, 11880 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 11881 mkU8(8)) ); 11882 assign( dVoddsZX, 11883 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 11884 assign( dVevensZX, 11885 binop(Iop_ShrN16x8, 11886 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 11887 mkU8(8)) ); 11888 11889 putXMMReg( 11890 gregOfRM(modrm), 11891 binop(Iop_QAdd16Sx8, 11892 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11893 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11894 ) 11895 ); 11896 goto decode_success; 11897 } 11898 11899 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 11900 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 11901 mmx) and G to G (mmx). */ 11902 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 11903 mmx) and G to G (mmx). */ 11904 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 11905 to G (mmx). */ 11906 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 11907 to G (mmx). */ 11908 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 11909 to G (mmx). */ 11910 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 11911 to G (mmx). */ 11912 11913 if (sz == 4 11914 && insn[0] == 0x0F && insn[1] == 0x38 11915 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 11916 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 11917 HChar* str = "???"; 11918 IROp opV64 = Iop_INVALID; 11919 IROp opCatO = Iop_CatOddLanes16x4; 11920 IROp opCatE = Iop_CatEvenLanes16x4; 11921 IRTemp sV = newTemp(Ity_I64); 11922 IRTemp dV = newTemp(Ity_I64); 11923 11924 modrm = insn[3]; 11925 11926 switch (insn[2]) { 11927 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 11928 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 11929 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 11930 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 11931 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 11932 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 11933 default: vassert(0); 11934 } 11935 if (insn[2] == 0x02 || insn[2] == 0x06) { 11936 opCatO = Iop_InterleaveHI32x2; 11937 opCatE = Iop_InterleaveLO32x2; 11938 } 11939 11940 do_MMX_preamble(); 11941 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11942 11943 if (epartIsReg(modrm)) { 11944 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11945 delta += 3+1; 11946 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 11947 nameMMXReg(gregOfRM(modrm))); 11948 } else { 11949 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11950 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11951 delta += 3+alen; 11952 DIP("ph%s %s,%s\n", str, dis_buf, 11953 nameMMXReg(gregOfRM(modrm))); 11954 } 11955 11956 putMMXReg( 11957 gregOfRM(modrm), 11958 binop(opV64, 11959 binop(opCatE,mkexpr(sV),mkexpr(dV)), 11960 binop(opCatO,mkexpr(sV),mkexpr(dV)) 11961 ) 11962 ); 11963 goto decode_success; 11964 } 11965 11966 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 11967 xmm) and G to G (xmm). */ 11968 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 11969 xmm) and G to G (xmm). */ 11970 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 11971 G to G (xmm). */ 11972 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 11973 G to G (xmm). */ 11974 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 11975 G to G (xmm). */ 11976 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 11977 G to G (xmm). */ 11978 11979 if (sz == 2 11980 && insn[0] == 0x0F && insn[1] == 0x38 11981 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 11982 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 11983 HChar* str = "???"; 11984 IROp opV64 = Iop_INVALID; 11985 IROp opCatO = Iop_CatOddLanes16x4; 11986 IROp opCatE = Iop_CatEvenLanes16x4; 11987 IRTemp sV = newTemp(Ity_V128); 11988 IRTemp dV = newTemp(Ity_V128); 11989 IRTemp sHi = newTemp(Ity_I64); 11990 IRTemp sLo = newTemp(Ity_I64); 11991 IRTemp dHi = newTemp(Ity_I64); 11992 IRTemp dLo = newTemp(Ity_I64); 11993 11994 modrm = insn[3]; 11995 11996 switch (insn[2]) { 11997 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 11998 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 11999 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 12000 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 12001 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 12002 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 12003 default: vassert(0); 12004 } 12005 if (insn[2] == 0x02 || insn[2] == 0x06) { 12006 opCatO = Iop_InterleaveHI32x2; 12007 opCatE = Iop_InterleaveLO32x2; 12008 } 12009 12010 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12011 12012 if (epartIsReg(modrm)) { 12013 assign( sV, getXMMReg( eregOfRM(modrm)) ); 12014 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12015 nameXMMReg(gregOfRM(modrm))); 12016 delta += 3+1; 12017 } else { 12018 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12019 gen_SEGV_if_not_16_aligned( addr ); 12020 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12021 DIP("ph%s %s,%s\n", str, dis_buf, 12022 nameXMMReg(gregOfRM(modrm))); 12023 delta += 3+alen; 12024 } 12025 12026 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12027 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12028 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12029 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12030 12031 /* This isn't a particularly efficient way to compute the 12032 result, but at least it avoids a proliferation of IROps, 12033 hence avoids complication all the backends. */ 12034 putXMMReg( 12035 gregOfRM(modrm), 12036 binop(Iop_64HLtoV128, 12037 binop(opV64, 12038 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 12039 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 12040 ), 12041 binop(opV64, 12042 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 12043 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 12044 ) 12045 ) 12046 ); 12047 goto decode_success; 12048 } 12049 12050 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 12051 (MMX) */ 12052 if (sz == 4 12053 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12054 IRTemp sV = newTemp(Ity_I64); 12055 IRTemp dV = newTemp(Ity_I64); 12056 12057 modrm = insn[3]; 12058 do_MMX_preamble(); 12059 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12060 12061 if (epartIsReg(modrm)) { 12062 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12063 delta += 3+1; 12064 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12065 nameMMXReg(gregOfRM(modrm))); 12066 } else { 12067 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12068 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12069 delta += 3+alen; 12070 DIP("pmulhrsw %s,%s\n", dis_buf, 12071 nameMMXReg(gregOfRM(modrm))); 12072 } 12073 12074 putMMXReg( 12075 gregOfRM(modrm), 12076 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 12077 ); 12078 goto decode_success; 12079 } 12080 12081 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 12082 Scale (XMM) */ 12083 if (sz == 2 12084 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12085 IRTemp sV = newTemp(Ity_V128); 12086 IRTemp dV = newTemp(Ity_V128); 12087 IRTemp sHi = newTemp(Ity_I64); 12088 IRTemp sLo = newTemp(Ity_I64); 12089 IRTemp dHi = newTemp(Ity_I64); 12090 IRTemp dLo = newTemp(Ity_I64); 12091 12092 modrm = insn[3]; 12093 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12094 12095 if (epartIsReg(modrm)) { 12096 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12097 delta += 3+1; 12098 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12099 nameXMMReg(gregOfRM(modrm))); 12100 } else { 12101 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12102 gen_SEGV_if_not_16_aligned( addr ); 12103 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12104 delta += 3+alen; 12105 DIP("pmulhrsw %s,%s\n", dis_buf, 12106 nameXMMReg(gregOfRM(modrm))); 12107 } 12108 12109 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12110 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12111 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12112 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12113 12114 putXMMReg( 12115 gregOfRM(modrm), 12116 binop(Iop_64HLtoV128, 12117 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 12118 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 12119 ) 12120 ); 12121 goto decode_success; 12122 } 12123 12124 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 12125 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 12126 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 12127 if (sz == 4 12128 && insn[0] == 0x0F && insn[1] == 0x38 12129 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12130 IRTemp sV = newTemp(Ity_I64); 12131 IRTemp dV = newTemp(Ity_I64); 12132 HChar* str = "???"; 12133 Int laneszB = 0; 12134 12135 switch (insn[2]) { 12136 case 0x08: laneszB = 1; str = "b"; break; 12137 case 0x09: laneszB = 2; str = "w"; break; 12138 case 0x0A: laneszB = 4; str = "d"; break; 12139 default: vassert(0); 12140 } 12141 12142 modrm = insn[3]; 12143 do_MMX_preamble(); 12144 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12145 12146 if (epartIsReg(modrm)) { 12147 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12148 delta += 3+1; 12149 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12150 nameMMXReg(gregOfRM(modrm))); 12151 } else { 12152 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12153 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12154 delta += 3+alen; 12155 DIP("psign%s %s,%s\n", str, dis_buf, 12156 nameMMXReg(gregOfRM(modrm))); 12157 } 12158 12159 putMMXReg( 12160 gregOfRM(modrm), 12161 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 12162 ); 12163 goto decode_success; 12164 } 12165 12166 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 12167 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 12168 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 12169 if (sz == 2 12170 && insn[0] == 0x0F && insn[1] == 0x38 12171 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12172 IRTemp sV = newTemp(Ity_V128); 12173 IRTemp dV = newTemp(Ity_V128); 12174 IRTemp sHi = newTemp(Ity_I64); 12175 IRTemp sLo = newTemp(Ity_I64); 12176 IRTemp dHi = newTemp(Ity_I64); 12177 IRTemp dLo = newTemp(Ity_I64); 12178 HChar* str = "???"; 12179 Int laneszB = 0; 12180 12181 switch (insn[2]) { 12182 case 0x08: laneszB = 1; str = "b"; break; 12183 case 0x09: laneszB = 2; str = "w"; break; 12184 case 0x0A: laneszB = 4; str = "d"; break; 12185 default: vassert(0); 12186 } 12187 12188 modrm = insn[3]; 12189 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12190 12191 if (epartIsReg(modrm)) { 12192 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12193 delta += 3+1; 12194 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12195 nameXMMReg(gregOfRM(modrm))); 12196 } else { 12197 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12198 gen_SEGV_if_not_16_aligned( addr ); 12199 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12200 delta += 3+alen; 12201 DIP("psign%s %s,%s\n", str, dis_buf, 12202 nameXMMReg(gregOfRM(modrm))); 12203 } 12204 12205 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12206 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12207 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12208 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12209 12210 putXMMReg( 12211 gregOfRM(modrm), 12212 binop(Iop_64HLtoV128, 12213 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 12214 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 12215 ) 12216 ); 12217 goto decode_success; 12218 } 12219 12220 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 12221 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 12222 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 12223 if (sz == 4 12224 && insn[0] == 0x0F && insn[1] == 0x38 12225 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12226 IRTemp sV = newTemp(Ity_I64); 12227 HChar* str = "???"; 12228 Int laneszB = 0; 12229 12230 switch (insn[2]) { 12231 case 0x1C: laneszB = 1; str = "b"; break; 12232 case 0x1D: laneszB = 2; str = "w"; break; 12233 case 0x1E: laneszB = 4; str = "d"; break; 12234 default: vassert(0); 12235 } 12236 12237 modrm = insn[3]; 12238 do_MMX_preamble(); 12239 12240 if (epartIsReg(modrm)) { 12241 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12242 delta += 3+1; 12243 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12244 nameMMXReg(gregOfRM(modrm))); 12245 } else { 12246 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12247 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12248 delta += 3+alen; 12249 DIP("pabs%s %s,%s\n", str, dis_buf, 12250 nameMMXReg(gregOfRM(modrm))); 12251 } 12252 12253 putMMXReg( 12254 gregOfRM(modrm), 12255 dis_PABS_helper( mkexpr(sV), laneszB ) 12256 ); 12257 goto decode_success; 12258 } 12259 12260 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 12261 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 12262 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 12263 if (sz == 2 12264 && insn[0] == 0x0F && insn[1] == 0x38 12265 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12266 IRTemp sV = newTemp(Ity_V128); 12267 IRTemp sHi = newTemp(Ity_I64); 12268 IRTemp sLo = newTemp(Ity_I64); 12269 HChar* str = "???"; 12270 Int laneszB = 0; 12271 12272 switch (insn[2]) { 12273 case 0x1C: laneszB = 1; str = "b"; break; 12274 case 0x1D: laneszB = 2; str = "w"; break; 12275 case 0x1E: laneszB = 4; str = "d"; break; 12276 default: vassert(0); 12277 } 12278 12279 modrm = insn[3]; 12280 12281 if (epartIsReg(modrm)) { 12282 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12283 delta += 3+1; 12284 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12285 nameXMMReg(gregOfRM(modrm))); 12286 } else { 12287 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12288 gen_SEGV_if_not_16_aligned( addr ); 12289 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12290 delta += 3+alen; 12291 DIP("pabs%s %s,%s\n", str, dis_buf, 12292 nameXMMReg(gregOfRM(modrm))); 12293 } 12294 12295 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12296 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12297 12298 putXMMReg( 12299 gregOfRM(modrm), 12300 binop(Iop_64HLtoV128, 12301 dis_PABS_helper( mkexpr(sHi), laneszB ), 12302 dis_PABS_helper( mkexpr(sLo), laneszB ) 12303 ) 12304 ); 12305 goto decode_success; 12306 } 12307 12308 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 12309 if (sz == 4 12310 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12311 IRTemp sV = newTemp(Ity_I64); 12312 IRTemp dV = newTemp(Ity_I64); 12313 IRTemp res = newTemp(Ity_I64); 12314 12315 modrm = insn[3]; 12316 do_MMX_preamble(); 12317 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12318 12319 if (epartIsReg(modrm)) { 12320 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12321 d32 = (UInt)insn[3+1]; 12322 delta += 3+1+1; 12323 DIP("palignr $%d,%s,%s\n", (Int)d32, 12324 nameMMXReg(eregOfRM(modrm)), 12325 nameMMXReg(gregOfRM(modrm))); 12326 } else { 12327 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12328 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12329 d32 = (UInt)insn[3+alen]; 12330 delta += 3+alen+1; 12331 DIP("palignr $%d%s,%s\n", (Int)d32, 12332 dis_buf, 12333 nameMMXReg(gregOfRM(modrm))); 12334 } 12335 12336 if (d32 == 0) { 12337 assign( res, mkexpr(sV) ); 12338 } 12339 else if (d32 >= 1 && d32 <= 7) { 12340 assign(res, 12341 binop(Iop_Or64, 12342 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), 12343 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) 12344 ))); 12345 } 12346 else if (d32 == 8) { 12347 assign( res, mkexpr(dV) ); 12348 } 12349 else if (d32 >= 9 && d32 <= 15) { 12350 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); 12351 } 12352 else if (d32 >= 16 && d32 <= 255) { 12353 assign( res, mkU64(0) ); 12354 } 12355 else 12356 vassert(0); 12357 12358 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 12359 goto decode_success; 12360 } 12361 12362 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 12363 if (sz == 2 12364 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12365 IRTemp sV = newTemp(Ity_V128); 12366 IRTemp dV = newTemp(Ity_V128); 12367 IRTemp sHi = newTemp(Ity_I64); 12368 IRTemp sLo = newTemp(Ity_I64); 12369 IRTemp dHi = newTemp(Ity_I64); 12370 IRTemp dLo = newTemp(Ity_I64); 12371 IRTemp rHi = newTemp(Ity_I64); 12372 IRTemp rLo = newTemp(Ity_I64); 12373 12374 modrm = insn[3]; 12375 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12376 12377 if (epartIsReg(modrm)) { 12378 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12379 d32 = (UInt)insn[3+1]; 12380 delta += 3+1+1; 12381 DIP("palignr $%d,%s,%s\n", (Int)d32, 12382 nameXMMReg(eregOfRM(modrm)), 12383 nameXMMReg(gregOfRM(modrm))); 12384 } else { 12385 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12386 gen_SEGV_if_not_16_aligned( addr ); 12387 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12388 d32 = (UInt)insn[3+alen]; 12389 delta += 3+alen+1; 12390 DIP("palignr $%d,%s,%s\n", (Int)d32, 12391 dis_buf, 12392 nameXMMReg(gregOfRM(modrm))); 12393 } 12394 12395 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12396 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12397 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12398 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12399 12400 if (d32 == 0) { 12401 assign( rHi, mkexpr(sHi) ); 12402 assign( rLo, mkexpr(sLo) ); 12403 } 12404 else if (d32 >= 1 && d32 <= 7) { 12405 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); 12406 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); 12407 } 12408 else if (d32 == 8) { 12409 assign( rHi, mkexpr(dLo) ); 12410 assign( rLo, mkexpr(sHi) ); 12411 } 12412 else if (d32 >= 9 && d32 <= 15) { 12413 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); 12414 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); 12415 } 12416 else if (d32 == 16) { 12417 assign( rHi, mkexpr(dHi) ); 12418 assign( rLo, mkexpr(dLo) ); 12419 } 12420 else if (d32 >= 17 && d32 <= 23) { 12421 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); 12422 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); 12423 } 12424 else if (d32 == 24) { 12425 assign( rHi, mkU64(0) ); 12426 assign( rLo, mkexpr(dHi) ); 12427 } 12428 else if (d32 >= 25 && d32 <= 31) { 12429 assign( rHi, mkU64(0) ); 12430 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); 12431 } 12432 else if (d32 >= 32 && d32 <= 255) { 12433 assign( rHi, mkU64(0) ); 12434 assign( rLo, mkU64(0) ); 12435 } 12436 else 12437 vassert(0); 12438 12439 putXMMReg( 12440 gregOfRM(modrm), 12441 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12442 ); 12443 goto decode_success; 12444 } 12445 12446 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 12447 if (sz == 4 12448 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12449 IRTemp sV = newTemp(Ity_I64); 12450 IRTemp dV = newTemp(Ity_I64); 12451 12452 modrm = insn[3]; 12453 do_MMX_preamble(); 12454 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12455 12456 if (epartIsReg(modrm)) { 12457 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12458 delta += 3+1; 12459 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12460 nameMMXReg(gregOfRM(modrm))); 12461 } else { 12462 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12463 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12464 delta += 3+alen; 12465 DIP("pshufb %s,%s\n", dis_buf, 12466 nameMMXReg(gregOfRM(modrm))); 12467 } 12468 12469 putMMXReg( 12470 gregOfRM(modrm), 12471 binop( 12472 Iop_And64, 12473 /* permute the lanes */ 12474 binop( 12475 Iop_Perm8x8, 12476 mkexpr(dV), 12477 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 12478 ), 12479 /* mask off lanes which have (index & 0x80) == 0x80 */ 12480 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 12481 ) 12482 ); 12483 goto decode_success; 12484 } 12485 12486 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 12487 if (sz == 2 12488 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12489 IRTemp sV = newTemp(Ity_V128); 12490 IRTemp dV = newTemp(Ity_V128); 12491 IRTemp sHi = newTemp(Ity_I64); 12492 IRTemp sLo = newTemp(Ity_I64); 12493 IRTemp dHi = newTemp(Ity_I64); 12494 IRTemp dLo = newTemp(Ity_I64); 12495 IRTemp rHi = newTemp(Ity_I64); 12496 IRTemp rLo = newTemp(Ity_I64); 12497 IRTemp sevens = newTemp(Ity_I64); 12498 IRTemp mask0x80hi = newTemp(Ity_I64); 12499 IRTemp mask0x80lo = newTemp(Ity_I64); 12500 IRTemp maskBit3hi = newTemp(Ity_I64); 12501 IRTemp maskBit3lo = newTemp(Ity_I64); 12502 IRTemp sAnd7hi = newTemp(Ity_I64); 12503 IRTemp sAnd7lo = newTemp(Ity_I64); 12504 IRTemp permdHi = newTemp(Ity_I64); 12505 IRTemp permdLo = newTemp(Ity_I64); 12506 12507 modrm = insn[3]; 12508 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12509 12510 if (epartIsReg(modrm)) { 12511 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12512 delta += 3+1; 12513 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12514 nameXMMReg(gregOfRM(modrm))); 12515 } else { 12516 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12517 gen_SEGV_if_not_16_aligned( addr ); 12518 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12519 delta += 3+alen; 12520 DIP("pshufb %s,%s\n", dis_buf, 12521 nameXMMReg(gregOfRM(modrm))); 12522 } 12523 12524 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12525 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12526 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12527 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12528 12529 assign( sevens, mkU64(0x0707070707070707ULL) ); 12530 12531 /* 12532 mask0x80hi = Not(SarN8x8(sHi,7)) 12533 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 12534 sAnd7hi = And(sHi,sevens) 12535 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 12536 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 12537 rHi = And(permdHi,mask0x80hi) 12538 */ 12539 assign( 12540 mask0x80hi, 12541 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 12542 12543 assign( 12544 maskBit3hi, 12545 binop(Iop_SarN8x8, 12546 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 12547 mkU8(7))); 12548 12549 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 12550 12551 assign( 12552 permdHi, 12553 binop( 12554 Iop_Or64, 12555 binop(Iop_And64, 12556 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 12557 mkexpr(maskBit3hi)), 12558 binop(Iop_And64, 12559 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 12560 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 12561 12562 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 12563 12564 /* And the same for the lower half of the result. What fun. */ 12565 12566 assign( 12567 mask0x80lo, 12568 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 12569 12570 assign( 12571 maskBit3lo, 12572 binop(Iop_SarN8x8, 12573 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 12574 mkU8(7))); 12575 12576 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 12577 12578 assign( 12579 permdLo, 12580 binop( 12581 Iop_Or64, 12582 binop(Iop_And64, 12583 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 12584 mkexpr(maskBit3lo)), 12585 binop(Iop_And64, 12586 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 12587 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 12588 12589 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 12590 12591 putXMMReg( 12592 gregOfRM(modrm), 12593 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12594 ); 12595 goto decode_success; 12596 } 12597 12598 /* ---------------------------------------------------- */ 12599 /* --- end of the SSSE3 decoder. --- */ 12600 /* ---------------------------------------------------- */ 12601 12602 /* ---------------------------------------------------- */ 12603 /* --- start of the SSE4 decoder --- */ 12604 /* ---------------------------------------------------- */ 12605 12606 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 12607 (Partial implementation only -- only deal with cases where 12608 the rounding mode is specified directly by the immediate byte.) 12609 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 12610 (Limitations ditto) 12611 */ 12612 if (sz == 2 12613 && insn[0] == 0x0F && insn[1] == 0x3A 12614 && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) { 12615 12616 Bool isD = insn[2] == 0x0B; 12617 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 12618 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 12619 Int imm = 0; 12620 12621 modrm = insn[3]; 12622 12623 if (epartIsReg(modrm)) { 12624 assign( src, 12625 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 ) 12626 : getXMMRegLane32F( eregOfRM(modrm), 0 ) ); 12627 imm = insn[3+1]; 12628 if (imm & ~3) goto decode_failure; 12629 delta += 3+1+1; 12630 DIP( "rounds%c $%d,%s,%s\n", 12631 isD ? 'd' : 's', 12632 imm, nameXMMReg( eregOfRM(modrm) ), 12633 nameXMMReg( gregOfRM(modrm) ) ); 12634 } else { 12635 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12636 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 12637 imm = insn[3+alen]; 12638 if (imm & ~3) goto decode_failure; 12639 delta += 3+alen+1; 12640 DIP( "roundsd $%d,%s,%s\n", 12641 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); 12642 } 12643 12644 /* (imm & 3) contains an Intel-encoded rounding mode. Because 12645 that encoding is the same as the encoding for IRRoundingMode, 12646 we can use that value directly in the IR as a rounding 12647 mode. */ 12648 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 12649 mkU32(imm & 3), mkexpr(src)) ); 12650 12651 if (isD) 12652 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) ); 12653 else 12654 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) ); 12655 12656 goto decode_success; 12657 } 12658 12659 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 12660 which we can only decode if we're sure this is an AMD cpu that 12661 supports LZCNT, since otherwise it's BSR, which behaves 12662 differently. */ 12663 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD 12664 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) { 12665 vassert(sz == 2 || sz == 4); 12666 /*IRType*/ ty = szToITy(sz); 12667 IRTemp src = newTemp(ty); 12668 modrm = insn[3]; 12669 if (epartIsReg(modrm)) { 12670 assign(src, getIReg(sz, eregOfRM(modrm))); 12671 delta += 3+1; 12672 DIP("lzcnt%c %s, %s\n", nameISize(sz), 12673 nameIReg(sz, eregOfRM(modrm)), 12674 nameIReg(sz, gregOfRM(modrm))); 12675 } else { 12676 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12677 assign(src, loadLE(ty, mkexpr(addr))); 12678 delta += 3+alen; 12679 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 12680 nameIReg(sz, gregOfRM(modrm))); 12681 } 12682 12683 IRTemp res = gen_LZCNT(ty, src); 12684 putIReg(sz, gregOfRM(modrm), mkexpr(res)); 12685 12686 // Update flags. This is pretty lame .. perhaps can do better 12687 // if this turns out to be performance critical. 12688 // O S A P are cleared. Z is set if RESULT == 0. 12689 // C is set if SRC is zero. 12690 IRTemp src32 = newTemp(Ity_I32); 12691 IRTemp res32 = newTemp(Ity_I32); 12692 assign(src32, widenUto32(mkexpr(src))); 12693 assign(res32, widenUto32(mkexpr(res))); 12694 12695 IRTemp oszacp = newTemp(Ity_I32); 12696 assign( 12697 oszacp, 12698 binop(Iop_Or32, 12699 binop(Iop_Shl32, 12700 unop(Iop_1Uto32, 12701 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))), 12702 mkU8(X86G_CC_SHIFT_Z)), 12703 binop(Iop_Shl32, 12704 unop(Iop_1Uto32, 12705 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))), 12706 mkU8(X86G_CC_SHIFT_C)) 12707 ) 12708 ); 12709 12710 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12711 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12712 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12713 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 12714 12715 goto decode_success; 12716 } 12717 12718 /* ---------------------------------------------------- */ 12719 /* --- end of the SSE4 decoder --- */ 12720 /* ---------------------------------------------------- */ 12721 12722 after_sse_decoders: 12723 12724 /* ---------------------------------------------------- */ 12725 /* --- deal with misc 0x67 pfxs (addr size override) -- */ 12726 /* ---------------------------------------------------- */ 12727 12728 /* 67 E3 = JCXZ (for JECXZ see below) */ 12729 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) { 12730 delta += 2; 12731 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 12732 delta ++; 12733 stmt( IRStmt_Exit( 12734 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), 12735 Ijk_Boring, 12736 IRConst_U32(d32) 12737 )); 12738 DIP("jcxz 0x%x\n", d32); 12739 goto decode_success; 12740 } 12741 12742 /* ---------------------------------------------------- */ 12743 /* --- start of the baseline insn decoder -- */ 12744 /* ---------------------------------------------------- */ 12745 12746 /* Get the primary opcode. */ 12747 opc = getIByte(delta); delta++; 12748 12749 /* We get here if the current insn isn't SSE, or this CPU doesn't 12750 support SSE. */ 12751 12752 switch (opc) { 12753 12754 /* ------------------------ Control flow --------------- */ 12755 12756 case 0xC2: /* RET imm16 */ 12757 d32 = getUDisp16(delta); 12758 delta += 2; 12759 dis_ret(d32); 12760 dres.whatNext = Dis_StopHere; 12761 DIP("ret %d\n", (Int)d32); 12762 break; 12763 case 0xC3: /* RET */ 12764 dis_ret(0); 12765 dres.whatNext = Dis_StopHere; 12766 DIP("ret\n"); 12767 break; 12768 12769 case 0xCF: /* IRET */ 12770 /* Note, this is an extremely kludgey and limited implementation 12771 of iret. All it really does is: 12772 popl %EIP; popl %CS; popl %EFLAGS. 12773 %CS is set but ignored (as it is in (eg) popw %cs)". */ 12774 t1 = newTemp(Ity_I32); /* ESP */ 12775 t2 = newTemp(Ity_I32); /* new EIP */ 12776 t3 = newTemp(Ity_I32); /* new CS */ 12777 t4 = newTemp(Ity_I32); /* new EFLAGS */ 12778 assign(t1, getIReg(4,R_ESP)); 12779 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) ))); 12780 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) ))); 12781 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) ))); 12782 /* Get stuff off stack */ 12783 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12))); 12784 /* set %CS (which is ignored anyway) */ 12785 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) ); 12786 /* set %EFLAGS */ 12787 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); 12788 /* goto new EIP value */ 12789 jmp_treg(Ijk_Ret,t2); 12790 dres.whatNext = Dis_StopHere; 12791 DIP("iret (very kludgey)\n"); 12792 break; 12793 12794 case 0xE8: /* CALL J4 */ 12795 d32 = getUDisp32(delta); delta += 4; 12796 d32 += (guest_EIP_bbstart+delta); 12797 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */ 12798 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58 12799 && getIByte(delta) <= 0x5F) { 12800 /* Specially treat the position-independent-code idiom 12801 call X 12802 X: popl %reg 12803 as 12804 movl %eip, %reg. 12805 since this generates better code, but for no other reason. */ 12806 Int archReg = getIByte(delta) - 0x58; 12807 /* vex_printf("-- fPIC thingy\n"); */ 12808 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta)); 12809 delta++; /* Step over the POP */ 12810 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); 12811 } else { 12812 /* The normal sequence for a call. */ 12813 t1 = newTemp(Ity_I32); 12814 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 12815 putIReg(4, R_ESP, mkexpr(t1)); 12816 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta)); 12817 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) { 12818 /* follow into the call target. */ 12819 dres.whatNext = Dis_ResteerU; 12820 dres.continueAt = (Addr64)(Addr32)d32; 12821 } else { 12822 jmp_lit(Ijk_Call,d32); 12823 dres.whatNext = Dis_StopHere; 12824 } 12825 DIP("call 0x%x\n",d32); 12826 } 12827 break; 12828 12829 //-- case 0xC8: /* ENTER */ 12830 //-- d32 = getUDisp16(eip); eip += 2; 12831 //-- abyte = getIByte(delta); delta++; 12832 //-- 12833 //-- vg_assert(sz == 4); 12834 //-- vg_assert(abyte == 0); 12835 //-- 12836 //-- t1 = newTemp(cb); t2 = newTemp(cb); 12837 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 12838 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 12839 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12840 //-- uLiteral(cb, sz); 12841 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12842 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 12843 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 12844 //-- if (d32) { 12845 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12846 //-- uLiteral(cb, d32); 12847 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12848 //-- } 12849 //-- DIP("enter 0x%x, 0x%x", d32, abyte); 12850 //-- break; 12851 12852 case 0xC9: /* LEAVE */ 12853 vassert(sz == 4); 12854 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 12855 assign(t1, getIReg(4,R_EBP)); 12856 /* First PUT ESP looks redundant, but need it because ESP must 12857 always be up-to-date for Memcheck to work... */ 12858 putIReg(4, R_ESP, mkexpr(t1)); 12859 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 12860 putIReg(4, R_EBP, mkexpr(t2)); 12861 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) ); 12862 DIP("leave\n"); 12863 break; 12864 12865 /* ---------------- Misc weird-ass insns --------------- */ 12866 12867 case 0x27: /* DAA */ 12868 case 0x2F: /* DAS */ 12869 case 0x37: /* AAA */ 12870 case 0x3F: /* AAS */ 12871 /* An ugly implementation for some ugly instructions. Oh 12872 well. */ 12873 if (sz != 4) goto decode_failure; 12874 t1 = newTemp(Ity_I32); 12875 t2 = newTemp(Ity_I32); 12876 /* Make up a 32-bit value (t1), with the old value of AX in the 12877 bottom 16 bits, and the old OSZACP bitmask in the upper 16 12878 bits. */ 12879 assign(t1, 12880 binop(Iop_16HLto32, 12881 unop(Iop_32to16, 12882 mk_x86g_calculate_eflags_all()), 12883 getIReg(2, R_EAX) 12884 )); 12885 /* Call the helper fn, to get a new AX and OSZACP value, and 12886 poke both back into the guest state. Also pass the helper 12887 the actual opcode so it knows which of the 4 instructions it 12888 is doing the computation for. */ 12889 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F); 12890 assign(t2, 12891 mkIRExprCCall( 12892 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas", 12893 &x86g_calculate_daa_das_aaa_aas, 12894 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 12895 )); 12896 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 12897 12898 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12899 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12900 stmt( IRStmt_Put( OFFB_CC_DEP1, 12901 binop(Iop_And32, 12902 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 12903 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 12904 | X86G_CC_MASK_A | X86G_CC_MASK_Z 12905 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 12906 ) 12907 ) 12908 ); 12909 /* Set NDEP even though it isn't used. This makes redundant-PUT 12910 elimination of previous stores to this field work better. */ 12911 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12912 switch (opc) { 12913 case 0x27: DIP("daa\n"); break; 12914 case 0x2F: DIP("das\n"); break; 12915 case 0x37: DIP("aaa\n"); break; 12916 case 0x3F: DIP("aas\n"); break; 12917 default: vassert(0); 12918 } 12919 break; 12920 12921 case 0xD4: /* AAM */ 12922 case 0xD5: /* AAD */ 12923 d32 = getIByte(delta); delta++; 12924 if (sz != 4 || d32 != 10) goto decode_failure; 12925 t1 = newTemp(Ity_I32); 12926 t2 = newTemp(Ity_I32); 12927 /* Make up a 32-bit value (t1), with the old value of AX in the 12928 bottom 16 bits, and the old OSZACP bitmask in the upper 16 12929 bits. */ 12930 assign(t1, 12931 binop(Iop_16HLto32, 12932 unop(Iop_32to16, 12933 mk_x86g_calculate_eflags_all()), 12934 getIReg(2, R_EAX) 12935 )); 12936 /* Call the helper fn, to get a new AX and OSZACP value, and 12937 poke both back into the guest state. Also pass the helper 12938 the actual opcode so it knows which of the 2 instructions it 12939 is doing the computation for. */ 12940 assign(t2, 12941 mkIRExprCCall( 12942 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam", 12943 &x86g_calculate_aad_aam, 12944 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 12945 )); 12946 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 12947 12948 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12949 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12950 stmt( IRStmt_Put( OFFB_CC_DEP1, 12951 binop(Iop_And32, 12952 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 12953 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 12954 | X86G_CC_MASK_A | X86G_CC_MASK_Z 12955 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 12956 ) 12957 ) 12958 ); 12959 /* Set NDEP even though it isn't used. This makes 12960 redundant-PUT elimination of previous stores to this field 12961 work better. */ 12962 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12963 12964 DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 12965 break; 12966 12967 /* ------------------------ CWD/CDQ -------------------- */ 12968 12969 case 0x98: /* CBW */ 12970 if (sz == 4) { 12971 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX))); 12972 DIP("cwde\n"); 12973 } else { 12974 vassert(sz == 2); 12975 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX))); 12976 DIP("cbw\n"); 12977 } 12978 break; 12979 12980 case 0x99: /* CWD/CDQ */ 12981 ty = szToITy(sz); 12982 putIReg(sz, R_EDX, 12983 binop(mkSizedOp(ty,Iop_Sar8), 12984 getIReg(sz, R_EAX), 12985 mkU8(sz == 2 ? 15 : 31)) ); 12986 DIP(sz == 2 ? "cwdq\n" : "cdqq\n"); 12987 break; 12988 12989 /* ------------------------ FPU ops -------------------- */ 12990 12991 case 0x9E: /* SAHF */ 12992 codegen_SAHF(); 12993 DIP("sahf\n"); 12994 break; 12995 12996 case 0x9F: /* LAHF */ 12997 codegen_LAHF(); 12998 DIP("lahf\n"); 12999 break; 13000 13001 case 0x9B: /* FWAIT */ 13002 /* ignore? */ 13003 DIP("fwait\n"); 13004 break; 13005 13006 case 0xD8: 13007 case 0xD9: 13008 case 0xDA: 13009 case 0xDB: 13010 case 0xDC: 13011 case 0xDD: 13012 case 0xDE: 13013 case 0xDF: { 13014 Int delta0 = delta; 13015 Bool decode_OK = False; 13016 delta = dis_FPU ( &decode_OK, sorb, delta ); 13017 if (!decode_OK) { 13018 delta = delta0; 13019 goto decode_failure; 13020 } 13021 break; 13022 } 13023 13024 /* ------------------------ INC & DEC ------------------ */ 13025 13026 case 0x40: /* INC eAX */ 13027 case 0x41: /* INC eCX */ 13028 case 0x42: /* INC eDX */ 13029 case 0x43: /* INC eBX */ 13030 case 0x44: /* INC eSP */ 13031 case 0x45: /* INC eBP */ 13032 case 0x46: /* INC eSI */ 13033 case 0x47: /* INC eDI */ 13034 vassert(sz == 2 || sz == 4); 13035 ty = szToITy(sz); 13036 t1 = newTemp(ty); 13037 assign( t1, binop(mkSizedOp(ty,Iop_Add8), 13038 getIReg(sz, (UInt)(opc - 0x40)), 13039 mkU(ty,1)) ); 13040 setFlags_INC_DEC( True, t1, ty ); 13041 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1)); 13042 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); 13043 break; 13044 13045 case 0x48: /* DEC eAX */ 13046 case 0x49: /* DEC eCX */ 13047 case 0x4A: /* DEC eDX */ 13048 case 0x4B: /* DEC eBX */ 13049 case 0x4C: /* DEC eSP */ 13050 case 0x4D: /* DEC eBP */ 13051 case 0x4E: /* DEC eSI */ 13052 case 0x4F: /* DEC eDI */ 13053 vassert(sz == 2 || sz == 4); 13054 ty = szToITy(sz); 13055 t1 = newTemp(ty); 13056 assign( t1, binop(mkSizedOp(ty,Iop_Sub8), 13057 getIReg(sz, (UInt)(opc - 0x48)), 13058 mkU(ty,1)) ); 13059 setFlags_INC_DEC( False, t1, ty ); 13060 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1)); 13061 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); 13062 break; 13063 13064 /* ------------------------ INT ------------------------ */ 13065 13066 case 0xCC: /* INT 3 */ 13067 jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta); 13068 dres.whatNext = Dis_StopHere; 13069 DIP("int $0x3\n"); 13070 break; 13071 13072 case 0xCD: /* INT imm8 */ 13073 d32 = getIByte(delta); delta++; 13074 13075 /* For any of the cases where we emit a jump (that is, for all 13076 currently handled cases), it's important that all ArchRegs 13077 carry their up-to-date value at this point. So we declare an 13078 end-of-block here, which forces any TempRegs caching ArchRegs 13079 to be flushed. */ 13080 13081 /* Handle int $0x40 .. $0x43 by synthesising a segfault and a 13082 restart of this instruction (hence the "-2" two lines below, 13083 to get the restart EIP to be this instruction. This is 13084 probably Linux-specific and it would be more correct to only 13085 do this if the VexAbiInfo says that is what we should do. */ 13086 if (d32 >= 0x40 && d32 <= 0x43) { 13087 jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2); 13088 dres.whatNext = Dis_StopHere; 13089 DIP("int $0x%x\n", (Int)d32); 13090 break; 13091 } 13092 13093 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82 13094 (darwin syscalls). As part of this, note where we are, so we 13095 can back up the guest to this point if the syscall needs to 13096 be restarted. */ 13097 if (d32 == 0x80) { 13098 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13099 mkU32(guest_EIP_curr_instr) ) ); 13100 jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta); 13101 dres.whatNext = Dis_StopHere; 13102 DIP("int $0x80\n"); 13103 break; 13104 } 13105 if (d32 == 0x81) { 13106 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13107 mkU32(guest_EIP_curr_instr) ) ); 13108 jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta); 13109 dres.whatNext = Dis_StopHere; 13110 DIP("int $0x81\n"); 13111 break; 13112 } 13113 if (d32 == 0x82) { 13114 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13115 mkU32(guest_EIP_curr_instr) ) ); 13116 jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta); 13117 dres.whatNext = Dis_StopHere; 13118 DIP("int $0x82\n"); 13119 break; 13120 } 13121 13122 /* none of the above */ 13123 goto decode_failure; 13124 13125 /* ------------------------ Jcond, byte offset --------- */ 13126 13127 case 0xEB: /* Jb (jump, byte offset) */ 13128 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13129 delta++; 13130 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13131 dres.whatNext = Dis_ResteerU; 13132 dres.continueAt = (Addr64)(Addr32)d32; 13133 } else { 13134 jmp_lit(Ijk_Boring,d32); 13135 dres.whatNext = Dis_StopHere; 13136 } 13137 DIP("jmp-8 0x%x\n", d32); 13138 break; 13139 13140 case 0xE9: /* Jv (jump, 16/32 offset) */ 13141 vassert(sz == 4); /* JRS added 2004 July 11 */ 13142 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta); 13143 delta += sz; 13144 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13145 dres.whatNext = Dis_ResteerU; 13146 dres.continueAt = (Addr64)(Addr32)d32; 13147 } else { 13148 jmp_lit(Ijk_Boring,d32); 13149 dres.whatNext = Dis_StopHere; 13150 } 13151 DIP("jmp 0x%x\n", d32); 13152 break; 13153 13154 case 0x70: 13155 case 0x71: 13156 case 0x72: /* JBb/JNAEb (jump below) */ 13157 case 0x73: /* JNBb/JAEb (jump not below) */ 13158 case 0x74: /* JZb/JEb (jump zero) */ 13159 case 0x75: /* JNZb/JNEb (jump not zero) */ 13160 case 0x76: /* JBEb/JNAb (jump below or equal) */ 13161 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 13162 case 0x78: /* JSb (jump negative) */ 13163 case 0x79: /* JSb (jump not negative) */ 13164 case 0x7A: /* JP (jump parity even) */ 13165 case 0x7B: /* JNP/JPO (jump parity odd) */ 13166 case 0x7C: /* JLb/JNGEb (jump less) */ 13167 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 13168 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 13169 case 0x7F: /* JGb/JNLEb (jump greater) */ 13170 { Int jmpDelta; 13171 HChar* comment = ""; 13172 jmpDelta = (Int)getSDisp8(delta); 13173 vassert(-128 <= jmpDelta && jmpDelta < 128); 13174 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta; 13175 delta++; 13176 if (resteerCisOk 13177 && vex_control.guest_chase_cond 13178 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13179 && jmpDelta < 0 13180 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13181 /* Speculation: assume this backward branch is taken. So we 13182 need to emit a side-exit to the insn following this one, 13183 on the negation of the condition, and continue at the 13184 branch target address (d32). If we wind up back at the 13185 first instruction of the trace, just stop; it's better to 13186 let the IR loop unroller handle that case. */ 13187 stmt( IRStmt_Exit( 13188 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), 13189 Ijk_Boring, 13190 IRConst_U32(guest_EIP_bbstart+delta) ) ); 13191 dres.whatNext = Dis_ResteerC; 13192 dres.continueAt = (Addr64)(Addr32)d32; 13193 comment = "(assumed taken)"; 13194 } 13195 else 13196 if (resteerCisOk 13197 && vex_control.guest_chase_cond 13198 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13199 && jmpDelta >= 0 13200 && resteerOkFn( callback_opaque, 13201 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 13202 /* Speculation: assume this forward branch is not taken. So 13203 we need to emit a side-exit to d32 (the dest) and continue 13204 disassembling at the insn immediately following this 13205 one. */ 13206 stmt( IRStmt_Exit( 13207 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)), 13208 Ijk_Boring, 13209 IRConst_U32(d32) ) ); 13210 dres.whatNext = Dis_ResteerC; 13211 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 13212 comment = "(assumed not taken)"; 13213 } 13214 else { 13215 /* Conservative default translation - end the block at this 13216 point. */ 13217 jcc_01( (X86Condcode)(opc - 0x70), 13218 (Addr32)(guest_EIP_bbstart+delta), d32); 13219 dres.whatNext = Dis_StopHere; 13220 } 13221 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment); 13222 break; 13223 } 13224 13225 case 0xE3: /* JECXZ (for JCXZ see above) */ 13226 if (sz != 4) goto decode_failure; 13227 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13228 delta ++; 13229 stmt( IRStmt_Exit( 13230 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), 13231 Ijk_Boring, 13232 IRConst_U32(d32) 13233 )); 13234 DIP("jecxz 0x%x\n", d32); 13235 break; 13236 13237 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 13238 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 13239 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 13240 { /* Again, the docs say this uses ECX/CX as a count depending on 13241 the address size override, not the operand one. Since we 13242 don't handle address size overrides, I guess that means 13243 ECX. */ 13244 IRExpr* zbit = NULL; 13245 IRExpr* count = NULL; 13246 IRExpr* cond = NULL; 13247 HChar* xtra = NULL; 13248 13249 if (sz != 4) goto decode_failure; 13250 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13251 delta++; 13252 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1))); 13253 13254 count = getIReg(4,R_ECX); 13255 cond = binop(Iop_CmpNE32, count, mkU32(0)); 13256 switch (opc) { 13257 case 0xE2: 13258 xtra = ""; 13259 break; 13260 case 0xE1: 13261 xtra = "e"; 13262 zbit = mk_x86g_calculate_condition( X86CondZ ); 13263 cond = mkAnd1(cond, zbit); 13264 break; 13265 case 0xE0: 13266 xtra = "ne"; 13267 zbit = mk_x86g_calculate_condition( X86CondNZ ); 13268 cond = mkAnd1(cond, zbit); 13269 break; 13270 default: 13271 vassert(0); 13272 } 13273 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) ); 13274 13275 DIP("loop%s 0x%x\n", xtra, d32); 13276 break; 13277 } 13278 13279 /* ------------------------ IMUL ----------------------- */ 13280 13281 case 0x69: /* IMUL Iv, Ev, Gv */ 13282 delta = dis_imul_I_E_G ( sorb, sz, delta, sz ); 13283 break; 13284 case 0x6B: /* IMUL Ib, Ev, Gv */ 13285 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 ); 13286 break; 13287 13288 /* ------------------------ MOV ------------------------ */ 13289 13290 case 0x88: /* MOV Gb,Eb */ 13291 delta = dis_mov_G_E(sorb, 1, delta); 13292 break; 13293 13294 case 0x89: /* MOV Gv,Ev */ 13295 delta = dis_mov_G_E(sorb, sz, delta); 13296 break; 13297 13298 case 0x8A: /* MOV Eb,Gb */ 13299 delta = dis_mov_E_G(sorb, 1, delta); 13300 break; 13301 13302 case 0x8B: /* MOV Ev,Gv */ 13303 delta = dis_mov_E_G(sorb, sz, delta); 13304 break; 13305 13306 case 0x8D: /* LEA M,Gv */ 13307 if (sz != 4) 13308 goto decode_failure; 13309 modrm = getIByte(delta); 13310 if (epartIsReg(modrm)) 13311 goto decode_failure; 13312 /* NOTE! this is the one place where a segment override prefix 13313 has no effect on the address calculation. Therefore we pass 13314 zero instead of sorb here. */ 13315 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf ); 13316 delta += alen; 13317 putIReg(sz, gregOfRM(modrm), mkexpr(addr)); 13318 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 13319 nameIReg(sz,gregOfRM(modrm))); 13320 break; 13321 13322 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 13323 delta = dis_mov_Sw_Ew(sorb, sz, delta); 13324 break; 13325 13326 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 13327 delta = dis_mov_Ew_Sw(sorb, delta); 13328 break; 13329 13330 case 0xA0: /* MOV Ob,AL */ 13331 sz = 1; 13332 /* Fall through ... */ 13333 case 0xA1: /* MOV Ov,eAX */ 13334 d32 = getUDisp32(delta); delta += 4; 13335 ty = szToITy(sz); 13336 addr = newTemp(Ity_I32); 13337 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13338 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr))); 13339 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb), 13340 d32, nameIReg(sz,R_EAX)); 13341 break; 13342 13343 case 0xA2: /* MOV Ob,AL */ 13344 sz = 1; 13345 /* Fall through ... */ 13346 case 0xA3: /* MOV eAX,Ov */ 13347 d32 = getUDisp32(delta); delta += 4; 13348 ty = szToITy(sz); 13349 addr = newTemp(Ity_I32); 13350 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13351 storeLE( mkexpr(addr), getIReg(sz,R_EAX) ); 13352 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX), 13353 sorbTxt(sorb), d32); 13354 break; 13355 13356 case 0xB0: /* MOV imm,AL */ 13357 case 0xB1: /* MOV imm,CL */ 13358 case 0xB2: /* MOV imm,DL */ 13359 case 0xB3: /* MOV imm,BL */ 13360 case 0xB4: /* MOV imm,AH */ 13361 case 0xB5: /* MOV imm,CH */ 13362 case 0xB6: /* MOV imm,DH */ 13363 case 0xB7: /* MOV imm,BH */ 13364 d32 = getIByte(delta); delta += 1; 13365 putIReg(1, opc-0xB0, mkU8(d32)); 13366 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0)); 13367 break; 13368 13369 case 0xB8: /* MOV imm,eAX */ 13370 case 0xB9: /* MOV imm,eCX */ 13371 case 0xBA: /* MOV imm,eDX */ 13372 case 0xBB: /* MOV imm,eBX */ 13373 case 0xBC: /* MOV imm,eSP */ 13374 case 0xBD: /* MOV imm,eBP */ 13375 case 0xBE: /* MOV imm,eSI */ 13376 case 0xBF: /* MOV imm,eDI */ 13377 d32 = getUDisp(sz,delta); delta += sz; 13378 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32)); 13379 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8)); 13380 break; 13381 13382 case 0xC6: /* MOV Ib,Eb */ 13383 sz = 1; 13384 goto do_Mov_I_E; 13385 case 0xC7: /* MOV Iv,Ev */ 13386 goto do_Mov_I_E; 13387 13388 do_Mov_I_E: 13389 modrm = getIByte(delta); 13390 if (epartIsReg(modrm)) { 13391 delta++; /* mod/rm byte */ 13392 d32 = getUDisp(sz,delta); delta += sz; 13393 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32)); 13394 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, 13395 nameIReg(sz,eregOfRM(modrm))); 13396 } else { 13397 addr = disAMode ( &alen, sorb, delta, dis_buf ); 13398 delta += alen; 13399 d32 = getUDisp(sz,delta); delta += sz; 13400 storeLE(mkexpr(addr), mkU(szToITy(sz), d32)); 13401 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 13402 } 13403 break; 13404 13405 /* ------------------------ opl imm, A ----------------- */ 13406 13407 case 0x04: /* ADD Ib, AL */ 13408 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 13409 break; 13410 case 0x05: /* ADD Iv, eAX */ 13411 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" ); 13412 break; 13413 13414 case 0x0C: /* OR Ib, AL */ 13415 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 13416 break; 13417 case 0x0D: /* OR Iv, eAX */ 13418 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 13419 break; 13420 13421 case 0x14: /* ADC Ib, AL */ 13422 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 13423 break; 13424 case 0x15: /* ADC Iv, eAX */ 13425 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 13426 break; 13427 13428 case 0x1C: /* SBB Ib, AL */ 13429 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 13430 break; 13431 case 0x1D: /* SBB Iv, eAX */ 13432 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 13433 break; 13434 13435 case 0x24: /* AND Ib, AL */ 13436 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 13437 break; 13438 case 0x25: /* AND Iv, eAX */ 13439 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 13440 break; 13441 13442 case 0x2C: /* SUB Ib, AL */ 13443 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" ); 13444 break; 13445 case 0x2D: /* SUB Iv, eAX */ 13446 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 13447 break; 13448 13449 case 0x34: /* XOR Ib, AL */ 13450 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 13451 break; 13452 case 0x35: /* XOR Iv, eAX */ 13453 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 13454 break; 13455 13456 case 0x3C: /* CMP Ib, AL */ 13457 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 13458 break; 13459 case 0x3D: /* CMP Iv, eAX */ 13460 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 13461 break; 13462 13463 case 0xA8: /* TEST Ib, AL */ 13464 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 13465 break; 13466 case 0xA9: /* TEST Iv, eAX */ 13467 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 13468 break; 13469 13470 /* ------------------------ opl Ev, Gv ----------------- */ 13471 13472 case 0x02: /* ADD Eb,Gb */ 13473 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" ); 13474 break; 13475 case 0x03: /* ADD Ev,Gv */ 13476 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" ); 13477 break; 13478 13479 case 0x0A: /* OR Eb,Gb */ 13480 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" ); 13481 break; 13482 case 0x0B: /* OR Ev,Gv */ 13483 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" ); 13484 break; 13485 13486 case 0x12: /* ADC Eb,Gb */ 13487 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" ); 13488 break; 13489 case 0x13: /* ADC Ev,Gv */ 13490 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" ); 13491 break; 13492 13493 case 0x1A: /* SBB Eb,Gb */ 13494 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" ); 13495 break; 13496 case 0x1B: /* SBB Ev,Gv */ 13497 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" ); 13498 break; 13499 13500 case 0x22: /* AND Eb,Gb */ 13501 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" ); 13502 break; 13503 case 0x23: /* AND Ev,Gv */ 13504 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" ); 13505 break; 13506 13507 case 0x2A: /* SUB Eb,Gb */ 13508 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" ); 13509 break; 13510 case 0x2B: /* SUB Ev,Gv */ 13511 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" ); 13512 break; 13513 13514 case 0x32: /* XOR Eb,Gb */ 13515 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" ); 13516 break; 13517 case 0x33: /* XOR Ev,Gv */ 13518 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" ); 13519 break; 13520 13521 case 0x3A: /* CMP Eb,Gb */ 13522 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" ); 13523 break; 13524 case 0x3B: /* CMP Ev,Gv */ 13525 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" ); 13526 break; 13527 13528 case 0x84: /* TEST Eb,Gb */ 13529 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" ); 13530 break; 13531 case 0x85: /* TEST Ev,Gv */ 13532 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" ); 13533 break; 13534 13535 /* ------------------------ opl Gv, Ev ----------------- */ 13536 13537 case 0x00: /* ADD Gb,Eb */ 13538 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13539 Iop_Add8, True, 1, delta, "add" ); 13540 break; 13541 case 0x01: /* ADD Gv,Ev */ 13542 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13543 Iop_Add8, True, sz, delta, "add" ); 13544 break; 13545 13546 case 0x08: /* OR Gb,Eb */ 13547 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13548 Iop_Or8, True, 1, delta, "or" ); 13549 break; 13550 case 0x09: /* OR Gv,Ev */ 13551 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13552 Iop_Or8, True, sz, delta, "or" ); 13553 break; 13554 13555 case 0x10: /* ADC Gb,Eb */ 13556 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13557 Iop_Add8, True, 1, delta, "adc" ); 13558 break; 13559 case 0x11: /* ADC Gv,Ev */ 13560 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13561 Iop_Add8, True, sz, delta, "adc" ); 13562 break; 13563 13564 case 0x18: /* SBB Gb,Eb */ 13565 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13566 Iop_Sub8, True, 1, delta, "sbb" ); 13567 break; 13568 case 0x19: /* SBB Gv,Ev */ 13569 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13570 Iop_Sub8, True, sz, delta, "sbb" ); 13571 break; 13572 13573 case 0x20: /* AND Gb,Eb */ 13574 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13575 Iop_And8, True, 1, delta, "and" ); 13576 break; 13577 case 0x21: /* AND Gv,Ev */ 13578 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13579 Iop_And8, True, sz, delta, "and" ); 13580 break; 13581 13582 case 0x28: /* SUB Gb,Eb */ 13583 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13584 Iop_Sub8, True, 1, delta, "sub" ); 13585 break; 13586 case 0x29: /* SUB Gv,Ev */ 13587 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13588 Iop_Sub8, True, sz, delta, "sub" ); 13589 break; 13590 13591 case 0x30: /* XOR Gb,Eb */ 13592 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13593 Iop_Xor8, True, 1, delta, "xor" ); 13594 break; 13595 case 0x31: /* XOR Gv,Ev */ 13596 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13597 Iop_Xor8, True, sz, delta, "xor" ); 13598 break; 13599 13600 case 0x38: /* CMP Gb,Eb */ 13601 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13602 Iop_Sub8, False, 1, delta, "cmp" ); 13603 break; 13604 case 0x39: /* CMP Gv,Ev */ 13605 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13606 Iop_Sub8, False, sz, delta, "cmp" ); 13607 break; 13608 13609 /* ------------------------ POP ------------------------ */ 13610 13611 case 0x58: /* POP eAX */ 13612 case 0x59: /* POP eCX */ 13613 case 0x5A: /* POP eDX */ 13614 case 0x5B: /* POP eBX */ 13615 case 0x5D: /* POP eBP */ 13616 case 0x5E: /* POP eSI */ 13617 case 0x5F: /* POP eDI */ 13618 case 0x5C: /* POP eSP */ 13619 vassert(sz == 2 || sz == 4); 13620 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32); 13621 assign(t2, getIReg(4, R_ESP)); 13622 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 13623 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13624 putIReg(sz, opc-0x58, mkexpr(t1)); 13625 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); 13626 break; 13627 13628 case 0x9D: /* POPF */ 13629 vassert(sz == 2 || sz == 4); 13630 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 13631 assign(t2, getIReg(4, R_ESP)); 13632 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2)))); 13633 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13634 13635 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the 13636 value in t1. */ 13637 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/, 13638 ((Addr32)guest_EIP_bbstart)+delta ); 13639 13640 DIP("popf%c\n", nameISize(sz)); 13641 break; 13642 13643 case 0x61: /* POPA */ 13644 /* This is almost certainly wrong for sz==2. So ... */ 13645 if (sz != 4) goto decode_failure; 13646 13647 /* t5 is the old %ESP value. */ 13648 t5 = newTemp(Ity_I32); 13649 assign( t5, getIReg(4, R_ESP) ); 13650 13651 /* Reload all the registers, except %esp. */ 13652 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 13653 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 13654 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 13655 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 13656 /* ignore saved %ESP */ 13657 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 13658 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 13659 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 13660 13661 /* and move %ESP back up */ 13662 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 13663 13664 DIP("popa%c\n", nameISize(sz)); 13665 break; 13666 13667 case 0x8F: /* POPL/POPW m32 */ 13668 { Int len; 13669 UChar rm = getIByte(delta); 13670 13671 /* make sure this instruction is correct POP */ 13672 if (epartIsReg(rm) || gregOfRM(rm) != 0) 13673 goto decode_failure; 13674 /* and has correct size */ 13675 if (sz != 4 && sz != 2) 13676 goto decode_failure; 13677 ty = szToITy(sz); 13678 13679 t1 = newTemp(Ity_I32); /* stack address */ 13680 t3 = newTemp(ty); /* data */ 13681 /* set t1 to ESP: t1 = ESP */ 13682 assign( t1, getIReg(4, R_ESP) ); 13683 /* load M[ESP] to virtual register t3: t3 = M[t1] */ 13684 assign( t3, loadLE(ty, mkexpr(t1)) ); 13685 13686 /* increase ESP; must be done before the STORE. Intel manual says: 13687 If the ESP register is used as a base register for addressing 13688 a destination operand in memory, the POP instruction computes 13689 the effective address of the operand after it increments the 13690 ESP register. 13691 */ 13692 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) ); 13693 13694 /* resolve MODR/M */ 13695 addr = disAMode ( &len, sorb, delta, dis_buf); 13696 storeLE( mkexpr(addr), mkexpr(t3) ); 13697 13698 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf); 13699 13700 delta += len; 13701 break; 13702 } 13703 13704 case 0x1F: /* POP %DS */ 13705 dis_pop_segreg( R_DS, sz ); break; 13706 case 0x07: /* POP %ES */ 13707 dis_pop_segreg( R_ES, sz ); break; 13708 case 0x17: /* POP %SS */ 13709 dis_pop_segreg( R_SS, sz ); break; 13710 13711 /* ------------------------ PUSH ----------------------- */ 13712 13713 case 0x50: /* PUSH eAX */ 13714 case 0x51: /* PUSH eCX */ 13715 case 0x52: /* PUSH eDX */ 13716 case 0x53: /* PUSH eBX */ 13717 case 0x55: /* PUSH eBP */ 13718 case 0x56: /* PUSH eSI */ 13719 case 0x57: /* PUSH eDI */ 13720 case 0x54: /* PUSH eSP */ 13721 /* This is the Right Way, in that the value to be pushed is 13722 established before %esp is changed, so that pushl %esp 13723 correctly pushes the old value. */ 13724 vassert(sz == 2 || sz == 4); 13725 ty = sz==2 ? Ity_I16 : Ity_I32; 13726 t1 = newTemp(ty); t2 = newTemp(Ity_I32); 13727 assign(t1, getIReg(sz, opc-0x50)); 13728 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz))); 13729 putIReg(4, R_ESP, mkexpr(t2) ); 13730 storeLE(mkexpr(t2),mkexpr(t1)); 13731 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); 13732 break; 13733 13734 13735 case 0x68: /* PUSH Iv */ 13736 d32 = getUDisp(sz,delta); delta += sz; 13737 goto do_push_I; 13738 case 0x6A: /* PUSH Ib, sign-extended to sz */ 13739 d32 = getSDisp8(delta); delta += 1; 13740 goto do_push_I; 13741 do_push_I: 13742 ty = szToITy(sz); 13743 t1 = newTemp(Ity_I32); t2 = newTemp(ty); 13744 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13745 putIReg(4, R_ESP, mkexpr(t1) ); 13746 /* stop mkU16 asserting if d32 is a negative 16-bit number 13747 (bug #132813) */ 13748 if (ty == Ity_I16) 13749 d32 &= 0xFFFF; 13750 storeLE( mkexpr(t1), mkU(ty,d32) ); 13751 DIP("push%c $0x%x\n", nameISize(sz), d32); 13752 break; 13753 13754 case 0x9C: /* PUSHF */ { 13755 vassert(sz == 2 || sz == 4); 13756 13757 t1 = newTemp(Ity_I32); 13758 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13759 putIReg(4, R_ESP, mkexpr(t1) ); 13760 13761 /* Calculate OSZACP, and patch in fixed fields as per 13762 Intel docs. 13763 - bit 1 is always 1 13764 - bit 9 is Interrupt Enable (should always be 1 in user mode?) 13765 */ 13766 t2 = newTemp(Ity_I32); 13767 assign( t2, binop(Iop_Or32, 13768 mk_x86g_calculate_eflags_all(), 13769 mkU32( (1<<1)|(1<<9) ) )); 13770 13771 /* Patch in the D flag. This can simply be a copy of bit 10 of 13772 baseBlock[OFFB_DFLAG]. */ 13773 t3 = newTemp(Ity_I32); 13774 assign( t3, binop(Iop_Or32, 13775 mkexpr(t2), 13776 binop(Iop_And32, 13777 IRExpr_Get(OFFB_DFLAG,Ity_I32), 13778 mkU32(1<<10))) 13779 ); 13780 13781 /* And patch in the ID flag. */ 13782 t4 = newTemp(Ity_I32); 13783 assign( t4, binop(Iop_Or32, 13784 mkexpr(t3), 13785 binop(Iop_And32, 13786 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32), 13787 mkU8(21)), 13788 mkU32(1<<21))) 13789 ); 13790 13791 /* And patch in the AC flag. */ 13792 t5 = newTemp(Ity_I32); 13793 assign( t5, binop(Iop_Or32, 13794 mkexpr(t4), 13795 binop(Iop_And32, 13796 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32), 13797 mkU8(18)), 13798 mkU32(1<<18))) 13799 ); 13800 13801 /* if sz==2, the stored value needs to be narrowed. */ 13802 if (sz == 2) 13803 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) ); 13804 else 13805 storeLE( mkexpr(t1), mkexpr(t5) ); 13806 13807 DIP("pushf%c\n", nameISize(sz)); 13808 break; 13809 } 13810 13811 case 0x60: /* PUSHA */ 13812 /* This is almost certainly wrong for sz==2. So ... */ 13813 if (sz != 4) goto decode_failure; 13814 13815 /* This is the Right Way, in that the value to be pushed is 13816 established before %esp is changed, so that pusha 13817 correctly pushes the old %esp value. New value of %esp is 13818 pushed at start. */ 13819 /* t0 is the %ESP value we're going to push. */ 13820 t0 = newTemp(Ity_I32); 13821 assign( t0, getIReg(4, R_ESP) ); 13822 13823 /* t5 will be the new %ESP value. */ 13824 t5 = newTemp(Ity_I32); 13825 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 13826 13827 /* Update guest state before prodding memory. */ 13828 putIReg(4, R_ESP, mkexpr(t5)); 13829 13830 /* Dump all the registers. */ 13831 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 13832 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 13833 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 13834 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 13835 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 13836 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 13837 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 13838 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 13839 13840 DIP("pusha%c\n", nameISize(sz)); 13841 break; 13842 13843 case 0x0E: /* PUSH %CS */ 13844 dis_push_segreg( R_CS, sz ); break; 13845 case 0x1E: /* PUSH %DS */ 13846 dis_push_segreg( R_DS, sz ); break; 13847 case 0x06: /* PUSH %ES */ 13848 dis_push_segreg( R_ES, sz ); break; 13849 case 0x16: /* PUSH %SS */ 13850 dis_push_segreg( R_SS, sz ); break; 13851 13852 /* ------------------------ SCAS et al ----------------- */ 13853 13854 case 0xA4: /* MOVS, no REP prefix */ 13855 case 0xA5: 13856 if (sorb != 0) 13857 goto decode_failure; /* else dis_string_op asserts */ 13858 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 13859 break; 13860 13861 case 0xA6: /* CMPSb, no REP prefix */ 13862 case 0xA7: 13863 if (sorb != 0) 13864 goto decode_failure; /* else dis_string_op asserts */ 13865 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 13866 break; 13867 13868 case 0xAA: /* STOS, no REP prefix */ 13869 case 0xAB: 13870 if (sorb != 0) 13871 goto decode_failure; /* else dis_string_op asserts */ 13872 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb ); 13873 break; 13874 13875 case 0xAC: /* LODS, no REP prefix */ 13876 case 0xAD: 13877 if (sorb != 0) 13878 goto decode_failure; /* else dis_string_op asserts */ 13879 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb ); 13880 break; 13881 13882 case 0xAE: /* SCAS, no REP prefix */ 13883 case 0xAF: 13884 if (sorb != 0) 13885 goto decode_failure; /* else dis_string_op asserts */ 13886 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 13887 break; 13888 13889 13890 case 0xFC: /* CLD */ 13891 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) ); 13892 DIP("cld\n"); 13893 break; 13894 13895 case 0xFD: /* STD */ 13896 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) ); 13897 DIP("std\n"); 13898 break; 13899 13900 case 0xF8: /* CLC */ 13901 case 0xF9: /* STC */ 13902 case 0xF5: /* CMC */ 13903 t0 = newTemp(Ity_I32); 13904 t1 = newTemp(Ity_I32); 13905 assign( t0, mk_x86g_calculate_eflags_all() ); 13906 switch (opc) { 13907 case 0xF8: 13908 assign( t1, binop(Iop_And32, mkexpr(t0), 13909 mkU32(~X86G_CC_MASK_C))); 13910 DIP("clc\n"); 13911 break; 13912 case 0xF9: 13913 assign( t1, binop(Iop_Or32, mkexpr(t0), 13914 mkU32(X86G_CC_MASK_C))); 13915 DIP("stc\n"); 13916 break; 13917 case 0xF5: 13918 assign( t1, binop(Iop_Xor32, mkexpr(t0), 13919 mkU32(X86G_CC_MASK_C))); 13920 DIP("cmc\n"); 13921 break; 13922 default: 13923 vpanic("disInstr(x86)(clc/stc/cmc)"); 13924 } 13925 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 13926 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 13927 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 13928 /* Set NDEP even though it isn't used. This makes redundant-PUT 13929 elimination of previous stores to this field work better. */ 13930 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 13931 break; 13932 13933 case 0xD6: /* SALC */ 13934 t0 = newTemp(Ity_I32); 13935 t1 = newTemp(Ity_I32); 13936 assign( t0, binop(Iop_And32, 13937 mk_x86g_calculate_eflags_c(), 13938 mkU32(1)) ); 13939 assign( t1, binop(Iop_Sar32, 13940 binop(Iop_Shl32, mkexpr(t0), mkU8(31)), 13941 mkU8(31)) ); 13942 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) ); 13943 DIP("salc\n"); 13944 break; 13945 13946 /* REPNE prefix insn */ 13947 case 0xF2: { 13948 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 13949 if (sorb != 0) goto decode_failure; 13950 abyte = getIByte(delta); delta++; 13951 13952 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 13953 dres.whatNext = Dis_StopHere; 13954 13955 switch (abyte) { 13956 /* According to the Intel manual, "repne movs" should never occur, but 13957 * in practice it has happened, so allow for it here... */ 13958 case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 13959 case 0xA5: 13960 dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig, 13961 guest_EIP_bbstart+delta, "repne movs" ); 13962 break; 13963 13964 case 0xA6: sz = 1; /* REPNE CMP<sz> */ 13965 case 0xA7: 13966 dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig, 13967 guest_EIP_bbstart+delta, "repne cmps" ); 13968 break; 13969 13970 case 0xAA: sz = 1; /* REPNE STOS<sz> */ 13971 case 0xAB: 13972 dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig, 13973 guest_EIP_bbstart+delta, "repne stos" ); 13974 break; 13975 13976 case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 13977 case 0xAF: 13978 dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, 13979 guest_EIP_bbstart+delta, "repne scas" ); 13980 break; 13981 13982 default: 13983 goto decode_failure; 13984 } 13985 break; 13986 } 13987 13988 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE, 13989 for the rest, it means REP) */ 13990 case 0xF3: { 13991 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 13992 if (sorb != 0) goto decode_failure; 13993 abyte = getIByte(delta); delta++; 13994 13995 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 13996 dres.whatNext = Dis_StopHere; 13997 13998 switch (abyte) { 13999 case 0xA4: sz = 1; /* REP MOVS<sz> */ 14000 case 0xA5: 14001 dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig, 14002 guest_EIP_bbstart+delta, "rep movs" ); 14003 break; 14004 14005 case 0xA6: sz = 1; /* REPE CMP<sz> */ 14006 case 0xA7: 14007 dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig, 14008 guest_EIP_bbstart+delta, "repe cmps" ); 14009 break; 14010 14011 case 0xAA: sz = 1; /* REP STOS<sz> */ 14012 case 0xAB: 14013 dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig, 14014 guest_EIP_bbstart+delta, "rep stos" ); 14015 break; 14016 14017 case 0xAC: sz = 1; /* REP LODS<sz> */ 14018 case 0xAD: 14019 dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig, 14020 guest_EIP_bbstart+delta, "rep lods" ); 14021 break; 14022 14023 case 0xAE: sz = 1; /* REPE SCAS<sz> */ 14024 case 0xAF: 14025 dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig, 14026 guest_EIP_bbstart+delta, "repe scas" ); 14027 break; 14028 14029 case 0x90: /* REP NOP (PAUSE) */ 14030 /* a hint to the P4 re spin-wait loop */ 14031 DIP("rep nop (P4 pause)\n"); 14032 /* "observe" the hint. The Vex client needs to be careful not 14033 to cause very long delays as a result, though. */ 14034 jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); 14035 dres.whatNext = Dis_StopHere; 14036 break; 14037 14038 case 0xC3: /* REP RET -- same as normal ret? */ 14039 dis_ret(0); 14040 dres.whatNext = Dis_StopHere; 14041 DIP("rep ret\n"); 14042 break; 14043 14044 default: 14045 goto decode_failure; 14046 } 14047 break; 14048 } 14049 14050 /* ------------------------ XCHG ----------------------- */ 14051 14052 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 14053 prefix; hence it must be translated with an IRCAS (at least, the 14054 memory variant). */ 14055 case 0x86: /* XCHG Gb,Eb */ 14056 sz = 1; 14057 /* Fall through ... */ 14058 case 0x87: /* XCHG Gv,Ev */ 14059 modrm = getIByte(delta); 14060 ty = szToITy(sz); 14061 t1 = newTemp(ty); t2 = newTemp(ty); 14062 if (epartIsReg(modrm)) { 14063 assign(t1, getIReg(sz, eregOfRM(modrm))); 14064 assign(t2, getIReg(sz, gregOfRM(modrm))); 14065 putIReg(sz, gregOfRM(modrm), mkexpr(t1)); 14066 putIReg(sz, eregOfRM(modrm), mkexpr(t2)); 14067 delta++; 14068 DIP("xchg%c %s, %s\n", 14069 nameISize(sz), nameIReg(sz,gregOfRM(modrm)), 14070 nameIReg(sz,eregOfRM(modrm))); 14071 } else { 14072 *expect_CAS = True; 14073 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14074 assign( t1, loadLE(ty,mkexpr(addr)) ); 14075 assign( t2, getIReg(sz,gregOfRM(modrm)) ); 14076 casLE( mkexpr(addr), 14077 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 14078 putIReg( sz, gregOfRM(modrm), mkexpr(t1) ); 14079 delta += alen; 14080 DIP("xchg%c %s, %s\n", nameISize(sz), 14081 nameIReg(sz,gregOfRM(modrm)), dis_buf); 14082 } 14083 break; 14084 14085 case 0x90: /* XCHG eAX,eAX */ 14086 DIP("nop\n"); 14087 break; 14088 case 0x91: /* XCHG eAX,eCX */ 14089 case 0x92: /* XCHG eAX,eDX */ 14090 case 0x93: /* XCHG eAX,eBX */ 14091 case 0x94: /* XCHG eAX,eSP */ 14092 case 0x95: /* XCHG eAX,eBP */ 14093 case 0x96: /* XCHG eAX,eSI */ 14094 case 0x97: /* XCHG eAX,eDI */ 14095 codegen_xchg_eAX_Reg ( sz, opc - 0x90 ); 14096 break; 14097 14098 /* ------------------------ XLAT ----------------------- */ 14099 14100 case 0xD7: /* XLAT */ 14101 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */ 14102 putIReg( 14103 1, 14104 R_EAX/*AL*/, 14105 loadLE(Ity_I8, 14106 handleSegOverride( 14107 sorb, 14108 binop(Iop_Add32, 14109 getIReg(4, R_EBX), 14110 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/)))))); 14111 14112 DIP("xlat%c [ebx]\n", nameISize(sz)); 14113 break; 14114 14115 /* ------------------------ IN / OUT ----------------------- */ 14116 14117 case 0xE4: /* IN imm8, AL */ 14118 sz = 1; 14119 t1 = newTemp(Ity_I32); 14120 abyte = getIByte(delta); delta++; 14121 assign(t1, mkU32( abyte & 0xFF )); 14122 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14123 goto do_IN; 14124 case 0xE5: /* IN imm8, eAX */ 14125 vassert(sz == 2 || sz == 4); 14126 t1 = newTemp(Ity_I32); 14127 abyte = getIByte(delta); delta++; 14128 assign(t1, mkU32( abyte & 0xFF )); 14129 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14130 goto do_IN; 14131 case 0xEC: /* IN %DX, AL */ 14132 sz = 1; 14133 t1 = newTemp(Ity_I32); 14134 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14135 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14136 nameIReg(sz,R_EAX)); 14137 goto do_IN; 14138 case 0xED: /* IN %DX, eAX */ 14139 vassert(sz == 2 || sz == 4); 14140 t1 = newTemp(Ity_I32); 14141 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14142 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14143 nameIReg(sz,R_EAX)); 14144 goto do_IN; 14145 do_IN: { 14146 /* At this point, sz indicates the width, and t1 is a 32-bit 14147 value giving port number. */ 14148 IRDirty* d; 14149 vassert(sz == 1 || sz == 2 || sz == 4); 14150 ty = szToITy(sz); 14151 t2 = newTemp(Ity_I32); 14152 d = unsafeIRDirty_1_N( 14153 t2, 14154 0/*regparms*/, 14155 "x86g_dirtyhelper_IN", 14156 &x86g_dirtyhelper_IN, 14157 mkIRExprVec_2( mkexpr(t1), mkU32(sz) ) 14158 ); 14159 /* do the call, dumping the result in t2. */ 14160 stmt( IRStmt_Dirty(d) ); 14161 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) ); 14162 break; 14163 } 14164 14165 case 0xE6: /* OUT AL, imm8 */ 14166 sz = 1; 14167 t1 = newTemp(Ity_I32); 14168 abyte = getIByte(delta); delta++; 14169 assign( t1, mkU32( abyte & 0xFF ) ); 14170 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14171 goto do_OUT; 14172 case 0xE7: /* OUT eAX, imm8 */ 14173 vassert(sz == 2 || sz == 4); 14174 t1 = newTemp(Ity_I32); 14175 abyte = getIByte(delta); delta++; 14176 assign( t1, mkU32( abyte & 0xFF ) ); 14177 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14178 goto do_OUT; 14179 case 0xEE: /* OUT AL, %DX */ 14180 sz = 1; 14181 t1 = newTemp(Ity_I32); 14182 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14183 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14184 nameIReg(2,R_EDX)); 14185 goto do_OUT; 14186 case 0xEF: /* OUT eAX, %DX */ 14187 vassert(sz == 2 || sz == 4); 14188 t1 = newTemp(Ity_I32); 14189 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14190 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14191 nameIReg(2,R_EDX)); 14192 goto do_OUT; 14193 do_OUT: { 14194 /* At this point, sz indicates the width, and t1 is a 32-bit 14195 value giving port number. */ 14196 IRDirty* d; 14197 vassert(sz == 1 || sz == 2 || sz == 4); 14198 ty = szToITy(sz); 14199 d = unsafeIRDirty_0_N( 14200 0/*regparms*/, 14201 "x86g_dirtyhelper_OUT", 14202 &x86g_dirtyhelper_OUT, 14203 mkIRExprVec_3( mkexpr(t1), 14204 widenUto32( getIReg(sz, R_EAX) ), 14205 mkU32(sz) ) 14206 ); 14207 stmt( IRStmt_Dirty(d) ); 14208 break; 14209 } 14210 14211 /* ------------------------ (Grp1 extensions) ---------- */ 14212 14213 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as 14214 case 0x80, but only in 32-bit mode. */ 14215 /* fallthru */ 14216 case 0x80: /* Grp1 Ib,Eb */ 14217 modrm = getIByte(delta); 14218 am_sz = lengthAMode(delta); 14219 sz = 1; 14220 d_sz = 1; 14221 d32 = getUChar(delta + am_sz); 14222 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14223 break; 14224 14225 case 0x81: /* Grp1 Iv,Ev */ 14226 modrm = getIByte(delta); 14227 am_sz = lengthAMode(delta); 14228 d_sz = sz; 14229 d32 = getUDisp(d_sz, delta + am_sz); 14230 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14231 break; 14232 14233 case 0x83: /* Grp1 Ib,Ev */ 14234 modrm = getIByte(delta); 14235 am_sz = lengthAMode(delta); 14236 d_sz = 1; 14237 d32 = getSDisp8(delta + am_sz); 14238 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14239 break; 14240 14241 /* ------------------------ (Grp2 extensions) ---------- */ 14242 14243 case 0xC0: { /* Grp2 Ib,Eb */ 14244 Bool decode_OK = True; 14245 modrm = getIByte(delta); 14246 am_sz = lengthAMode(delta); 14247 d_sz = 1; 14248 d32 = getUChar(delta + am_sz); 14249 sz = 1; 14250 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14251 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14252 if (!decode_OK) 14253 goto decode_failure; 14254 break; 14255 } 14256 case 0xC1: { /* Grp2 Ib,Ev */ 14257 Bool decode_OK = True; 14258 modrm = getIByte(delta); 14259 am_sz = lengthAMode(delta); 14260 d_sz = 1; 14261 d32 = getUChar(delta + am_sz); 14262 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14263 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14264 if (!decode_OK) 14265 goto decode_failure; 14266 break; 14267 } 14268 case 0xD0: { /* Grp2 1,Eb */ 14269 Bool decode_OK = True; 14270 modrm = getIByte(delta); 14271 am_sz = lengthAMode(delta); 14272 d_sz = 0; 14273 d32 = 1; 14274 sz = 1; 14275 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14276 mkU8(d32), NULL, &decode_OK ); 14277 if (!decode_OK) 14278 goto decode_failure; 14279 break; 14280 } 14281 case 0xD1: { /* Grp2 1,Ev */ 14282 Bool decode_OK = True; 14283 modrm = getUChar(delta); 14284 am_sz = lengthAMode(delta); 14285 d_sz = 0; 14286 d32 = 1; 14287 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14288 mkU8(d32), NULL, &decode_OK ); 14289 if (!decode_OK) 14290 goto decode_failure; 14291 break; 14292 } 14293 case 0xD2: { /* Grp2 CL,Eb */ 14294 Bool decode_OK = True; 14295 modrm = getUChar(delta); 14296 am_sz = lengthAMode(delta); 14297 d_sz = 0; 14298 sz = 1; 14299 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14300 getIReg(1,R_ECX), "%cl", &decode_OK ); 14301 if (!decode_OK) 14302 goto decode_failure; 14303 break; 14304 } 14305 case 0xD3: { /* Grp2 CL,Ev */ 14306 Bool decode_OK = True; 14307 modrm = getIByte(delta); 14308 am_sz = lengthAMode(delta); 14309 d_sz = 0; 14310 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14311 getIReg(1,R_ECX), "%cl", &decode_OK ); 14312 if (!decode_OK) 14313 goto decode_failure; 14314 break; 14315 } 14316 14317 /* ------------------------ (Grp3 extensions) ---------- */ 14318 14319 case 0xF6: { /* Grp3 Eb */ 14320 Bool decode_OK = True; 14321 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK ); 14322 if (!decode_OK) 14323 goto decode_failure; 14324 break; 14325 } 14326 case 0xF7: { /* Grp3 Ev */ 14327 Bool decode_OK = True; 14328 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK ); 14329 if (!decode_OK) 14330 goto decode_failure; 14331 break; 14332 } 14333 14334 /* ------------------------ (Grp4 extensions) ---------- */ 14335 14336 case 0xFE: { /* Grp4 Eb */ 14337 Bool decode_OK = True; 14338 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK ); 14339 if (!decode_OK) 14340 goto decode_failure; 14341 break; 14342 } 14343 14344 /* ------------------------ (Grp5 extensions) ---------- */ 14345 14346 case 0xFF: { /* Grp5 Ev */ 14347 Bool decode_OK = True; 14348 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK ); 14349 if (!decode_OK) 14350 goto decode_failure; 14351 break; 14352 } 14353 14354 /* ------------------------ Escapes to 2-byte opcodes -- */ 14355 14356 case 0x0F: { 14357 opc = getIByte(delta); delta++; 14358 switch (opc) { 14359 14360 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 14361 14362 case 0xBA: { /* Grp8 Ib,Ev */ 14363 Bool decode_OK = False; 14364 modrm = getUChar(delta); 14365 am_sz = lengthAMode(delta); 14366 d32 = getSDisp8(delta + am_sz); 14367 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm, 14368 am_sz, sz, d32, &decode_OK ); 14369 if (!decode_OK) 14370 goto decode_failure; 14371 break; 14372 } 14373 14374 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 14375 14376 case 0xBC: /* BSF Gv,Ev */ 14377 delta = dis_bs_E_G ( sorb, sz, delta, True ); 14378 break; 14379 case 0xBD: /* BSR Gv,Ev */ 14380 delta = dis_bs_E_G ( sorb, sz, delta, False ); 14381 break; 14382 14383 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 14384 14385 case 0xC8: /* BSWAP %eax */ 14386 case 0xC9: 14387 case 0xCA: 14388 case 0xCB: 14389 case 0xCC: 14390 case 0xCD: 14391 case 0xCE: 14392 case 0xCF: /* BSWAP %edi */ 14393 /* AFAICS from the Intel docs, this only exists at size 4. */ 14394 vassert(sz == 4); 14395 t1 = newTemp(Ity_I32); 14396 t2 = newTemp(Ity_I32); 14397 assign( t1, getIReg(4, opc-0xC8) ); 14398 14399 assign( t2, 14400 binop(Iop_Or32, 14401 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 14402 binop(Iop_Or32, 14403 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 14404 mkU32(0x00FF0000)), 14405 binop(Iop_Or32, 14406 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 14407 mkU32(0x0000FF00)), 14408 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 14409 mkU32(0x000000FF) ) 14410 ))) 14411 ); 14412 14413 putIReg(4, opc-0xC8, mkexpr(t2)); 14414 DIP("bswapl %s\n", nameIReg(4, opc-0xC8)); 14415 break; 14416 14417 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 14418 14419 case 0xA3: /* BT Gv,Ev */ 14420 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone ); 14421 break; 14422 case 0xB3: /* BTR Gv,Ev */ 14423 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset ); 14424 break; 14425 case 0xAB: /* BTS Gv,Ev */ 14426 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet ); 14427 break; 14428 case 0xBB: /* BTC Gv,Ev */ 14429 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp ); 14430 break; 14431 14432 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 14433 14434 case 0x40: 14435 case 0x41: 14436 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 14437 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 14438 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 14439 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 14440 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 14441 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 14442 case 0x48: /* CMOVSb (cmov negative) */ 14443 case 0x49: /* CMOVSb (cmov not negative) */ 14444 case 0x4A: /* CMOVP (cmov parity even) */ 14445 case 0x4B: /* CMOVNP (cmov parity odd) */ 14446 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 14447 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 14448 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 14449 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 14450 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta); 14451 break; 14452 14453 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 14454 14455 case 0xB0: /* CMPXCHG Gb,Eb */ 14456 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta ); 14457 break; 14458 case 0xB1: /* CMPXCHG Gv,Ev */ 14459 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta ); 14460 break; 14461 14462 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */ 14463 IRTemp expdHi = newTemp(Ity_I32); 14464 IRTemp expdLo = newTemp(Ity_I32); 14465 IRTemp dataHi = newTemp(Ity_I32); 14466 IRTemp dataLo = newTemp(Ity_I32); 14467 IRTemp oldHi = newTemp(Ity_I32); 14468 IRTemp oldLo = newTemp(Ity_I32); 14469 IRTemp flags_old = newTemp(Ity_I32); 14470 IRTemp flags_new = newTemp(Ity_I32); 14471 IRTemp success = newTemp(Ity_I1); 14472 14473 /* Translate this using a DCAS, even if there is no LOCK 14474 prefix. Life is too short to bother with generating two 14475 different translations for the with/without-LOCK-prefix 14476 cases. */ 14477 *expect_CAS = True; 14478 14479 /* Decode, and generate address. */ 14480 if (sz != 4) goto decode_failure; 14481 modrm = getIByte(delta); 14482 if (epartIsReg(modrm)) goto decode_failure; 14483 if (gregOfRM(modrm) != 1) goto decode_failure; 14484 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14485 delta += alen; 14486 14487 /* Get the expected and new values. */ 14488 assign( expdHi, getIReg(4,R_EDX) ); 14489 assign( expdLo, getIReg(4,R_EAX) ); 14490 assign( dataHi, getIReg(4,R_ECX) ); 14491 assign( dataLo, getIReg(4,R_EBX) ); 14492 14493 /* Do the DCAS */ 14494 stmt( IRStmt_CAS( 14495 mkIRCAS( oldHi, oldLo, 14496 Iend_LE, mkexpr(addr), 14497 mkexpr(expdHi), mkexpr(expdLo), 14498 mkexpr(dataHi), mkexpr(dataLo) 14499 ))); 14500 14501 /* success when oldHi:oldLo == expdHi:expdLo */ 14502 assign( success, 14503 binop(Iop_CasCmpEQ32, 14504 binop(Iop_Or32, 14505 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)), 14506 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo)) 14507 ), 14508 mkU32(0) 14509 )); 14510 14511 /* If the DCAS is successful, that is to say oldHi:oldLo == 14512 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX, 14513 which is where they came from originally. Both the actual 14514 contents of these two regs, and any shadow values, are 14515 unchanged. If the DCAS fails then we're putting into 14516 EDX:EAX the value seen in memory. */ 14517 putIReg(4, R_EDX, 14518 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14519 mkexpr(oldHi), 14520 mkexpr(expdHi) 14521 )); 14522 putIReg(4, R_EAX, 14523 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14524 mkexpr(oldLo), 14525 mkexpr(expdLo) 14526 )); 14527 14528 /* Copy the success bit into the Z flag and leave the others 14529 unchanged */ 14530 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all())); 14531 assign( 14532 flags_new, 14533 binop(Iop_Or32, 14534 binop(Iop_And32, mkexpr(flags_old), 14535 mkU32(~X86G_CC_MASK_Z)), 14536 binop(Iop_Shl32, 14537 binop(Iop_And32, 14538 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)), 14539 mkU8(X86G_CC_SHIFT_Z)) )); 14540 14541 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14542 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 14543 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14544 /* Set NDEP even though it isn't used. This makes 14545 redundant-PUT elimination of previous stores to this field 14546 work better. */ 14547 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14548 14549 /* Sheesh. Aren't you glad it was me and not you that had to 14550 write and validate all this grunge? */ 14551 14552 DIP("cmpxchg8b %s\n", dis_buf); 14553 break; 14554 } 14555 14556 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 14557 14558 case 0xA2: { /* CPUID */ 14559 /* Uses dirty helper: 14560 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* ) 14561 declared to mod eax, wr ebx, ecx, edx 14562 */ 14563 IRDirty* d = NULL; 14564 HChar* fName = NULL; 14565 void* fAddr = NULL; 14566 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { 14567 fName = "x86g_dirtyhelper_CPUID_sse2"; 14568 fAddr = &x86g_dirtyhelper_CPUID_sse2; 14569 } 14570 else 14571 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { 14572 fName = "x86g_dirtyhelper_CPUID_sse1"; 14573 fAddr = &x86g_dirtyhelper_CPUID_sse1; 14574 } 14575 else 14576 if (archinfo->hwcaps == 0/*no SSE*/) { 14577 fName = "x86g_dirtyhelper_CPUID_sse0"; 14578 fAddr = &x86g_dirtyhelper_CPUID_sse0; 14579 } else 14580 vpanic("disInstr(x86)(cpuid)"); 14581 14582 vassert(fName); vassert(fAddr); 14583 d = unsafeIRDirty_0_N ( 0/*regparms*/, 14584 fName, fAddr, mkIRExprVec_0() ); 14585 /* declare guest state effects */ 14586 d->needsBBP = True; 14587 d->nFxState = 4; 14588 d->fxState[0].fx = Ifx_Modify; 14589 d->fxState[0].offset = OFFB_EAX; 14590 d->fxState[0].size = 4; 14591 d->fxState[1].fx = Ifx_Write; 14592 d->fxState[1].offset = OFFB_EBX; 14593 d->fxState[1].size = 4; 14594 d->fxState[2].fx = Ifx_Modify; 14595 d->fxState[2].offset = OFFB_ECX; 14596 d->fxState[2].size = 4; 14597 d->fxState[3].fx = Ifx_Write; 14598 d->fxState[3].offset = OFFB_EDX; 14599 d->fxState[3].size = 4; 14600 /* execute the dirty call, side-effecting guest state */ 14601 stmt( IRStmt_Dirty(d) ); 14602 /* CPUID is a serialising insn. So, just in case someone is 14603 using it as a memory fence ... */ 14604 stmt( IRStmt_MBE(Imbe_Fence) ); 14605 DIP("cpuid\n"); 14606 break; 14607 } 14608 14609 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID)) 14610 //-- goto decode_failure; 14611 //-- 14612 //-- t1 = newTemp(cb); 14613 //-- t2 = newTemp(cb); 14614 //-- t3 = newTemp(cb); 14615 //-- t4 = newTemp(cb); 14616 //-- uInstr0(cb, CALLM_S, 0); 14617 //-- 14618 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); 14619 //-- uInstr1(cb, PUSH, 4, TempReg, t1); 14620 //-- 14621 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); 14622 //-- uLiteral(cb, 0); 14623 //-- uInstr1(cb, PUSH, 4, TempReg, t2); 14624 //-- 14625 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); 14626 //-- uLiteral(cb, 0); 14627 //-- uInstr1(cb, PUSH, 4, TempReg, t3); 14628 //-- 14629 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); 14630 //-- uLiteral(cb, 0); 14631 //-- uInstr1(cb, PUSH, 4, TempReg, t4); 14632 //-- 14633 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); 14634 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); 14635 //-- 14636 //-- uInstr1(cb, POP, 4, TempReg, t4); 14637 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); 14638 //-- 14639 //-- uInstr1(cb, POP, 4, TempReg, t3); 14640 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); 14641 //-- 14642 //-- uInstr1(cb, POP, 4, TempReg, t2); 14643 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); 14644 //-- 14645 //-- uInstr1(cb, POP, 4, TempReg, t1); 14646 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); 14647 //-- 14648 //-- uInstr0(cb, CALLM_E, 0); 14649 //-- DIP("cpuid\n"); 14650 //-- break; 14651 //-- 14652 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 14653 14654 case 0xB6: /* MOVZXb Eb,Gv */ 14655 if (sz != 2 && sz != 4) 14656 goto decode_failure; 14657 delta = dis_movx_E_G ( sorb, delta, 1, sz, False ); 14658 break; 14659 14660 case 0xB7: /* MOVZXw Ew,Gv */ 14661 if (sz != 4) 14662 goto decode_failure; 14663 delta = dis_movx_E_G ( sorb, delta, 2, 4, False ); 14664 break; 14665 14666 case 0xBE: /* MOVSXb Eb,Gv */ 14667 if (sz != 2 && sz != 4) 14668 goto decode_failure; 14669 delta = dis_movx_E_G ( sorb, delta, 1, sz, True ); 14670 break; 14671 14672 case 0xBF: /* MOVSXw Ew,Gv */ 14673 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2) 14674 goto decode_failure; 14675 delta = dis_movx_E_G ( sorb, delta, 2, sz, True ); 14676 break; 14677 14678 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 14679 //-- 14680 //-- case 0xC3: /* MOVNTI Gv,Ev */ 14681 //-- vg_assert(sz == 4); 14682 //-- modrm = getUChar(eip); 14683 //-- vg_assert(!epartIsReg(modrm)); 14684 //-- t1 = newTemp(cb); 14685 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 14686 //-- pair = disAMode ( cb, sorb, eip, dis_buf ); 14687 //-- t2 = LOW24(pair); 14688 //-- eip += HI8(pair); 14689 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 14690 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 14691 //-- break; 14692 14693 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 14694 14695 case 0xAF: /* IMUL Ev, Gv */ 14696 delta = dis_mul_E_G ( sorb, sz, delta ); 14697 break; 14698 14699 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 14700 14701 case 0x1F: 14702 modrm = getUChar(delta); 14703 if (epartIsReg(modrm)) goto decode_failure; 14704 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14705 delta += alen; 14706 DIP("nop%c %s\n", nameISize(sz), dis_buf); 14707 break; 14708 14709 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 14710 case 0x80: 14711 case 0x81: 14712 case 0x82: /* JBb/JNAEb (jump below) */ 14713 case 0x83: /* JNBb/JAEb (jump not below) */ 14714 case 0x84: /* JZb/JEb (jump zero) */ 14715 case 0x85: /* JNZb/JNEb (jump not zero) */ 14716 case 0x86: /* JBEb/JNAb (jump below or equal) */ 14717 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 14718 case 0x88: /* JSb (jump negative) */ 14719 case 0x89: /* JSb (jump not negative) */ 14720 case 0x8A: /* JP (jump parity even) */ 14721 case 0x8B: /* JNP/JPO (jump parity odd) */ 14722 case 0x8C: /* JLb/JNGEb (jump less) */ 14723 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 14724 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 14725 case 0x8F: /* JGb/JNLEb (jump greater) */ 14726 { Int jmpDelta; 14727 HChar* comment = ""; 14728 jmpDelta = (Int)getUDisp32(delta); 14729 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta; 14730 delta += 4; 14731 if (resteerCisOk 14732 && vex_control.guest_chase_cond 14733 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14734 && jmpDelta < 0 14735 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 14736 /* Speculation: assume this backward branch is taken. So 14737 we need to emit a side-exit to the insn following this 14738 one, on the negation of the condition, and continue at 14739 the branch target address (d32). If we wind up back at 14740 the first instruction of the trace, just stop; it's 14741 better to let the IR loop unroller handle that case.*/ 14742 stmt( IRStmt_Exit( 14743 mk_x86g_calculate_condition((X86Condcode) 14744 (1 ^ (opc - 0x80))), 14745 Ijk_Boring, 14746 IRConst_U32(guest_EIP_bbstart+delta) ) ); 14747 dres.whatNext = Dis_ResteerC; 14748 dres.continueAt = (Addr64)(Addr32)d32; 14749 comment = "(assumed taken)"; 14750 } 14751 else 14752 if (resteerCisOk 14753 && vex_control.guest_chase_cond 14754 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14755 && jmpDelta >= 0 14756 && resteerOkFn( callback_opaque, 14757 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 14758 /* Speculation: assume this forward branch is not taken. 14759 So we need to emit a side-exit to d32 (the dest) and 14760 continue disassembling at the insn immediately 14761 following this one. */ 14762 stmt( IRStmt_Exit( 14763 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)), 14764 Ijk_Boring, 14765 IRConst_U32(d32) ) ); 14766 dres.whatNext = Dis_ResteerC; 14767 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 14768 comment = "(assumed not taken)"; 14769 } 14770 else { 14771 /* Conservative default translation - end the block at 14772 this point. */ 14773 jcc_01( (X86Condcode)(opc - 0x80), 14774 (Addr32)(guest_EIP_bbstart+delta), d32); 14775 dres.whatNext = Dis_StopHere; 14776 } 14777 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment); 14778 break; 14779 } 14780 14781 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 14782 case 0x31: { /* RDTSC */ 14783 IRTemp val = newTemp(Ity_I64); 14784 IRExpr** args = mkIRExprVec_0(); 14785 IRDirty* d = unsafeIRDirty_1_N ( 14786 val, 14787 0/*regparms*/, 14788 "x86g_dirtyhelper_RDTSC", 14789 &x86g_dirtyhelper_RDTSC, 14790 args 14791 ); 14792 /* execute the dirty call, dumping the result in val. */ 14793 stmt( IRStmt_Dirty(d) ); 14794 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val))); 14795 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val))); 14796 DIP("rdtsc\n"); 14797 break; 14798 } 14799 14800 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 14801 14802 case 0xA1: /* POP %FS */ 14803 dis_pop_segreg( R_FS, sz ); break; 14804 case 0xA9: /* POP %GS */ 14805 dis_pop_segreg( R_GS, sz ); break; 14806 14807 case 0xA0: /* PUSH %FS */ 14808 dis_push_segreg( R_FS, sz ); break; 14809 case 0xA8: /* PUSH %GS */ 14810 dis_push_segreg( R_GS, sz ); break; 14811 14812 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 14813 case 0x90: 14814 case 0x91: 14815 case 0x92: /* set-Bb/set-NAEb (jump below) */ 14816 case 0x93: /* set-NBb/set-AEb (jump not below) */ 14817 case 0x94: /* set-Zb/set-Eb (jump zero) */ 14818 case 0x95: /* set-NZb/set-NEb (jump not zero) */ 14819 case 0x96: /* set-BEb/set-NAb (jump below or equal) */ 14820 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ 14821 case 0x98: /* set-Sb (jump negative) */ 14822 case 0x99: /* set-Sb (jump not negative) */ 14823 case 0x9A: /* set-P (jump parity even) */ 14824 case 0x9B: /* set-NP (jump parity odd) */ 14825 case 0x9C: /* set-Lb/set-NGEb (jump less) */ 14826 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ 14827 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ 14828 case 0x9F: /* set-Gb/set-NLEb (jump greater) */ 14829 t1 = newTemp(Ity_I8); 14830 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) ); 14831 modrm = getIByte(delta); 14832 if (epartIsReg(modrm)) { 14833 delta++; 14834 putIReg(1, eregOfRM(modrm), mkexpr(t1)); 14835 DIP("set%s %s\n", name_X86Condcode(opc-0x90), 14836 nameIReg(1,eregOfRM(modrm))); 14837 } else { 14838 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14839 delta += alen; 14840 storeLE( mkexpr(addr), mkexpr(t1) ); 14841 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf); 14842 } 14843 break; 14844 14845 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 14846 14847 case 0xA4: /* SHLDv imm8,Gv,Ev */ 14848 modrm = getIByte(delta); 14849 d32 = delta + lengthAMode(delta); 14850 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14851 delta = dis_SHLRD_Gv_Ev ( 14852 sorb, delta, modrm, sz, 14853 mkU8(getIByte(d32)), True, /* literal */ 14854 dis_buf, True ); 14855 break; 14856 case 0xA5: /* SHLDv %cl,Gv,Ev */ 14857 modrm = getIByte(delta); 14858 delta = dis_SHLRD_Gv_Ev ( 14859 sorb, delta, modrm, sz, 14860 getIReg(1,R_ECX), False, /* not literal */ 14861 "%cl", True ); 14862 break; 14863 14864 case 0xAC: /* SHRDv imm8,Gv,Ev */ 14865 modrm = getIByte(delta); 14866 d32 = delta + lengthAMode(delta); 14867 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14868 delta = dis_SHLRD_Gv_Ev ( 14869 sorb, delta, modrm, sz, 14870 mkU8(getIByte(d32)), True, /* literal */ 14871 dis_buf, False ); 14872 break; 14873 case 0xAD: /* SHRDv %cl,Gv,Ev */ 14874 modrm = getIByte(delta); 14875 delta = dis_SHLRD_Gv_Ev ( 14876 sorb, delta, modrm, sz, 14877 getIReg(1,R_ECX), False, /* not literal */ 14878 "%cl", False ); 14879 break; 14880 14881 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */ 14882 14883 case 0x34: 14884 /* Simple implementation needing a long explaination. 14885 14886 sysenter is a kind of syscall entry. The key thing here 14887 is that the return address is not known -- that is 14888 something that is beyond Vex's knowledge. So this IR 14889 forces a return to the scheduler, which can do what it 14890 likes to simulate the systenter, but it MUST set this 14891 thread's guest_EIP field with the continuation address 14892 before resuming execution. If that doesn't happen, the 14893 thread will jump to address zero, which is probably 14894 fatal. 14895 */ 14896 14897 /* Note where we are, so we can back up the guest to this 14898 point if the syscall needs to be restarted. */ 14899 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 14900 mkU32(guest_EIP_curr_instr) ) ); 14901 jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/); 14902 dres.whatNext = Dis_StopHere; 14903 DIP("sysenter"); 14904 break; 14905 14906 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 14907 14908 case 0xC0: { /* XADD Gb,Eb */ 14909 Bool decodeOK; 14910 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK ); 14911 if (!decodeOK) goto decode_failure; 14912 break; 14913 } 14914 case 0xC1: { /* XADD Gv,Ev */ 14915 Bool decodeOK; 14916 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK ); 14917 if (!decodeOK) goto decode_failure; 14918 break; 14919 } 14920 14921 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 14922 14923 case 0x71: 14924 case 0x72: 14925 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 14926 14927 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 14928 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 14929 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 14930 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 14931 14932 case 0xFC: 14933 case 0xFD: 14934 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 14935 14936 case 0xEC: 14937 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14938 14939 case 0xDC: 14940 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14941 14942 case 0xF8: 14943 case 0xF9: 14944 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 14945 14946 case 0xE8: 14947 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14948 14949 case 0xD8: 14950 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14951 14952 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 14953 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 14954 14955 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 14956 14957 case 0x74: 14958 case 0x75: 14959 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 14960 14961 case 0x64: 14962 case 0x65: 14963 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 14964 14965 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 14966 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 14967 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 14968 14969 case 0x68: 14970 case 0x69: 14971 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 14972 14973 case 0x60: 14974 case 0x61: 14975 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14976 14977 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 14978 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 14979 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 14980 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 14981 14982 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14983 case 0xF2: 14984 case 0xF3: 14985 14986 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14987 case 0xD2: 14988 case 0xD3: 14989 14990 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 14991 case 0xE2: 14992 { 14993 Int delta0 = delta-1; 14994 Bool decode_OK = False; 14995 14996 /* If sz==2 this is SSE, and we assume sse idec has 14997 already spotted those cases by now. */ 14998 if (sz != 4) 14999 goto decode_failure; 15000 15001 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 ); 15002 if (!decode_OK) { 15003 delta = delta0; 15004 goto decode_failure; 15005 } 15006 break; 15007 } 15008 15009 case 0x0E: /* FEMMS */ 15010 case 0x77: /* EMMS */ 15011 if (sz != 4) 15012 goto decode_failure; 15013 do_EMMS_preamble(); 15014 DIP("{f}emms\n"); 15015 break; 15016 15017 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 15018 case 0x01: /* 0F 01 /0 -- SGDT */ 15019 /* 0F 01 /1 -- SIDT */ 15020 { 15021 /* This is really revolting, but ... since each processor 15022 (core) only has one IDT and one GDT, just let the guest 15023 see it (pass-through semantics). I can't see any way to 15024 construct a faked-up value, so don't bother to try. */ 15025 modrm = getUChar(delta); 15026 addr = disAMode ( &alen, sorb, delta, dis_buf ); 15027 delta += alen; 15028 if (epartIsReg(modrm)) goto decode_failure; 15029 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1) 15030 goto decode_failure; 15031 switch (gregOfRM(modrm)) { 15032 case 0: DIP("sgdt %s\n", dis_buf); break; 15033 case 1: DIP("sidt %s\n", dis_buf); break; 15034 default: vassert(0); /*NOTREACHED*/ 15035 } 15036 15037 IRDirty* d = unsafeIRDirty_0_N ( 15038 0/*regparms*/, 15039 "x86g_dirtyhelper_SxDT", 15040 &x86g_dirtyhelper_SxDT, 15041 mkIRExprVec_2( mkexpr(addr), 15042 mkU32(gregOfRM(modrm)) ) 15043 ); 15044 /* declare we're writing memory */ 15045 d->mFx = Ifx_Write; 15046 d->mAddr = mkexpr(addr); 15047 d->mSize = 6; 15048 stmt( IRStmt_Dirty(d) ); 15049 break; 15050 } 15051 15052 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 15053 15054 default: 15055 goto decode_failure; 15056 } /* switch (opc) for the 2-byte opcodes */ 15057 goto decode_success; 15058 } /* case 0x0F: of primary opcode */ 15059 15060 /* ------------------------ ??? ------------------------ */ 15061 15062 default: 15063 decode_failure: 15064 /* All decode failures end up here. */ 15065 vex_printf("vex x86->IR: unhandled instruction bytes: " 15066 "0x%x 0x%x 0x%x 0x%x\n", 15067 (Int)getIByte(delta_start+0), 15068 (Int)getIByte(delta_start+1), 15069 (Int)getIByte(delta_start+2), 15070 (Int)getIByte(delta_start+3) ); 15071 15072 /* Tell the dispatcher that this insn cannot be decoded, and so has 15073 not been executed, and (is currently) the next to be executed. 15074 EIP should be up-to-date since it made so at the start of each 15075 insn, but nevertheless be paranoid and update it again right 15076 now. */ 15077 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); 15078 jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr); 15079 dres.whatNext = Dis_StopHere; 15080 dres.len = 0; 15081 /* We also need to say that a CAS is not expected now, regardless 15082 of what it might have been set to at the start of the function, 15083 since the IR that we've emitted just above (to synthesis a 15084 SIGILL) does not involve any CAS, and presumably no other IR has 15085 been emitted for this (non-decoded) insn. */ 15086 *expect_CAS = False; 15087 return dres; 15088 15089 } /* switch (opc) for the main (primary) opcode switch. */ 15090 15091 decode_success: 15092 /* All decode successes end up here. */ 15093 DIP("\n"); 15094 dres.len = delta - delta_start; 15095 return dres; 15096 } 15097 15098 #undef DIP 15099 #undef DIS 15100 15101 15102 /*------------------------------------------------------------*/ 15103 /*--- Top-level fn ---*/ 15104 /*------------------------------------------------------------*/ 15105 15106 /* Disassemble a single instruction into IR. The instruction 15107 is located in host memory at &guest_code[delta]. */ 15108 15109 DisResult disInstr_X86 ( IRSB* irsb_IN, 15110 Bool put_IP, 15111 Bool (*resteerOkFn) ( void*, Addr64 ), 15112 Bool resteerCisOk, 15113 void* callback_opaque, 15114 UChar* guest_code_IN, 15115 Long delta, 15116 Addr64 guest_IP, 15117 VexArch guest_arch, 15118 VexArchInfo* archinfo, 15119 VexAbiInfo* abiinfo, 15120 Bool host_bigendian_IN ) 15121 { 15122 Int i, x1, x2; 15123 Bool expect_CAS, has_CAS; 15124 DisResult dres; 15125 15126 /* Set globals (see top of this file) */ 15127 vassert(guest_arch == VexArchX86); 15128 guest_code = guest_code_IN; 15129 irsb = irsb_IN; 15130 host_is_bigendian = host_bigendian_IN; 15131 guest_EIP_curr_instr = (Addr32)guest_IP; 15132 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); 15133 15134 x1 = irsb_IN->stmts_used; 15135 expect_CAS = False; 15136 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, 15137 resteerCisOk, 15138 callback_opaque, 15139 delta, archinfo, abiinfo ); 15140 x2 = irsb_IN->stmts_used; 15141 vassert(x2 >= x1); 15142 15143 /* See comment at the top of disInstr_X86_WRK for meaning of 15144 expect_CAS. Here, we (sanity-)check for the presence/absence of 15145 IRCAS as directed by the returned expect_CAS value. */ 15146 has_CAS = False; 15147 for (i = x1; i < x2; i++) { 15148 if (irsb_IN->stmts[i]->tag == Ist_CAS) 15149 has_CAS = True; 15150 } 15151 15152 if (expect_CAS != has_CAS) { 15153 /* inconsistency detected. re-disassemble the instruction so as 15154 to generate a useful error message; then assert. */ 15155 vex_traceflags |= VEX_TRACE_FE; 15156 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, 15157 resteerCisOk, 15158 callback_opaque, 15159 delta, archinfo, abiinfo ); 15160 for (i = x1; i < x2; i++) { 15161 vex_printf("\t\t"); 15162 ppIRStmt(irsb_IN->stmts[i]); 15163 vex_printf("\n"); 15164 } 15165 /* Failure of this assertion is serious and denotes a bug in 15166 disInstr. */ 15167 vpanic("disInstr_X86: inconsistency in LOCK prefix handling"); 15168 } 15169 15170 return dres; 15171 } 15172 15173 15174 /*--------------------------------------------------------------------*/ 15175 /*--- end guest_x86_toIR.c ---*/ 15176 /*--------------------------------------------------------------------*/ 15177