1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_x86_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates x86 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 32-bit value is being written. 42 43 FUCOMI(P): what happens to A and S flags? Currently are forced 44 to zero. 45 46 x87 FP Limitations: 47 48 * all arithmetic done at 64 bits 49 50 * no FP exceptions, except for handling stack over/underflow 51 52 * FP rounding mode observed only for float->int conversions 53 and int->float conversions which could lose accuracy, and 54 for float-to-float rounding. For all other operations, 55 round-to-nearest is used, regardless. 56 57 * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the 58 simulation claims the argument is in-range (-2^63 <= arg <= 2^63) 59 even when it isn't. 60 61 * some of the FCOM cases could do with testing -- not convinced 62 that the args are the right way round. 63 64 * FSAVE does not re-initialise the FPU; it should do 65 66 * FINIT not only initialises the FPU environment, it also 67 zeroes all the FP registers. It should leave the registers 68 unchanged. 69 70 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 71 per Intel docs this bit has no meaning anyway. Since PUSHF is the 72 only way to observe eflags[1], a proper fix would be to make that 73 bit be set by PUSHF. 74 75 The state of %eflags.AC (alignment check, bit 18) is recorded by 76 the simulation (viz, if you set it with popf then a pushf produces 77 the value you set it to), but it is otherwise ignored. In 78 particular, setting it to 1 does NOT cause alignment checking to 79 happen. Programs that set it to 1 and then rely on the resulting 80 SIGBUSs to inform them of misaligned accesses will not work. 81 82 Implementation of sysenter is necessarily partial. sysenter is a 83 kind of system call entry. When doing a sysenter, the return 84 address is not known -- that is something that is beyond Vex's 85 knowledge. So the generated IR forces a return to the scheduler, 86 which can do what it likes to simulate the systenter, but it MUST 87 set this thread's guest_EIP field with the continuation address 88 before resuming execution. If that doesn't happen, the thread will 89 jump to address zero, which is probably fatal. 90 91 This module uses global variables and so is not MT-safe (if that 92 should ever become relevant). 93 94 The delta values are 32-bit ints, not 64-bit ints. That means 95 this module may not work right if run on a 64-bit host. That should 96 be fixed properly, really -- if anyone ever wants to use Vex to 97 translate x86 code for execution on a 64-bit host. 98 99 casLE (implementation of lock-prefixed insns) and rep-prefixed 100 insns: the side-exit back to the start of the insn is done with 101 Ijk_Boring. This is quite wrong, it should be done with 102 Ijk_NoRedir, since otherwise the side exit, which is intended to 103 restart the instruction for whatever reason, could go somewhere 104 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 105 no-redir jumps performance critical, at least for rep-prefixed 106 instructions, since all iterations thereof would involve such a 107 jump. It's not such a big deal with casLE since the side exit is 108 only taken if the CAS fails, that is, the location is contended, 109 which is relatively unlikely. 110 111 XXXX: Nov 2009: handling of SWP on ARM suffers from the same 112 problem. 113 114 Note also, the test for CAS success vs failure is done using 115 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 116 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 117 shouldn't definedness-check these comparisons. See 118 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 119 background/rationale. 120 */ 121 122 /* Performance holes: 123 124 - fcom ; fstsw %ax ; sahf 125 sahf does not update the O flag (sigh) and so O needs to 126 be computed. This is done expensively; it would be better 127 to have a calculate_eflags_o helper. 128 129 - emwarns; some FP codes can generate huge numbers of these 130 if the fpucw is changed in an inner loop. It would be 131 better for the guest state to have an emwarn-enable reg 132 which can be set zero or nonzero. If it is zero, emwarns 133 are not flagged, and instead control just flows all the 134 way through bbs as usual. 135 */ 136 137 /* "Special" instructions. 138 139 This instruction decoder can decode three special instructions 140 which mean nothing natively (are no-ops as far as regs/mem are 141 concerned) but have meaning for supporting Valgrind. A special 142 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D 143 C1C713 (in the standard interpretation, that means: roll $3, %edi; 144 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, 145 one of the following 3 are allowed (standard interpretation in 146 parentheses): 147 148 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) 149 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR 150 87D2 (xchgl %edx,%edx) call-noredir *%EAX 151 152 Any other bytes following the 12-byte preamble are illegal and 153 constitute a failure in instruction decoding. This all assumes 154 that the preamble will never occur except in specific code 155 fragments designed for Valgrind to catch. 156 157 No prefixes may precede a "Special" instruction. 158 */ 159 160 /* LOCK prefixed instructions. These are translated using IR-level 161 CAS statements (IRCAS) and are believed to preserve atomicity, even 162 from the point of view of some other process racing against a 163 simulated one (presumably they communicate via a shared memory 164 segment). 165 166 Handlers which are aware of LOCK prefixes are: 167 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 168 dis_cmpxchg_G_E (cmpxchg) 169 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 170 dis_Grp3 (not, neg) 171 dis_Grp4 (inc, dec) 172 dis_Grp5 (inc, dec) 173 dis_Grp8_Imm (bts, btc, btr) 174 dis_bt_G_E (bts, btc, btr) 175 dis_xadd_G_E (xadd) 176 */ 177 178 179 #include "libvex_basictypes.h" 180 #include "libvex_ir.h" 181 #include "libvex.h" 182 #include "libvex_guest_x86.h" 183 184 #include "main_util.h" 185 #include "main_globals.h" 186 #include "guest_generic_bb_to_IR.h" 187 #include "guest_generic_x87.h" 188 #include "guest_x86_defs.h" 189 190 191 /*------------------------------------------------------------*/ 192 /*--- Globals ---*/ 193 /*------------------------------------------------------------*/ 194 195 /* These are set at the start of the translation of an insn, right 196 down in disInstr_X86, so that we don't have to pass them around 197 endlessly. They are all constant during the translation of any 198 given insn. */ 199 200 /* We need to know this to do sub-register accesses correctly. */ 201 static Bool host_is_bigendian; 202 203 /* Pointer to the guest code area (points to start of BB, not to the 204 insn being processed). */ 205 static UChar* guest_code; 206 207 /* The guest address corresponding to guest_code[0]. */ 208 static Addr32 guest_EIP_bbstart; 209 210 /* The guest address for the instruction currently being 211 translated. */ 212 static Addr32 guest_EIP_curr_instr; 213 214 /* The IRSB* into which we're generating code. */ 215 static IRSB* irsb; 216 217 218 /*------------------------------------------------------------*/ 219 /*--- Debugging output ---*/ 220 /*------------------------------------------------------------*/ 221 222 #define DIP(format, args...) \ 223 if (vex_traceflags & VEX_TRACE_FE) \ 224 vex_printf(format, ## args) 225 226 #define DIS(buf, format, args...) \ 227 if (vex_traceflags & VEX_TRACE_FE) \ 228 vex_sprintf(buf, format, ## args) 229 230 231 /*------------------------------------------------------------*/ 232 /*--- Offsets of various parts of the x86 guest state. ---*/ 233 /*------------------------------------------------------------*/ 234 235 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX) 236 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX) 237 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX) 238 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX) 239 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP) 240 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP) 241 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI) 242 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI) 243 244 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP) 245 246 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP) 247 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1) 248 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2) 249 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP) 250 251 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0]) 252 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0]) 253 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG) 254 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG) 255 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG) 256 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP) 257 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) 258 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND) 259 260 #define OFFB_CS offsetof(VexGuestX86State,guest_CS) 261 #define OFFB_DS offsetof(VexGuestX86State,guest_DS) 262 #define OFFB_ES offsetof(VexGuestX86State,guest_ES) 263 #define OFFB_FS offsetof(VexGuestX86State,guest_FS) 264 #define OFFB_GS offsetof(VexGuestX86State,guest_GS) 265 #define OFFB_SS offsetof(VexGuestX86State,guest_SS) 266 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT) 267 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT) 268 269 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND) 270 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0) 271 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1) 272 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2) 273 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3) 274 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4) 275 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5) 276 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6) 277 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7) 278 279 #define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN) 280 281 #define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART) 282 #define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN) 283 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR) 284 285 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL) 286 287 288 /*------------------------------------------------------------*/ 289 /*--- Helper bits and pieces for deconstructing the ---*/ 290 /*--- x86 insn stream. ---*/ 291 /*------------------------------------------------------------*/ 292 293 /* This is the Intel register encoding -- integer regs. */ 294 #define R_EAX 0 295 #define R_ECX 1 296 #define R_EDX 2 297 #define R_EBX 3 298 #define R_ESP 4 299 #define R_EBP 5 300 #define R_ESI 6 301 #define R_EDI 7 302 303 #define R_AL (0+R_EAX) 304 #define R_AH (4+R_EAX) 305 306 /* This is the Intel register encoding -- segment regs. */ 307 #define R_ES 0 308 #define R_CS 1 309 #define R_SS 2 310 #define R_DS 3 311 #define R_FS 4 312 #define R_GS 5 313 314 315 /* Add a statement to the list held by "irbb". */ 316 static void stmt ( IRStmt* st ) 317 { 318 addStmtToIRSB( irsb, st ); 319 } 320 321 /* Generate a new temporary of the given type. */ 322 static IRTemp newTemp ( IRType ty ) 323 { 324 vassert(isPlausibleIRType(ty)); 325 return newIRTemp( irsb->tyenv, ty ); 326 } 327 328 /* Various simple conversions */ 329 330 static UInt extend_s_8to32( UInt x ) 331 { 332 return (UInt)((((Int)x) << 24) >> 24); 333 } 334 335 static UInt extend_s_16to32 ( UInt x ) 336 { 337 return (UInt)((((Int)x) << 16) >> 16); 338 } 339 340 /* Fetch a byte from the guest insn stream. */ 341 static UChar getIByte ( Int delta ) 342 { 343 return guest_code[delta]; 344 } 345 346 /* Extract the reg field from a modRM byte. */ 347 static Int gregOfRM ( UChar mod_reg_rm ) 348 { 349 return (Int)( (mod_reg_rm >> 3) & 7 ); 350 } 351 352 /* Figure out whether the mod and rm parts of a modRM byte refer to a 353 register or memory. If so, the byte will have the form 11XXXYYY, 354 where YYY is the register number. */ 355 static Bool epartIsReg ( UChar mod_reg_rm ) 356 { 357 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 358 } 359 360 /* ... and extract the register number ... */ 361 static Int eregOfRM ( UChar mod_reg_rm ) 362 { 363 return (Int)(mod_reg_rm & 0x7); 364 } 365 366 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 367 368 static UChar getUChar ( Int delta ) 369 { 370 UChar v = guest_code[delta+0]; 371 return toUChar(v); 372 } 373 374 static UInt getUDisp16 ( Int delta ) 375 { 376 UInt v = guest_code[delta+1]; v <<= 8; 377 v |= guest_code[delta+0]; 378 return v & 0xFFFF; 379 } 380 381 static UInt getUDisp32 ( Int delta ) 382 { 383 UInt v = guest_code[delta+3]; v <<= 8; 384 v |= guest_code[delta+2]; v <<= 8; 385 v |= guest_code[delta+1]; v <<= 8; 386 v |= guest_code[delta+0]; 387 return v; 388 } 389 390 static UInt getUDisp ( Int size, Int delta ) 391 { 392 switch (size) { 393 case 4: return getUDisp32(delta); 394 case 2: return getUDisp16(delta); 395 case 1: return (UInt)getUChar(delta); 396 default: vpanic("getUDisp(x86)"); 397 } 398 return 0; /*notreached*/ 399 } 400 401 402 /* Get a byte value out of the insn stream and sign-extend to 32 403 bits. */ 404 static UInt getSDisp8 ( Int delta ) 405 { 406 return extend_s_8to32( (UInt) (guest_code[delta]) ); 407 } 408 409 static UInt getSDisp16 ( Int delta0 ) 410 { 411 UChar* eip = (UChar*)(&guest_code[delta0]); 412 UInt d = *eip++; 413 d |= ((*eip++) << 8); 414 return extend_s_16to32(d); 415 } 416 417 static UInt getSDisp ( Int size, Int delta ) 418 { 419 switch (size) { 420 case 4: return getUDisp32(delta); 421 case 2: return getSDisp16(delta); 422 case 1: return getSDisp8(delta); 423 default: vpanic("getSDisp(x86)"); 424 } 425 return 0; /*notreached*/ 426 } 427 428 429 /*------------------------------------------------------------*/ 430 /*--- Helpers for constructing IR. ---*/ 431 /*------------------------------------------------------------*/ 432 433 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit 434 register references, we need to take the host endianness into 435 account. Supplied value is 0 .. 7 and in the Intel instruction 436 encoding. */ 437 438 static IRType szToITy ( Int n ) 439 { 440 switch (n) { 441 case 1: return Ity_I8; 442 case 2: return Ity_I16; 443 case 4: return Ity_I32; 444 default: vpanic("szToITy(x86)"); 445 } 446 } 447 448 /* On a little-endian host, less significant bits of the guest 449 registers are at lower addresses. Therefore, if a reference to a 450 register low half has the safe guest state offset as a reference to 451 the full register. 452 */ 453 static Int integerGuestRegOffset ( Int sz, UInt archreg ) 454 { 455 vassert(archreg < 8); 456 457 /* Correct for little-endian host only. */ 458 vassert(!host_is_bigendian); 459 460 if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) { 461 switch (archreg) { 462 case R_EAX: return OFFB_EAX; 463 case R_EBX: return OFFB_EBX; 464 case R_ECX: return OFFB_ECX; 465 case R_EDX: return OFFB_EDX; 466 case R_ESI: return OFFB_ESI; 467 case R_EDI: return OFFB_EDI; 468 case R_ESP: return OFFB_ESP; 469 case R_EBP: return OFFB_EBP; 470 default: vpanic("integerGuestRegOffset(x86,le)(4,2)"); 471 } 472 } 473 474 vassert(archreg >= 4 && archreg < 8 && sz == 1); 475 switch (archreg-4) { 476 case R_EAX: return 1+ OFFB_EAX; 477 case R_EBX: return 1+ OFFB_EBX; 478 case R_ECX: return 1+ OFFB_ECX; 479 case R_EDX: return 1+ OFFB_EDX; 480 default: vpanic("integerGuestRegOffset(x86,le)(1h)"); 481 } 482 483 /* NOTREACHED */ 484 vpanic("integerGuestRegOffset(x86,le)"); 485 } 486 487 static Int segmentGuestRegOffset ( UInt sreg ) 488 { 489 switch (sreg) { 490 case R_ES: return OFFB_ES; 491 case R_CS: return OFFB_CS; 492 case R_SS: return OFFB_SS; 493 case R_DS: return OFFB_DS; 494 case R_FS: return OFFB_FS; 495 case R_GS: return OFFB_GS; 496 default: vpanic("segmentGuestRegOffset(x86)"); 497 } 498 } 499 500 static Int xmmGuestRegOffset ( UInt xmmreg ) 501 { 502 switch (xmmreg) { 503 case 0: return OFFB_XMM0; 504 case 1: return OFFB_XMM1; 505 case 2: return OFFB_XMM2; 506 case 3: return OFFB_XMM3; 507 case 4: return OFFB_XMM4; 508 case 5: return OFFB_XMM5; 509 case 6: return OFFB_XMM6; 510 case 7: return OFFB_XMM7; 511 default: vpanic("xmmGuestRegOffset"); 512 } 513 } 514 515 /* Lanes of vector registers are always numbered from zero being the 516 least significant lane (rightmost in the register). */ 517 518 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 519 { 520 /* Correct for little-endian host only. */ 521 vassert(!host_is_bigendian); 522 vassert(laneno >= 0 && laneno < 8); 523 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 524 } 525 526 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 527 { 528 /* Correct for little-endian host only. */ 529 vassert(!host_is_bigendian); 530 vassert(laneno >= 0 && laneno < 4); 531 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 532 } 533 534 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 535 { 536 /* Correct for little-endian host only. */ 537 vassert(!host_is_bigendian); 538 vassert(laneno >= 0 && laneno < 2); 539 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 540 } 541 542 static IRExpr* getIReg ( Int sz, UInt archreg ) 543 { 544 vassert(sz == 1 || sz == 2 || sz == 4); 545 vassert(archreg < 8); 546 return IRExpr_Get( integerGuestRegOffset(sz,archreg), 547 szToITy(sz) ); 548 } 549 550 /* Ditto, but write to a reg instead. */ 551 static void putIReg ( Int sz, UInt archreg, IRExpr* e ) 552 { 553 IRType ty = typeOfIRExpr(irsb->tyenv, e); 554 switch (sz) { 555 case 1: vassert(ty == Ity_I8); break; 556 case 2: vassert(ty == Ity_I16); break; 557 case 4: vassert(ty == Ity_I32); break; 558 default: vpanic("putIReg(x86)"); 559 } 560 vassert(archreg < 8); 561 stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) ); 562 } 563 564 static IRExpr* getSReg ( UInt sreg ) 565 { 566 return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 ); 567 } 568 569 static void putSReg ( UInt sreg, IRExpr* e ) 570 { 571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 572 stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) ); 573 } 574 575 static IRExpr* getXMMReg ( UInt xmmreg ) 576 { 577 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 578 } 579 580 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 581 { 582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 583 } 584 585 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 586 { 587 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 588 } 589 590 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 591 { 592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 593 } 594 595 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 596 { 597 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 598 } 599 600 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 601 { 602 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 603 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 604 } 605 606 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 607 { 608 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 609 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 610 } 611 612 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 613 { 614 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 615 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 616 } 617 618 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 619 { 620 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 621 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 622 } 623 624 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 625 { 626 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 627 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 628 } 629 630 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e ) 631 { 632 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 633 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) ); 634 } 635 636 static void assign ( IRTemp dst, IRExpr* e ) 637 { 638 stmt( IRStmt_WrTmp(dst, e) ); 639 } 640 641 static void storeLE ( IRExpr* addr, IRExpr* data ) 642 { 643 stmt( IRStmt_Store(Iend_LE, addr, data) ); 644 } 645 646 static IRExpr* unop ( IROp op, IRExpr* a ) 647 { 648 return IRExpr_Unop(op, a); 649 } 650 651 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 652 { 653 return IRExpr_Binop(op, a1, a2); 654 } 655 656 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 657 { 658 return IRExpr_Triop(op, a1, a2, a3); 659 } 660 661 static IRExpr* mkexpr ( IRTemp tmp ) 662 { 663 return IRExpr_RdTmp(tmp); 664 } 665 666 static IRExpr* mkU8 ( UInt i ) 667 { 668 vassert(i < 256); 669 return IRExpr_Const(IRConst_U8( (UChar)i )); 670 } 671 672 static IRExpr* mkU16 ( UInt i ) 673 { 674 vassert(i < 65536); 675 return IRExpr_Const(IRConst_U16( (UShort)i )); 676 } 677 678 static IRExpr* mkU32 ( UInt i ) 679 { 680 return IRExpr_Const(IRConst_U32(i)); 681 } 682 683 static IRExpr* mkU64 ( ULong i ) 684 { 685 return IRExpr_Const(IRConst_U64(i)); 686 } 687 688 static IRExpr* mkU ( IRType ty, UInt i ) 689 { 690 if (ty == Ity_I8) return mkU8(i); 691 if (ty == Ity_I16) return mkU16(i); 692 if (ty == Ity_I32) return mkU32(i); 693 /* If this panics, it usually means you passed a size (1,2,4) 694 value as the IRType, rather than a real IRType. */ 695 vpanic("mkU(x86)"); 696 } 697 698 static IRExpr* mkV128 ( UShort mask ) 699 { 700 return IRExpr_Const(IRConst_V128(mask)); 701 } 702 703 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 704 { 705 return IRExpr_Load(Iend_LE, ty, addr); 706 } 707 708 static IROp mkSizedOp ( IRType ty, IROp op8 ) 709 { 710 Int adj; 711 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 712 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 713 || op8 == Iop_Mul8 714 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 715 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 716 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 717 || op8 == Iop_CasCmpNE8 718 || op8 == Iop_Not8); 719 adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 720 return adj + op8; 721 } 722 723 static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd ) 724 { 725 if (szSmall == 1 && szBig == 4) { 726 return signd ? Iop_8Sto32 : Iop_8Uto32; 727 } 728 if (szSmall == 1 && szBig == 2) { 729 return signd ? Iop_8Sto16 : Iop_8Uto16; 730 } 731 if (szSmall == 2 && szBig == 4) { 732 return signd ? Iop_16Sto32 : Iop_16Uto32; 733 } 734 vpanic("mkWidenOp(x86,guest)"); 735 } 736 737 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 738 { 739 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 740 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 741 return unop(Iop_32to1, 742 binop(Iop_And32, 743 unop(Iop_1Uto32,x), 744 unop(Iop_1Uto32,y))); 745 } 746 747 /* Generate a compare-and-swap operation, operating on memory at 748 'addr'. The expected value is 'expVal' and the new value is 749 'newVal'. If the operation fails, then transfer control (with a 750 no-redir jump (XXX no -- see comment at top of this file)) to 751 'restart_point', which is presumably the address of the guest 752 instruction again -- retrying, essentially. */ 753 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 754 Addr32 restart_point ) 755 { 756 IRCAS* cas; 757 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 758 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 759 IRTemp oldTmp = newTemp(tyE); 760 IRTemp expTmp = newTemp(tyE); 761 vassert(tyE == tyN); 762 vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8); 763 assign(expTmp, expVal); 764 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 765 NULL, mkexpr(expTmp), NULL, newVal ); 766 stmt( IRStmt_CAS(cas) ); 767 stmt( IRStmt_Exit( 768 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 769 mkexpr(oldTmp), mkexpr(expTmp) ), 770 Ijk_Boring, /*Ijk_NoRedir*/ 771 IRConst_U32( restart_point ) 772 )); 773 } 774 775 776 /*------------------------------------------------------------*/ 777 /*--- Helpers for %eflags. ---*/ 778 /*------------------------------------------------------------*/ 779 780 /* -------------- Evaluating the flags-thunk. -------------- */ 781 782 /* Build IR to calculate all the eflags from stored 783 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 784 Ity_I32. */ 785 static IRExpr* mk_x86g_calculate_eflags_all ( void ) 786 { 787 IRExpr** args 788 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 789 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 790 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 791 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 792 IRExpr* call 793 = mkIRExprCCall( 794 Ity_I32, 795 0/*regparm*/, 796 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all, 797 args 798 ); 799 /* Exclude OP and NDEP from definedness checking. We're only 800 interested in DEP1 and DEP2. */ 801 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 802 return call; 803 } 804 805 /* Build IR to calculate some particular condition from stored 806 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 807 Ity_Bit. */ 808 static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond ) 809 { 810 IRExpr** args 811 = mkIRExprVec_5( mkU32(cond), 812 IRExpr_Get(OFFB_CC_OP, Ity_I32), 813 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 814 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 815 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 816 IRExpr* call 817 = mkIRExprCCall( 818 Ity_I32, 819 0/*regparm*/, 820 "x86g_calculate_condition", &x86g_calculate_condition, 821 args 822 ); 823 /* Exclude the requested condition, OP and NDEP from definedness 824 checking. We're only interested in DEP1 and DEP2. */ 825 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 826 return unop(Iop_32to1, call); 827 } 828 829 /* Build IR to calculate just the carry flag from stored 830 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */ 831 static IRExpr* mk_x86g_calculate_eflags_c ( void ) 832 { 833 IRExpr** args 834 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32), 835 IRExpr_Get(OFFB_CC_DEP1, Ity_I32), 836 IRExpr_Get(OFFB_CC_DEP2, Ity_I32), 837 IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ); 838 IRExpr* call 839 = mkIRExprCCall( 840 Ity_I32, 841 3/*regparm*/, 842 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c, 843 args 844 ); 845 /* Exclude OP and NDEP from definedness checking. We're only 846 interested in DEP1 and DEP2. */ 847 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 848 return call; 849 } 850 851 852 /* -------------- Building the flags-thunk. -------------- */ 853 854 /* The machinery in this section builds the flag-thunk following a 855 flag-setting operation. Hence the various setFlags_* functions. 856 */ 857 858 static Bool isAddSub ( IROp op8 ) 859 { 860 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 861 } 862 863 static Bool isLogic ( IROp op8 ) 864 { 865 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 866 } 867 868 /* U-widen 8/16/32 bit int expr to 32. */ 869 static IRExpr* widenUto32 ( IRExpr* e ) 870 { 871 switch (typeOfIRExpr(irsb->tyenv,e)) { 872 case Ity_I32: return e; 873 case Ity_I16: return unop(Iop_16Uto32,e); 874 case Ity_I8: return unop(Iop_8Uto32,e); 875 default: vpanic("widenUto32"); 876 } 877 } 878 879 /* S-widen 8/16/32 bit int expr to 32. */ 880 static IRExpr* widenSto32 ( IRExpr* e ) 881 { 882 switch (typeOfIRExpr(irsb->tyenv,e)) { 883 case Ity_I32: return e; 884 case Ity_I16: return unop(Iop_16Sto32,e); 885 case Ity_I8: return unop(Iop_8Sto32,e); 886 default: vpanic("widenSto32"); 887 } 888 } 889 890 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some 891 of these combinations make sense. */ 892 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 893 { 894 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 895 if (src_ty == dst_ty) 896 return e; 897 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 898 return unop(Iop_32to16, e); 899 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 900 return unop(Iop_32to8, e); 901 902 vex_printf("\nsrc, dst tys are: "); 903 ppIRType(src_ty); 904 vex_printf(", "); 905 ppIRType(dst_ty); 906 vex_printf("\n"); 907 vpanic("narrowTo(x86)"); 908 } 909 910 911 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 912 auto-sized up to the real op. */ 913 914 static 915 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 916 { 917 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 918 919 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 920 921 switch (op8) { 922 case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break; 923 case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break; 924 default: ppIROp(op8); 925 vpanic("setFlags_DEP1_DEP2(x86)"); 926 } 927 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 928 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 929 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) ); 930 /* Set NDEP even though it isn't used. This makes redundant-PUT 931 elimination of previous stores to this field work better. */ 932 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 933 } 934 935 936 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 937 938 static 939 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 940 { 941 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 942 943 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 944 945 switch (op8) { 946 case Iop_Or8: 947 case Iop_And8: 948 case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break; 949 default: ppIROp(op8); 950 vpanic("setFlags_DEP1(x86)"); 951 } 952 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 953 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) ); 954 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 955 /* Set NDEP even though it isn't used. This makes redundant-PUT 956 elimination of previous stores to this field work better. */ 957 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 958 } 959 960 961 /* For shift operations, we put in the result and the undershifted 962 result. Except if the shift amount is zero, the thunk is left 963 unchanged. */ 964 965 static void setFlags_DEP1_DEP2_shift ( IROp op32, 966 IRTemp res, 967 IRTemp resUS, 968 IRType ty, 969 IRTemp guard ) 970 { 971 Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0); 972 973 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 974 vassert(guard); 975 976 /* Both kinds of right shifts are handled by the same thunk 977 operation. */ 978 switch (op32) { 979 case Iop_Shr32: 980 case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break; 981 case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break; 982 default: ppIROp(op32); 983 vpanic("setFlags_DEP1_DEP2_shift(x86)"); 984 } 985 986 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 987 stmt( IRStmt_Put( OFFB_CC_OP, 988 IRExpr_Mux0X( mkexpr(guard), 989 IRExpr_Get(OFFB_CC_OP,Ity_I32), 990 mkU32(ccOp))) ); 991 stmt( IRStmt_Put( OFFB_CC_DEP1, 992 IRExpr_Mux0X( mkexpr(guard), 993 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 994 widenUto32(mkexpr(res)))) ); 995 stmt( IRStmt_Put( OFFB_CC_DEP2, 996 IRExpr_Mux0X( mkexpr(guard), 997 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 998 widenUto32(mkexpr(resUS)))) ); 999 /* Set NDEP even though it isn't used. This makes redundant-PUT 1000 elimination of previous stores to this field work better. */ 1001 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 1002 } 1003 1004 1005 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1006 the former value of the carry flag, which unfortunately we have to 1007 compute. */ 1008 1009 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1010 { 1011 Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB; 1012 1013 ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 1014 vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); 1015 1016 /* This has to come first, because calculating the C flag 1017 may require reading all four thunk fields. */ 1018 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) ); 1019 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) ); 1020 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) ); 1021 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) ); 1022 } 1023 1024 1025 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1026 two arguments. */ 1027 1028 static 1029 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op ) 1030 { 1031 switch (ty) { 1032 case Ity_I8: 1033 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) ); 1034 break; 1035 case Ity_I16: 1036 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) ); 1037 break; 1038 case Ity_I32: 1039 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) ); 1040 break; 1041 default: 1042 vpanic("setFlags_MUL(x86)"); 1043 } 1044 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) )); 1045 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) )); 1046 /* Set NDEP even though it isn't used. This makes redundant-PUT 1047 elimination of previous stores to this field work better. */ 1048 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 1049 } 1050 1051 1052 /* -------------- Condition codes. -------------- */ 1053 1054 /* Condition codes, using the Intel encoding. */ 1055 1056 static HChar* name_X86Condcode ( X86Condcode cond ) 1057 { 1058 switch (cond) { 1059 case X86CondO: return "o"; 1060 case X86CondNO: return "no"; 1061 case X86CondB: return "b"; 1062 case X86CondNB: return "nb"; 1063 case X86CondZ: return "z"; 1064 case X86CondNZ: return "nz"; 1065 case X86CondBE: return "be"; 1066 case X86CondNBE: return "nbe"; 1067 case X86CondS: return "s"; 1068 case X86CondNS: return "ns"; 1069 case X86CondP: return "p"; 1070 case X86CondNP: return "np"; 1071 case X86CondL: return "l"; 1072 case X86CondNL: return "nl"; 1073 case X86CondLE: return "le"; 1074 case X86CondNLE: return "nle"; 1075 case X86CondAlways: return "ALWAYS"; 1076 default: vpanic("name_X86Condcode"); 1077 } 1078 } 1079 1080 static 1081 X86Condcode positiveIse_X86Condcode ( X86Condcode cond, 1082 Bool* needInvert ) 1083 { 1084 vassert(cond >= X86CondO && cond <= X86CondNLE); 1085 if (cond & 1) { 1086 *needInvert = True; 1087 return cond-1; 1088 } else { 1089 *needInvert = False; 1090 return cond; 1091 } 1092 } 1093 1094 1095 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1096 1097 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1098 appropriately. 1099 1100 Optionally, generate a store for the 'tres' value. This can either 1101 be a normal store, or it can be a cas-with-possible-failure style 1102 store: 1103 1104 if taddr is IRTemp_INVALID, then no store is generated. 1105 1106 if taddr is not IRTemp_INVALID, then a store (using taddr as 1107 the address) is generated: 1108 1109 if texpVal is IRTemp_INVALID then a normal store is 1110 generated, and restart_point must be zero (it is irrelevant). 1111 1112 if texpVal is not IRTemp_INVALID then a cas-style store is 1113 generated. texpVal is the expected value, restart_point 1114 is the restart point if the store fails, and texpVal must 1115 have the same type as tres. 1116 */ 1117 static void helper_ADC ( Int sz, 1118 IRTemp tres, IRTemp ta1, IRTemp ta2, 1119 /* info about optional store: */ 1120 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1121 { 1122 UInt thunkOp; 1123 IRType ty = szToITy(sz); 1124 IRTemp oldc = newTemp(Ity_I32); 1125 IRTemp oldcn = newTemp(ty); 1126 IROp plus = mkSizedOp(ty, Iop_Add8); 1127 IROp xor = mkSizedOp(ty, Iop_Xor8); 1128 1129 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1130 vassert(sz == 1 || sz == 2 || sz == 4); 1131 thunkOp = sz==4 ? X86G_CC_OP_ADCL 1132 : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB); 1133 1134 /* oldc = old carry flag, 0 or 1 */ 1135 assign( oldc, binop(Iop_And32, 1136 mk_x86g_calculate_eflags_c(), 1137 mkU32(1)) ); 1138 1139 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1140 1141 assign( tres, binop(plus, 1142 binop(plus,mkexpr(ta1),mkexpr(ta2)), 1143 mkexpr(oldcn)) ); 1144 1145 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1146 start of this function. */ 1147 if (taddr != IRTemp_INVALID) { 1148 if (texpVal == IRTemp_INVALID) { 1149 vassert(restart_point == 0); 1150 storeLE( mkexpr(taddr), mkexpr(tres) ); 1151 } else { 1152 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1153 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1154 casLE( mkexpr(taddr), 1155 mkexpr(texpVal), mkexpr(tres), restart_point ); 1156 } 1157 } 1158 1159 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1160 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) )); 1161 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1162 mkexpr(oldcn)) )) ); 1163 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1164 } 1165 1166 1167 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 1168 appropriately. As with helper_ADC, possibly generate a store of 1169 the result -- see comments on helper_ADC for details. 1170 */ 1171 static void helper_SBB ( Int sz, 1172 IRTemp tres, IRTemp ta1, IRTemp ta2, 1173 /* info about optional store: */ 1174 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1175 { 1176 UInt thunkOp; 1177 IRType ty = szToITy(sz); 1178 IRTemp oldc = newTemp(Ity_I32); 1179 IRTemp oldcn = newTemp(ty); 1180 IROp minus = mkSizedOp(ty, Iop_Sub8); 1181 IROp xor = mkSizedOp(ty, Iop_Xor8); 1182 1183 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 1184 vassert(sz == 1 || sz == 2 || sz == 4); 1185 thunkOp = sz==4 ? X86G_CC_OP_SBBL 1186 : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB); 1187 1188 /* oldc = old carry flag, 0 or 1 */ 1189 assign( oldc, binop(Iop_And32, 1190 mk_x86g_calculate_eflags_c(), 1191 mkU32(1)) ); 1192 1193 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 1194 1195 assign( tres, binop(minus, 1196 binop(minus,mkexpr(ta1),mkexpr(ta2)), 1197 mkexpr(oldcn)) ); 1198 1199 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 1200 start of this function. */ 1201 if (taddr != IRTemp_INVALID) { 1202 if (texpVal == IRTemp_INVALID) { 1203 vassert(restart_point == 0); 1204 storeLE( mkexpr(taddr), mkexpr(tres) ); 1205 } else { 1206 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 1207 /* .. and hence 'texpVal' has the same type as 'tres'. */ 1208 casLE( mkexpr(taddr), 1209 mkexpr(texpVal), mkexpr(tres), restart_point ); 1210 } 1211 } 1212 1213 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) ); 1214 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) ); 1215 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2), 1216 mkexpr(oldcn)) )) ); 1217 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 1218 } 1219 1220 1221 /* -------------- Helpers for disassembly printing. -------------- */ 1222 1223 static HChar* nameGrp1 ( Int opc_aux ) 1224 { 1225 static HChar* grp1_names[8] 1226 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 1227 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)"); 1228 return grp1_names[opc_aux]; 1229 } 1230 1231 static HChar* nameGrp2 ( Int opc_aux ) 1232 { 1233 static HChar* grp2_names[8] 1234 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 1235 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)"); 1236 return grp2_names[opc_aux]; 1237 } 1238 1239 static HChar* nameGrp4 ( Int opc_aux ) 1240 { 1241 static HChar* grp4_names[8] 1242 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 1243 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)"); 1244 return grp4_names[opc_aux]; 1245 } 1246 1247 static HChar* nameGrp5 ( Int opc_aux ) 1248 { 1249 static HChar* grp5_names[8] 1250 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 1251 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)"); 1252 return grp5_names[opc_aux]; 1253 } 1254 1255 static HChar* nameGrp8 ( Int opc_aux ) 1256 { 1257 static HChar* grp8_names[8] 1258 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 1259 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)"); 1260 return grp8_names[opc_aux]; 1261 } 1262 1263 static HChar* nameIReg ( Int size, Int reg ) 1264 { 1265 static HChar* ireg32_names[8] 1266 = { "%eax", "%ecx", "%edx", "%ebx", 1267 "%esp", "%ebp", "%esi", "%edi" }; 1268 static HChar* ireg16_names[8] 1269 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; 1270 static HChar* ireg8_names[8] 1271 = { "%al", "%cl", "%dl", "%bl", 1272 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; 1273 if (reg < 0 || reg > 7) goto bad; 1274 switch (size) { 1275 case 4: return ireg32_names[reg]; 1276 case 2: return ireg16_names[reg]; 1277 case 1: return ireg8_names[reg]; 1278 } 1279 bad: 1280 vpanic("nameIReg(X86)"); 1281 return NULL; /*notreached*/ 1282 } 1283 1284 static HChar* nameSReg ( UInt sreg ) 1285 { 1286 switch (sreg) { 1287 case R_ES: return "%es"; 1288 case R_CS: return "%cs"; 1289 case R_SS: return "%ss"; 1290 case R_DS: return "%ds"; 1291 case R_FS: return "%fs"; 1292 case R_GS: return "%gs"; 1293 default: vpanic("nameSReg(x86)"); 1294 } 1295 } 1296 1297 static HChar* nameMMXReg ( Int mmxreg ) 1298 { 1299 static HChar* mmx_names[8] 1300 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 1301 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)"); 1302 return mmx_names[mmxreg]; 1303 } 1304 1305 static HChar* nameXMMReg ( Int xmmreg ) 1306 { 1307 static HChar* xmm_names[8] 1308 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 1309 "%xmm4", "%xmm5", "%xmm6", "%xmm7" }; 1310 if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg"); 1311 return xmm_names[xmmreg]; 1312 } 1313 1314 static HChar* nameMMXGran ( Int gran ) 1315 { 1316 switch (gran) { 1317 case 0: return "b"; 1318 case 1: return "w"; 1319 case 2: return "d"; 1320 case 3: return "q"; 1321 default: vpanic("nameMMXGran(x86,guest)"); 1322 } 1323 } 1324 1325 static HChar nameISize ( Int size ) 1326 { 1327 switch (size) { 1328 case 4: return 'l'; 1329 case 2: return 'w'; 1330 case 1: return 'b'; 1331 default: vpanic("nameISize(x86)"); 1332 } 1333 } 1334 1335 1336 /*------------------------------------------------------------*/ 1337 /*--- JMP helpers ---*/ 1338 /*------------------------------------------------------------*/ 1339 1340 static void jmp_lit( IRJumpKind kind, Addr32 d32 ) 1341 { 1342 irsb->next = mkU32(d32); 1343 irsb->jumpkind = kind; 1344 } 1345 1346 static void jmp_treg( IRJumpKind kind, IRTemp t ) 1347 { 1348 irsb->next = mkexpr(t); 1349 irsb->jumpkind = kind; 1350 } 1351 1352 static 1353 void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true ) 1354 { 1355 Bool invert; 1356 X86Condcode condPos; 1357 condPos = positiveIse_X86Condcode ( cond, &invert ); 1358 if (invert) { 1359 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1360 Ijk_Boring, 1361 IRConst_U32(d32_false) ) ); 1362 irsb->next = mkU32(d32_true); 1363 irsb->jumpkind = Ijk_Boring; 1364 } else { 1365 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos), 1366 Ijk_Boring, 1367 IRConst_U32(d32_true) ) ); 1368 irsb->next = mkU32(d32_false); 1369 irsb->jumpkind = Ijk_Boring; 1370 } 1371 } 1372 1373 1374 /*------------------------------------------------------------*/ 1375 /*--- Disassembling addressing modes ---*/ 1376 /*------------------------------------------------------------*/ 1377 1378 static 1379 HChar* sorbTxt ( UChar sorb ) 1380 { 1381 switch (sorb) { 1382 case 0: return ""; /* no override */ 1383 case 0x3E: return "%ds"; 1384 case 0x26: return "%es:"; 1385 case 0x64: return "%fs:"; 1386 case 0x65: return "%gs:"; 1387 default: vpanic("sorbTxt(x86,guest)"); 1388 } 1389 } 1390 1391 1392 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 1393 linear address by adding any required segment override as indicated 1394 by sorb. */ 1395 static 1396 IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual ) 1397 { 1398 Int sreg; 1399 IRType hWordTy; 1400 IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 1401 1402 if (sorb == 0) 1403 /* the common case - no override */ 1404 return virtual; 1405 1406 switch (sorb) { 1407 case 0x3E: sreg = R_DS; break; 1408 case 0x26: sreg = R_ES; break; 1409 case 0x64: sreg = R_FS; break; 1410 case 0x65: sreg = R_GS; break; 1411 default: vpanic("handleSegOverride(x86,guest)"); 1412 } 1413 1414 hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 1415 1416 seg_selector = newTemp(Ity_I32); 1417 ldt_ptr = newTemp(hWordTy); 1418 gdt_ptr = newTemp(hWordTy); 1419 r64 = newTemp(Ity_I64); 1420 1421 assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 1422 assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 1423 assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 1424 1425 /* 1426 Call this to do the translation and limit checks: 1427 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 1428 UInt seg_selector, UInt virtual_addr ) 1429 */ 1430 assign( 1431 r64, 1432 mkIRExprCCall( 1433 Ity_I64, 1434 0/*regparms*/, 1435 "x86g_use_seg_selector", 1436 &x86g_use_seg_selector, 1437 mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 1438 mkexpr(seg_selector), virtual) 1439 ) 1440 ); 1441 1442 /* If the high 32 of the result are non-zero, there was a 1443 failure in address translation. In which case, make a 1444 quick exit. 1445 */ 1446 stmt( 1447 IRStmt_Exit( 1448 binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 1449 Ijk_MapFail, 1450 IRConst_U32( guest_EIP_curr_instr ) 1451 ) 1452 ); 1453 1454 /* otherwise, here's the translated result. */ 1455 return unop(Iop_64to32, mkexpr(r64)); 1456 } 1457 1458 1459 /* Generate IR to calculate an address indicated by a ModRM and 1460 following SIB bytes. The expression, and the number of bytes in 1461 the address mode, are returned. Note that this fn should not be 1462 called if the R/M part of the address denotes a register instead of 1463 memory. If print_codegen is true, text of the addressing mode is 1464 placed in buf. 1465 1466 The computed address is stored in a new tempreg, and the 1467 identity of the tempreg is returned. */ 1468 1469 static IRTemp disAMode_copy2tmp ( IRExpr* addr32 ) 1470 { 1471 IRTemp tmp = newTemp(Ity_I32); 1472 assign( tmp, addr32 ); 1473 return tmp; 1474 } 1475 1476 static 1477 IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf ) 1478 { 1479 UChar mod_reg_rm = getIByte(delta); 1480 delta++; 1481 1482 buf[0] = (UChar)0; 1483 1484 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1485 jump table seems a bit excessive. 1486 */ 1487 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1488 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1489 /* is now XX0XXYYY */ 1490 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1491 switch (mod_reg_rm) { 1492 1493 /* (%eax) .. (%edi), not including (%esp) or (%ebp). 1494 --> GET %reg, t 1495 */ 1496 case 0x00: case 0x01: case 0x02: case 0x03: 1497 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1498 { UChar rm = mod_reg_rm; 1499 DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm)); 1500 *len = 1; 1501 return disAMode_copy2tmp( 1502 handleSegOverride(sorb, getIReg(4,rm))); 1503 } 1504 1505 /* d8(%eax) ... d8(%edi), not including d8(%esp) 1506 --> GET %reg, t ; ADDL d8, t 1507 */ 1508 case 0x08: case 0x09: case 0x0A: case 0x0B: 1509 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1510 { UChar rm = toUChar(mod_reg_rm & 7); 1511 UInt d = getSDisp8(delta); 1512 DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1513 *len = 2; 1514 return disAMode_copy2tmp( 1515 handleSegOverride(sorb, 1516 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1517 } 1518 1519 /* d32(%eax) ... d32(%edi), not including d32(%esp) 1520 --> GET %reg, t ; ADDL d8, t 1521 */ 1522 case 0x10: case 0x11: case 0x12: case 0x13: 1523 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1524 { UChar rm = toUChar(mod_reg_rm & 7); 1525 UInt d = getUDisp32(delta); 1526 DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm)); 1527 *len = 5; 1528 return disAMode_copy2tmp( 1529 handleSegOverride(sorb, 1530 binop(Iop_Add32,getIReg(4,rm),mkU32(d)))); 1531 } 1532 1533 /* a register, %eax .. %edi. This shouldn't happen. */ 1534 case 0x18: case 0x19: case 0x1A: case 0x1B: 1535 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1536 vpanic("disAMode(x86): not an addr!"); 1537 1538 /* a 32-bit literal address 1539 --> MOV d32, tmp 1540 */ 1541 case 0x05: 1542 { UInt d = getUDisp32(delta); 1543 *len = 5; 1544 DIS(buf, "%s(0x%x)", sorbTxt(sorb), d); 1545 return disAMode_copy2tmp( 1546 handleSegOverride(sorb, mkU32(d))); 1547 } 1548 1549 case 0x04: { 1550 /* SIB, with no displacement. Special cases: 1551 -- %esp cannot act as an index value. 1552 If index_r indicates %esp, zero is used for the index. 1553 -- when mod is zero and base indicates EBP, base is instead 1554 a 32-bit literal. 1555 It's all madness, I tell you. Extract %index, %base and 1556 scale from the SIB byte. The value denoted is then: 1557 | %index == %ESP && %base == %EBP 1558 = d32 following SIB byte 1559 | %index == %ESP && %base != %EBP 1560 = %base 1561 | %index != %ESP && %base == %EBP 1562 = d32 following SIB byte + (%index << scale) 1563 | %index != %ESP && %base != %ESP 1564 = %base + (%index << scale) 1565 1566 What happens to the souls of CPU architects who dream up such 1567 horrendous schemes, do you suppose? 1568 */ 1569 UChar sib = getIByte(delta); 1570 UChar scale = toUChar((sib >> 6) & 3); 1571 UChar index_r = toUChar((sib >> 3) & 7); 1572 UChar base_r = toUChar(sib & 7); 1573 delta++; 1574 1575 if (index_r != R_ESP && base_r != R_EBP) { 1576 DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb), 1577 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1578 *len = 2; 1579 return 1580 disAMode_copy2tmp( 1581 handleSegOverride(sorb, 1582 binop(Iop_Add32, 1583 getIReg(4,base_r), 1584 binop(Iop_Shl32, getIReg(4,index_r), 1585 mkU8(scale))))); 1586 } 1587 1588 if (index_r != R_ESP && base_r == R_EBP) { 1589 UInt d = getUDisp32(delta); 1590 DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d, 1591 nameIReg(4,index_r), 1<<scale); 1592 *len = 6; 1593 return 1594 disAMode_copy2tmp( 1595 handleSegOverride(sorb, 1596 binop(Iop_Add32, 1597 binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)), 1598 mkU32(d)))); 1599 } 1600 1601 if (index_r == R_ESP && base_r != R_EBP) { 1602 DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r)); 1603 *len = 2; 1604 return disAMode_copy2tmp( 1605 handleSegOverride(sorb, getIReg(4,base_r))); 1606 } 1607 1608 if (index_r == R_ESP && base_r == R_EBP) { 1609 UInt d = getUDisp32(delta); 1610 DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d); 1611 *len = 6; 1612 return disAMode_copy2tmp( 1613 handleSegOverride(sorb, mkU32(d))); 1614 } 1615 /*NOTREACHED*/ 1616 vassert(0); 1617 } 1618 1619 /* SIB, with 8-bit displacement. Special cases: 1620 -- %esp cannot act as an index value. 1621 If index_r indicates %esp, zero is used for the index. 1622 Denoted value is: 1623 | %index == %ESP 1624 = d8 + %base 1625 | %index != %ESP 1626 = d8 + %base + (%index << scale) 1627 */ 1628 case 0x0C: { 1629 UChar sib = getIByte(delta); 1630 UChar scale = toUChar((sib >> 6) & 3); 1631 UChar index_r = toUChar((sib >> 3) & 7); 1632 UChar base_r = toUChar(sib & 7); 1633 UInt d = getSDisp8(delta+1); 1634 1635 if (index_r == R_ESP) { 1636 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1637 (Int)d, nameIReg(4,base_r)); 1638 *len = 3; 1639 return disAMode_copy2tmp( 1640 handleSegOverride(sorb, 1641 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1642 } else { 1643 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1644 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1645 *len = 3; 1646 return 1647 disAMode_copy2tmp( 1648 handleSegOverride(sorb, 1649 binop(Iop_Add32, 1650 binop(Iop_Add32, 1651 getIReg(4,base_r), 1652 binop(Iop_Shl32, 1653 getIReg(4,index_r), mkU8(scale))), 1654 mkU32(d)))); 1655 } 1656 /*NOTREACHED*/ 1657 vassert(0); 1658 } 1659 1660 /* SIB, with 32-bit displacement. Special cases: 1661 -- %esp cannot act as an index value. 1662 If index_r indicates %esp, zero is used for the index. 1663 Denoted value is: 1664 | %index == %ESP 1665 = d32 + %base 1666 | %index != %ESP 1667 = d32 + %base + (%index << scale) 1668 */ 1669 case 0x14: { 1670 UChar sib = getIByte(delta); 1671 UChar scale = toUChar((sib >> 6) & 3); 1672 UChar index_r = toUChar((sib >> 3) & 7); 1673 UChar base_r = toUChar(sib & 7); 1674 UInt d = getUDisp32(delta+1); 1675 1676 if (index_r == R_ESP) { 1677 DIS(buf, "%s%d(%s,,)", sorbTxt(sorb), 1678 (Int)d, nameIReg(4,base_r)); 1679 *len = 6; 1680 return disAMode_copy2tmp( 1681 handleSegOverride(sorb, 1682 binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) )); 1683 } else { 1684 DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d, 1685 nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale); 1686 *len = 6; 1687 return 1688 disAMode_copy2tmp( 1689 handleSegOverride(sorb, 1690 binop(Iop_Add32, 1691 binop(Iop_Add32, 1692 getIReg(4,base_r), 1693 binop(Iop_Shl32, 1694 getIReg(4,index_r), mkU8(scale))), 1695 mkU32(d)))); 1696 } 1697 /*NOTREACHED*/ 1698 vassert(0); 1699 } 1700 1701 default: 1702 vpanic("disAMode(x86)"); 1703 return 0; /*notreached*/ 1704 } 1705 } 1706 1707 1708 /* Figure out the number of (insn-stream) bytes constituting the amode 1709 beginning at delta. Is useful for getting hold of literals beyond 1710 the end of the amode before it has been disassembled. */ 1711 1712 static UInt lengthAMode ( Int delta ) 1713 { 1714 UChar mod_reg_rm = getIByte(delta); delta++; 1715 1716 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 1717 jump table seems a bit excessive. 1718 */ 1719 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 1720 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 1721 /* is now XX0XXYYY */ 1722 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 1723 switch (mod_reg_rm) { 1724 1725 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ 1726 case 0x00: case 0x01: case 0x02: case 0x03: 1727 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 1728 return 1; 1729 1730 /* d8(%eax) ... d8(%edi), not including d8(%esp). */ 1731 case 0x08: case 0x09: case 0x0A: case 0x0B: 1732 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 1733 return 2; 1734 1735 /* d32(%eax) ... d32(%edi), not including d32(%esp). */ 1736 case 0x10: case 0x11: case 0x12: case 0x13: 1737 /* ! 14 */ case 0x15: case 0x16: case 0x17: 1738 return 5; 1739 1740 /* a register, %eax .. %edi. (Not an addr, but still handled.) */ 1741 case 0x18: case 0x19: case 0x1A: case 0x1B: 1742 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 1743 return 1; 1744 1745 /* a 32-bit literal address. */ 1746 case 0x05: return 5; 1747 1748 /* SIB, no displacement. */ 1749 case 0x04: { 1750 UChar sib = getIByte(delta); 1751 UChar base_r = toUChar(sib & 7); 1752 if (base_r == R_EBP) return 6; else return 2; 1753 } 1754 /* SIB, with 8-bit displacement. */ 1755 case 0x0C: return 3; 1756 1757 /* SIB, with 32-bit displacement. */ 1758 case 0x14: return 6; 1759 1760 default: 1761 vpanic("lengthAMode"); 1762 return 0; /*notreached*/ 1763 } 1764 } 1765 1766 /*------------------------------------------------------------*/ 1767 /*--- Disassembling common idioms ---*/ 1768 /*------------------------------------------------------------*/ 1769 1770 /* Handle binary integer instructions of the form 1771 op E, G meaning 1772 op reg-or-mem, reg 1773 Is passed the a ptr to the modRM byte, the actual operation, and the 1774 data size. Returns the address advanced completely over this 1775 instruction. 1776 1777 E(src) is reg-or-mem 1778 G(dst) is reg. 1779 1780 If E is reg, --> GET %G, tmp 1781 OP %E, tmp 1782 PUT tmp, %G 1783 1784 If E is mem and OP is not reversible, 1785 --> (getAddr E) -> tmpa 1786 LD (tmpa), tmpa 1787 GET %G, tmp2 1788 OP tmpa, tmp2 1789 PUT tmp2, %G 1790 1791 If E is mem and OP is reversible 1792 --> (getAddr E) -> tmpa 1793 LD (tmpa), tmpa 1794 OP %G, tmpa 1795 PUT tmpa, %G 1796 */ 1797 static 1798 UInt dis_op2_E_G ( UChar sorb, 1799 Bool addSubCarry, 1800 IROp op8, 1801 Bool keep, 1802 Int size, 1803 Int delta0, 1804 HChar* t_x86opc ) 1805 { 1806 HChar dis_buf[50]; 1807 Int len; 1808 IRType ty = szToITy(size); 1809 IRTemp dst1 = newTemp(ty); 1810 IRTemp src = newTemp(ty); 1811 IRTemp dst0 = newTemp(ty); 1812 UChar rm = getUChar(delta0); 1813 IRTemp addr = IRTemp_INVALID; 1814 1815 /* addSubCarry == True indicates the intended operation is 1816 add-with-carry or subtract-with-borrow. */ 1817 if (addSubCarry) { 1818 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1819 vassert(keep); 1820 } 1821 1822 if (epartIsReg(rm)) { 1823 /* Specially handle XOR reg,reg, because that doesn't really 1824 depend on reg, and doing the obvious thing potentially 1825 generates a spurious value check failure due to the bogus 1826 dependency. Ditto SBB reg,reg. */ 1827 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1828 && gregOfRM(rm) == eregOfRM(rm)) { 1829 putIReg(size, gregOfRM(rm), mkU(ty,0)); 1830 } 1831 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1832 assign( src, getIReg(size,eregOfRM(rm)) ); 1833 1834 if (addSubCarry && op8 == Iop_Add8) { 1835 helper_ADC( size, dst1, dst0, src, 1836 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1837 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1838 } else 1839 if (addSubCarry && op8 == Iop_Sub8) { 1840 helper_SBB( size, dst1, dst0, src, 1841 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1842 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1843 } else { 1844 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1845 if (isAddSub(op8)) 1846 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1847 else 1848 setFlags_DEP1(op8, dst1, ty); 1849 if (keep) 1850 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1851 } 1852 1853 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1854 nameIReg(size,eregOfRM(rm)), 1855 nameIReg(size,gregOfRM(rm))); 1856 return 1+delta0; 1857 } else { 1858 /* E refers to memory */ 1859 addr = disAMode ( &len, sorb, delta0, dis_buf); 1860 assign( dst0, getIReg(size,gregOfRM(rm)) ); 1861 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 1862 1863 if (addSubCarry && op8 == Iop_Add8) { 1864 helper_ADC( size, dst1, dst0, src, 1865 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1866 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1867 } else 1868 if (addSubCarry && op8 == Iop_Sub8) { 1869 helper_SBB( size, dst1, dst0, src, 1870 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1871 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1872 } else { 1873 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 1874 if (isAddSub(op8)) 1875 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1876 else 1877 setFlags_DEP1(op8, dst1, ty); 1878 if (keep) 1879 putIReg(size, gregOfRM(rm), mkexpr(dst1)); 1880 } 1881 1882 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1883 dis_buf,nameIReg(size,gregOfRM(rm))); 1884 return len+delta0; 1885 } 1886 } 1887 1888 1889 1890 /* Handle binary integer instructions of the form 1891 op G, E meaning 1892 op reg, reg-or-mem 1893 Is passed the a ptr to the modRM byte, the actual operation, and the 1894 data size. Returns the address advanced completely over this 1895 instruction. 1896 1897 G(src) is reg. 1898 E(dst) is reg-or-mem 1899 1900 If E is reg, --> GET %E, tmp 1901 OP %G, tmp 1902 PUT tmp, %E 1903 1904 If E is mem, --> (getAddr E) -> tmpa 1905 LD (tmpa), tmpv 1906 OP %G, tmpv 1907 ST tmpv, (tmpa) 1908 */ 1909 static 1910 UInt dis_op2_G_E ( UChar sorb, 1911 Bool locked, 1912 Bool addSubCarry, 1913 IROp op8, 1914 Bool keep, 1915 Int size, 1916 Int delta0, 1917 HChar* t_x86opc ) 1918 { 1919 HChar dis_buf[50]; 1920 Int len; 1921 IRType ty = szToITy(size); 1922 IRTemp dst1 = newTemp(ty); 1923 IRTemp src = newTemp(ty); 1924 IRTemp dst0 = newTemp(ty); 1925 UChar rm = getIByte(delta0); 1926 IRTemp addr = IRTemp_INVALID; 1927 1928 /* addSubCarry == True indicates the intended operation is 1929 add-with-carry or subtract-with-borrow. */ 1930 if (addSubCarry) { 1931 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 1932 vassert(keep); 1933 } 1934 1935 if (epartIsReg(rm)) { 1936 /* Specially handle XOR reg,reg, because that doesn't really 1937 depend on reg, and doing the obvious thing potentially 1938 generates a spurious value check failure due to the bogus 1939 dependency. Ditto SBB reg,reg.*/ 1940 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 1941 && gregOfRM(rm) == eregOfRM(rm)) { 1942 putIReg(size, eregOfRM(rm), mkU(ty,0)); 1943 } 1944 assign(dst0, getIReg(size,eregOfRM(rm))); 1945 assign(src, getIReg(size,gregOfRM(rm))); 1946 1947 if (addSubCarry && op8 == Iop_Add8) { 1948 helper_ADC( size, dst1, dst0, src, 1949 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1950 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1951 } else 1952 if (addSubCarry && op8 == Iop_Sub8) { 1953 helper_SBB( size, dst1, dst0, src, 1954 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 1955 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1956 } else { 1957 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 1958 if (isAddSub(op8)) 1959 setFlags_DEP1_DEP2(op8, dst0, src, ty); 1960 else 1961 setFlags_DEP1(op8, dst1, ty); 1962 if (keep) 1963 putIReg(size, eregOfRM(rm), mkexpr(dst1)); 1964 } 1965 1966 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 1967 nameIReg(size,gregOfRM(rm)), 1968 nameIReg(size,eregOfRM(rm))); 1969 return 1+delta0; 1970 } 1971 1972 /* E refers to memory */ 1973 { 1974 addr = disAMode ( &len, sorb, delta0, dis_buf); 1975 assign(dst0, loadLE(ty,mkexpr(addr))); 1976 assign(src, getIReg(size,gregOfRM(rm))); 1977 1978 if (addSubCarry && op8 == Iop_Add8) { 1979 if (locked) { 1980 /* cas-style store */ 1981 helper_ADC( size, dst1, dst0, src, 1982 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 1983 } else { 1984 /* normal store */ 1985 helper_ADC( size, dst1, dst0, src, 1986 /*store*/addr, IRTemp_INVALID, 0 ); 1987 } 1988 } else 1989 if (addSubCarry && op8 == Iop_Sub8) { 1990 if (locked) { 1991 /* cas-style store */ 1992 helper_SBB( size, dst1, dst0, src, 1993 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 1994 } else { 1995 /* normal store */ 1996 helper_SBB( size, dst1, dst0, src, 1997 /*store*/addr, IRTemp_INVALID, 0 ); 1998 } 1999 } else { 2000 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2001 if (keep) { 2002 if (locked) { 2003 if (0) vex_printf("locked case\n" ); 2004 casLE( mkexpr(addr), 2005 mkexpr(dst0)/*expval*/, 2006 mkexpr(dst1)/*newval*/, guest_EIP_curr_instr ); 2007 } else { 2008 if (0) vex_printf("nonlocked case\n"); 2009 storeLE(mkexpr(addr), mkexpr(dst1)); 2010 } 2011 } 2012 if (isAddSub(op8)) 2013 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2014 else 2015 setFlags_DEP1(op8, dst1, ty); 2016 } 2017 2018 DIP("%s%c %s,%s\n", t_x86opc, nameISize(size), 2019 nameIReg(size,gregOfRM(rm)), dis_buf); 2020 return len+delta0; 2021 } 2022 } 2023 2024 2025 /* Handle move instructions of the form 2026 mov E, G meaning 2027 mov reg-or-mem, reg 2028 Is passed the a ptr to the modRM byte, and the data size. Returns 2029 the address advanced completely over this instruction. 2030 2031 E(src) is reg-or-mem 2032 G(dst) is reg. 2033 2034 If E is reg, --> GET %E, tmpv 2035 PUT tmpv, %G 2036 2037 If E is mem --> (getAddr E) -> tmpa 2038 LD (tmpa), tmpb 2039 PUT tmpb, %G 2040 */ 2041 static 2042 UInt dis_mov_E_G ( UChar sorb, 2043 Int size, 2044 Int delta0 ) 2045 { 2046 Int len; 2047 UChar rm = getIByte(delta0); 2048 HChar dis_buf[50]; 2049 2050 if (epartIsReg(rm)) { 2051 putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm))); 2052 DIP("mov%c %s,%s\n", nameISize(size), 2053 nameIReg(size,eregOfRM(rm)), 2054 nameIReg(size,gregOfRM(rm))); 2055 return 1+delta0; 2056 } 2057 2058 /* E refers to memory */ 2059 { 2060 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 2061 putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr))); 2062 DIP("mov%c %s,%s\n", nameISize(size), 2063 dis_buf,nameIReg(size,gregOfRM(rm))); 2064 return delta0+len; 2065 } 2066 } 2067 2068 2069 /* Handle move instructions of the form 2070 mov G, E meaning 2071 mov reg, reg-or-mem 2072 Is passed the a ptr to the modRM byte, and the data size. Returns 2073 the address advanced completely over this instruction. 2074 2075 G(src) is reg. 2076 E(dst) is reg-or-mem 2077 2078 If E is reg, --> GET %G, tmp 2079 PUT tmp, %E 2080 2081 If E is mem, --> (getAddr E) -> tmpa 2082 GET %G, tmpv 2083 ST tmpv, (tmpa) 2084 */ 2085 static 2086 UInt dis_mov_G_E ( UChar sorb, 2087 Int size, 2088 Int delta0 ) 2089 { 2090 Int len; 2091 UChar rm = getIByte(delta0); 2092 HChar dis_buf[50]; 2093 2094 if (epartIsReg(rm)) { 2095 putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm))); 2096 DIP("mov%c %s,%s\n", nameISize(size), 2097 nameIReg(size,gregOfRM(rm)), 2098 nameIReg(size,eregOfRM(rm))); 2099 return 1+delta0; 2100 } 2101 2102 /* E refers to memory */ 2103 { 2104 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf); 2105 storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) ); 2106 DIP("mov%c %s,%s\n", nameISize(size), 2107 nameIReg(size,gregOfRM(rm)), dis_buf); 2108 return len+delta0; 2109 } 2110 } 2111 2112 2113 /* op $immediate, AL/AX/EAX. */ 2114 static 2115 UInt dis_op_imm_A ( Int size, 2116 Bool carrying, 2117 IROp op8, 2118 Bool keep, 2119 Int delta, 2120 HChar* t_x86opc ) 2121 { 2122 IRType ty = szToITy(size); 2123 IRTemp dst0 = newTemp(ty); 2124 IRTemp src = newTemp(ty); 2125 IRTemp dst1 = newTemp(ty); 2126 UInt lit = getUDisp(size,delta); 2127 assign(dst0, getIReg(size,R_EAX)); 2128 assign(src, mkU(ty,lit)); 2129 2130 if (isAddSub(op8) && !carrying) { 2131 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2132 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2133 } 2134 else 2135 if (isLogic(op8)) { 2136 vassert(!carrying); 2137 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2138 setFlags_DEP1(op8, dst1, ty); 2139 } 2140 else 2141 if (op8 == Iop_Add8 && carrying) { 2142 helper_ADC( size, dst1, dst0, src, 2143 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2144 } 2145 else 2146 if (op8 == Iop_Sub8 && carrying) { 2147 helper_SBB( size, dst1, dst0, src, 2148 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2149 } 2150 else 2151 vpanic("dis_op_imm_A(x86,guest)"); 2152 2153 if (keep) 2154 putIReg(size, R_EAX, mkexpr(dst1)); 2155 2156 DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), 2157 lit, nameIReg(size,R_EAX)); 2158 return delta+size; 2159 } 2160 2161 2162 /* Sign- and Zero-extending moves. */ 2163 static 2164 UInt dis_movx_E_G ( UChar sorb, 2165 Int delta, Int szs, Int szd, Bool sign_extend ) 2166 { 2167 UChar rm = getIByte(delta); 2168 if (epartIsReg(rm)) { 2169 if (szd == szs) { 2170 // mutant case. See #250799 2171 putIReg(szd, gregOfRM(rm), 2172 getIReg(szs,eregOfRM(rm))); 2173 } else { 2174 // normal case 2175 putIReg(szd, gregOfRM(rm), 2176 unop(mkWidenOp(szs,szd,sign_extend), 2177 getIReg(szs,eregOfRM(rm)))); 2178 } 2179 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2180 nameISize(szs), nameISize(szd), 2181 nameIReg(szs,eregOfRM(rm)), 2182 nameIReg(szd,gregOfRM(rm))); 2183 return 1+delta; 2184 } 2185 2186 /* E refers to memory */ 2187 { 2188 Int len; 2189 HChar dis_buf[50]; 2190 IRTemp addr = disAMode ( &len, sorb, delta, dis_buf ); 2191 if (szd == szs) { 2192 // mutant case. See #250799 2193 putIReg(szd, gregOfRM(rm), 2194 loadLE(szToITy(szs),mkexpr(addr))); 2195 } else { 2196 // normal case 2197 putIReg(szd, gregOfRM(rm), 2198 unop(mkWidenOp(szs,szd,sign_extend), 2199 loadLE(szToITy(szs),mkexpr(addr)))); 2200 } 2201 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 2202 nameISize(szs), nameISize(szd), 2203 dis_buf, nameIReg(szd,gregOfRM(rm))); 2204 return len+delta; 2205 } 2206 } 2207 2208 2209 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / 2210 16 / 8 bit quantity in the given IRTemp. */ 2211 static 2212 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 2213 { 2214 IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32; 2215 IRTemp src64 = newTemp(Ity_I64); 2216 IRTemp dst64 = newTemp(Ity_I64); 2217 switch (sz) { 2218 case 4: 2219 assign( src64, binop(Iop_32HLto64, 2220 getIReg(4,R_EDX), getIReg(4,R_EAX)) ); 2221 assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) ); 2222 putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) ); 2223 putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) ); 2224 break; 2225 case 2: { 2226 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2227 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2228 assign( src64, unop(widen3264, 2229 binop(Iop_16HLto32, 2230 getIReg(2,R_EDX), getIReg(2,R_EAX))) ); 2231 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 2232 putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 2233 putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 2234 break; 2235 } 2236 case 1: { 2237 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 2238 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 2239 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 2240 assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) ); 2241 assign( dst64, 2242 binop(op, mkexpr(src64), 2243 unop(widen1632, unop(widen816, mkexpr(t)))) ); 2244 putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16, 2245 unop(Iop_64to32,mkexpr(dst64)))) ); 2246 putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16, 2247 unop(Iop_64HIto32,mkexpr(dst64)))) ); 2248 break; 2249 } 2250 default: vpanic("codegen_div(x86)"); 2251 } 2252 } 2253 2254 2255 static 2256 UInt dis_Grp1 ( UChar sorb, Bool locked, 2257 Int delta, UChar modrm, 2258 Int am_sz, Int d_sz, Int sz, UInt d32 ) 2259 { 2260 Int len; 2261 HChar dis_buf[50]; 2262 IRType ty = szToITy(sz); 2263 IRTemp dst1 = newTemp(ty); 2264 IRTemp src = newTemp(ty); 2265 IRTemp dst0 = newTemp(ty); 2266 IRTemp addr = IRTemp_INVALID; 2267 IROp op8 = Iop_INVALID; 2268 UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF); 2269 2270 switch (gregOfRM(modrm)) { 2271 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 2272 case 2: break; // ADC 2273 case 3: break; // SBB 2274 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 2275 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 2276 /*NOTREACHED*/ 2277 default: vpanic("dis_Grp1: unhandled case"); 2278 } 2279 2280 if (epartIsReg(modrm)) { 2281 vassert(am_sz == 1); 2282 2283 assign(dst0, getIReg(sz,eregOfRM(modrm))); 2284 assign(src, mkU(ty,d32 & mask)); 2285 2286 if (gregOfRM(modrm) == 2 /* ADC */) { 2287 helper_ADC( sz, dst1, dst0, src, 2288 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2289 } else 2290 if (gregOfRM(modrm) == 3 /* SBB */) { 2291 helper_SBB( sz, dst1, dst0, src, 2292 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2293 } else { 2294 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2295 if (isAddSub(op8)) 2296 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2297 else 2298 setFlags_DEP1(op8, dst1, ty); 2299 } 2300 2301 if (gregOfRM(modrm) < 7) 2302 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2303 2304 delta += (am_sz + d_sz); 2305 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, 2306 nameIReg(sz,eregOfRM(modrm))); 2307 } else { 2308 addr = disAMode ( &len, sorb, delta, dis_buf); 2309 2310 assign(dst0, loadLE(ty,mkexpr(addr))); 2311 assign(src, mkU(ty,d32 & mask)); 2312 2313 if (gregOfRM(modrm) == 2 /* ADC */) { 2314 if (locked) { 2315 /* cas-style store */ 2316 helper_ADC( sz, dst1, dst0, src, 2317 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2318 } else { 2319 /* normal store */ 2320 helper_ADC( sz, dst1, dst0, src, 2321 /*store*/addr, IRTemp_INVALID, 0 ); 2322 } 2323 } else 2324 if (gregOfRM(modrm) == 3 /* SBB */) { 2325 if (locked) { 2326 /* cas-style store */ 2327 helper_SBB( sz, dst1, dst0, src, 2328 /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr ); 2329 } else { 2330 /* normal store */ 2331 helper_SBB( sz, dst1, dst0, src, 2332 /*store*/addr, IRTemp_INVALID, 0 ); 2333 } 2334 } else { 2335 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 2336 if (gregOfRM(modrm) < 7) { 2337 if (locked) { 2338 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 2339 mkexpr(dst1)/*newVal*/, 2340 guest_EIP_curr_instr ); 2341 } else { 2342 storeLE(mkexpr(addr), mkexpr(dst1)); 2343 } 2344 } 2345 if (isAddSub(op8)) 2346 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2347 else 2348 setFlags_DEP1(op8, dst1, ty); 2349 } 2350 2351 delta += (len+d_sz); 2352 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), 2353 d32, dis_buf); 2354 } 2355 return delta; 2356 } 2357 2358 2359 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 2360 expression. */ 2361 2362 static 2363 UInt dis_Grp2 ( UChar sorb, 2364 Int delta, UChar modrm, 2365 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 2366 HChar* shift_expr_txt, Bool* decode_OK ) 2367 { 2368 /* delta on entry points at the modrm byte. */ 2369 HChar dis_buf[50]; 2370 Int len; 2371 Bool isShift, isRotate, isRotateC; 2372 IRType ty = szToITy(sz); 2373 IRTemp dst0 = newTemp(ty); 2374 IRTemp dst1 = newTemp(ty); 2375 IRTemp addr = IRTemp_INVALID; 2376 2377 *decode_OK = True; 2378 2379 vassert(sz == 1 || sz == 2 || sz == 4); 2380 2381 /* Put value to shift/rotate in dst0. */ 2382 if (epartIsReg(modrm)) { 2383 assign(dst0, getIReg(sz, eregOfRM(modrm))); 2384 delta += (am_sz + d_sz); 2385 } else { 2386 addr = disAMode ( &len, sorb, delta, dis_buf); 2387 assign(dst0, loadLE(ty,mkexpr(addr))); 2388 delta += len + d_sz; 2389 } 2390 2391 isShift = False; 2392 switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; } 2393 2394 isRotate = False; 2395 switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; } 2396 2397 isRotateC = False; 2398 switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; } 2399 2400 if (gregOfRM(modrm) == 6) { 2401 *decode_OK = False; 2402 return delta; 2403 } 2404 2405 if (!isShift && !isRotate && !isRotateC) { 2406 /*NOTREACHED*/ 2407 vpanic("dis_Grp2(Reg): unhandled case(x86)"); 2408 } 2409 2410 if (isRotateC) { 2411 /* call a helper; these insns are so ridiculous they do not 2412 deserve better */ 2413 Bool left = toBool(gregOfRM(modrm) == 2); 2414 IRTemp r64 = newTemp(Ity_I64); 2415 IRExpr** args 2416 = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */ 2417 widenUto32(shift_expr), /* rotate amount */ 2418 widenUto32(mk_x86g_calculate_eflags_all()), 2419 mkU32(sz) ); 2420 assign( r64, mkIRExprCCall( 2421 Ity_I64, 2422 0/*regparm*/, 2423 left ? "x86g_calculate_RCL" : "x86g_calculate_RCR", 2424 left ? &x86g_calculate_RCL : &x86g_calculate_RCR, 2425 args 2426 ) 2427 ); 2428 /* new eflags in hi half r64; new value in lo half r64 */ 2429 assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) ); 2430 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2431 stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) )); 2432 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2433 /* Set NDEP even though it isn't used. This makes redundant-PUT 2434 elimination of previous stores to this field work better. */ 2435 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2436 } 2437 2438 if (isShift) { 2439 2440 IRTemp pre32 = newTemp(Ity_I32); 2441 IRTemp res32 = newTemp(Ity_I32); 2442 IRTemp res32ss = newTemp(Ity_I32); 2443 IRTemp shift_amt = newTemp(Ity_I8); 2444 IROp op32; 2445 2446 switch (gregOfRM(modrm)) { 2447 case 4: op32 = Iop_Shl32; break; 2448 case 5: op32 = Iop_Shr32; break; 2449 case 7: op32 = Iop_Sar32; break; 2450 /*NOTREACHED*/ 2451 default: vpanic("dis_Grp2:shift"); break; 2452 } 2453 2454 /* Widen the value to be shifted to 32 bits, do the shift, and 2455 narrow back down. This seems surprisingly long-winded, but 2456 unfortunately the Intel semantics requires that 8/16-bit 2457 shifts give defined results for shift values all the way up 2458 to 31, and this seems the simplest way to do it. It has the 2459 advantage that the only IR level shifts generated are of 32 2460 bit values, and the shift amount is guaranteed to be in the 2461 range 0 .. 31, thereby observing the IR semantics requiring 2462 all shift values to be in the range 0 .. 2^word_size-1. */ 2463 2464 /* shift_amt = shift_expr & 31, regardless of operation size */ 2465 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) ); 2466 2467 /* suitably widen the value to be shifted to 32 bits. */ 2468 assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0)) 2469 : widenUto32(mkexpr(dst0)) ); 2470 2471 /* res32 = pre32 `shift` shift_amt */ 2472 assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) ); 2473 2474 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */ 2475 assign( res32ss, 2476 binop(op32, 2477 mkexpr(pre32), 2478 binop(Iop_And8, 2479 binop(Iop_Sub8, 2480 mkexpr(shift_amt), mkU8(1)), 2481 mkU8(31))) ); 2482 2483 /* Build the flags thunk. */ 2484 setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt); 2485 2486 /* Narrow the result back down. */ 2487 assign( dst1, narrowTo(ty, mkexpr(res32)) ); 2488 2489 } /* if (isShift) */ 2490 2491 else 2492 if (isRotate) { 2493 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2); 2494 Bool left = toBool(gregOfRM(modrm) == 0); 2495 IRTemp rot_amt = newTemp(Ity_I8); 2496 IRTemp rot_amt32 = newTemp(Ity_I8); 2497 IRTemp oldFlags = newTemp(Ity_I32); 2498 2499 /* rot_amt = shift_expr & mask */ 2500 /* By masking the rotate amount thusly, the IR-level Shl/Shr 2501 expressions never shift beyond the word size and thus remain 2502 well defined. */ 2503 assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31))); 2504 2505 if (ty == Ity_I32) 2506 assign(rot_amt, mkexpr(rot_amt32)); 2507 else 2508 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1))); 2509 2510 if (left) { 2511 2512 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 2513 assign(dst1, 2514 binop( mkSizedOp(ty,Iop_Or8), 2515 binop( mkSizedOp(ty,Iop_Shl8), 2516 mkexpr(dst0), 2517 mkexpr(rot_amt) 2518 ), 2519 binop( mkSizedOp(ty,Iop_Shr8), 2520 mkexpr(dst0), 2521 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2522 ) 2523 ) 2524 ); 2525 ccOp += X86G_CC_OP_ROLB; 2526 2527 } else { /* right */ 2528 2529 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 2530 assign(dst1, 2531 binop( mkSizedOp(ty,Iop_Or8), 2532 binop( mkSizedOp(ty,Iop_Shr8), 2533 mkexpr(dst0), 2534 mkexpr(rot_amt) 2535 ), 2536 binop( mkSizedOp(ty,Iop_Shl8), 2537 mkexpr(dst0), 2538 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 2539 ) 2540 ) 2541 ); 2542 ccOp += X86G_CC_OP_RORB; 2543 2544 } 2545 2546 /* dst1 now holds the rotated value. Build flag thunk. We 2547 need the resulting value for this, and the previous flags. 2548 Except don't set it if the rotate count is zero. */ 2549 2550 assign(oldFlags, mk_x86g_calculate_eflags_all()); 2551 2552 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 2553 stmt( IRStmt_Put( OFFB_CC_OP, 2554 IRExpr_Mux0X( mkexpr(rot_amt32), 2555 IRExpr_Get(OFFB_CC_OP,Ity_I32), 2556 mkU32(ccOp))) ); 2557 stmt( IRStmt_Put( OFFB_CC_DEP1, 2558 IRExpr_Mux0X( mkexpr(rot_amt32), 2559 IRExpr_Get(OFFB_CC_DEP1,Ity_I32), 2560 widenUto32(mkexpr(dst1)))) ); 2561 stmt( IRStmt_Put( OFFB_CC_DEP2, 2562 IRExpr_Mux0X( mkexpr(rot_amt32), 2563 IRExpr_Get(OFFB_CC_DEP2,Ity_I32), 2564 mkU32(0))) ); 2565 stmt( IRStmt_Put( OFFB_CC_NDEP, 2566 IRExpr_Mux0X( mkexpr(rot_amt32), 2567 IRExpr_Get(OFFB_CC_NDEP,Ity_I32), 2568 mkexpr(oldFlags))) ); 2569 } /* if (isRotate) */ 2570 2571 /* Save result, and finish up. */ 2572 if (epartIsReg(modrm)) { 2573 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2574 if (vex_traceflags & VEX_TRACE_FE) { 2575 vex_printf("%s%c ", 2576 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2577 if (shift_expr_txt) 2578 vex_printf("%s", shift_expr_txt); 2579 else 2580 ppIRExpr(shift_expr); 2581 vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm))); 2582 } 2583 } else { 2584 storeLE(mkexpr(addr), mkexpr(dst1)); 2585 if (vex_traceflags & VEX_TRACE_FE) { 2586 vex_printf("%s%c ", 2587 nameGrp2(gregOfRM(modrm)), nameISize(sz) ); 2588 if (shift_expr_txt) 2589 vex_printf("%s", shift_expr_txt); 2590 else 2591 ppIRExpr(shift_expr); 2592 vex_printf(", %s\n", dis_buf); 2593 } 2594 } 2595 return delta; 2596 } 2597 2598 2599 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 2600 static 2601 UInt dis_Grp8_Imm ( UChar sorb, 2602 Bool locked, 2603 Int delta, UChar modrm, 2604 Int am_sz, Int sz, UInt src_val, 2605 Bool* decode_OK ) 2606 { 2607 /* src_val denotes a d8. 2608 And delta on entry points at the modrm byte. */ 2609 2610 IRType ty = szToITy(sz); 2611 IRTemp t2 = newTemp(Ity_I32); 2612 IRTemp t2m = newTemp(Ity_I32); 2613 IRTemp t_addr = IRTemp_INVALID; 2614 HChar dis_buf[50]; 2615 UInt mask; 2616 2617 /* we're optimists :-) */ 2618 *decode_OK = True; 2619 2620 /* Limit src_val -- the bit offset -- to something within a word. 2621 The Intel docs say that literal offsets larger than a word are 2622 masked in this way. */ 2623 switch (sz) { 2624 case 2: src_val &= 15; break; 2625 case 4: src_val &= 31; break; 2626 default: *decode_OK = False; return delta; 2627 } 2628 2629 /* Invent a mask suitable for the operation. */ 2630 switch (gregOfRM(modrm)) { 2631 case 4: /* BT */ mask = 0; break; 2632 case 5: /* BTS */ mask = 1 << src_val; break; 2633 case 6: /* BTR */ mask = ~(1 << src_val); break; 2634 case 7: /* BTC */ mask = 1 << src_val; break; 2635 /* If this needs to be extended, probably simplest to make a 2636 new function to handle the other cases (0 .. 3). The 2637 Intel docs do however not indicate any use for 0 .. 3, so 2638 we don't expect this to happen. */ 2639 default: *decode_OK = False; return delta; 2640 } 2641 2642 /* Fetch the value to be tested and modified into t2, which is 2643 32-bits wide regardless of sz. */ 2644 if (epartIsReg(modrm)) { 2645 vassert(am_sz == 1); 2646 assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) ); 2647 delta += (am_sz + 1); 2648 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2649 src_val, nameIReg(sz,eregOfRM(modrm))); 2650 } else { 2651 Int len; 2652 t_addr = disAMode ( &len, sorb, delta, dis_buf); 2653 delta += (len+1); 2654 assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) ); 2655 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz), 2656 src_val, dis_buf); 2657 } 2658 2659 /* Compute the new value into t2m, if non-BT. */ 2660 switch (gregOfRM(modrm)) { 2661 case 4: /* BT */ 2662 break; 2663 case 5: /* BTS */ 2664 assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) ); 2665 break; 2666 case 6: /* BTR */ 2667 assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) ); 2668 break; 2669 case 7: /* BTC */ 2670 assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) ); 2671 break; 2672 default: 2673 /*NOTREACHED*/ /*the previous switch guards this*/ 2674 vassert(0); 2675 } 2676 2677 /* Write the result back, if non-BT. If the CAS fails then we 2678 side-exit from the trace at this point, and so the flag state is 2679 not affected. This is of course as required. */ 2680 if (gregOfRM(modrm) != 4 /* BT */) { 2681 if (epartIsReg(modrm)) { 2682 putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m))); 2683 } else { 2684 if (locked) { 2685 casLE( mkexpr(t_addr), 2686 narrowTo(ty, mkexpr(t2))/*expd*/, 2687 narrowTo(ty, mkexpr(t2m))/*new*/, 2688 guest_EIP_curr_instr ); 2689 } else { 2690 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 2691 } 2692 } 2693 } 2694 2695 /* Copy relevant bit from t2 into the carry flag. */ 2696 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 2697 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 2698 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 2699 stmt( IRStmt_Put( 2700 OFFB_CC_DEP1, 2701 binop(Iop_And32, 2702 binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)), 2703 mkU32(1)) 2704 )); 2705 /* Set NDEP even though it isn't used. This makes redundant-PUT 2706 elimination of previous stores to this field work better. */ 2707 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 2708 2709 return delta; 2710 } 2711 2712 2713 /* Signed/unsigned widening multiply. Generate IR to multiply the 2714 value in EAX/AX/AL by the given IRTemp, and park the result in 2715 EDX:EAX/DX:AX/AX. 2716 */ 2717 static void codegen_mulL_A_D ( Int sz, Bool syned, 2718 IRTemp tmp, HChar* tmp_txt ) 2719 { 2720 IRType ty = szToITy(sz); 2721 IRTemp t1 = newTemp(ty); 2722 2723 assign( t1, getIReg(sz, R_EAX) ); 2724 2725 switch (ty) { 2726 case Ity_I32: { 2727 IRTemp res64 = newTemp(Ity_I64); 2728 IRTemp resHi = newTemp(Ity_I32); 2729 IRTemp resLo = newTemp(Ity_I32); 2730 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 2731 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2732 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 2733 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2734 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 2735 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 2736 putIReg(4, R_EDX, mkexpr(resHi)); 2737 putIReg(4, R_EAX, mkexpr(resLo)); 2738 break; 2739 } 2740 case Ity_I16: { 2741 IRTemp res32 = newTemp(Ity_I32); 2742 IRTemp resHi = newTemp(Ity_I16); 2743 IRTemp resLo = newTemp(Ity_I16); 2744 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 2745 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2746 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 2747 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2748 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 2749 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 2750 putIReg(2, R_EDX, mkexpr(resHi)); 2751 putIReg(2, R_EAX, mkexpr(resLo)); 2752 break; 2753 } 2754 case Ity_I8: { 2755 IRTemp res16 = newTemp(Ity_I16); 2756 IRTemp resHi = newTemp(Ity_I8); 2757 IRTemp resLo = newTemp(Ity_I8); 2758 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 2759 UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB; 2760 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 2761 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 2762 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 2763 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 2764 putIReg(2, R_EAX, mkexpr(res16)); 2765 break; 2766 } 2767 default: 2768 vpanic("codegen_mulL_A_D(x86)"); 2769 } 2770 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 2771 } 2772 2773 2774 /* Group 3 extended opcodes. */ 2775 static 2776 UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK ) 2777 { 2778 UInt d32; 2779 UChar modrm; 2780 HChar dis_buf[50]; 2781 Int len; 2782 IRTemp addr; 2783 IRType ty = szToITy(sz); 2784 IRTemp t1 = newTemp(ty); 2785 IRTemp dst1, src, dst0; 2786 2787 *decode_OK = True; /* may change this later */ 2788 2789 modrm = getIByte(delta); 2790 2791 if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) { 2792 /* LOCK prefix only allowed with not and neg subopcodes */ 2793 *decode_OK = False; 2794 return delta; 2795 } 2796 2797 if (epartIsReg(modrm)) { 2798 switch (gregOfRM(modrm)) { 2799 case 0: { /* TEST */ 2800 delta++; d32 = getUDisp(sz, delta); delta += sz; 2801 dst1 = newTemp(ty); 2802 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2803 getIReg(sz,eregOfRM(modrm)), 2804 mkU(ty,d32))); 2805 setFlags_DEP1( Iop_And8, dst1, ty ); 2806 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, 2807 nameIReg(sz, eregOfRM(modrm))); 2808 break; 2809 } 2810 case 1: /* UNDEFINED */ 2811 /* The Intel docs imply this insn is undefined and binutils 2812 agrees. Unfortunately Core 2 will run it (with who 2813 knows what result?) sandpile.org reckons it's an alias 2814 for case 0. We play safe. */ 2815 *decode_OK = False; 2816 break; 2817 case 2: /* NOT */ 2818 delta++; 2819 putIReg(sz, eregOfRM(modrm), 2820 unop(mkSizedOp(ty,Iop_Not8), 2821 getIReg(sz, eregOfRM(modrm)))); 2822 DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2823 break; 2824 case 3: /* NEG */ 2825 delta++; 2826 dst0 = newTemp(ty); 2827 src = newTemp(ty); 2828 dst1 = newTemp(ty); 2829 assign(dst0, mkU(ty,0)); 2830 assign(src, getIReg(sz,eregOfRM(modrm))); 2831 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src))); 2832 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2833 putIReg(sz, eregOfRM(modrm), mkexpr(dst1)); 2834 DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2835 break; 2836 case 4: /* MUL (unsigned widening) */ 2837 delta++; 2838 src = newTemp(ty); 2839 assign(src, getIReg(sz,eregOfRM(modrm))); 2840 codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) ); 2841 break; 2842 case 5: /* IMUL (signed widening) */ 2843 delta++; 2844 src = newTemp(ty); 2845 assign(src, getIReg(sz,eregOfRM(modrm))); 2846 codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) ); 2847 break; 2848 case 6: /* DIV */ 2849 delta++; 2850 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2851 codegen_div ( sz, t1, False ); 2852 DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2853 break; 2854 case 7: /* IDIV */ 2855 delta++; 2856 assign( t1, getIReg(sz, eregOfRM(modrm)) ); 2857 codegen_div ( sz, t1, True ); 2858 DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 2859 break; 2860 default: 2861 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2862 vpanic("Grp3(x86)"); 2863 } 2864 } else { 2865 addr = disAMode ( &len, sorb, delta, dis_buf ); 2866 t1 = newTemp(ty); 2867 delta += len; 2868 assign(t1, loadLE(ty,mkexpr(addr))); 2869 switch (gregOfRM(modrm)) { 2870 case 0: { /* TEST */ 2871 d32 = getUDisp(sz, delta); delta += sz; 2872 dst1 = newTemp(ty); 2873 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 2874 mkexpr(t1), mkU(ty,d32))); 2875 setFlags_DEP1( Iop_And8, dst1, ty ); 2876 DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 2877 break; 2878 } 2879 case 1: /* UNDEFINED */ 2880 /* See comment above on R case */ 2881 *decode_OK = False; 2882 break; 2883 case 2: /* NOT */ 2884 dst1 = newTemp(ty); 2885 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 2886 if (locked) { 2887 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2888 guest_EIP_curr_instr ); 2889 } else { 2890 storeLE( mkexpr(addr), mkexpr(dst1) ); 2891 } 2892 DIP("not%c %s\n", nameISize(sz), dis_buf); 2893 break; 2894 case 3: /* NEG */ 2895 dst0 = newTemp(ty); 2896 src = newTemp(ty); 2897 dst1 = newTemp(ty); 2898 assign(dst0, mkU(ty,0)); 2899 assign(src, mkexpr(t1)); 2900 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), 2901 mkexpr(dst0), mkexpr(src))); 2902 if (locked) { 2903 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 2904 guest_EIP_curr_instr ); 2905 } else { 2906 storeLE( mkexpr(addr), mkexpr(dst1) ); 2907 } 2908 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 2909 DIP("neg%c %s\n", nameISize(sz), dis_buf); 2910 break; 2911 case 4: /* MUL */ 2912 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 2913 break; 2914 case 5: /* IMUL */ 2915 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 2916 break; 2917 case 6: /* DIV */ 2918 codegen_div ( sz, t1, False ); 2919 DIP("div%c %s\n", nameISize(sz), dis_buf); 2920 break; 2921 case 7: /* IDIV */ 2922 codegen_div ( sz, t1, True ); 2923 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 2924 break; 2925 default: 2926 /* This can't happen - gregOfRM should return 0 .. 7 only */ 2927 vpanic("Grp3(x86)"); 2928 } 2929 } 2930 return delta; 2931 } 2932 2933 2934 /* Group 4 extended opcodes. */ 2935 static 2936 UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK ) 2937 { 2938 Int alen; 2939 UChar modrm; 2940 HChar dis_buf[50]; 2941 IRType ty = Ity_I8; 2942 IRTemp t1 = newTemp(ty); 2943 IRTemp t2 = newTemp(ty); 2944 2945 *decode_OK = True; 2946 2947 modrm = getIByte(delta); 2948 2949 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 2950 /* LOCK prefix only allowed with inc and dec subopcodes */ 2951 *decode_OK = False; 2952 return delta; 2953 } 2954 2955 if (epartIsReg(modrm)) { 2956 assign(t1, getIReg(1, eregOfRM(modrm))); 2957 switch (gregOfRM(modrm)) { 2958 case 0: /* INC */ 2959 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2960 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2961 setFlags_INC_DEC( True, t2, ty ); 2962 break; 2963 case 1: /* DEC */ 2964 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2965 putIReg(1, eregOfRM(modrm), mkexpr(t2)); 2966 setFlags_INC_DEC( False, t2, ty ); 2967 break; 2968 default: 2969 *decode_OK = False; 2970 return delta; 2971 } 2972 delta++; 2973 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), 2974 nameIReg(1, eregOfRM(modrm))); 2975 } else { 2976 IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf ); 2977 assign( t1, loadLE(ty, mkexpr(addr)) ); 2978 switch (gregOfRM(modrm)) { 2979 case 0: /* INC */ 2980 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 2981 if (locked) { 2982 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 2983 guest_EIP_curr_instr ); 2984 } else { 2985 storeLE( mkexpr(addr), mkexpr(t2) ); 2986 } 2987 setFlags_INC_DEC( True, t2, ty ); 2988 break; 2989 case 1: /* DEC */ 2990 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 2991 if (locked) { 2992 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 2993 guest_EIP_curr_instr ); 2994 } else { 2995 storeLE( mkexpr(addr), mkexpr(t2) ); 2996 } 2997 setFlags_INC_DEC( False, t2, ty ); 2998 break; 2999 default: 3000 *decode_OK = False; 3001 return delta; 3002 } 3003 delta += alen; 3004 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); 3005 } 3006 return delta; 3007 } 3008 3009 3010 /* Group 5 extended opcodes. */ 3011 static 3012 UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta, 3013 DisResult* dres, Bool* decode_OK ) 3014 { 3015 Int len; 3016 UChar modrm; 3017 HChar dis_buf[50]; 3018 IRTemp addr = IRTemp_INVALID; 3019 IRType ty = szToITy(sz); 3020 IRTemp t1 = newTemp(ty); 3021 IRTemp t2 = IRTemp_INVALID; 3022 3023 *decode_OK = True; 3024 3025 modrm = getIByte(delta); 3026 3027 if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) { 3028 /* LOCK prefix only allowed with inc and dec subopcodes */ 3029 *decode_OK = False; 3030 return delta; 3031 } 3032 3033 if (epartIsReg(modrm)) { 3034 assign(t1, getIReg(sz,eregOfRM(modrm))); 3035 switch (gregOfRM(modrm)) { 3036 case 0: /* INC */ 3037 vassert(sz == 2 || sz == 4); 3038 t2 = newTemp(ty); 3039 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3040 mkexpr(t1), mkU(ty,1))); 3041 setFlags_INC_DEC( True, t2, ty ); 3042 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3043 break; 3044 case 1: /* DEC */ 3045 vassert(sz == 2 || sz == 4); 3046 t2 = newTemp(ty); 3047 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3048 mkexpr(t1), mkU(ty,1))); 3049 setFlags_INC_DEC( False, t2, ty ); 3050 putIReg(sz,eregOfRM(modrm),mkexpr(t2)); 3051 break; 3052 case 2: /* call Ev */ 3053 vassert(sz == 4); 3054 t2 = newTemp(Ity_I32); 3055 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3056 putIReg(4, R_ESP, mkexpr(t2)); 3057 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1)); 3058 jmp_treg(Ijk_Call,t1); 3059 dres->whatNext = Dis_StopHere; 3060 break; 3061 case 4: /* jmp Ev */ 3062 vassert(sz == 4); 3063 jmp_treg(Ijk_Boring,t1); 3064 dres->whatNext = Dis_StopHere; 3065 break; 3066 case 6: /* PUSH Ev */ 3067 vassert(sz == 4 || sz == 2); 3068 t2 = newTemp(Ity_I32); 3069 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3070 putIReg(4, R_ESP, mkexpr(t2) ); 3071 storeLE( mkexpr(t2), mkexpr(t1) ); 3072 break; 3073 default: 3074 *decode_OK = False; 3075 return delta; 3076 } 3077 delta++; 3078 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3079 nameISize(sz), nameIReg(sz, eregOfRM(modrm))); 3080 } else { 3081 addr = disAMode ( &len, sorb, delta, dis_buf ); 3082 assign(t1, loadLE(ty,mkexpr(addr))); 3083 switch (gregOfRM(modrm)) { 3084 case 0: /* INC */ 3085 t2 = newTemp(ty); 3086 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 3087 mkexpr(t1), mkU(ty,1))); 3088 if (locked) { 3089 casLE( mkexpr(addr), 3090 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3091 } else { 3092 storeLE(mkexpr(addr),mkexpr(t2)); 3093 } 3094 setFlags_INC_DEC( True, t2, ty ); 3095 break; 3096 case 1: /* DEC */ 3097 t2 = newTemp(ty); 3098 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 3099 mkexpr(t1), mkU(ty,1))); 3100 if (locked) { 3101 casLE( mkexpr(addr), 3102 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 3103 } else { 3104 storeLE(mkexpr(addr),mkexpr(t2)); 3105 } 3106 setFlags_INC_DEC( False, t2, ty ); 3107 break; 3108 case 2: /* call Ev */ 3109 vassert(sz == 4); 3110 t2 = newTemp(Ity_I32); 3111 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 3112 putIReg(4, R_ESP, mkexpr(t2)); 3113 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len)); 3114 jmp_treg(Ijk_Call,t1); 3115 dres->whatNext = Dis_StopHere; 3116 break; 3117 case 4: /* JMP Ev */ 3118 vassert(sz == 4); 3119 jmp_treg(Ijk_Boring,t1); 3120 dres->whatNext = Dis_StopHere; 3121 break; 3122 case 6: /* PUSH Ev */ 3123 vassert(sz == 4 || sz == 2); 3124 t2 = newTemp(Ity_I32); 3125 assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 3126 putIReg(4, R_ESP, mkexpr(t2) ); 3127 storeLE( mkexpr(t2), mkexpr(t1) ); 3128 break; 3129 default: 3130 *decode_OK = False; 3131 return delta; 3132 } 3133 delta += len; 3134 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)), 3135 nameISize(sz), dis_buf); 3136 } 3137 return delta; 3138 } 3139 3140 3141 /*------------------------------------------------------------*/ 3142 /*--- Disassembling string ops (including REP prefixes) ---*/ 3143 /*------------------------------------------------------------*/ 3144 3145 /* Code shared by all the string ops */ 3146 static 3147 void dis_string_op_increment(Int sz, Int t_inc) 3148 { 3149 if (sz == 4 || sz == 2) { 3150 assign( t_inc, 3151 binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ), 3152 mkU8(sz/2) ) ); 3153 } else { 3154 assign( t_inc, 3155 IRExpr_Get( OFFB_DFLAG, Ity_I32 ) ); 3156 } 3157 } 3158 3159 static 3160 void dis_string_op( void (*dis_OP)( Int, IRTemp ), 3161 Int sz, HChar* name, UChar sorb ) 3162 { 3163 IRTemp t_inc = newTemp(Ity_I32); 3164 vassert(sorb == 0); /* hmm. so what was the point of passing it in? */ 3165 dis_string_op_increment(sz, t_inc); 3166 dis_OP( sz, t_inc ); 3167 DIP("%s%c\n", name, nameISize(sz)); 3168 } 3169 3170 static 3171 void dis_MOVS ( Int sz, IRTemp t_inc ) 3172 { 3173 IRType ty = szToITy(sz); 3174 IRTemp td = newTemp(Ity_I32); /* EDI */ 3175 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3176 3177 assign( td, getIReg(4, R_EDI) ); 3178 assign( ts, getIReg(4, R_ESI) ); 3179 3180 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 3181 3182 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3183 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3184 } 3185 3186 static 3187 void dis_LODS ( Int sz, IRTemp t_inc ) 3188 { 3189 IRType ty = szToITy(sz); 3190 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3191 3192 assign( ts, getIReg(4, R_ESI) ); 3193 3194 putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) ); 3195 3196 putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3197 } 3198 3199 static 3200 void dis_STOS ( Int sz, IRTemp t_inc ) 3201 { 3202 IRType ty = szToITy(sz); 3203 IRTemp ta = newTemp(ty); /* EAX */ 3204 IRTemp td = newTemp(Ity_I32); /* EDI */ 3205 3206 assign( ta, getIReg(sz, R_EAX) ); 3207 assign( td, getIReg(4, R_EDI) ); 3208 3209 storeLE( mkexpr(td), mkexpr(ta) ); 3210 3211 putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3212 } 3213 3214 static 3215 void dis_CMPS ( Int sz, IRTemp t_inc ) 3216 { 3217 IRType ty = szToITy(sz); 3218 IRTemp tdv = newTemp(ty); /* (EDI) */ 3219 IRTemp tsv = newTemp(ty); /* (ESI) */ 3220 IRTemp td = newTemp(Ity_I32); /* EDI */ 3221 IRTemp ts = newTemp(Ity_I32); /* ESI */ 3222 3223 assign( td, getIReg(4, R_EDI) ); 3224 assign( ts, getIReg(4, R_ESI) ); 3225 3226 assign( tdv, loadLE(ty,mkexpr(td)) ); 3227 assign( tsv, loadLE(ty,mkexpr(ts)) ); 3228 3229 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 3230 3231 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3232 putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) ); 3233 } 3234 3235 static 3236 void dis_SCAS ( Int sz, IRTemp t_inc ) 3237 { 3238 IRType ty = szToITy(sz); 3239 IRTemp ta = newTemp(ty); /* EAX */ 3240 IRTemp td = newTemp(Ity_I32); /* EDI */ 3241 IRTemp tdv = newTemp(ty); /* (EDI) */ 3242 3243 assign( ta, getIReg(sz, R_EAX) ); 3244 assign( td, getIReg(4, R_EDI) ); 3245 3246 assign( tdv, loadLE(ty,mkexpr(td)) ); 3247 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 3248 3249 putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) ); 3250 } 3251 3252 3253 /* Wrap the appropriate string op inside a REP/REPE/REPNE. 3254 We assume the insn is the last one in the basic block, and so emit a jump 3255 to the next insn, rather than just falling through. */ 3256 static 3257 void dis_REP_op ( X86Condcode cond, 3258 void (*dis_OP)(Int, IRTemp), 3259 Int sz, Addr32 eip, Addr32 eip_next, HChar* name ) 3260 { 3261 IRTemp t_inc = newTemp(Ity_I32); 3262 IRTemp tc = newTemp(Ity_I32); /* ECX */ 3263 3264 assign( tc, getIReg(4,R_ECX) ); 3265 3266 stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)), 3267 Ijk_Boring, 3268 IRConst_U32(eip_next) ) ); 3269 3270 putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 3271 3272 dis_string_op_increment(sz, t_inc); 3273 dis_OP (sz, t_inc); 3274 3275 if (cond == X86CondAlways) { 3276 jmp_lit(Ijk_Boring,eip); 3277 } else { 3278 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond), 3279 Ijk_Boring, 3280 IRConst_U32(eip) ) ); 3281 jmp_lit(Ijk_Boring,eip_next); 3282 } 3283 DIP("%s%c\n", name, nameISize(sz)); 3284 } 3285 3286 3287 /*------------------------------------------------------------*/ 3288 /*--- Arithmetic, etc. ---*/ 3289 /*------------------------------------------------------------*/ 3290 3291 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 3292 static 3293 UInt dis_mul_E_G ( UChar sorb, 3294 Int size, 3295 Int delta0 ) 3296 { 3297 Int alen; 3298 HChar dis_buf[50]; 3299 UChar rm = getIByte(delta0); 3300 IRType ty = szToITy(size); 3301 IRTemp te = newTemp(ty); 3302 IRTemp tg = newTemp(ty); 3303 IRTemp resLo = newTemp(ty); 3304 3305 assign( tg, getIReg(size, gregOfRM(rm)) ); 3306 if (epartIsReg(rm)) { 3307 assign( te, getIReg(size, eregOfRM(rm)) ); 3308 } else { 3309 IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf ); 3310 assign( te, loadLE(ty,mkexpr(addr)) ); 3311 } 3312 3313 setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB ); 3314 3315 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 3316 3317 putIReg(size, gregOfRM(rm), mkexpr(resLo) ); 3318 3319 if (epartIsReg(rm)) { 3320 DIP("imul%c %s, %s\n", nameISize(size), 3321 nameIReg(size,eregOfRM(rm)), 3322 nameIReg(size,gregOfRM(rm))); 3323 return 1+delta0; 3324 } else { 3325 DIP("imul%c %s, %s\n", nameISize(size), 3326 dis_buf, nameIReg(size,gregOfRM(rm))); 3327 return alen+delta0; 3328 } 3329 } 3330 3331 3332 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ 3333 static 3334 UInt dis_imul_I_E_G ( UChar sorb, 3335 Int size, 3336 Int delta, 3337 Int litsize ) 3338 { 3339 Int d32, alen; 3340 HChar dis_buf[50]; 3341 UChar rm = getIByte(delta); 3342 IRType ty = szToITy(size); 3343 IRTemp te = newTemp(ty); 3344 IRTemp tl = newTemp(ty); 3345 IRTemp resLo = newTemp(ty); 3346 3347 vassert(size == 1 || size == 2 || size == 4); 3348 3349 if (epartIsReg(rm)) { 3350 assign(te, getIReg(size, eregOfRM(rm))); 3351 delta++; 3352 } else { 3353 IRTemp addr = disAMode( &alen, sorb, delta, dis_buf ); 3354 assign(te, loadLE(ty, mkexpr(addr))); 3355 delta += alen; 3356 } 3357 d32 = getSDisp(litsize,delta); 3358 delta += litsize; 3359 3360 if (size == 1) d32 &= 0xFF; 3361 if (size == 2) d32 &= 0xFFFF; 3362 3363 assign(tl, mkU(ty,d32)); 3364 3365 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 3366 3367 setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB ); 3368 3369 putIReg(size, gregOfRM(rm), mkexpr(resLo)); 3370 3371 DIP("imul %d, %s, %s\n", d32, 3372 ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ), 3373 nameIReg(size,gregOfRM(rm)) ); 3374 return delta; 3375 } 3376 3377 3378 /* Generate an IR sequence to do a count-leading-zeroes operation on 3379 the supplied IRTemp, and return a new IRTemp holding the result. 3380 'ty' may be Ity_I16 or Ity_I32 only. In the case where the 3381 argument is zero, return the number of bits in the word (the 3382 natural semantics). */ 3383 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 3384 { 3385 vassert(ty == Ity_I32 || ty == Ity_I16); 3386 3387 IRTemp src32 = newTemp(Ity_I32); 3388 assign(src32, widenUto32( mkexpr(src) )); 3389 3390 IRTemp src32x = newTemp(Ity_I32); 3391 assign(src32x, 3392 binop(Iop_Shl32, mkexpr(src32), 3393 mkU8(32 - 8 * sizeofIRType(ty)))); 3394 3395 // Clz32 has undefined semantics when its input is zero, so 3396 // special-case around that. 3397 IRTemp res32 = newTemp(Ity_I32); 3398 assign(res32, 3399 IRExpr_Mux0X( 3400 unop(Iop_1Uto8, 3401 binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0))), 3402 unop(Iop_Clz32, mkexpr(src32x)), 3403 mkU32(8 * sizeofIRType(ty)) 3404 )); 3405 3406 IRTemp res = newTemp(ty); 3407 assign(res, narrowTo(ty, mkexpr(res32))); 3408 return res; 3409 } 3410 3411 3412 /*------------------------------------------------------------*/ 3413 /*--- ---*/ 3414 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 3415 /*--- ---*/ 3416 /*------------------------------------------------------------*/ 3417 3418 /* --- Helper functions for dealing with the register stack. --- */ 3419 3420 /* --- Set the emulation-warning pseudo-register. --- */ 3421 3422 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 3423 { 3424 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3425 stmt( IRStmt_Put( OFFB_EMWARN, e ) ); 3426 } 3427 3428 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 3429 3430 static IRExpr* mkQNaN64 ( void ) 3431 { 3432 /* QNaN is 0 2047 1 0(51times) 3433 == 0b 11111111111b 1 0(51times) 3434 == 0x7FF8 0000 0000 0000 3435 */ 3436 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 3437 } 3438 3439 /* --------- Get/put the top-of-stack pointer. --------- */ 3440 3441 static IRExpr* get_ftop ( void ) 3442 { 3443 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 3444 } 3445 3446 static void put_ftop ( IRExpr* e ) 3447 { 3448 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 3449 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 3450 } 3451 3452 /* --------- Get/put the C3210 bits. --------- */ 3453 3454 static IRExpr* get_C3210 ( void ) 3455 { 3456 return IRExpr_Get( OFFB_FC3210, Ity_I32 ); 3457 } 3458 3459 static void put_C3210 ( IRExpr* e ) 3460 { 3461 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 3462 } 3463 3464 /* --------- Get/put the FPU rounding mode. --------- */ 3465 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 3466 { 3467 return IRExpr_Get( OFFB_FPROUND, Ity_I32 ); 3468 } 3469 3470 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 3471 { 3472 stmt( IRStmt_Put( OFFB_FPROUND, e ) ); 3473 } 3474 3475 3476 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 3477 /* Produces a value in 0 .. 3, which is encoded as per the type 3478 IRRoundingMode. Since the guest_FPROUND value is also encoded as 3479 per IRRoundingMode, we merely need to get it and mask it for 3480 safety. 3481 */ 3482 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 3483 { 3484 return binop( Iop_And32, get_fpround(), mkU32(3) ); 3485 } 3486 3487 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 3488 { 3489 return mkU32(Irrm_NEAREST); 3490 } 3491 3492 3493 /* --------- Get/set FP register tag bytes. --------- */ 3494 3495 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 3496 3497 static void put_ST_TAG ( Int i, IRExpr* value ) 3498 { 3499 IRRegArray* descr; 3500 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 3501 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3502 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 3503 } 3504 3505 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 3506 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 3507 3508 static IRExpr* get_ST_TAG ( Int i ) 3509 { 3510 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 3511 return IRExpr_GetI( descr, get_ftop(), i ); 3512 } 3513 3514 3515 /* --------- Get/set FP registers. --------- */ 3516 3517 /* Given i, and some expression e, emit 'ST(i) = e' and set the 3518 register's tag to indicate the register is full. The previous 3519 state of the register is not checked. */ 3520 3521 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 3522 { 3523 IRRegArray* descr; 3524 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 3525 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3526 stmt( IRStmt_PutI( descr, get_ftop(), i, value ) ); 3527 /* Mark the register as in-use. */ 3528 put_ST_TAG(i, mkU8(1)); 3529 } 3530 3531 /* Given i, and some expression e, emit 3532 ST(i) = is_full(i) ? NaN : e 3533 and set the tag accordingly. 3534 */ 3535 3536 static void put_ST ( Int i, IRExpr* value ) 3537 { 3538 put_ST_UNCHECKED( i, 3539 IRExpr_Mux0X( get_ST_TAG(i), 3540 /* 0 means empty */ 3541 value, 3542 /* non-0 means full */ 3543 mkQNaN64() 3544 ) 3545 ); 3546 } 3547 3548 3549 /* Given i, generate an expression yielding 'ST(i)'. */ 3550 3551 static IRExpr* get_ST_UNCHECKED ( Int i ) 3552 { 3553 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 3554 return IRExpr_GetI( descr, get_ftop(), i ); 3555 } 3556 3557 3558 /* Given i, generate an expression yielding 3559 is_full(i) ? ST(i) : NaN 3560 */ 3561 3562 static IRExpr* get_ST ( Int i ) 3563 { 3564 return 3565 IRExpr_Mux0X( get_ST_TAG(i), 3566 /* 0 means empty */ 3567 mkQNaN64(), 3568 /* non-0 means full */ 3569 get_ST_UNCHECKED(i)); 3570 } 3571 3572 3573 /* Adjust FTOP downwards by one register. */ 3574 3575 static void fp_push ( void ) 3576 { 3577 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 3578 } 3579 3580 /* Adjust FTOP upwards by one register, and mark the vacated register 3581 as empty. */ 3582 3583 static void fp_pop ( void ) 3584 { 3585 put_ST_TAG(0, mkU8(0)); 3586 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 3587 } 3588 3589 /* Clear the C2 bit of the FPU status register, for 3590 sin/cos/tan/sincos. */ 3591 3592 static void clear_C2 ( void ) 3593 { 3594 put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); 3595 } 3596 3597 /* Invent a plausible-looking FPU status word value: 3598 ((ftop & 7) << 11) | (c3210 & 0x4700) 3599 */ 3600 static IRExpr* get_FPU_sw ( void ) 3601 { 3602 return 3603 unop(Iop_32to16, 3604 binop(Iop_Or32, 3605 binop(Iop_Shl32, 3606 binop(Iop_And32, get_ftop(), mkU32(7)), 3607 mkU8(11)), 3608 binop(Iop_And32, get_C3210(), mkU32(0x4700)) 3609 )); 3610 } 3611 3612 3613 /* ------------------------------------------------------- */ 3614 /* Given all that stack-mangling junk, we can now go ahead 3615 and describe FP instructions. 3616 */ 3617 3618 /* ST(0) = ST(0) `op` mem64/32(addr) 3619 Need to check ST(0)'s tag on read, but not on write. 3620 */ 3621 static 3622 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3623 IROp op, Bool dbl ) 3624 { 3625 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3626 if (dbl) { 3627 put_ST_UNCHECKED(0, 3628 triop( op, 3629 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3630 get_ST(0), 3631 loadLE(Ity_F64,mkexpr(addr)) 3632 )); 3633 } else { 3634 put_ST_UNCHECKED(0, 3635 triop( op, 3636 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3637 get_ST(0), 3638 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 3639 )); 3640 } 3641 } 3642 3643 3644 /* ST(0) = mem64/32(addr) `op` ST(0) 3645 Need to check ST(0)'s tag on read, but not on write. 3646 */ 3647 static 3648 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf, 3649 IROp op, Bool dbl ) 3650 { 3651 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 3652 if (dbl) { 3653 put_ST_UNCHECKED(0, 3654 triop( op, 3655 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3656 loadLE(Ity_F64,mkexpr(addr)), 3657 get_ST(0) 3658 )); 3659 } else { 3660 put_ST_UNCHECKED(0, 3661 triop( op, 3662 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3663 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 3664 get_ST(0) 3665 )); 3666 } 3667 } 3668 3669 3670 /* ST(dst) = ST(dst) `op` ST(src). 3671 Check dst and src tags when reading but not on write. 3672 */ 3673 static 3674 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3675 Bool pop_after ) 3676 { 3677 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3678 (Int)st_src, (Int)st_dst ); 3679 put_ST_UNCHECKED( 3680 st_dst, 3681 triop( op, 3682 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3683 get_ST(st_dst), 3684 get_ST(st_src) ) 3685 ); 3686 if (pop_after) 3687 fp_pop(); 3688 } 3689 3690 /* ST(dst) = ST(src) `op` ST(dst). 3691 Check dst and src tags when reading but not on write. 3692 */ 3693 static 3694 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 3695 Bool pop_after ) 3696 { 3697 DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"", 3698 (Int)st_src, (Int)st_dst ); 3699 put_ST_UNCHECKED( 3700 st_dst, 3701 triop( op, 3702 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 3703 get_ST(st_src), 3704 get_ST(st_dst) ) 3705 ); 3706 if (pop_after) 3707 fp_pop(); 3708 } 3709 3710 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 3711 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 3712 { 3713 DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i ); 3714 /* This is a bit of a hack (and isn't really right). It sets 3715 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 3716 documentation implies A and S are unchanged. 3717 */ 3718 /* It's also fishy in that it is used both for COMIP and 3719 UCOMIP, and they aren't the same (although similar). */ 3720 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 3721 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 3722 stmt( IRStmt_Put( OFFB_CC_DEP1, 3723 binop( Iop_And32, 3724 binop(Iop_CmpF64, get_ST(0), get_ST(i)), 3725 mkU32(0x45) 3726 ))); 3727 /* Set NDEP even though it isn't used. This makes redundant-PUT 3728 elimination of previous stores to this field work better. */ 3729 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 3730 if (pop_after) 3731 fp_pop(); 3732 } 3733 3734 3735 static 3736 UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) 3737 { 3738 Int len; 3739 UInt r_src, r_dst; 3740 HChar dis_buf[50]; 3741 IRTemp t1, t2; 3742 3743 /* On entry, delta points at the second byte of the insn (the modrm 3744 byte).*/ 3745 UChar first_opcode = getIByte(delta-1); 3746 UChar modrm = getIByte(delta+0); 3747 3748 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 3749 3750 if (first_opcode == 0xD8) { 3751 if (modrm < 0xC0) { 3752 3753 /* bits 5,4,3 are an opcode extension, and the modRM also 3754 specifies an address. */ 3755 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3756 delta += len; 3757 3758 switch (gregOfRM(modrm)) { 3759 3760 case 0: /* FADD single-real */ 3761 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 3762 break; 3763 3764 case 1: /* FMUL single-real */ 3765 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 3766 break; 3767 3768 case 2: /* FCOM single-real */ 3769 DIP("fcoms %s\n", dis_buf); 3770 /* This forces C1 to zero, which isn't right. */ 3771 put_C3210( 3772 binop( Iop_And32, 3773 binop(Iop_Shl32, 3774 binop(Iop_CmpF64, 3775 get_ST(0), 3776 unop(Iop_F32toF64, 3777 loadLE(Ity_F32,mkexpr(addr)))), 3778 mkU8(8)), 3779 mkU32(0x4500) 3780 )); 3781 break; 3782 3783 case 3: /* FCOMP single-real */ 3784 DIP("fcomps %s\n", dis_buf); 3785 /* This forces C1 to zero, which isn't right. */ 3786 put_C3210( 3787 binop( Iop_And32, 3788 binop(Iop_Shl32, 3789 binop(Iop_CmpF64, 3790 get_ST(0), 3791 unop(Iop_F32toF64, 3792 loadLE(Ity_F32,mkexpr(addr)))), 3793 mkU8(8)), 3794 mkU32(0x4500) 3795 )); 3796 fp_pop(); 3797 break; 3798 3799 case 4: /* FSUB single-real */ 3800 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 3801 break; 3802 3803 case 5: /* FSUBR single-real */ 3804 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 3805 break; 3806 3807 case 6: /* FDIV single-real */ 3808 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 3809 break; 3810 3811 case 7: /* FDIVR single-real */ 3812 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 3813 break; 3814 3815 default: 3816 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 3817 vex_printf("first_opcode == 0xD8\n"); 3818 goto decode_fail; 3819 } 3820 } else { 3821 delta++; 3822 switch (modrm) { 3823 3824 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 3825 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 3826 break; 3827 3828 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 3829 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 3830 break; 3831 3832 /* Dunno if this is right */ 3833 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 3834 r_dst = (UInt)modrm - 0xD0; 3835 DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst); 3836 /* This forces C1 to zero, which isn't right. */ 3837 put_C3210( 3838 binop( Iop_And32, 3839 binop(Iop_Shl32, 3840 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3841 mkU8(8)), 3842 mkU32(0x4500) 3843 )); 3844 break; 3845 3846 /* Dunno if this is right */ 3847 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 3848 r_dst = (UInt)modrm - 0xD8; 3849 DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst); 3850 /* This forces C1 to zero, which isn't right. */ 3851 put_C3210( 3852 binop( Iop_And32, 3853 binop(Iop_Shl32, 3854 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 3855 mkU8(8)), 3856 mkU32(0x4500) 3857 )); 3858 fp_pop(); 3859 break; 3860 3861 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 3862 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 3863 break; 3864 3865 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 3866 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 3867 break; 3868 3869 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 3870 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 3871 break; 3872 3873 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 3874 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 3875 break; 3876 3877 default: 3878 goto decode_fail; 3879 } 3880 } 3881 } 3882 3883 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 3884 else 3885 if (first_opcode == 0xD9) { 3886 if (modrm < 0xC0) { 3887 3888 /* bits 5,4,3 are an opcode extension, and the modRM also 3889 specifies an address. */ 3890 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 3891 delta += len; 3892 3893 switch (gregOfRM(modrm)) { 3894 3895 case 0: /* FLD single-real */ 3896 DIP("flds %s\n", dis_buf); 3897 fp_push(); 3898 put_ST(0, unop(Iop_F32toF64, 3899 loadLE(Ity_F32, mkexpr(addr)))); 3900 break; 3901 3902 case 2: /* FST single-real */ 3903 DIP("fsts %s\n", dis_buf); 3904 storeLE(mkexpr(addr), 3905 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3906 break; 3907 3908 case 3: /* FSTP single-real */ 3909 DIP("fstps %s\n", dis_buf); 3910 storeLE(mkexpr(addr), 3911 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 3912 fp_pop(); 3913 break; 3914 3915 case 4: { /* FLDENV m28 */ 3916 /* Uses dirty helper: 3917 VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */ 3918 IRTemp ew = newTemp(Ity_I32); 3919 IRDirty* d = unsafeIRDirty_0_N ( 3920 0/*regparms*/, 3921 "x86g_dirtyhelper_FLDENV", 3922 &x86g_dirtyhelper_FLDENV, 3923 mkIRExprVec_1( mkexpr(addr) ) 3924 ); 3925 d->needsBBP = True; 3926 d->tmp = ew; 3927 /* declare we're reading memory */ 3928 d->mFx = Ifx_Read; 3929 d->mAddr = mkexpr(addr); 3930 d->mSize = 28; 3931 3932 /* declare we're writing guest state */ 3933 d->nFxState = 4; 3934 3935 d->fxState[0].fx = Ifx_Write; 3936 d->fxState[0].offset = OFFB_FTOP; 3937 d->fxState[0].size = sizeof(UInt); 3938 3939 d->fxState[1].fx = Ifx_Write; 3940 d->fxState[1].offset = OFFB_FPTAGS; 3941 d->fxState[1].size = 8 * sizeof(UChar); 3942 3943 d->fxState[2].fx = Ifx_Write; 3944 d->fxState[2].offset = OFFB_FPROUND; 3945 d->fxState[2].size = sizeof(UInt); 3946 3947 d->fxState[3].fx = Ifx_Write; 3948 d->fxState[3].offset = OFFB_FC3210; 3949 d->fxState[3].size = sizeof(UInt); 3950 3951 stmt( IRStmt_Dirty(d) ); 3952 3953 /* ew contains any emulation warning we may need to 3954 issue. If needed, side-exit to the next insn, 3955 reporting the warning, so that Valgrind's dispatcher 3956 sees the warning. */ 3957 put_emwarn( mkexpr(ew) ); 3958 stmt( 3959 IRStmt_Exit( 3960 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 3961 Ijk_EmWarn, 3962 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 3963 ) 3964 ); 3965 3966 DIP("fldenv %s\n", dis_buf); 3967 break; 3968 } 3969 3970 case 5: {/* FLDCW */ 3971 /* The only thing we observe in the control word is the 3972 rounding mode. Therefore, pass the 16-bit value 3973 (x87 native-format control word) to a clean helper, 3974 getting back a 64-bit value, the lower half of which 3975 is the FPROUND value to store, and the upper half of 3976 which is the emulation-warning token which may be 3977 generated. 3978 */ 3979 /* ULong x86h_check_fldcw ( UInt ); */ 3980 IRTemp t64 = newTemp(Ity_I64); 3981 IRTemp ew = newTemp(Ity_I32); 3982 DIP("fldcw %s\n", dis_buf); 3983 assign( t64, mkIRExprCCall( 3984 Ity_I64, 0/*regparms*/, 3985 "x86g_check_fldcw", 3986 &x86g_check_fldcw, 3987 mkIRExprVec_1( 3988 unop( Iop_16Uto32, 3989 loadLE(Ity_I16, mkexpr(addr))) 3990 ) 3991 ) 3992 ); 3993 3994 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 3995 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 3996 put_emwarn( mkexpr(ew) ); 3997 /* Finally, if an emulation warning was reported, 3998 side-exit to the next insn, reporting the warning, 3999 so that Valgrind's dispatcher sees the warning. */ 4000 stmt( 4001 IRStmt_Exit( 4002 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4003 Ijk_EmWarn, 4004 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 4005 ) 4006 ); 4007 break; 4008 } 4009 4010 case 6: { /* FNSTENV m28 */ 4011 /* Uses dirty helper: 4012 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */ 4013 IRDirty* d = unsafeIRDirty_0_N ( 4014 0/*regparms*/, 4015 "x86g_dirtyhelper_FSTENV", 4016 &x86g_dirtyhelper_FSTENV, 4017 mkIRExprVec_1( mkexpr(addr) ) 4018 ); 4019 d->needsBBP = True; 4020 /* declare we're writing memory */ 4021 d->mFx = Ifx_Write; 4022 d->mAddr = mkexpr(addr); 4023 d->mSize = 28; 4024 4025 /* declare we're reading guest state */ 4026 d->nFxState = 4; 4027 4028 d->fxState[0].fx = Ifx_Read; 4029 d->fxState[0].offset = OFFB_FTOP; 4030 d->fxState[0].size = sizeof(UInt); 4031 4032 d->fxState[1].fx = Ifx_Read; 4033 d->fxState[1].offset = OFFB_FPTAGS; 4034 d->fxState[1].size = 8 * sizeof(UChar); 4035 4036 d->fxState[2].fx = Ifx_Read; 4037 d->fxState[2].offset = OFFB_FPROUND; 4038 d->fxState[2].size = sizeof(UInt); 4039 4040 d->fxState[3].fx = Ifx_Read; 4041 d->fxState[3].offset = OFFB_FC3210; 4042 d->fxState[3].size = sizeof(UInt); 4043 4044 stmt( IRStmt_Dirty(d) ); 4045 4046 DIP("fnstenv %s\n", dis_buf); 4047 break; 4048 } 4049 4050 case 7: /* FNSTCW */ 4051 /* Fake up a native x87 FPU control word. The only 4052 thing it depends on is FPROUND[1:0], so call a clean 4053 helper to cook it up. */ 4054 /* UInt x86h_create_fpucw ( UInt fpround ) */ 4055 DIP("fnstcw %s\n", dis_buf); 4056 storeLE( 4057 mkexpr(addr), 4058 unop( Iop_32to16, 4059 mkIRExprCCall( 4060 Ity_I32, 0/*regp*/, 4061 "x86g_create_fpucw", &x86g_create_fpucw, 4062 mkIRExprVec_1( get_fpround() ) 4063 ) 4064 ) 4065 ); 4066 break; 4067 4068 default: 4069 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4070 vex_printf("first_opcode == 0xD9\n"); 4071 goto decode_fail; 4072 } 4073 4074 } else { 4075 delta++; 4076 switch (modrm) { 4077 4078 case 0xC0 ... 0xC7: /* FLD %st(?) */ 4079 r_src = (UInt)modrm - 0xC0; 4080 DIP("fld %%st(%d)\n", (Int)r_src); 4081 t1 = newTemp(Ity_F64); 4082 assign(t1, get_ST(r_src)); 4083 fp_push(); 4084 put_ST(0, mkexpr(t1)); 4085 break; 4086 4087 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 4088 r_src = (UInt)modrm - 0xC8; 4089 DIP("fxch %%st(%d)\n", (Int)r_src); 4090 t1 = newTemp(Ity_F64); 4091 t2 = newTemp(Ity_F64); 4092 assign(t1, get_ST(0)); 4093 assign(t2, get_ST(r_src)); 4094 put_ST_UNCHECKED(0, mkexpr(t2)); 4095 put_ST_UNCHECKED(r_src, mkexpr(t1)); 4096 break; 4097 4098 case 0xE0: /* FCHS */ 4099 DIP("fchs\n"); 4100 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 4101 break; 4102 4103 case 0xE1: /* FABS */ 4104 DIP("fabs\n"); 4105 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 4106 break; 4107 4108 case 0xE4: /* FTST */ 4109 DIP("ftst\n"); 4110 /* This forces C1 to zero, which isn't right. */ 4111 /* Well, in fact the Intel docs say (bizarrely): "C1 is 4112 set to 0 if stack underflow occurred; otherwise, set 4113 to 0" which is pretty nonsensical. I guess it's a 4114 typo. */ 4115 put_C3210( 4116 binop( Iop_And32, 4117 binop(Iop_Shl32, 4118 binop(Iop_CmpF64, 4119 get_ST(0), 4120 IRExpr_Const(IRConst_F64i(0x0ULL))), 4121 mkU8(8)), 4122 mkU32(0x4500) 4123 )); 4124 break; 4125 4126 case 0xE5: { /* FXAM */ 4127 /* This is an interesting one. It examines %st(0), 4128 regardless of whether the tag says it's empty or not. 4129 Here, just pass both the tag (in our format) and the 4130 value (as a double, actually a ULong) to a helper 4131 function. */ 4132 IRExpr** args 4133 = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)), 4134 unop(Iop_ReinterpF64asI64, 4135 get_ST_UNCHECKED(0)) ); 4136 put_C3210(mkIRExprCCall( 4137 Ity_I32, 4138 0/*regparm*/, 4139 "x86g_calculate_FXAM", &x86g_calculate_FXAM, 4140 args 4141 )); 4142 DIP("fxam\n"); 4143 break; 4144 } 4145 4146 case 0xE8: /* FLD1 */ 4147 DIP("fld1\n"); 4148 fp_push(); 4149 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 4150 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 4151 break; 4152 4153 case 0xE9: /* FLDL2T */ 4154 DIP("fldl2t\n"); 4155 fp_push(); 4156 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 4157 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 4158 break; 4159 4160 case 0xEA: /* FLDL2E */ 4161 DIP("fldl2e\n"); 4162 fp_push(); 4163 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 4164 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 4165 break; 4166 4167 case 0xEB: /* FLDPI */ 4168 DIP("fldpi\n"); 4169 fp_push(); 4170 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 4171 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 4172 break; 4173 4174 case 0xEC: /* FLDLG2 */ 4175 DIP("fldlg2\n"); 4176 fp_push(); 4177 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 4178 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 4179 break; 4180 4181 case 0xED: /* FLDLN2 */ 4182 DIP("fldln2\n"); 4183 fp_push(); 4184 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 4185 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 4186 break; 4187 4188 case 0xEE: /* FLDZ */ 4189 DIP("fldz\n"); 4190 fp_push(); 4191 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 4192 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 4193 break; 4194 4195 case 0xF0: /* F2XM1 */ 4196 DIP("f2xm1\n"); 4197 put_ST_UNCHECKED(0, 4198 binop(Iop_2xm1F64, 4199 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4200 get_ST(0))); 4201 break; 4202 4203 case 0xF1: /* FYL2X */ 4204 DIP("fyl2x\n"); 4205 put_ST_UNCHECKED(1, 4206 triop(Iop_Yl2xF64, 4207 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4208 get_ST(1), 4209 get_ST(0))); 4210 fp_pop(); 4211 break; 4212 4213 case 0xF2: /* FPTAN */ 4214 DIP("ftan\n"); 4215 put_ST_UNCHECKED(0, 4216 binop(Iop_TanF64, 4217 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4218 get_ST(0))); 4219 fp_push(); 4220 put_ST(0, IRExpr_Const(IRConst_F64(1.0))); 4221 clear_C2(); /* HACK */ 4222 break; 4223 4224 case 0xF3: /* FPATAN */ 4225 DIP("fpatan\n"); 4226 put_ST_UNCHECKED(1, 4227 triop(Iop_AtanF64, 4228 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4229 get_ST(1), 4230 get_ST(0))); 4231 fp_pop(); 4232 break; 4233 4234 case 0xF4: { /* FXTRACT */ 4235 IRTemp argF = newTemp(Ity_F64); 4236 IRTemp sigF = newTemp(Ity_F64); 4237 IRTemp expF = newTemp(Ity_F64); 4238 IRTemp argI = newTemp(Ity_I64); 4239 IRTemp sigI = newTemp(Ity_I64); 4240 IRTemp expI = newTemp(Ity_I64); 4241 DIP("fxtract\n"); 4242 assign( argF, get_ST(0) ); 4243 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 4244 assign( sigI, 4245 mkIRExprCCall( 4246 Ity_I64, 0/*regparms*/, 4247 "x86amd64g_calculate_FXTRACT", 4248 &x86amd64g_calculate_FXTRACT, 4249 mkIRExprVec_2( mkexpr(argI), 4250 mkIRExpr_HWord(0)/*sig*/ )) 4251 ); 4252 assign( expI, 4253 mkIRExprCCall( 4254 Ity_I64, 0/*regparms*/, 4255 "x86amd64g_calculate_FXTRACT", 4256 &x86amd64g_calculate_FXTRACT, 4257 mkIRExprVec_2( mkexpr(argI), 4258 mkIRExpr_HWord(1)/*exp*/ )) 4259 ); 4260 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 4261 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 4262 /* exponent */ 4263 put_ST_UNCHECKED(0, mkexpr(expF) ); 4264 fp_push(); 4265 /* significand */ 4266 put_ST(0, mkexpr(sigF) ); 4267 break; 4268 } 4269 4270 case 0xF5: { /* FPREM1 -- IEEE compliant */ 4271 IRTemp a1 = newTemp(Ity_F64); 4272 IRTemp a2 = newTemp(Ity_F64); 4273 DIP("fprem1\n"); 4274 /* Do FPREM1 twice, once to get the remainder, and once 4275 to get the C3210 flag values. */ 4276 assign( a1, get_ST(0) ); 4277 assign( a2, get_ST(1) ); 4278 put_ST_UNCHECKED(0, 4279 triop(Iop_PRem1F64, 4280 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4281 mkexpr(a1), 4282 mkexpr(a2))); 4283 put_C3210( 4284 triop(Iop_PRem1C3210F64, 4285 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4286 mkexpr(a1), 4287 mkexpr(a2)) ); 4288 break; 4289 } 4290 4291 case 0xF7: /* FINCSTP */ 4292 DIP("fprem\n"); 4293 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 4294 break; 4295 4296 case 0xF8: { /* FPREM -- not IEEE compliant */ 4297 IRTemp a1 = newTemp(Ity_F64); 4298 IRTemp a2 = newTemp(Ity_F64); 4299 DIP("fprem\n"); 4300 /* Do FPREM twice, once to get the remainder, and once 4301 to get the C3210 flag values. */ 4302 assign( a1, get_ST(0) ); 4303 assign( a2, get_ST(1) ); 4304 put_ST_UNCHECKED(0, 4305 triop(Iop_PRemF64, 4306 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4307 mkexpr(a1), 4308 mkexpr(a2))); 4309 put_C3210( 4310 triop(Iop_PRemC3210F64, 4311 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4312 mkexpr(a1), 4313 mkexpr(a2)) ); 4314 break; 4315 } 4316 4317 case 0xF9: /* FYL2XP1 */ 4318 DIP("fyl2xp1\n"); 4319 put_ST_UNCHECKED(1, 4320 triop(Iop_Yl2xp1F64, 4321 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4322 get_ST(1), 4323 get_ST(0))); 4324 fp_pop(); 4325 break; 4326 4327 case 0xFA: /* FSQRT */ 4328 DIP("fsqrt\n"); 4329 put_ST_UNCHECKED(0, 4330 binop(Iop_SqrtF64, 4331 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4332 get_ST(0))); 4333 break; 4334 4335 case 0xFB: { /* FSINCOS */ 4336 IRTemp a1 = newTemp(Ity_F64); 4337 assign( a1, get_ST(0) ); 4338 DIP("fsincos\n"); 4339 put_ST_UNCHECKED(0, 4340 binop(Iop_SinF64, 4341 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4342 mkexpr(a1))); 4343 fp_push(); 4344 put_ST(0, 4345 binop(Iop_CosF64, 4346 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4347 mkexpr(a1))); 4348 clear_C2(); /* HACK */ 4349 break; 4350 } 4351 4352 case 0xFC: /* FRNDINT */ 4353 DIP("frndint\n"); 4354 put_ST_UNCHECKED(0, 4355 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 4356 break; 4357 4358 case 0xFD: /* FSCALE */ 4359 DIP("fscale\n"); 4360 put_ST_UNCHECKED(0, 4361 triop(Iop_ScaleF64, 4362 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4363 get_ST(0), 4364 get_ST(1))); 4365 break; 4366 4367 case 0xFE: /* FSIN */ 4368 DIP("fsin\n"); 4369 put_ST_UNCHECKED(0, 4370 binop(Iop_SinF64, 4371 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4372 get_ST(0))); 4373 clear_C2(); /* HACK */ 4374 break; 4375 4376 case 0xFF: /* FCOS */ 4377 DIP("fcos\n"); 4378 put_ST_UNCHECKED(0, 4379 binop(Iop_CosF64, 4380 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4381 get_ST(0))); 4382 clear_C2(); /* HACK */ 4383 break; 4384 4385 default: 4386 goto decode_fail; 4387 } 4388 } 4389 } 4390 4391 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 4392 else 4393 if (first_opcode == 0xDA) { 4394 4395 if (modrm < 0xC0) { 4396 4397 /* bits 5,4,3 are an opcode extension, and the modRM also 4398 specifies an address. */ 4399 IROp fop; 4400 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4401 delta += len; 4402 switch (gregOfRM(modrm)) { 4403 4404 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 4405 DIP("fiaddl %s\n", dis_buf); 4406 fop = Iop_AddF64; 4407 goto do_fop_m32; 4408 4409 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 4410 DIP("fimull %s\n", dis_buf); 4411 fop = Iop_MulF64; 4412 goto do_fop_m32; 4413 4414 case 2: /* FICOM m32int */ 4415 DIP("ficoml %s\n", dis_buf); 4416 /* This forces C1 to zero, which isn't right. */ 4417 put_C3210( 4418 binop( Iop_And32, 4419 binop(Iop_Shl32, 4420 binop(Iop_CmpF64, 4421 get_ST(0), 4422 unop(Iop_I32StoF64, 4423 loadLE(Ity_I32,mkexpr(addr)))), 4424 mkU8(8)), 4425 mkU32(0x4500) 4426 )); 4427 break; 4428 4429 case 3: /* FICOMP m32int */ 4430 DIP("ficompl %s\n", dis_buf); 4431 /* This forces C1 to zero, which isn't right. */ 4432 put_C3210( 4433 binop( Iop_And32, 4434 binop(Iop_Shl32, 4435 binop(Iop_CmpF64, 4436 get_ST(0), 4437 unop(Iop_I32StoF64, 4438 loadLE(Ity_I32,mkexpr(addr)))), 4439 mkU8(8)), 4440 mkU32(0x4500) 4441 )); 4442 fp_pop(); 4443 break; 4444 4445 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 4446 DIP("fisubl %s\n", dis_buf); 4447 fop = Iop_SubF64; 4448 goto do_fop_m32; 4449 4450 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 4451 DIP("fisubrl %s\n", dis_buf); 4452 fop = Iop_SubF64; 4453 goto do_foprev_m32; 4454 4455 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 4456 DIP("fidivl %s\n", dis_buf); 4457 fop = Iop_DivF64; 4458 goto do_fop_m32; 4459 4460 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 4461 DIP("fidivrl %s\n", dis_buf); 4462 fop = Iop_DivF64; 4463 goto do_foprev_m32; 4464 4465 do_fop_m32: 4466 put_ST_UNCHECKED(0, 4467 triop(fop, 4468 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4469 get_ST(0), 4470 unop(Iop_I32StoF64, 4471 loadLE(Ity_I32, mkexpr(addr))))); 4472 break; 4473 4474 do_foprev_m32: 4475 put_ST_UNCHECKED(0, 4476 triop(fop, 4477 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 4478 unop(Iop_I32StoF64, 4479 loadLE(Ity_I32, mkexpr(addr))), 4480 get_ST(0))); 4481 break; 4482 4483 default: 4484 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4485 vex_printf("first_opcode == 0xDA\n"); 4486 goto decode_fail; 4487 } 4488 4489 } else { 4490 4491 delta++; 4492 switch (modrm) { 4493 4494 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 4495 r_src = (UInt)modrm - 0xC0; 4496 DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src); 4497 put_ST_UNCHECKED(0, 4498 IRExpr_Mux0X( 4499 unop(Iop_1Uto8, 4500 mk_x86g_calculate_condition(X86CondB)), 4501 get_ST(0), get_ST(r_src)) ); 4502 break; 4503 4504 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 4505 r_src = (UInt)modrm - 0xC8; 4506 DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src); 4507 put_ST_UNCHECKED(0, 4508 IRExpr_Mux0X( 4509 unop(Iop_1Uto8, 4510 mk_x86g_calculate_condition(X86CondZ)), 4511 get_ST(0), get_ST(r_src)) ); 4512 break; 4513 4514 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 4515 r_src = (UInt)modrm - 0xD0; 4516 DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src); 4517 put_ST_UNCHECKED(0, 4518 IRExpr_Mux0X( 4519 unop(Iop_1Uto8, 4520 mk_x86g_calculate_condition(X86CondBE)), 4521 get_ST(0), get_ST(r_src)) ); 4522 break; 4523 4524 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 4525 r_src = (UInt)modrm - 0xD8; 4526 DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src); 4527 put_ST_UNCHECKED(0, 4528 IRExpr_Mux0X( 4529 unop(Iop_1Uto8, 4530 mk_x86g_calculate_condition(X86CondP)), 4531 get_ST(0), get_ST(r_src)) ); 4532 break; 4533 4534 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 4535 DIP("fucompp %%st(0),%%st(1)\n"); 4536 /* This forces C1 to zero, which isn't right. */ 4537 put_C3210( 4538 binop( Iop_And32, 4539 binop(Iop_Shl32, 4540 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 4541 mkU8(8)), 4542 mkU32(0x4500) 4543 )); 4544 fp_pop(); 4545 fp_pop(); 4546 break; 4547 4548 default: 4549 goto decode_fail; 4550 } 4551 4552 } 4553 } 4554 4555 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 4556 else 4557 if (first_opcode == 0xDB) { 4558 if (modrm < 0xC0) { 4559 4560 /* bits 5,4,3 are an opcode extension, and the modRM also 4561 specifies an address. */ 4562 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4563 delta += len; 4564 4565 switch (gregOfRM(modrm)) { 4566 4567 case 0: /* FILD m32int */ 4568 DIP("fildl %s\n", dis_buf); 4569 fp_push(); 4570 put_ST(0, unop(Iop_I32StoF64, 4571 loadLE(Ity_I32, mkexpr(addr)))); 4572 break; 4573 4574 case 1: /* FISTTPL m32 (SSE3) */ 4575 DIP("fisttpl %s\n", dis_buf); 4576 storeLE( mkexpr(addr), 4577 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 4578 fp_pop(); 4579 break; 4580 4581 case 2: /* FIST m32 */ 4582 DIP("fistl %s\n", dis_buf); 4583 storeLE( mkexpr(addr), 4584 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4585 break; 4586 4587 case 3: /* FISTP m32 */ 4588 DIP("fistpl %s\n", dis_buf); 4589 storeLE( mkexpr(addr), 4590 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 4591 fp_pop(); 4592 break; 4593 4594 case 5: { /* FLD extended-real */ 4595 /* Uses dirty helper: 4596 ULong x86g_loadF80le ( UInt ) 4597 addr holds the address. First, do a dirty call to 4598 get hold of the data. */ 4599 IRTemp val = newTemp(Ity_I64); 4600 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 4601 4602 IRDirty* d = unsafeIRDirty_1_N ( 4603 val, 4604 0/*regparms*/, 4605 "x86g_dirtyhelper_loadF80le", 4606 &x86g_dirtyhelper_loadF80le, 4607 args 4608 ); 4609 /* declare that we're reading memory */ 4610 d->mFx = Ifx_Read; 4611 d->mAddr = mkexpr(addr); 4612 d->mSize = 10; 4613 4614 /* execute the dirty call, dumping the result in val. */ 4615 stmt( IRStmt_Dirty(d) ); 4616 fp_push(); 4617 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 4618 4619 DIP("fldt %s\n", dis_buf); 4620 break; 4621 } 4622 4623 case 7: { /* FSTP extended-real */ 4624 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */ 4625 IRExpr** args 4626 = mkIRExprVec_2( mkexpr(addr), 4627 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 4628 4629 IRDirty* d = unsafeIRDirty_0_N ( 4630 0/*regparms*/, 4631 "x86g_dirtyhelper_storeF80le", 4632 &x86g_dirtyhelper_storeF80le, 4633 args 4634 ); 4635 /* declare we're writing memory */ 4636 d->mFx = Ifx_Write; 4637 d->mAddr = mkexpr(addr); 4638 d->mSize = 10; 4639 4640 /* execute the dirty call. */ 4641 stmt( IRStmt_Dirty(d) ); 4642 fp_pop(); 4643 4644 DIP("fstpt\n %s", dis_buf); 4645 break; 4646 } 4647 4648 default: 4649 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4650 vex_printf("first_opcode == 0xDB\n"); 4651 goto decode_fail; 4652 } 4653 4654 } else { 4655 4656 delta++; 4657 switch (modrm) { 4658 4659 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 4660 r_src = (UInt)modrm - 0xC0; 4661 DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src); 4662 put_ST_UNCHECKED(0, 4663 IRExpr_Mux0X( 4664 unop(Iop_1Uto8, 4665 mk_x86g_calculate_condition(X86CondNB)), 4666 get_ST(0), get_ST(r_src)) ); 4667 break; 4668 4669 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 4670 r_src = (UInt)modrm - 0xC8; 4671 DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src); 4672 put_ST_UNCHECKED(0, 4673 IRExpr_Mux0X( 4674 unop(Iop_1Uto8, 4675 mk_x86g_calculate_condition(X86CondNZ)), 4676 get_ST(0), get_ST(r_src)) ); 4677 break; 4678 4679 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 4680 r_src = (UInt)modrm - 0xD0; 4681 DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src); 4682 put_ST_UNCHECKED(0, 4683 IRExpr_Mux0X( 4684 unop(Iop_1Uto8, 4685 mk_x86g_calculate_condition(X86CondNBE)), 4686 get_ST(0), get_ST(r_src)) ); 4687 break; 4688 4689 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 4690 r_src = (UInt)modrm - 0xD8; 4691 DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src); 4692 put_ST_UNCHECKED(0, 4693 IRExpr_Mux0X( 4694 unop(Iop_1Uto8, 4695 mk_x86g_calculate_condition(X86CondNP)), 4696 get_ST(0), get_ST(r_src)) ); 4697 break; 4698 4699 case 0xE2: 4700 DIP("fnclex\n"); 4701 break; 4702 4703 case 0xE3: { 4704 /* Uses dirty helper: 4705 void x86g_do_FINIT ( VexGuestX86State* ) */ 4706 IRDirty* d = unsafeIRDirty_0_N ( 4707 0/*regparms*/, 4708 "x86g_dirtyhelper_FINIT", 4709 &x86g_dirtyhelper_FINIT, 4710 mkIRExprVec_0() 4711 ); 4712 d->needsBBP = True; 4713 4714 /* declare we're writing guest state */ 4715 d->nFxState = 5; 4716 4717 d->fxState[0].fx = Ifx_Write; 4718 d->fxState[0].offset = OFFB_FTOP; 4719 d->fxState[0].size = sizeof(UInt); 4720 4721 d->fxState[1].fx = Ifx_Write; 4722 d->fxState[1].offset = OFFB_FPREGS; 4723 d->fxState[1].size = 8 * sizeof(ULong); 4724 4725 d->fxState[2].fx = Ifx_Write; 4726 d->fxState[2].offset = OFFB_FPTAGS; 4727 d->fxState[2].size = 8 * sizeof(UChar); 4728 4729 d->fxState[3].fx = Ifx_Write; 4730 d->fxState[3].offset = OFFB_FPROUND; 4731 d->fxState[3].size = sizeof(UInt); 4732 4733 d->fxState[4].fx = Ifx_Write; 4734 d->fxState[4].offset = OFFB_FC3210; 4735 d->fxState[4].size = sizeof(UInt); 4736 4737 stmt( IRStmt_Dirty(d) ); 4738 4739 DIP("fninit\n"); 4740 break; 4741 } 4742 4743 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 4744 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 4745 break; 4746 4747 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 4748 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 4749 break; 4750 4751 default: 4752 goto decode_fail; 4753 } 4754 } 4755 } 4756 4757 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 4758 else 4759 if (first_opcode == 0xDC) { 4760 if (modrm < 0xC0) { 4761 4762 /* bits 5,4,3 are an opcode extension, and the modRM also 4763 specifies an address. */ 4764 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4765 delta += len; 4766 4767 switch (gregOfRM(modrm)) { 4768 4769 case 0: /* FADD double-real */ 4770 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 4771 break; 4772 4773 case 1: /* FMUL double-real */ 4774 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 4775 break; 4776 4777 case 2: /* FCOM double-real */ 4778 DIP("fcoml %s\n", dis_buf); 4779 /* This forces C1 to zero, which isn't right. */ 4780 put_C3210( 4781 binop( Iop_And32, 4782 binop(Iop_Shl32, 4783 binop(Iop_CmpF64, 4784 get_ST(0), 4785 loadLE(Ity_F64,mkexpr(addr))), 4786 mkU8(8)), 4787 mkU32(0x4500) 4788 )); 4789 break; 4790 4791 case 3: /* FCOMP double-real */ 4792 DIP("fcompl %s\n", dis_buf); 4793 /* This forces C1 to zero, which isn't right. */ 4794 put_C3210( 4795 binop( Iop_And32, 4796 binop(Iop_Shl32, 4797 binop(Iop_CmpF64, 4798 get_ST(0), 4799 loadLE(Ity_F64,mkexpr(addr))), 4800 mkU8(8)), 4801 mkU32(0x4500) 4802 )); 4803 fp_pop(); 4804 break; 4805 4806 case 4: /* FSUB double-real */ 4807 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 4808 break; 4809 4810 case 5: /* FSUBR double-real */ 4811 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 4812 break; 4813 4814 case 6: /* FDIV double-real */ 4815 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 4816 break; 4817 4818 case 7: /* FDIVR double-real */ 4819 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 4820 break; 4821 4822 default: 4823 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 4824 vex_printf("first_opcode == 0xDC\n"); 4825 goto decode_fail; 4826 } 4827 4828 } else { 4829 4830 delta++; 4831 switch (modrm) { 4832 4833 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 4834 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 4835 break; 4836 4837 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 4838 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 4839 break; 4840 4841 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 4842 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 4843 break; 4844 4845 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 4846 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 4847 break; 4848 4849 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 4850 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 4851 break; 4852 4853 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 4854 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 4855 break; 4856 4857 default: 4858 goto decode_fail; 4859 } 4860 4861 } 4862 } 4863 4864 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 4865 else 4866 if (first_opcode == 0xDD) { 4867 4868 if (modrm < 0xC0) { 4869 4870 /* bits 5,4,3 are an opcode extension, and the modRM also 4871 specifies an address. */ 4872 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 4873 delta += len; 4874 4875 switch (gregOfRM(modrm)) { 4876 4877 case 0: /* FLD double-real */ 4878 DIP("fldl %s\n", dis_buf); 4879 fp_push(); 4880 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 4881 break; 4882 4883 case 1: /* FISTTPQ m64 (SSE3) */ 4884 DIP("fistppll %s\n", dis_buf); 4885 storeLE( mkexpr(addr), 4886 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 4887 fp_pop(); 4888 break; 4889 4890 case 2: /* FST double-real */ 4891 DIP("fstl %s\n", dis_buf); 4892 storeLE(mkexpr(addr), get_ST(0)); 4893 break; 4894 4895 case 3: /* FSTP double-real */ 4896 DIP("fstpl %s\n", dis_buf); 4897 storeLE(mkexpr(addr), get_ST(0)); 4898 fp_pop(); 4899 break; 4900 4901 case 4: { /* FRSTOR m108 */ 4902 /* Uses dirty helper: 4903 VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */ 4904 IRTemp ew = newTemp(Ity_I32); 4905 IRDirty* d = unsafeIRDirty_0_N ( 4906 0/*regparms*/, 4907 "x86g_dirtyhelper_FRSTOR", 4908 &x86g_dirtyhelper_FRSTOR, 4909 mkIRExprVec_1( mkexpr(addr) ) 4910 ); 4911 d->needsBBP = True; 4912 d->tmp = ew; 4913 /* declare we're reading memory */ 4914 d->mFx = Ifx_Read; 4915 d->mAddr = mkexpr(addr); 4916 d->mSize = 108; 4917 4918 /* declare we're writing guest state */ 4919 d->nFxState = 5; 4920 4921 d->fxState[0].fx = Ifx_Write; 4922 d->fxState[0].offset = OFFB_FTOP; 4923 d->fxState[0].size = sizeof(UInt); 4924 4925 d->fxState[1].fx = Ifx_Write; 4926 d->fxState[1].offset = OFFB_FPREGS; 4927 d->fxState[1].size = 8 * sizeof(ULong); 4928 4929 d->fxState[2].fx = Ifx_Write; 4930 d->fxState[2].offset = OFFB_FPTAGS; 4931 d->fxState[2].size = 8 * sizeof(UChar); 4932 4933 d->fxState[3].fx = Ifx_Write; 4934 d->fxState[3].offset = OFFB_FPROUND; 4935 d->fxState[3].size = sizeof(UInt); 4936 4937 d->fxState[4].fx = Ifx_Write; 4938 d->fxState[4].offset = OFFB_FC3210; 4939 d->fxState[4].size = sizeof(UInt); 4940 4941 stmt( IRStmt_Dirty(d) ); 4942 4943 /* ew contains any emulation warning we may need to 4944 issue. If needed, side-exit to the next insn, 4945 reporting the warning, so that Valgrind's dispatcher 4946 sees the warning. */ 4947 put_emwarn( mkexpr(ew) ); 4948 stmt( 4949 IRStmt_Exit( 4950 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 4951 Ijk_EmWarn, 4952 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 4953 ) 4954 ); 4955 4956 DIP("frstor %s\n", dis_buf); 4957 break; 4958 } 4959 4960 case 6: { /* FNSAVE m108 */ 4961 /* Uses dirty helper: 4962 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */ 4963 IRDirty* d = unsafeIRDirty_0_N ( 4964 0/*regparms*/, 4965 "x86g_dirtyhelper_FSAVE", 4966 &x86g_dirtyhelper_FSAVE, 4967 mkIRExprVec_1( mkexpr(addr) ) 4968 ); 4969 d->needsBBP = True; 4970 /* declare we're writing memory */ 4971 d->mFx = Ifx_Write; 4972 d->mAddr = mkexpr(addr); 4973 d->mSize = 108; 4974 4975 /* declare we're reading guest state */ 4976 d->nFxState = 5; 4977 4978 d->fxState[0].fx = Ifx_Read; 4979 d->fxState[0].offset = OFFB_FTOP; 4980 d->fxState[0].size = sizeof(UInt); 4981 4982 d->fxState[1].fx = Ifx_Read; 4983 d->fxState[1].offset = OFFB_FPREGS; 4984 d->fxState[1].size = 8 * sizeof(ULong); 4985 4986 d->fxState[2].fx = Ifx_Read; 4987 d->fxState[2].offset = OFFB_FPTAGS; 4988 d->fxState[2].size = 8 * sizeof(UChar); 4989 4990 d->fxState[3].fx = Ifx_Read; 4991 d->fxState[3].offset = OFFB_FPROUND; 4992 d->fxState[3].size = sizeof(UInt); 4993 4994 d->fxState[4].fx = Ifx_Read; 4995 d->fxState[4].offset = OFFB_FC3210; 4996 d->fxState[4].size = sizeof(UInt); 4997 4998 stmt( IRStmt_Dirty(d) ); 4999 5000 DIP("fnsave %s\n", dis_buf); 5001 break; 5002 } 5003 5004 case 7: { /* FNSTSW m16 */ 5005 IRExpr* sw = get_FPU_sw(); 5006 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 5007 storeLE( mkexpr(addr), sw ); 5008 DIP("fnstsw %s\n", dis_buf); 5009 break; 5010 } 5011 5012 default: 5013 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5014 vex_printf("first_opcode == 0xDD\n"); 5015 goto decode_fail; 5016 } 5017 } else { 5018 delta++; 5019 switch (modrm) { 5020 5021 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 5022 r_dst = (UInt)modrm - 0xC0; 5023 DIP("ffree %%st(%d)\n", (Int)r_dst); 5024 put_ST_TAG ( r_dst, mkU8(0) ); 5025 break; 5026 5027 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 5028 r_dst = (UInt)modrm - 0xD0; 5029 DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst); 5030 /* P4 manual says: "If the destination operand is a 5031 non-empty register, the invalid-operation exception 5032 is not generated. Hence put_ST_UNCHECKED. */ 5033 put_ST_UNCHECKED(r_dst, get_ST(0)); 5034 break; 5035 5036 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 5037 r_dst = (UInt)modrm - 0xD8; 5038 DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst); 5039 /* P4 manual says: "If the destination operand is a 5040 non-empty register, the invalid-operation exception 5041 is not generated. Hence put_ST_UNCHECKED. */ 5042 put_ST_UNCHECKED(r_dst, get_ST(0)); 5043 fp_pop(); 5044 break; 5045 5046 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 5047 r_dst = (UInt)modrm - 0xE0; 5048 DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst); 5049 /* This forces C1 to zero, which isn't right. */ 5050 put_C3210( 5051 binop( Iop_And32, 5052 binop(Iop_Shl32, 5053 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5054 mkU8(8)), 5055 mkU32(0x4500) 5056 )); 5057 break; 5058 5059 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 5060 r_dst = (UInt)modrm - 0xE8; 5061 DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst); 5062 /* This forces C1 to zero, which isn't right. */ 5063 put_C3210( 5064 binop( Iop_And32, 5065 binop(Iop_Shl32, 5066 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5067 mkU8(8)), 5068 mkU32(0x4500) 5069 )); 5070 fp_pop(); 5071 break; 5072 5073 default: 5074 goto decode_fail; 5075 } 5076 } 5077 } 5078 5079 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 5080 else 5081 if (first_opcode == 0xDE) { 5082 5083 if (modrm < 0xC0) { 5084 5085 /* bits 5,4,3 are an opcode extension, and the modRM also 5086 specifies an address. */ 5087 IROp fop; 5088 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5089 delta += len; 5090 5091 switch (gregOfRM(modrm)) { 5092 5093 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 5094 DIP("fiaddw %s\n", dis_buf); 5095 fop = Iop_AddF64; 5096 goto do_fop_m16; 5097 5098 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 5099 DIP("fimulw %s\n", dis_buf); 5100 fop = Iop_MulF64; 5101 goto do_fop_m16; 5102 5103 case 2: /* FICOM m16int */ 5104 DIP("ficomw %s\n", dis_buf); 5105 /* This forces C1 to zero, which isn't right. */ 5106 put_C3210( 5107 binop( Iop_And32, 5108 binop(Iop_Shl32, 5109 binop(Iop_CmpF64, 5110 get_ST(0), 5111 unop(Iop_I32StoF64, 5112 unop(Iop_16Sto32, 5113 loadLE(Ity_I16,mkexpr(addr))))), 5114 mkU8(8)), 5115 mkU32(0x4500) 5116 )); 5117 break; 5118 5119 case 3: /* FICOMP m16int */ 5120 DIP("ficompw %s\n", dis_buf); 5121 /* This forces C1 to zero, which isn't right. */ 5122 put_C3210( 5123 binop( Iop_And32, 5124 binop(Iop_Shl32, 5125 binop(Iop_CmpF64, 5126 get_ST(0), 5127 unop(Iop_I32StoF64, 5128 unop(Iop_16Sto32, 5129 loadLE(Ity_I16,mkexpr(addr))))), 5130 mkU8(8)), 5131 mkU32(0x4500) 5132 )); 5133 fp_pop(); 5134 break; 5135 5136 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 5137 DIP("fisubw %s\n", dis_buf); 5138 fop = Iop_SubF64; 5139 goto do_fop_m16; 5140 5141 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 5142 DIP("fisubrw %s\n", dis_buf); 5143 fop = Iop_SubF64; 5144 goto do_foprev_m16; 5145 5146 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 5147 DIP("fisubw %s\n", dis_buf); 5148 fop = Iop_DivF64; 5149 goto do_fop_m16; 5150 5151 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 5152 DIP("fidivrw %s\n", dis_buf); 5153 fop = Iop_DivF64; 5154 goto do_foprev_m16; 5155 5156 do_fop_m16: 5157 put_ST_UNCHECKED(0, 5158 triop(fop, 5159 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5160 get_ST(0), 5161 unop(Iop_I32StoF64, 5162 unop(Iop_16Sto32, 5163 loadLE(Ity_I16, mkexpr(addr)))))); 5164 break; 5165 5166 do_foprev_m16: 5167 put_ST_UNCHECKED(0, 5168 triop(fop, 5169 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5170 unop(Iop_I32StoF64, 5171 unop(Iop_16Sto32, 5172 loadLE(Ity_I16, mkexpr(addr)))), 5173 get_ST(0))); 5174 break; 5175 5176 default: 5177 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5178 vex_printf("first_opcode == 0xDE\n"); 5179 goto decode_fail; 5180 } 5181 5182 } else { 5183 5184 delta++; 5185 switch (modrm) { 5186 5187 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 5188 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 5189 break; 5190 5191 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 5192 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 5193 break; 5194 5195 case 0xD9: /* FCOMPP %st(0),%st(1) */ 5196 DIP("fuompp %%st(0),%%st(1)\n"); 5197 /* This forces C1 to zero, which isn't right. */ 5198 put_C3210( 5199 binop( Iop_And32, 5200 binop(Iop_Shl32, 5201 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 5202 mkU8(8)), 5203 mkU32(0x4500) 5204 )); 5205 fp_pop(); 5206 fp_pop(); 5207 break; 5208 5209 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 5210 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 5211 break; 5212 5213 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 5214 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 5215 break; 5216 5217 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 5218 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 5219 break; 5220 5221 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 5222 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 5223 break; 5224 5225 default: 5226 goto decode_fail; 5227 } 5228 5229 } 5230 } 5231 5232 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 5233 else 5234 if (first_opcode == 0xDF) { 5235 5236 if (modrm < 0xC0) { 5237 5238 /* bits 5,4,3 are an opcode extension, and the modRM also 5239 specifies an address. */ 5240 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5241 delta += len; 5242 5243 switch (gregOfRM(modrm)) { 5244 5245 case 0: /* FILD m16int */ 5246 DIP("fildw %s\n", dis_buf); 5247 fp_push(); 5248 put_ST(0, unop(Iop_I32StoF64, 5249 unop(Iop_16Sto32, 5250 loadLE(Ity_I16, mkexpr(addr))))); 5251 break; 5252 5253 case 1: /* FISTTPS m16 (SSE3) */ 5254 DIP("fisttps %s\n", dis_buf); 5255 storeLE( mkexpr(addr), 5256 binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) ); 5257 fp_pop(); 5258 break; 5259 5260 case 2: /* FIST m16 */ 5261 DIP("fistp %s\n", dis_buf); 5262 storeLE( mkexpr(addr), 5263 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5264 break; 5265 5266 case 3: /* FISTP m16 */ 5267 DIP("fistps %s\n", dis_buf); 5268 storeLE( mkexpr(addr), 5269 binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) ); 5270 fp_pop(); 5271 break; 5272 5273 case 5: /* FILD m64 */ 5274 DIP("fildll %s\n", dis_buf); 5275 fp_push(); 5276 put_ST(0, binop(Iop_I64StoF64, 5277 get_roundingmode(), 5278 loadLE(Ity_I64, mkexpr(addr)))); 5279 break; 5280 5281 case 7: /* FISTP m64 */ 5282 DIP("fistpll %s\n", dis_buf); 5283 storeLE( mkexpr(addr), 5284 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 5285 fp_pop(); 5286 break; 5287 5288 default: 5289 vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm)); 5290 vex_printf("first_opcode == 0xDF\n"); 5291 goto decode_fail; 5292 } 5293 5294 } else { 5295 5296 delta++; 5297 switch (modrm) { 5298 5299 case 0xC0: /* FFREEP %st(0) */ 5300 DIP("ffreep %%st(%d)\n", 0); 5301 put_ST_TAG ( 0, mkU8(0) ); 5302 fp_pop(); 5303 break; 5304 5305 case 0xE0: /* FNSTSW %ax */ 5306 DIP("fnstsw %%ax\n"); 5307 /* Get the FPU status word value and dump it in %AX. */ 5308 if (0) { 5309 /* The obvious thing to do is simply dump the 16-bit 5310 status word value in %AX. However, due to a 5311 limitation in Memcheck's origin tracking 5312 machinery, this causes Memcheck not to track the 5313 origin of any undefinedness into %AH (only into 5314 %AL/%AX/%EAX), which means origins are lost in 5315 the sequence "fnstsw %ax; test $M,%ah; jcond .." */ 5316 putIReg(2, R_EAX, get_FPU_sw()); 5317 } else { 5318 /* So a somewhat lame kludge is to make it very 5319 clear to Memcheck that the value is written to 5320 both %AH and %AL. This generates marginally 5321 worse code, but I don't think it matters much. */ 5322 IRTemp t16 = newTemp(Ity_I16); 5323 assign(t16, get_FPU_sw()); 5324 putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) ); 5325 putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) ); 5326 } 5327 break; 5328 5329 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 5330 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 5331 break; 5332 5333 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 5334 /* not really right since COMIP != UCOMIP */ 5335 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 5336 break; 5337 5338 default: 5339 goto decode_fail; 5340 } 5341 } 5342 5343 } 5344 5345 else 5346 vpanic("dis_FPU(x86): invalid primary opcode"); 5347 5348 *decode_ok = True; 5349 return delta; 5350 5351 decode_fail: 5352 *decode_ok = False; 5353 return delta; 5354 } 5355 5356 5357 /*------------------------------------------------------------*/ 5358 /*--- ---*/ 5359 /*--- MMX INSTRUCTIONS ---*/ 5360 /*--- ---*/ 5361 /*------------------------------------------------------------*/ 5362 5363 /* Effect of MMX insns on x87 FPU state (table 11-2 of 5364 IA32 arch manual, volume 3): 5365 5366 Read from, or write to MMX register (viz, any insn except EMMS): 5367 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 5368 * FP stack pointer set to zero 5369 5370 EMMS: 5371 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 5372 * FP stack pointer set to zero 5373 */ 5374 5375 static void do_MMX_preamble ( void ) 5376 { 5377 Int i; 5378 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5379 IRExpr* zero = mkU32(0); 5380 IRExpr* tag1 = mkU8(1); 5381 put_ftop(zero); 5382 for (i = 0; i < 8; i++) 5383 stmt( IRStmt_PutI( descr, zero, i, tag1 ) ); 5384 } 5385 5386 static void do_EMMS_preamble ( void ) 5387 { 5388 Int i; 5389 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5390 IRExpr* zero = mkU32(0); 5391 IRExpr* tag0 = mkU8(0); 5392 put_ftop(zero); 5393 for (i = 0; i < 8; i++) 5394 stmt( IRStmt_PutI( descr, zero, i, tag0 ) ); 5395 } 5396 5397 5398 static IRExpr* getMMXReg ( UInt archreg ) 5399 { 5400 vassert(archreg < 8); 5401 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 5402 } 5403 5404 5405 static void putMMXReg ( UInt archreg, IRExpr* e ) 5406 { 5407 vassert(archreg < 8); 5408 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 5409 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 5410 } 5411 5412 5413 /* Helper for non-shift MMX insns. Note this is incomplete in the 5414 sense that it does not first call do_MMX_preamble() -- that is the 5415 responsibility of its caller. */ 5416 5417 static 5418 UInt dis_MMXop_regmem_to_reg ( UChar sorb, 5419 Int delta, 5420 UChar opc, 5421 HChar* name, 5422 Bool show_granularity ) 5423 { 5424 HChar dis_buf[50]; 5425 UChar modrm = getIByte(delta); 5426 Bool isReg = epartIsReg(modrm); 5427 IRExpr* argL = NULL; 5428 IRExpr* argR = NULL; 5429 IRExpr* argG = NULL; 5430 IRExpr* argE = NULL; 5431 IRTemp res = newTemp(Ity_I64); 5432 5433 Bool invG = False; 5434 IROp op = Iop_INVALID; 5435 void* hAddr = NULL; 5436 HChar* hName = NULL; 5437 Bool eLeft = False; 5438 5439 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 5440 5441 switch (opc) { 5442 /* Original MMX ones */ 5443 case 0xFC: op = Iop_Add8x8; break; 5444 case 0xFD: op = Iop_Add16x4; break; 5445 case 0xFE: op = Iop_Add32x2; break; 5446 5447 case 0xEC: op = Iop_QAdd8Sx8; break; 5448 case 0xED: op = Iop_QAdd16Sx4; break; 5449 5450 case 0xDC: op = Iop_QAdd8Ux8; break; 5451 case 0xDD: op = Iop_QAdd16Ux4; break; 5452 5453 case 0xF8: op = Iop_Sub8x8; break; 5454 case 0xF9: op = Iop_Sub16x4; break; 5455 case 0xFA: op = Iop_Sub32x2; break; 5456 5457 case 0xE8: op = Iop_QSub8Sx8; break; 5458 case 0xE9: op = Iop_QSub16Sx4; break; 5459 5460 case 0xD8: op = Iop_QSub8Ux8; break; 5461 case 0xD9: op = Iop_QSub16Ux4; break; 5462 5463 case 0xE5: op = Iop_MulHi16Sx4; break; 5464 case 0xD5: op = Iop_Mul16x4; break; 5465 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break; 5466 5467 case 0x74: op = Iop_CmpEQ8x8; break; 5468 case 0x75: op = Iop_CmpEQ16x4; break; 5469 case 0x76: op = Iop_CmpEQ32x2; break; 5470 5471 case 0x64: op = Iop_CmpGT8Sx8; break; 5472 case 0x65: op = Iop_CmpGT16Sx4; break; 5473 case 0x66: op = Iop_CmpGT32Sx2; break; 5474 5475 case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; 5476 case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; 5477 case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; 5478 5479 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 5480 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 5481 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 5482 5483 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 5484 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 5485 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 5486 5487 case 0xDB: op = Iop_And64; break; 5488 case 0xDF: op = Iop_And64; invG = True; break; 5489 case 0xEB: op = Iop_Or64; break; 5490 case 0xEF: /* Possibly do better here if argL and argR are the 5491 same reg */ 5492 op = Iop_Xor64; break; 5493 5494 /* Introduced in SSE1 */ 5495 case 0xE0: op = Iop_Avg8Ux8; break; 5496 case 0xE3: op = Iop_Avg16Ux4; break; 5497 case 0xEE: op = Iop_Max16Sx4; break; 5498 case 0xDE: op = Iop_Max8Ux8; break; 5499 case 0xEA: op = Iop_Min16Sx4; break; 5500 case 0xDA: op = Iop_Min8Ux8; break; 5501 case 0xE4: op = Iop_MulHi16Ux4; break; 5502 case 0xF6: XXX(x86g_calculate_mmx_psadbw); break; 5503 5504 /* Introduced in SSE2 */ 5505 case 0xD4: op = Iop_Add64; break; 5506 case 0xFB: op = Iop_Sub64; break; 5507 5508 default: 5509 vex_printf("\n0x%x\n", (Int)opc); 5510 vpanic("dis_MMXop_regmem_to_reg"); 5511 } 5512 5513 # undef XXX 5514 5515 argG = getMMXReg(gregOfRM(modrm)); 5516 if (invG) 5517 argG = unop(Iop_Not64, argG); 5518 5519 if (isReg) { 5520 delta++; 5521 argE = getMMXReg(eregOfRM(modrm)); 5522 } else { 5523 Int len; 5524 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5525 delta += len; 5526 argE = loadLE(Ity_I64, mkexpr(addr)); 5527 } 5528 5529 if (eLeft) { 5530 argL = argE; 5531 argR = argG; 5532 } else { 5533 argL = argG; 5534 argR = argE; 5535 } 5536 5537 if (op != Iop_INVALID) { 5538 vassert(hName == NULL); 5539 vassert(hAddr == NULL); 5540 assign(res, binop(op, argL, argR)); 5541 } else { 5542 vassert(hName != NULL); 5543 vassert(hAddr != NULL); 5544 assign( res, 5545 mkIRExprCCall( 5546 Ity_I64, 5547 0/*regparms*/, hName, hAddr, 5548 mkIRExprVec_2( argL, argR ) 5549 ) 5550 ); 5551 } 5552 5553 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 5554 5555 DIP("%s%s %s, %s\n", 5556 name, show_granularity ? nameMMXGran(opc & 3) : "", 5557 ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ), 5558 nameMMXReg(gregOfRM(modrm)) ); 5559 5560 return delta; 5561 } 5562 5563 5564 /* Vector by scalar shift of G by the amount specified at the bottom 5565 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 5566 5567 static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta, 5568 HChar* opname, IROp op ) 5569 { 5570 HChar dis_buf[50]; 5571 Int alen, size; 5572 IRTemp addr; 5573 Bool shl, shr, sar; 5574 UChar rm = getIByte(delta); 5575 IRTemp g0 = newTemp(Ity_I64); 5576 IRTemp g1 = newTemp(Ity_I64); 5577 IRTemp amt = newTemp(Ity_I32); 5578 IRTemp amt8 = newTemp(Ity_I8); 5579 5580 if (epartIsReg(rm)) { 5581 assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) ); 5582 DIP("%s %s,%s\n", opname, 5583 nameMMXReg(eregOfRM(rm)), 5584 nameMMXReg(gregOfRM(rm)) ); 5585 delta++; 5586 } else { 5587 addr = disAMode ( &alen, sorb, delta, dis_buf ); 5588 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 5589 DIP("%s %s,%s\n", opname, 5590 dis_buf, 5591 nameMMXReg(gregOfRM(rm)) ); 5592 delta += alen; 5593 } 5594 assign( g0, getMMXReg(gregOfRM(rm)) ); 5595 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 5596 5597 shl = shr = sar = False; 5598 size = 0; 5599 switch (op) { 5600 case Iop_ShlN16x4: shl = True; size = 32; break; 5601 case Iop_ShlN32x2: shl = True; size = 32; break; 5602 case Iop_Shl64: shl = True; size = 64; break; 5603 case Iop_ShrN16x4: shr = True; size = 16; break; 5604 case Iop_ShrN32x2: shr = True; size = 32; break; 5605 case Iop_Shr64: shr = True; size = 64; break; 5606 case Iop_SarN16x4: sar = True; size = 16; break; 5607 case Iop_SarN32x2: sar = True; size = 32; break; 5608 default: vassert(0); 5609 } 5610 5611 if (shl || shr) { 5612 assign( 5613 g1, 5614 IRExpr_Mux0X( 5615 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5616 mkU64(0), 5617 binop(op, mkexpr(g0), mkexpr(amt8)) 5618 ) 5619 ); 5620 } else 5621 if (sar) { 5622 assign( 5623 g1, 5624 IRExpr_Mux0X( 5625 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 5626 binop(op, mkexpr(g0), mkU8(size-1)), 5627 binop(op, mkexpr(g0), mkexpr(amt8)) 5628 ) 5629 ); 5630 } else { 5631 /*NOTREACHED*/ 5632 vassert(0); 5633 } 5634 5635 putMMXReg( gregOfRM(rm), mkexpr(g1) ); 5636 return delta; 5637 } 5638 5639 5640 /* Vector by scalar shift of E by an immediate byte. This is a 5641 straight copy of dis_SSE_shiftE_imm. */ 5642 5643 static 5644 UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op ) 5645 { 5646 Bool shl, shr, sar; 5647 UChar rm = getIByte(delta); 5648 IRTemp e0 = newTemp(Ity_I64); 5649 IRTemp e1 = newTemp(Ity_I64); 5650 UChar amt, size; 5651 vassert(epartIsReg(rm)); 5652 vassert(gregOfRM(rm) == 2 5653 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 5654 amt = getIByte(delta+1); 5655 delta += 2; 5656 DIP("%s $%d,%s\n", opname, 5657 (Int)amt, 5658 nameMMXReg(eregOfRM(rm)) ); 5659 5660 assign( e0, getMMXReg(eregOfRM(rm)) ); 5661 5662 shl = shr = sar = False; 5663 size = 0; 5664 switch (op) { 5665 case Iop_ShlN16x4: shl = True; size = 16; break; 5666 case Iop_ShlN32x2: shl = True; size = 32; break; 5667 case Iop_Shl64: shl = True; size = 64; break; 5668 case Iop_SarN16x4: sar = True; size = 16; break; 5669 case Iop_SarN32x2: sar = True; size = 32; break; 5670 case Iop_ShrN16x4: shr = True; size = 16; break; 5671 case Iop_ShrN32x2: shr = True; size = 32; break; 5672 case Iop_Shr64: shr = True; size = 64; break; 5673 default: vassert(0); 5674 } 5675 5676 if (shl || shr) { 5677 assign( e1, amt >= size 5678 ? mkU64(0) 5679 : binop(op, mkexpr(e0), mkU8(amt)) 5680 ); 5681 } else 5682 if (sar) { 5683 assign( e1, amt >= size 5684 ? binop(op, mkexpr(e0), mkU8(size-1)) 5685 : binop(op, mkexpr(e0), mkU8(amt)) 5686 ); 5687 } else { 5688 /*NOTREACHED*/ 5689 vassert(0); 5690 } 5691 5692 putMMXReg( eregOfRM(rm), mkexpr(e1) ); 5693 return delta; 5694 } 5695 5696 5697 /* Completely handle all MMX instructions except emms. */ 5698 5699 static 5700 UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) 5701 { 5702 Int len; 5703 UChar modrm; 5704 HChar dis_buf[50]; 5705 UChar opc = getIByte(delta); 5706 delta++; 5707 5708 /* dis_MMX handles all insns except emms. */ 5709 do_MMX_preamble(); 5710 5711 switch (opc) { 5712 5713 case 0x6E: 5714 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/ 5715 if (sz != 4) 5716 goto mmx_decode_failure; 5717 modrm = getIByte(delta); 5718 if (epartIsReg(modrm)) { 5719 delta++; 5720 putMMXReg( 5721 gregOfRM(modrm), 5722 binop( Iop_32HLto64, 5723 mkU32(0), 5724 getIReg(4, eregOfRM(modrm)) ) ); 5725 DIP("movd %s, %s\n", 5726 nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5727 } else { 5728 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5729 delta += len; 5730 putMMXReg( 5731 gregOfRM(modrm), 5732 binop( Iop_32HLto64, 5733 mkU32(0), 5734 loadLE(Ity_I32, mkexpr(addr)) ) ); 5735 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm))); 5736 } 5737 break; 5738 5739 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */ 5740 if (sz != 4) 5741 goto mmx_decode_failure; 5742 modrm = getIByte(delta); 5743 if (epartIsReg(modrm)) { 5744 delta++; 5745 putIReg( 4, eregOfRM(modrm), 5746 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5747 DIP("movd %s, %s\n", 5748 nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 5749 } else { 5750 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5751 delta += len; 5752 storeLE( mkexpr(addr), 5753 unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) ); 5754 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf); 5755 } 5756 break; 5757 5758 case 0x6F: 5759 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 5760 if (sz != 4) 5761 goto mmx_decode_failure; 5762 modrm = getIByte(delta); 5763 if (epartIsReg(modrm)) { 5764 delta++; 5765 putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) ); 5766 DIP("movq %s, %s\n", 5767 nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm))); 5768 } else { 5769 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5770 delta += len; 5771 putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 5772 DIP("movq %s, %s\n", 5773 dis_buf, nameMMXReg(gregOfRM(modrm))); 5774 } 5775 break; 5776 5777 case 0x7F: 5778 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 5779 if (sz != 4) 5780 goto mmx_decode_failure; 5781 modrm = getIByte(delta); 5782 if (epartIsReg(modrm)) { 5783 delta++; 5784 putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) ); 5785 DIP("movq %s, %s\n", 5786 nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm))); 5787 } else { 5788 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 5789 delta += len; 5790 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 5791 DIP("mov(nt)q %s, %s\n", 5792 nameMMXReg(gregOfRM(modrm)), dis_buf); 5793 } 5794 break; 5795 5796 case 0xFC: 5797 case 0xFD: 5798 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 5799 if (sz != 4) 5800 goto mmx_decode_failure; 5801 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True ); 5802 break; 5803 5804 case 0xEC: 5805 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5806 if (sz != 4) 5807 goto mmx_decode_failure; 5808 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True ); 5809 break; 5810 5811 case 0xDC: 5812 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5813 if (sz != 4) 5814 goto mmx_decode_failure; 5815 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True ); 5816 break; 5817 5818 case 0xF8: 5819 case 0xF9: 5820 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 5821 if (sz != 4) 5822 goto mmx_decode_failure; 5823 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True ); 5824 break; 5825 5826 case 0xE8: 5827 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5828 if (sz != 4) 5829 goto mmx_decode_failure; 5830 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True ); 5831 break; 5832 5833 case 0xD8: 5834 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 5835 if (sz != 4) 5836 goto mmx_decode_failure; 5837 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True ); 5838 break; 5839 5840 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 5841 if (sz != 4) 5842 goto mmx_decode_failure; 5843 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False ); 5844 break; 5845 5846 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 5847 if (sz != 4) 5848 goto mmx_decode_failure; 5849 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False ); 5850 break; 5851 5852 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 5853 vassert(sz == 4); 5854 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False ); 5855 break; 5856 5857 case 0x74: 5858 case 0x75: 5859 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 5860 if (sz != 4) 5861 goto mmx_decode_failure; 5862 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True ); 5863 break; 5864 5865 case 0x64: 5866 case 0x65: 5867 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 5868 if (sz != 4) 5869 goto mmx_decode_failure; 5870 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True ); 5871 break; 5872 5873 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 5874 if (sz != 4) 5875 goto mmx_decode_failure; 5876 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False ); 5877 break; 5878 5879 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5880 if (sz != 4) 5881 goto mmx_decode_failure; 5882 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False ); 5883 break; 5884 5885 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 5886 if (sz != 4) 5887 goto mmx_decode_failure; 5888 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False ); 5889 break; 5890 5891 case 0x68: 5892 case 0x69: 5893 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 5894 if (sz != 4) 5895 goto mmx_decode_failure; 5896 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True ); 5897 break; 5898 5899 case 0x60: 5900 case 0x61: 5901 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5902 if (sz != 4) 5903 goto mmx_decode_failure; 5904 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True ); 5905 break; 5906 5907 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 5908 if (sz != 4) 5909 goto mmx_decode_failure; 5910 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False ); 5911 break; 5912 5913 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 5914 if (sz != 4) 5915 goto mmx_decode_failure; 5916 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False ); 5917 break; 5918 5919 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 5920 if (sz != 4) 5921 goto mmx_decode_failure; 5922 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False ); 5923 break; 5924 5925 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 5926 if (sz != 4) 5927 goto mmx_decode_failure; 5928 delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False ); 5929 break; 5930 5931 # define SHIFT_BY_REG(_name,_op) \ 5932 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \ 5933 break; 5934 5935 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5936 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 5937 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 5938 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 5939 5940 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 5941 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 5942 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 5943 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 5944 5945 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 5946 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 5947 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 5948 5949 # undef SHIFT_BY_REG 5950 5951 case 0x71: 5952 case 0x72: 5953 case 0x73: { 5954 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 5955 UChar byte2, subopc; 5956 if (sz != 4) 5957 goto mmx_decode_failure; 5958 byte2 = getIByte(delta); /* amode / sub-opcode */ 5959 subopc = toUChar( (byte2 >> 3) & 7 ); 5960 5961 # define SHIFT_BY_IMM(_name,_op) \ 5962 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 5963 } while (0) 5964 5965 if (subopc == 2 /*SRL*/ && opc == 0x71) 5966 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 5967 else if (subopc == 2 /*SRL*/ && opc == 0x72) 5968 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 5969 else if (subopc == 2 /*SRL*/ && opc == 0x73) 5970 SHIFT_BY_IMM("psrlq", Iop_Shr64); 5971 5972 else if (subopc == 4 /*SAR*/ && opc == 0x71) 5973 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 5974 else if (subopc == 4 /*SAR*/ && opc == 0x72) 5975 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 5976 5977 else if (subopc == 6 /*SHL*/ && opc == 0x71) 5978 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 5979 else if (subopc == 6 /*SHL*/ && opc == 0x72) 5980 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 5981 else if (subopc == 6 /*SHL*/ && opc == 0x73) 5982 SHIFT_BY_IMM("psllq", Iop_Shl64); 5983 5984 else goto mmx_decode_failure; 5985 5986 # undef SHIFT_BY_IMM 5987 break; 5988 } 5989 5990 case 0xF7: { 5991 IRTemp addr = newTemp(Ity_I32); 5992 IRTemp regD = newTemp(Ity_I64); 5993 IRTemp regM = newTemp(Ity_I64); 5994 IRTemp mask = newTemp(Ity_I64); 5995 IRTemp olddata = newTemp(Ity_I64); 5996 IRTemp newdata = newTemp(Ity_I64); 5997 5998 modrm = getIByte(delta); 5999 if (sz != 4 || (!epartIsReg(modrm))) 6000 goto mmx_decode_failure; 6001 delta++; 6002 6003 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 6004 assign( regM, getMMXReg( eregOfRM(modrm) )); 6005 assign( regD, getMMXReg( gregOfRM(modrm) )); 6006 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 6007 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 6008 assign( newdata, 6009 binop(Iop_Or64, 6010 binop(Iop_And64, 6011 mkexpr(regD), 6012 mkexpr(mask) ), 6013 binop(Iop_And64, 6014 mkexpr(olddata), 6015 unop(Iop_Not64, mkexpr(mask)))) ); 6016 storeLE( mkexpr(addr), mkexpr(newdata) ); 6017 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), 6018 nameMMXReg( gregOfRM(modrm) ) ); 6019 break; 6020 } 6021 6022 /* --- MMX decode failure --- */ 6023 default: 6024 mmx_decode_failure: 6025 *decode_ok = False; 6026 return delta; /* ignored */ 6027 6028 } 6029 6030 *decode_ok = True; 6031 return delta; 6032 } 6033 6034 6035 /*------------------------------------------------------------*/ 6036 /*--- More misc arithmetic and other obscure insns. ---*/ 6037 /*------------------------------------------------------------*/ 6038 6039 /* Double length left and right shifts. Apparently only required in 6040 v-size (no b- variant). */ 6041 static 6042 UInt dis_SHLRD_Gv_Ev ( UChar sorb, 6043 Int delta, UChar modrm, 6044 Int sz, 6045 IRExpr* shift_amt, 6046 Bool amt_is_literal, 6047 HChar* shift_amt_txt, 6048 Bool left_shift ) 6049 { 6050 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 6051 for printing it. And eip on entry points at the modrm byte. */ 6052 Int len; 6053 HChar dis_buf[50]; 6054 6055 IRType ty = szToITy(sz); 6056 IRTemp gsrc = newTemp(ty); 6057 IRTemp esrc = newTemp(ty); 6058 IRTemp addr = IRTemp_INVALID; 6059 IRTemp tmpSH = newTemp(Ity_I8); 6060 IRTemp tmpL = IRTemp_INVALID; 6061 IRTemp tmpRes = IRTemp_INVALID; 6062 IRTemp tmpSubSh = IRTemp_INVALID; 6063 IROp mkpair; 6064 IROp getres; 6065 IROp shift; 6066 IRExpr* mask = NULL; 6067 6068 vassert(sz == 2 || sz == 4); 6069 6070 /* The E-part is the destination; this is shifted. The G-part 6071 supplies bits to be shifted into the E-part, but is not 6072 changed. 6073 6074 If shifting left, form a double-length word with E at the top 6075 and G at the bottom, and shift this left. The result is then in 6076 the high part. 6077 6078 If shifting right, form a double-length word with G at the top 6079 and E at the bottom, and shift this right. The result is then 6080 at the bottom. */ 6081 6082 /* Fetch the operands. */ 6083 6084 assign( gsrc, getIReg(sz, gregOfRM(modrm)) ); 6085 6086 if (epartIsReg(modrm)) { 6087 delta++; 6088 assign( esrc, getIReg(sz, eregOfRM(modrm)) ); 6089 DIP("sh%cd%c %s, %s, %s\n", 6090 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6091 shift_amt_txt, 6092 nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm))); 6093 } else { 6094 addr = disAMode ( &len, sorb, delta, dis_buf ); 6095 delta += len; 6096 assign( esrc, loadLE(ty, mkexpr(addr)) ); 6097 DIP("sh%cd%c %s, %s, %s\n", 6098 ( left_shift ? 'l' : 'r' ), nameISize(sz), 6099 shift_amt_txt, 6100 nameIReg(sz, gregOfRM(modrm)), dis_buf); 6101 } 6102 6103 /* Round up the relevant primops. */ 6104 6105 if (sz == 4) { 6106 tmpL = newTemp(Ity_I64); 6107 tmpRes = newTemp(Ity_I32); 6108 tmpSubSh = newTemp(Ity_I32); 6109 mkpair = Iop_32HLto64; 6110 getres = left_shift ? Iop_64HIto32 : Iop_64to32; 6111 shift = left_shift ? Iop_Shl64 : Iop_Shr64; 6112 mask = mkU8(31); 6113 } else { 6114 /* sz == 2 */ 6115 tmpL = newTemp(Ity_I32); 6116 tmpRes = newTemp(Ity_I16); 6117 tmpSubSh = newTemp(Ity_I16); 6118 mkpair = Iop_16HLto32; 6119 getres = left_shift ? Iop_32HIto16 : Iop_32to16; 6120 shift = left_shift ? Iop_Shl32 : Iop_Shr32; 6121 mask = mkU8(15); 6122 } 6123 6124 /* Do the shift, calculate the subshift value, and set 6125 the flag thunk. */ 6126 6127 assign( tmpSH, binop(Iop_And8, shift_amt, mask) ); 6128 6129 if (left_shift) 6130 assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) ); 6131 else 6132 assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) ); 6133 6134 assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) ); 6135 assign( tmpSubSh, 6136 unop(getres, 6137 binop(shift, 6138 mkexpr(tmpL), 6139 binop(Iop_And8, 6140 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 6141 mask))) ); 6142 6143 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32, 6144 tmpRes, tmpSubSh, ty, tmpSH ); 6145 6146 /* Put result back. */ 6147 6148 if (epartIsReg(modrm)) { 6149 putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes)); 6150 } else { 6151 storeLE( mkexpr(addr), mkexpr(tmpRes) ); 6152 } 6153 6154 if (amt_is_literal) delta++; 6155 return delta; 6156 } 6157 6158 6159 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 6160 required. */ 6161 6162 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 6163 6164 static HChar* nameBtOp ( BtOp op ) 6165 { 6166 switch (op) { 6167 case BtOpNone: return ""; 6168 case BtOpSet: return "s"; 6169 case BtOpReset: return "r"; 6170 case BtOpComp: return "c"; 6171 default: vpanic("nameBtOp(x86)"); 6172 } 6173 } 6174 6175 6176 static 6177 UInt dis_bt_G_E ( VexAbiInfo* vbi, 6178 UChar sorb, Bool locked, Int sz, Int delta, BtOp op ) 6179 { 6180 HChar dis_buf[50]; 6181 UChar modrm; 6182 Int len; 6183 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 6184 t_addr1, t_esp, t_mask, t_new; 6185 6186 vassert(sz == 2 || sz == 4); 6187 6188 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 6189 = t_addr0 = t_addr1 = t_esp 6190 = t_mask = t_new = IRTemp_INVALID; 6191 6192 t_fetched = newTemp(Ity_I8); 6193 t_new = newTemp(Ity_I8); 6194 t_bitno0 = newTemp(Ity_I32); 6195 t_bitno1 = newTemp(Ity_I32); 6196 t_bitno2 = newTemp(Ity_I8); 6197 t_addr1 = newTemp(Ity_I32); 6198 modrm = getIByte(delta); 6199 6200 assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) ); 6201 6202 if (epartIsReg(modrm)) { 6203 delta++; 6204 /* Get it onto the client's stack. */ 6205 t_esp = newTemp(Ity_I32); 6206 t_addr0 = newTemp(Ity_I32); 6207 6208 /* For the choice of the value 128, see comment in dis_bt_G_E in 6209 guest_amd64_toIR.c. We point out here only that 128 is 6210 fast-cased in Memcheck and is > 0, so seems like a good 6211 choice. */ 6212 vassert(vbi->guest_stack_redzone_size == 0); 6213 assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) ); 6214 putIReg(4, R_ESP, mkexpr(t_esp)); 6215 6216 storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) ); 6217 6218 /* Make t_addr0 point at it. */ 6219 assign( t_addr0, mkexpr(t_esp) ); 6220 6221 /* Mask out upper bits of the shift amount, since we're doing a 6222 reg. */ 6223 assign( t_bitno1, binop(Iop_And32, 6224 mkexpr(t_bitno0), 6225 mkU32(sz == 4 ? 31 : 15)) ); 6226 6227 } else { 6228 t_addr0 = disAMode ( &len, sorb, delta, dis_buf ); 6229 delta += len; 6230 assign( t_bitno1, mkexpr(t_bitno0) ); 6231 } 6232 6233 /* At this point: t_addr0 is the address being operated on. If it 6234 was a reg, we will have pushed it onto the client's stack. 6235 t_bitno1 is the bit number, suitably masked in the case of a 6236 reg. */ 6237 6238 /* Now the main sequence. */ 6239 assign( t_addr1, 6240 binop(Iop_Add32, 6241 mkexpr(t_addr0), 6242 binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) ); 6243 6244 /* t_addr1 now holds effective address */ 6245 6246 assign( t_bitno2, 6247 unop(Iop_32to8, 6248 binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) ); 6249 6250 /* t_bitno2 contains offset of bit within byte */ 6251 6252 if (op != BtOpNone) { 6253 t_mask = newTemp(Ity_I8); 6254 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 6255 } 6256 6257 /* t_mask is now a suitable byte mask */ 6258 6259 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 6260 6261 if (op != BtOpNone) { 6262 switch (op) { 6263 case BtOpSet: 6264 assign( t_new, 6265 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6266 break; 6267 case BtOpComp: 6268 assign( t_new, 6269 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 6270 break; 6271 case BtOpReset: 6272 assign( t_new, 6273 binop(Iop_And8, mkexpr(t_fetched), 6274 unop(Iop_Not8, mkexpr(t_mask))) ); 6275 break; 6276 default: 6277 vpanic("dis_bt_G_E(x86)"); 6278 } 6279 if (locked && !epartIsReg(modrm)) { 6280 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 6281 mkexpr(t_new)/*new*/, 6282 guest_EIP_curr_instr ); 6283 } else { 6284 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 6285 } 6286 } 6287 6288 /* Side effect done; now get selected bit into Carry flag */ 6289 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 6290 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6291 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6292 stmt( IRStmt_Put( 6293 OFFB_CC_DEP1, 6294 binop(Iop_And32, 6295 binop(Iop_Shr32, 6296 unop(Iop_8Uto32, mkexpr(t_fetched)), 6297 mkexpr(t_bitno2)), 6298 mkU32(1))) 6299 ); 6300 /* Set NDEP even though it isn't used. This makes redundant-PUT 6301 elimination of previous stores to this field work better. */ 6302 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6303 6304 /* Move reg operand from stack back to reg */ 6305 if (epartIsReg(modrm)) { 6306 /* t_esp still points at it. */ 6307 putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) ); 6308 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) ); 6309 } 6310 6311 DIP("bt%s%c %s, %s\n", 6312 nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 6313 ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) ); 6314 6315 return delta; 6316 } 6317 6318 6319 6320 /* Handle BSF/BSR. Only v-size seems necessary. */ 6321 static 6322 UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds ) 6323 { 6324 Bool isReg; 6325 UChar modrm; 6326 HChar dis_buf[50]; 6327 6328 IRType ty = szToITy(sz); 6329 IRTemp src = newTemp(ty); 6330 IRTemp dst = newTemp(ty); 6331 6332 IRTemp src32 = newTemp(Ity_I32); 6333 IRTemp dst32 = newTemp(Ity_I32); 6334 IRTemp src8 = newTemp(Ity_I8); 6335 6336 vassert(sz == 4 || sz == 2); 6337 6338 modrm = getIByte(delta); 6339 6340 isReg = epartIsReg(modrm); 6341 if (isReg) { 6342 delta++; 6343 assign( src, getIReg(sz, eregOfRM(modrm)) ); 6344 } else { 6345 Int len; 6346 IRTemp addr = disAMode( &len, sorb, delta, dis_buf ); 6347 delta += len; 6348 assign( src, loadLE(ty, mkexpr(addr)) ); 6349 } 6350 6351 DIP("bs%c%c %s, %s\n", 6352 fwds ? 'f' : 'r', nameISize(sz), 6353 ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ), 6354 nameIReg(sz, gregOfRM(modrm))); 6355 6356 /* Generate an 8-bit expression which is zero iff the 6357 original is zero, and nonzero otherwise */ 6358 assign( src8, 6359 unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8), 6360 mkexpr(src), mkU(ty,0))) ); 6361 6362 /* Flags: Z is 1 iff source value is zero. All others 6363 are undefined -- we force them to zero. */ 6364 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6365 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6366 stmt( IRStmt_Put( 6367 OFFB_CC_DEP1, 6368 IRExpr_Mux0X( mkexpr(src8), 6369 /* src==0 */ 6370 mkU32(X86G_CC_MASK_Z), 6371 /* src!=0 */ 6372 mkU32(0) 6373 ) 6374 )); 6375 /* Set NDEP even though it isn't used. This makes redundant-PUT 6376 elimination of previous stores to this field work better. */ 6377 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6378 6379 /* Result: iff source value is zero, we can't use 6380 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case. 6381 But anyway, Intel x86 semantics say the result is undefined in 6382 such situations. Hence handle the zero case specially. */ 6383 6384 /* Bleh. What we compute: 6385 6386 bsf32: if src == 0 then 0 else Ctz32(src) 6387 bsr32: if src == 0 then 0 else 31 - Clz32(src) 6388 6389 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src)) 6390 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src)) 6391 6392 First, widen src to 32 bits if it is not already. 6393 6394 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the 6395 dst register unchanged when src == 0. Hence change accordingly. 6396 */ 6397 if (sz == 2) 6398 assign( src32, unop(Iop_16Uto32, mkexpr(src)) ); 6399 else 6400 assign( src32, mkexpr(src) ); 6401 6402 /* The main computation, guarding against zero. */ 6403 assign( dst32, 6404 IRExpr_Mux0X( 6405 mkexpr(src8), 6406 /* src == 0 -- leave dst unchanged */ 6407 widenUto32( getIReg( sz, gregOfRM(modrm) ) ), 6408 /* src != 0 */ 6409 fwds ? unop(Iop_Ctz32, mkexpr(src32)) 6410 : binop(Iop_Sub32, 6411 mkU32(31), 6412 unop(Iop_Clz32, mkexpr(src32))) 6413 ) 6414 ); 6415 6416 if (sz == 2) 6417 assign( dst, unop(Iop_32to16, mkexpr(dst32)) ); 6418 else 6419 assign( dst, mkexpr(dst32) ); 6420 6421 /* dump result back */ 6422 putIReg( sz, gregOfRM(modrm), mkexpr(dst) ); 6423 6424 return delta; 6425 } 6426 6427 6428 static 6429 void codegen_xchg_eAX_Reg ( Int sz, Int reg ) 6430 { 6431 IRType ty = szToITy(sz); 6432 IRTemp t1 = newTemp(ty); 6433 IRTemp t2 = newTemp(ty); 6434 vassert(sz == 2 || sz == 4); 6435 assign( t1, getIReg(sz, R_EAX) ); 6436 assign( t2, getIReg(sz, reg) ); 6437 putIReg( sz, R_EAX, mkexpr(t2) ); 6438 putIReg( sz, reg, mkexpr(t1) ); 6439 DIP("xchg%c %s, %s\n", 6440 nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg)); 6441 } 6442 6443 6444 static 6445 void codegen_SAHF ( void ) 6446 { 6447 /* Set the flags to: 6448 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag 6449 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6450 |X86G_CC_MASK_P|X86G_CC_MASK_C) 6451 */ 6452 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6453 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6454 IRTemp oldflags = newTemp(Ity_I32); 6455 assign( oldflags, mk_x86g_calculate_eflags_all() ); 6456 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 6457 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6458 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 6459 stmt( IRStmt_Put( OFFB_CC_DEP1, 6460 binop(Iop_Or32, 6461 binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)), 6462 binop(Iop_And32, 6463 binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)), 6464 mkU32(mask_SZACP)) 6465 ) 6466 )); 6467 /* Set NDEP even though it isn't used. This makes redundant-PUT 6468 elimination of previous stores to this field work better. */ 6469 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 6470 } 6471 6472 6473 static 6474 void codegen_LAHF ( void ) 6475 { 6476 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 6477 IRExpr* eax_with_hole; 6478 IRExpr* new_byte; 6479 IRExpr* new_eax; 6480 UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A 6481 |X86G_CC_MASK_C|X86G_CC_MASK_P; 6482 6483 IRTemp flags = newTemp(Ity_I32); 6484 assign( flags, mk_x86g_calculate_eflags_all() ); 6485 6486 eax_with_hole 6487 = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF)); 6488 new_byte 6489 = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)), 6490 mkU32(1<<1)); 6491 new_eax 6492 = binop(Iop_Or32, eax_with_hole, 6493 binop(Iop_Shl32, new_byte, mkU8(8))); 6494 putIReg(4, R_EAX, new_eax); 6495 } 6496 6497 6498 static 6499 UInt dis_cmpxchg_G_E ( UChar sorb, 6500 Bool locked, 6501 Int size, 6502 Int delta0 ) 6503 { 6504 HChar dis_buf[50]; 6505 Int len; 6506 6507 IRType ty = szToITy(size); 6508 IRTemp acc = newTemp(ty); 6509 IRTemp src = newTemp(ty); 6510 IRTemp dest = newTemp(ty); 6511 IRTemp dest2 = newTemp(ty); 6512 IRTemp acc2 = newTemp(ty); 6513 IRTemp cond8 = newTemp(Ity_I8); 6514 IRTemp addr = IRTemp_INVALID; 6515 UChar rm = getUChar(delta0); 6516 6517 /* There are 3 cases to consider: 6518 6519 reg-reg: ignore any lock prefix, generate sequence based 6520 on Mux0X 6521 6522 reg-mem, not locked: ignore any lock prefix, generate sequence 6523 based on Mux0X 6524 6525 reg-mem, locked: use IRCAS 6526 */ 6527 if (epartIsReg(rm)) { 6528 /* case 1 */ 6529 assign( dest, getIReg(size, eregOfRM(rm)) ); 6530 delta0++; 6531 assign( src, getIReg(size, gregOfRM(rm)) ); 6532 assign( acc, getIReg(size, R_EAX) ); 6533 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6534 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6535 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6536 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6537 putIReg(size, R_EAX, mkexpr(acc2)); 6538 putIReg(size, eregOfRM(rm), mkexpr(dest2)); 6539 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6540 nameIReg(size,gregOfRM(rm)), 6541 nameIReg(size,eregOfRM(rm)) ); 6542 } 6543 else if (!epartIsReg(rm) && !locked) { 6544 /* case 2 */ 6545 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6546 assign( dest, loadLE(ty, mkexpr(addr)) ); 6547 delta0 += len; 6548 assign( src, getIReg(size, gregOfRM(rm)) ); 6549 assign( acc, getIReg(size, R_EAX) ); 6550 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6551 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6552 assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) ); 6553 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6554 putIReg(size, R_EAX, mkexpr(acc2)); 6555 storeLE( mkexpr(addr), mkexpr(dest2) ); 6556 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6557 nameIReg(size,gregOfRM(rm)), dis_buf); 6558 } 6559 else if (!epartIsReg(rm) && locked) { 6560 /* case 3 */ 6561 /* src is new value. acc is expected value. dest is old value. 6562 Compute success from the output of the IRCAS, and steer the 6563 new value for EAX accordingly: in case of success, EAX is 6564 unchanged. */ 6565 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6566 delta0 += len; 6567 assign( src, getIReg(size, gregOfRM(rm)) ); 6568 assign( acc, getIReg(size, R_EAX) ); 6569 stmt( IRStmt_CAS( 6570 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 6571 NULL, mkexpr(acc), NULL, mkexpr(src) ) 6572 )); 6573 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 6574 assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) ); 6575 assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) ); 6576 putIReg(size, R_EAX, mkexpr(acc2)); 6577 DIP("cmpxchg%c %s,%s\n", nameISize(size), 6578 nameIReg(size,gregOfRM(rm)), dis_buf); 6579 } 6580 else vassert(0); 6581 6582 return delta0; 6583 } 6584 6585 6586 /* Handle conditional move instructions of the form 6587 cmovcc E(reg-or-mem), G(reg) 6588 6589 E(src) is reg-or-mem 6590 G(dst) is reg. 6591 6592 If E is reg, --> GET %E, tmps 6593 GET %G, tmpd 6594 CMOVcc tmps, tmpd 6595 PUT tmpd, %G 6596 6597 If E is mem --> (getAddr E) -> tmpa 6598 LD (tmpa), tmps 6599 GET %G, tmpd 6600 CMOVcc tmps, tmpd 6601 PUT tmpd, %G 6602 */ 6603 static 6604 UInt dis_cmov_E_G ( UChar sorb, 6605 Int sz, 6606 X86Condcode cond, 6607 Int delta0 ) 6608 { 6609 UChar rm = getIByte(delta0); 6610 HChar dis_buf[50]; 6611 Int len; 6612 6613 IRType ty = szToITy(sz); 6614 IRTemp tmps = newTemp(ty); 6615 IRTemp tmpd = newTemp(ty); 6616 6617 if (epartIsReg(rm)) { 6618 assign( tmps, getIReg(sz, eregOfRM(rm)) ); 6619 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6620 6621 putIReg(sz, gregOfRM(rm), 6622 IRExpr_Mux0X( unop(Iop_1Uto8, 6623 mk_x86g_calculate_condition(cond)), 6624 mkexpr(tmpd), 6625 mkexpr(tmps) ) 6626 ); 6627 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6628 name_X86Condcode(cond), 6629 nameIReg(sz,eregOfRM(rm)), 6630 nameIReg(sz,gregOfRM(rm))); 6631 return 1+delta0; 6632 } 6633 6634 /* E refers to memory */ 6635 { 6636 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6637 assign( tmps, loadLE(ty, mkexpr(addr)) ); 6638 assign( tmpd, getIReg(sz, gregOfRM(rm)) ); 6639 6640 putIReg(sz, gregOfRM(rm), 6641 IRExpr_Mux0X( unop(Iop_1Uto8, 6642 mk_x86g_calculate_condition(cond)), 6643 mkexpr(tmpd), 6644 mkexpr(tmps) ) 6645 ); 6646 6647 DIP("cmov%c%s %s,%s\n", nameISize(sz), 6648 name_X86Condcode(cond), 6649 dis_buf, 6650 nameIReg(sz,gregOfRM(rm))); 6651 return len+delta0; 6652 } 6653 } 6654 6655 6656 static 6657 UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0, 6658 Bool* decodeOK ) 6659 { 6660 Int len; 6661 UChar rm = getIByte(delta0); 6662 HChar dis_buf[50]; 6663 6664 IRType ty = szToITy(sz); 6665 IRTemp tmpd = newTemp(ty); 6666 IRTemp tmpt0 = newTemp(ty); 6667 IRTemp tmpt1 = newTemp(ty); 6668 6669 /* There are 3 cases to consider: 6670 6671 reg-reg: ignore any lock prefix, 6672 generate 'naive' (non-atomic) sequence 6673 6674 reg-mem, not locked: ignore any lock prefix, generate 'naive' 6675 (non-atomic) sequence 6676 6677 reg-mem, locked: use IRCAS 6678 */ 6679 6680 if (epartIsReg(rm)) { 6681 /* case 1 */ 6682 assign( tmpd, getIReg(sz, eregOfRM(rm))); 6683 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6684 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6685 mkexpr(tmpd), mkexpr(tmpt0)) ); 6686 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6687 putIReg(sz, eregOfRM(rm), mkexpr(tmpt1)); 6688 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6689 DIP("xadd%c %s, %s\n", 6690 nameISize(sz), nameIReg(sz,gregOfRM(rm)), 6691 nameIReg(sz,eregOfRM(rm))); 6692 *decodeOK = True; 6693 return 1+delta0; 6694 } 6695 else if (!epartIsReg(rm) && !locked) { 6696 /* case 2 */ 6697 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6698 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6699 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6700 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6701 mkexpr(tmpd), mkexpr(tmpt0)) ); 6702 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 6703 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6704 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6705 DIP("xadd%c %s, %s\n", 6706 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6707 *decodeOK = True; 6708 return len+delta0; 6709 } 6710 else if (!epartIsReg(rm) && locked) { 6711 /* case 3 */ 6712 IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf ); 6713 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 6714 assign( tmpt0, getIReg(sz, gregOfRM(rm)) ); 6715 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 6716 mkexpr(tmpd), mkexpr(tmpt0)) ); 6717 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 6718 mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr ); 6719 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 6720 putIReg(sz, gregOfRM(rm), mkexpr(tmpd)); 6721 DIP("xadd%c %s, %s\n", 6722 nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf); 6723 *decodeOK = True; 6724 return len+delta0; 6725 } 6726 /*UNREACHED*/ 6727 vassert(0); 6728 } 6729 6730 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 6731 6732 static 6733 UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 ) 6734 { 6735 Int len; 6736 IRTemp addr; 6737 UChar rm = getIByte(delta0); 6738 HChar dis_buf[50]; 6739 6740 if (epartIsReg(rm)) { 6741 putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 6742 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 6743 return 1+delta0; 6744 } else { 6745 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6746 putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 6747 DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 6748 return len+delta0; 6749 } 6750 } 6751 6752 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 6753 dst is ireg and sz==4, zero out top half of it. */ 6754 6755 static 6756 UInt dis_mov_Sw_Ew ( UChar sorb, 6757 Int sz, 6758 Int delta0 ) 6759 { 6760 Int len; 6761 IRTemp addr; 6762 UChar rm = getIByte(delta0); 6763 HChar dis_buf[50]; 6764 6765 vassert(sz == 2 || sz == 4); 6766 6767 if (epartIsReg(rm)) { 6768 if (sz == 4) 6769 putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 6770 else 6771 putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 6772 6773 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 6774 return 1+delta0; 6775 } else { 6776 addr = disAMode ( &len, sorb, delta0, dis_buf ); 6777 storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 6778 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 6779 return len+delta0; 6780 } 6781 } 6782 6783 6784 static 6785 void dis_push_segreg ( UInt sreg, Int sz ) 6786 { 6787 IRTemp t1 = newTemp(Ity_I16); 6788 IRTemp ta = newTemp(Ity_I32); 6789 vassert(sz == 2 || sz == 4); 6790 6791 assign( t1, getSReg(sreg) ); 6792 assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 6793 putIReg(4, R_ESP, mkexpr(ta)); 6794 storeLE( mkexpr(ta), mkexpr(t1) ); 6795 6796 DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6797 } 6798 6799 static 6800 void dis_pop_segreg ( UInt sreg, Int sz ) 6801 { 6802 IRTemp t1 = newTemp(Ity_I16); 6803 IRTemp ta = newTemp(Ity_I32); 6804 vassert(sz == 2 || sz == 4); 6805 6806 assign( ta, getIReg(4, R_ESP) ); 6807 assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 6808 6809 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 6810 putSReg( sreg, mkexpr(t1) ); 6811 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg)); 6812 } 6813 6814 static 6815 void dis_ret ( UInt d32 ) 6816 { 6817 IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32); 6818 assign(t1, getIReg(4,R_ESP)); 6819 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 6820 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32))); 6821 jmp_treg(Ijk_Ret,t2); 6822 } 6823 6824 /*------------------------------------------------------------*/ 6825 /*--- SSE/SSE2/SSE3 helpers ---*/ 6826 /*------------------------------------------------------------*/ 6827 6828 /* Worker function; do not call directly. 6829 Handles full width G = G `op` E and G = (not G) `op` E. 6830 */ 6831 6832 static UInt dis_SSE_E_to_G_all_wrk ( 6833 UChar sorb, Int delta, 6834 HChar* opname, IROp op, 6835 Bool invertG 6836 ) 6837 { 6838 HChar dis_buf[50]; 6839 Int alen; 6840 IRTemp addr; 6841 UChar rm = getIByte(delta); 6842 IRExpr* gpart 6843 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm))) 6844 : getXMMReg(gregOfRM(rm)); 6845 if (epartIsReg(rm)) { 6846 putXMMReg( gregOfRM(rm), 6847 binop(op, gpart, 6848 getXMMReg(eregOfRM(rm))) ); 6849 DIP("%s %s,%s\n", opname, 6850 nameXMMReg(eregOfRM(rm)), 6851 nameXMMReg(gregOfRM(rm)) ); 6852 return delta+1; 6853 } else { 6854 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6855 putXMMReg( gregOfRM(rm), 6856 binop(op, gpart, 6857 loadLE(Ity_V128, mkexpr(addr))) ); 6858 DIP("%s %s,%s\n", opname, 6859 dis_buf, 6860 nameXMMReg(gregOfRM(rm)) ); 6861 return delta+alen; 6862 } 6863 } 6864 6865 6866 /* All lanes SSE binary operation, G = G `op` E. */ 6867 6868 static 6869 UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op ) 6870 { 6871 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False ); 6872 } 6873 6874 /* All lanes SSE binary operation, G = (not G) `op` E. */ 6875 6876 static 6877 UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta, 6878 HChar* opname, IROp op ) 6879 { 6880 return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True ); 6881 } 6882 6883 6884 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 6885 6886 static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta, 6887 HChar* opname, IROp op ) 6888 { 6889 HChar dis_buf[50]; 6890 Int alen; 6891 IRTemp addr; 6892 UChar rm = getIByte(delta); 6893 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6894 if (epartIsReg(rm)) { 6895 putXMMReg( gregOfRM(rm), 6896 binop(op, gpart, 6897 getXMMReg(eregOfRM(rm))) ); 6898 DIP("%s %s,%s\n", opname, 6899 nameXMMReg(eregOfRM(rm)), 6900 nameXMMReg(gregOfRM(rm)) ); 6901 return delta+1; 6902 } else { 6903 /* We can only do a 32-bit memory read, so the upper 3/4 of the 6904 E operand needs to be made simply of zeroes. */ 6905 IRTemp epart = newTemp(Ity_V128); 6906 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6907 assign( epart, unop( Iop_32UtoV128, 6908 loadLE(Ity_I32, mkexpr(addr))) ); 6909 putXMMReg( gregOfRM(rm), 6910 binop(op, gpart, mkexpr(epart)) ); 6911 DIP("%s %s,%s\n", opname, 6912 dis_buf, 6913 nameXMMReg(gregOfRM(rm)) ); 6914 return delta+alen; 6915 } 6916 } 6917 6918 6919 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 6920 6921 static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta, 6922 HChar* opname, IROp op ) 6923 { 6924 HChar dis_buf[50]; 6925 Int alen; 6926 IRTemp addr; 6927 UChar rm = getIByte(delta); 6928 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 6929 if (epartIsReg(rm)) { 6930 putXMMReg( gregOfRM(rm), 6931 binop(op, gpart, 6932 getXMMReg(eregOfRM(rm))) ); 6933 DIP("%s %s,%s\n", opname, 6934 nameXMMReg(eregOfRM(rm)), 6935 nameXMMReg(gregOfRM(rm)) ); 6936 return delta+1; 6937 } else { 6938 /* We can only do a 64-bit memory read, so the upper half of the 6939 E operand needs to be made simply of zeroes. */ 6940 IRTemp epart = newTemp(Ity_V128); 6941 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6942 assign( epart, unop( Iop_64UtoV128, 6943 loadLE(Ity_I64, mkexpr(addr))) ); 6944 putXMMReg( gregOfRM(rm), 6945 binop(op, gpart, mkexpr(epart)) ); 6946 DIP("%s %s,%s\n", opname, 6947 dis_buf, 6948 nameXMMReg(gregOfRM(rm)) ); 6949 return delta+alen; 6950 } 6951 } 6952 6953 6954 /* All lanes unary SSE operation, G = op(E). */ 6955 6956 static UInt dis_SSE_E_to_G_unary_all ( 6957 UChar sorb, Int delta, 6958 HChar* opname, IROp op 6959 ) 6960 { 6961 HChar dis_buf[50]; 6962 Int alen; 6963 IRTemp addr; 6964 UChar rm = getIByte(delta); 6965 if (epartIsReg(rm)) { 6966 putXMMReg( gregOfRM(rm), 6967 unop(op, getXMMReg(eregOfRM(rm))) ); 6968 DIP("%s %s,%s\n", opname, 6969 nameXMMReg(eregOfRM(rm)), 6970 nameXMMReg(gregOfRM(rm)) ); 6971 return delta+1; 6972 } else { 6973 addr = disAMode ( &alen, sorb, delta, dis_buf ); 6974 putXMMReg( gregOfRM(rm), 6975 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 6976 DIP("%s %s,%s\n", opname, 6977 dis_buf, 6978 nameXMMReg(gregOfRM(rm)) ); 6979 return delta+alen; 6980 } 6981 } 6982 6983 6984 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 6985 6986 static UInt dis_SSE_E_to_G_unary_lo32 ( 6987 UChar sorb, Int delta, 6988 HChar* opname, IROp op 6989 ) 6990 { 6991 /* First we need to get the old G value and patch the low 32 bits 6992 of the E operand into it. Then apply op and write back to G. */ 6993 HChar dis_buf[50]; 6994 Int alen; 6995 IRTemp addr; 6996 UChar rm = getIByte(delta); 6997 IRTemp oldG0 = newTemp(Ity_V128); 6998 IRTemp oldG1 = newTemp(Ity_V128); 6999 7000 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7001 7002 if (epartIsReg(rm)) { 7003 assign( oldG1, 7004 binop( Iop_SetV128lo32, 7005 mkexpr(oldG0), 7006 getXMMRegLane32(eregOfRM(rm), 0)) ); 7007 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7008 DIP("%s %s,%s\n", opname, 7009 nameXMMReg(eregOfRM(rm)), 7010 nameXMMReg(gregOfRM(rm)) ); 7011 return delta+1; 7012 } else { 7013 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7014 assign( oldG1, 7015 binop( Iop_SetV128lo32, 7016 mkexpr(oldG0), 7017 loadLE(Ity_I32, mkexpr(addr)) )); 7018 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7019 DIP("%s %s,%s\n", opname, 7020 dis_buf, 7021 nameXMMReg(gregOfRM(rm)) ); 7022 return delta+alen; 7023 } 7024 } 7025 7026 7027 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 7028 7029 static UInt dis_SSE_E_to_G_unary_lo64 ( 7030 UChar sorb, Int delta, 7031 HChar* opname, IROp op 7032 ) 7033 { 7034 /* First we need to get the old G value and patch the low 64 bits 7035 of the E operand into it. Then apply op and write back to G. */ 7036 HChar dis_buf[50]; 7037 Int alen; 7038 IRTemp addr; 7039 UChar rm = getIByte(delta); 7040 IRTemp oldG0 = newTemp(Ity_V128); 7041 IRTemp oldG1 = newTemp(Ity_V128); 7042 7043 assign( oldG0, getXMMReg(gregOfRM(rm)) ); 7044 7045 if (epartIsReg(rm)) { 7046 assign( oldG1, 7047 binop( Iop_SetV128lo64, 7048 mkexpr(oldG0), 7049 getXMMRegLane64(eregOfRM(rm), 0)) ); 7050 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7051 DIP("%s %s,%s\n", opname, 7052 nameXMMReg(eregOfRM(rm)), 7053 nameXMMReg(gregOfRM(rm)) ); 7054 return delta+1; 7055 } else { 7056 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7057 assign( oldG1, 7058 binop( Iop_SetV128lo64, 7059 mkexpr(oldG0), 7060 loadLE(Ity_I64, mkexpr(addr)) )); 7061 putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) ); 7062 DIP("%s %s,%s\n", opname, 7063 dis_buf, 7064 nameXMMReg(gregOfRM(rm)) ); 7065 return delta+alen; 7066 } 7067 } 7068 7069 7070 /* SSE integer binary operation: 7071 G = G `op` E (eLeft == False) 7072 G = E `op` G (eLeft == True) 7073 */ 7074 static UInt dis_SSEint_E_to_G( 7075 UChar sorb, Int delta, 7076 HChar* opname, IROp op, 7077 Bool eLeft 7078 ) 7079 { 7080 HChar dis_buf[50]; 7081 Int alen; 7082 IRTemp addr; 7083 UChar rm = getIByte(delta); 7084 IRExpr* gpart = getXMMReg(gregOfRM(rm)); 7085 IRExpr* epart = NULL; 7086 if (epartIsReg(rm)) { 7087 epart = getXMMReg(eregOfRM(rm)); 7088 DIP("%s %s,%s\n", opname, 7089 nameXMMReg(eregOfRM(rm)), 7090 nameXMMReg(gregOfRM(rm)) ); 7091 delta += 1; 7092 } else { 7093 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7094 epart = loadLE(Ity_V128, mkexpr(addr)); 7095 DIP("%s %s,%s\n", opname, 7096 dis_buf, 7097 nameXMMReg(gregOfRM(rm)) ); 7098 delta += alen; 7099 } 7100 putXMMReg( gregOfRM(rm), 7101 eLeft ? binop(op, epart, gpart) 7102 : binop(op, gpart, epart) ); 7103 return delta; 7104 } 7105 7106 7107 /* Helper for doing SSE FP comparisons. */ 7108 7109 static void findSSECmpOp ( Bool* needNot, IROp* op, 7110 Int imm8, Bool all_lanes, Int sz ) 7111 { 7112 imm8 &= 7; 7113 *needNot = False; 7114 *op = Iop_INVALID; 7115 if (imm8 >= 4) { 7116 *needNot = True; 7117 imm8 -= 4; 7118 } 7119 7120 if (sz == 4 && all_lanes) { 7121 switch (imm8) { 7122 case 0: *op = Iop_CmpEQ32Fx4; return; 7123 case 1: *op = Iop_CmpLT32Fx4; return; 7124 case 2: *op = Iop_CmpLE32Fx4; return; 7125 case 3: *op = Iop_CmpUN32Fx4; return; 7126 default: break; 7127 } 7128 } 7129 if (sz == 4 && !all_lanes) { 7130 switch (imm8) { 7131 case 0: *op = Iop_CmpEQ32F0x4; return; 7132 case 1: *op = Iop_CmpLT32F0x4; return; 7133 case 2: *op = Iop_CmpLE32F0x4; return; 7134 case 3: *op = Iop_CmpUN32F0x4; return; 7135 default: break; 7136 } 7137 } 7138 if (sz == 8 && all_lanes) { 7139 switch (imm8) { 7140 case 0: *op = Iop_CmpEQ64Fx2; return; 7141 case 1: *op = Iop_CmpLT64Fx2; return; 7142 case 2: *op = Iop_CmpLE64Fx2; return; 7143 case 3: *op = Iop_CmpUN64Fx2; return; 7144 default: break; 7145 } 7146 } 7147 if (sz == 8 && !all_lanes) { 7148 switch (imm8) { 7149 case 0: *op = Iop_CmpEQ64F0x2; return; 7150 case 1: *op = Iop_CmpLT64F0x2; return; 7151 case 2: *op = Iop_CmpLE64F0x2; return; 7152 case 3: *op = Iop_CmpUN64F0x2; return; 7153 default: break; 7154 } 7155 } 7156 vpanic("findSSECmpOp(x86,guest)"); 7157 } 7158 7159 /* Handles SSE 32F/64F comparisons. */ 7160 7161 static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta, 7162 HChar* opname, Bool all_lanes, Int sz ) 7163 { 7164 HChar dis_buf[50]; 7165 Int alen, imm8; 7166 IRTemp addr; 7167 Bool needNot = False; 7168 IROp op = Iop_INVALID; 7169 IRTemp plain = newTemp(Ity_V128); 7170 UChar rm = getIByte(delta); 7171 UShort mask = 0; 7172 vassert(sz == 4 || sz == 8); 7173 if (epartIsReg(rm)) { 7174 imm8 = getIByte(delta+1); 7175 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7176 assign( plain, binop(op, getXMMReg(gregOfRM(rm)), 7177 getXMMReg(eregOfRM(rm))) ); 7178 delta += 2; 7179 DIP("%s $%d,%s,%s\n", opname, 7180 (Int)imm8, 7181 nameXMMReg(eregOfRM(rm)), 7182 nameXMMReg(gregOfRM(rm)) ); 7183 } else { 7184 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7185 imm8 = getIByte(delta+alen); 7186 findSSECmpOp(&needNot, &op, imm8, all_lanes, sz); 7187 assign( plain, 7188 binop( 7189 op, 7190 getXMMReg(gregOfRM(rm)), 7191 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 7192 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 7193 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 7194 ) 7195 ); 7196 delta += alen+1; 7197 DIP("%s $%d,%s,%s\n", opname, 7198 (Int)imm8, 7199 dis_buf, 7200 nameXMMReg(gregOfRM(rm)) ); 7201 } 7202 7203 if (needNot && all_lanes) { 7204 putXMMReg( gregOfRM(rm), 7205 unop(Iop_NotV128, mkexpr(plain)) ); 7206 } 7207 else 7208 if (needNot && !all_lanes) { 7209 mask = toUShort( sz==4 ? 0x000F : 0x00FF ); 7210 putXMMReg( gregOfRM(rm), 7211 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 7212 } 7213 else { 7214 putXMMReg( gregOfRM(rm), mkexpr(plain) ); 7215 } 7216 7217 return delta; 7218 } 7219 7220 7221 /* Vector by scalar shift of G by the amount specified at the bottom 7222 of E. */ 7223 7224 static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta, 7225 HChar* opname, IROp op ) 7226 { 7227 HChar dis_buf[50]; 7228 Int alen, size; 7229 IRTemp addr; 7230 Bool shl, shr, sar; 7231 UChar rm = getIByte(delta); 7232 IRTemp g0 = newTemp(Ity_V128); 7233 IRTemp g1 = newTemp(Ity_V128); 7234 IRTemp amt = newTemp(Ity_I32); 7235 IRTemp amt8 = newTemp(Ity_I8); 7236 if (epartIsReg(rm)) { 7237 assign( amt, getXMMRegLane32(eregOfRM(rm), 0) ); 7238 DIP("%s %s,%s\n", opname, 7239 nameXMMReg(eregOfRM(rm)), 7240 nameXMMReg(gregOfRM(rm)) ); 7241 delta++; 7242 } else { 7243 addr = disAMode ( &alen, sorb, delta, dis_buf ); 7244 assign( amt, loadLE(Ity_I32, mkexpr(addr)) ); 7245 DIP("%s %s,%s\n", opname, 7246 dis_buf, 7247 nameXMMReg(gregOfRM(rm)) ); 7248 delta += alen; 7249 } 7250 assign( g0, getXMMReg(gregOfRM(rm)) ); 7251 assign( amt8, unop(Iop_32to8, mkexpr(amt)) ); 7252 7253 shl = shr = sar = False; 7254 size = 0; 7255 switch (op) { 7256 case Iop_ShlN16x8: shl = True; size = 32; break; 7257 case Iop_ShlN32x4: shl = True; size = 32; break; 7258 case Iop_ShlN64x2: shl = True; size = 64; break; 7259 case Iop_SarN16x8: sar = True; size = 16; break; 7260 case Iop_SarN32x4: sar = True; size = 32; break; 7261 case Iop_ShrN16x8: shr = True; size = 16; break; 7262 case Iop_ShrN32x4: shr = True; size = 32; break; 7263 case Iop_ShrN64x2: shr = True; size = 64; break; 7264 default: vassert(0); 7265 } 7266 7267 if (shl || shr) { 7268 assign( 7269 g1, 7270 IRExpr_Mux0X( 7271 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7272 mkV128(0x0000), 7273 binop(op, mkexpr(g0), mkexpr(amt8)) 7274 ) 7275 ); 7276 } else 7277 if (sar) { 7278 assign( 7279 g1, 7280 IRExpr_Mux0X( 7281 unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))), 7282 binop(op, mkexpr(g0), mkU8(size-1)), 7283 binop(op, mkexpr(g0), mkexpr(amt8)) 7284 ) 7285 ); 7286 } else { 7287 /*NOTREACHED*/ 7288 vassert(0); 7289 } 7290 7291 putXMMReg( gregOfRM(rm), mkexpr(g1) ); 7292 return delta; 7293 } 7294 7295 7296 /* Vector by scalar shift of E by an immediate byte. */ 7297 7298 static 7299 UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op ) 7300 { 7301 Bool shl, shr, sar; 7302 UChar rm = getIByte(delta); 7303 IRTemp e0 = newTemp(Ity_V128); 7304 IRTemp e1 = newTemp(Ity_V128); 7305 UChar amt, size; 7306 vassert(epartIsReg(rm)); 7307 vassert(gregOfRM(rm) == 2 7308 || gregOfRM(rm) == 4 || gregOfRM(rm) == 6); 7309 amt = getIByte(delta+1); 7310 delta += 2; 7311 DIP("%s $%d,%s\n", opname, 7312 (Int)amt, 7313 nameXMMReg(eregOfRM(rm)) ); 7314 assign( e0, getXMMReg(eregOfRM(rm)) ); 7315 7316 shl = shr = sar = False; 7317 size = 0; 7318 switch (op) { 7319 case Iop_ShlN16x8: shl = True; size = 16; break; 7320 case Iop_ShlN32x4: shl = True; size = 32; break; 7321 case Iop_ShlN64x2: shl = True; size = 64; break; 7322 case Iop_SarN16x8: sar = True; size = 16; break; 7323 case Iop_SarN32x4: sar = True; size = 32; break; 7324 case Iop_ShrN16x8: shr = True; size = 16; break; 7325 case Iop_ShrN32x4: shr = True; size = 32; break; 7326 case Iop_ShrN64x2: shr = True; size = 64; break; 7327 default: vassert(0); 7328 } 7329 7330 if (shl || shr) { 7331 assign( e1, amt >= size 7332 ? mkV128(0x0000) 7333 : binop(op, mkexpr(e0), mkU8(amt)) 7334 ); 7335 } else 7336 if (sar) { 7337 assign( e1, amt >= size 7338 ? binop(op, mkexpr(e0), mkU8(size-1)) 7339 : binop(op, mkexpr(e0), mkU8(amt)) 7340 ); 7341 } else { 7342 /*NOTREACHED*/ 7343 vassert(0); 7344 } 7345 7346 putXMMReg( eregOfRM(rm), mkexpr(e1) ); 7347 return delta; 7348 } 7349 7350 7351 /* Get the current SSE rounding mode. */ 7352 7353 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 7354 { 7355 return binop( Iop_And32, 7356 IRExpr_Get( OFFB_SSEROUND, Ity_I32 ), 7357 mkU32(3) ); 7358 } 7359 7360 static void put_sse_roundingmode ( IRExpr* sseround ) 7361 { 7362 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 7363 stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) ); 7364 } 7365 7366 /* Break a 128-bit value up into four 32-bit ints. */ 7367 7368 static void breakup128to32s ( IRTemp t128, 7369 /*OUTs*/ 7370 IRTemp* t3, IRTemp* t2, 7371 IRTemp* t1, IRTemp* t0 ) 7372 { 7373 IRTemp hi64 = newTemp(Ity_I64); 7374 IRTemp lo64 = newTemp(Ity_I64); 7375 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 7376 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 7377 7378 vassert(t0 && *t0 == IRTemp_INVALID); 7379 vassert(t1 && *t1 == IRTemp_INVALID); 7380 vassert(t2 && *t2 == IRTemp_INVALID); 7381 vassert(t3 && *t3 == IRTemp_INVALID); 7382 7383 *t0 = newTemp(Ity_I32); 7384 *t1 = newTemp(Ity_I32); 7385 *t2 = newTemp(Ity_I32); 7386 *t3 = newTemp(Ity_I32); 7387 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 7388 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 7389 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 7390 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 7391 } 7392 7393 /* Construct a 128-bit value from four 32-bit ints. */ 7394 7395 static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2, 7396 IRTemp t1, IRTemp t0 ) 7397 { 7398 return 7399 binop( Iop_64HLtoV128, 7400 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 7401 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 7402 ); 7403 } 7404 7405 /* Break a 64-bit value up into four 16-bit ints. */ 7406 7407 static void breakup64to16s ( IRTemp t64, 7408 /*OUTs*/ 7409 IRTemp* t3, IRTemp* t2, 7410 IRTemp* t1, IRTemp* t0 ) 7411 { 7412 IRTemp hi32 = newTemp(Ity_I32); 7413 IRTemp lo32 = newTemp(Ity_I32); 7414 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 7415 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 7416 7417 vassert(t0 && *t0 == IRTemp_INVALID); 7418 vassert(t1 && *t1 == IRTemp_INVALID); 7419 vassert(t2 && *t2 == IRTemp_INVALID); 7420 vassert(t3 && *t3 == IRTemp_INVALID); 7421 7422 *t0 = newTemp(Ity_I16); 7423 *t1 = newTemp(Ity_I16); 7424 *t2 = newTemp(Ity_I16); 7425 *t3 = newTemp(Ity_I16); 7426 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 7427 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 7428 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 7429 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 7430 } 7431 7432 /* Construct a 64-bit value from four 16-bit ints. */ 7433 7434 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 7435 IRTemp t1, IRTemp t0 ) 7436 { 7437 return 7438 binop( Iop_32HLto64, 7439 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 7440 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 7441 ); 7442 } 7443 7444 /* Generate IR to set the guest %EFLAGS from the pushfl-format image 7445 in the given 32-bit temporary. The flags that are set are: O S Z A 7446 C P D ID AC. 7447 7448 In all cases, code to set AC is generated. However, VEX actually 7449 ignores the AC value and so can optionally emit an emulation 7450 warning when it is enabled. In this routine, an emulation warning 7451 is only emitted if emit_AC_emwarn is True, in which case 7452 next_insn_EIP must be correct (this allows for correct code 7453 generation for popfl/popfw). If emit_AC_emwarn is False, 7454 next_insn_EIP is unimportant (this allows for easy if kludgey code 7455 generation for IRET.) */ 7456 7457 static 7458 void set_EFLAGS_from_value ( IRTemp t1, 7459 Bool emit_AC_emwarn, 7460 Addr32 next_insn_EIP ) 7461 { 7462 vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32); 7463 7464 /* t1 is the flag word. Mask out everything except OSZACP and set 7465 the flags thunk to X86G_CC_OP_COPY. */ 7466 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 7467 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 7468 stmt( IRStmt_Put( OFFB_CC_DEP1, 7469 binop(Iop_And32, 7470 mkexpr(t1), 7471 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 7472 | X86G_CC_MASK_A | X86G_CC_MASK_Z 7473 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 7474 ) 7475 ) 7476 ); 7477 /* Set NDEP even though it isn't used. This makes redundant-PUT 7478 elimination of previous stores to this field work better. */ 7479 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 7480 7481 /* Also need to set the D flag, which is held in bit 10 of t1. 7482 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 7483 stmt( IRStmt_Put( 7484 OFFB_DFLAG, 7485 IRExpr_Mux0X( 7486 unop(Iop_32to8, 7487 binop(Iop_And32, 7488 binop(Iop_Shr32, mkexpr(t1), mkU8(10)), 7489 mkU32(1))), 7490 mkU32(1), 7491 mkU32(0xFFFFFFFF))) 7492 ); 7493 7494 /* Set the ID flag */ 7495 stmt( IRStmt_Put( 7496 OFFB_IDFLAG, 7497 IRExpr_Mux0X( 7498 unop(Iop_32to8, 7499 binop(Iop_And32, 7500 binop(Iop_Shr32, mkexpr(t1), mkU8(21)), 7501 mkU32(1))), 7502 mkU32(0), 7503 mkU32(1))) 7504 ); 7505 7506 /* And set the AC flag. If setting it 1 to, possibly emit an 7507 emulation warning. */ 7508 stmt( IRStmt_Put( 7509 OFFB_ACFLAG, 7510 IRExpr_Mux0X( 7511 unop(Iop_32to8, 7512 binop(Iop_And32, 7513 binop(Iop_Shr32, mkexpr(t1), mkU8(18)), 7514 mkU32(1))), 7515 mkU32(0), 7516 mkU32(1))) 7517 ); 7518 7519 if (emit_AC_emwarn) { 7520 put_emwarn( mkU32(EmWarn_X86_acFlag) ); 7521 stmt( 7522 IRStmt_Exit( 7523 binop( Iop_CmpNE32, 7524 binop(Iop_And32, mkexpr(t1), mkU32(1<<18)), 7525 mkU32(0) ), 7526 Ijk_EmWarn, 7527 IRConst_U32( next_insn_EIP ) 7528 ) 7529 ); 7530 } 7531 } 7532 7533 7534 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 7535 values (aa,bb), computes, for each of the 4 16-bit lanes: 7536 7537 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 7538 */ 7539 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 7540 { 7541 IRTemp aa = newTemp(Ity_I64); 7542 IRTemp bb = newTemp(Ity_I64); 7543 IRTemp aahi32s = newTemp(Ity_I64); 7544 IRTemp aalo32s = newTemp(Ity_I64); 7545 IRTemp bbhi32s = newTemp(Ity_I64); 7546 IRTemp bblo32s = newTemp(Ity_I64); 7547 IRTemp rHi = newTemp(Ity_I64); 7548 IRTemp rLo = newTemp(Ity_I64); 7549 IRTemp one32x2 = newTemp(Ity_I64); 7550 assign(aa, aax); 7551 assign(bb, bbx); 7552 assign( aahi32s, 7553 binop(Iop_SarN32x2, 7554 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 7555 mkU8(16) )); 7556 assign( aalo32s, 7557 binop(Iop_SarN32x2, 7558 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 7559 mkU8(16) )); 7560 assign( bbhi32s, 7561 binop(Iop_SarN32x2, 7562 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 7563 mkU8(16) )); 7564 assign( bblo32s, 7565 binop(Iop_SarN32x2, 7566 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 7567 mkU8(16) )); 7568 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 7569 assign( 7570 rHi, 7571 binop( 7572 Iop_ShrN32x2, 7573 binop( 7574 Iop_Add32x2, 7575 binop( 7576 Iop_ShrN32x2, 7577 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 7578 mkU8(14) 7579 ), 7580 mkexpr(one32x2) 7581 ), 7582 mkU8(1) 7583 ) 7584 ); 7585 assign( 7586 rLo, 7587 binop( 7588 Iop_ShrN32x2, 7589 binop( 7590 Iop_Add32x2, 7591 binop( 7592 Iop_ShrN32x2, 7593 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 7594 mkU8(14) 7595 ), 7596 mkexpr(one32x2) 7597 ), 7598 mkU8(1) 7599 ) 7600 ); 7601 return 7602 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 7603 } 7604 7605 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 7606 values (aa,bb), computes, for each lane: 7607 7608 if aa_lane < 0 then - bb_lane 7609 else if aa_lane > 0 then bb_lane 7610 else 0 7611 */ 7612 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 7613 { 7614 IRTemp aa = newTemp(Ity_I64); 7615 IRTemp bb = newTemp(Ity_I64); 7616 IRTemp zero = newTemp(Ity_I64); 7617 IRTemp bbNeg = newTemp(Ity_I64); 7618 IRTemp negMask = newTemp(Ity_I64); 7619 IRTemp posMask = newTemp(Ity_I64); 7620 IROp opSub = Iop_INVALID; 7621 IROp opCmpGTS = Iop_INVALID; 7622 7623 switch (laneszB) { 7624 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 7625 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 7626 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 7627 default: vassert(0); 7628 } 7629 7630 assign( aa, aax ); 7631 assign( bb, bbx ); 7632 assign( zero, mkU64(0) ); 7633 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 7634 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 7635 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 7636 7637 return 7638 binop(Iop_Or64, 7639 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 7640 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 7641 7642 } 7643 7644 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 7645 value aa, computes, for each lane 7646 7647 if aa < 0 then -aa else aa 7648 7649 Note that the result is interpreted as unsigned, so that the 7650 absolute value of the most negative signed input can be 7651 represented. 7652 */ 7653 static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) 7654 { 7655 IRTemp aa = newTemp(Ity_I64); 7656 IRTemp zero = newTemp(Ity_I64); 7657 IRTemp aaNeg = newTemp(Ity_I64); 7658 IRTemp negMask = newTemp(Ity_I64); 7659 IRTemp posMask = newTemp(Ity_I64); 7660 IROp opSub = Iop_INVALID; 7661 IROp opSarN = Iop_INVALID; 7662 7663 switch (laneszB) { 7664 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 7665 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 7666 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 7667 default: vassert(0); 7668 } 7669 7670 assign( aa, aax ); 7671 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 7672 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 7673 assign( zero, mkU64(0) ); 7674 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 7675 return 7676 binop(Iop_Or64, 7677 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 7678 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); 7679 } 7680 7681 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 7682 IRTemp lo64, Int byteShift ) 7683 { 7684 vassert(byteShift >= 1 && byteShift <= 7); 7685 return 7686 binop(Iop_Or64, 7687 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 7688 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 7689 ); 7690 } 7691 7692 /* Generate a SIGSEGV followed by a restart of the current instruction 7693 if effective_addr is not 16-aligned. This is required behaviour 7694 for some SSE3 instructions and all 128-bit SSSE3 instructions. 7695 This assumes that guest_RIP_curr_instr is set correctly! */ 7696 /* TODO(glider): we've replaced the 0xF mask with 0x0, effectively disabling 7697 * the check. Need to enable it once TSan stops generating unaligned 7698 * accesses in the wrappers. 7699 * See http://code.google.com/p/data-race-test/issues/detail?id=49 */ 7700 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) 7701 { 7702 stmt( 7703 IRStmt_Exit( 7704 binop(Iop_CmpNE32, 7705 binop(Iop_And32,mkexpr(effective_addr),mkU32(0x0)), 7706 mkU32(0)), 7707 Ijk_SigSEGV, 7708 IRConst_U32(guest_EIP_curr_instr) 7709 ) 7710 ); 7711 } 7712 7713 7714 /* Helper for deciding whether a given insn (starting at the opcode 7715 byte) may validly be used with a LOCK prefix. The following insns 7716 may be used with LOCK when their destination operand is in memory. 7717 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 7718 7719 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 7720 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 7721 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 7722 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 7723 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 7724 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 7725 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 7726 7727 DEC FE /1, FF /1 7728 INC FE /0, FF /0 7729 7730 NEG F6 /3, F7 /3 7731 NOT F6 /2, F7 /2 7732 7733 XCHG 86, 87 7734 7735 BTC 0F BB, 0F BA /7 7736 BTR 0F B3, 0F BA /6 7737 BTS 0F AB, 0F BA /5 7738 7739 CMPXCHG 0F B0, 0F B1 7740 CMPXCHG8B 0F C7 /1 7741 7742 XADD 0F C0, 0F C1 7743 7744 ------------------------------ 7745 7746 80 /0 = addb $imm8, rm8 7747 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 7748 82 /0 = addb $imm8, rm8 7749 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 7750 7751 00 = addb r8, rm8 7752 01 = addl r32, rm32 and addw r16, rm16 7753 7754 Same for ADD OR ADC SBB AND SUB XOR 7755 7756 FE /1 = dec rm8 7757 FF /1 = dec rm32 and dec rm16 7758 7759 FE /0 = inc rm8 7760 FF /0 = inc rm32 and inc rm16 7761 7762 F6 /3 = neg rm8 7763 F7 /3 = neg rm32 and neg rm16 7764 7765 F6 /2 = not rm8 7766 F7 /2 = not rm32 and not rm16 7767 7768 0F BB = btcw r16, rm16 and btcl r32, rm32 7769 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 7770 7771 Same for BTS, BTR 7772 */ 7773 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 7774 { 7775 switch (opc[0]) { 7776 case 0x00: case 0x01: case 0x08: case 0x09: 7777 case 0x10: case 0x11: case 0x18: case 0x19: 7778 case 0x20: case 0x21: case 0x28: case 0x29: 7779 case 0x30: case 0x31: 7780 if (!epartIsReg(opc[1])) 7781 return True; 7782 break; 7783 7784 case 0x80: case 0x81: case 0x82: case 0x83: 7785 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6 7786 && !epartIsReg(opc[1])) 7787 return True; 7788 break; 7789 7790 case 0xFE: case 0xFF: 7791 if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1 7792 && !epartIsReg(opc[1])) 7793 return True; 7794 break; 7795 7796 case 0xF6: case 0xF7: 7797 if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3 7798 && !epartIsReg(opc[1])) 7799 return True; 7800 break; 7801 7802 case 0x86: case 0x87: 7803 if (!epartIsReg(opc[1])) 7804 return True; 7805 break; 7806 7807 case 0x0F: { 7808 switch (opc[1]) { 7809 case 0xBB: case 0xB3: case 0xAB: 7810 if (!epartIsReg(opc[2])) 7811 return True; 7812 break; 7813 case 0xBA: 7814 if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7 7815 && !epartIsReg(opc[2])) 7816 return True; 7817 break; 7818 case 0xB0: case 0xB1: 7819 if (!epartIsReg(opc[2])) 7820 return True; 7821 break; 7822 case 0xC7: 7823 if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 7824 return True; 7825 break; 7826 case 0xC0: case 0xC1: 7827 if (!epartIsReg(opc[2])) 7828 return True; 7829 break; 7830 default: 7831 break; 7832 } /* switch (opc[1]) */ 7833 break; 7834 } 7835 7836 default: 7837 break; 7838 } /* switch (opc[0]) */ 7839 7840 return False; 7841 } 7842 7843 7844 /*------------------------------------------------------------*/ 7845 /*--- Disassemble a single instruction ---*/ 7846 /*------------------------------------------------------------*/ 7847 7848 /* Disassemble a single instruction into IR. The instruction is 7849 located in host memory at &guest_code[delta]. *expect_CAS is set 7850 to True if the resulting IR is expected to contain an IRCAS 7851 statement, and False if it's not expected to. This makes it 7852 possible for the caller of disInstr_X86_WRK to check that 7853 LOCK-prefixed instructions are at least plausibly translated, in 7854 that it becomes possible to check that a (validly) LOCK-prefixed 7855 instruction generates a translation containing an IRCAS, and 7856 instructions without LOCK prefixes don't generate translations 7857 containing an IRCAS. 7858 */ 7859 static 7860 DisResult disInstr_X86_WRK ( 7861 /*OUT*/Bool* expect_CAS, 7862 Bool put_IP, 7863 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 7864 Bool resteerCisOk, 7865 void* callback_opaque, 7866 Long delta64, 7867 VexArchInfo* archinfo, 7868 VexAbiInfo* vbi 7869 ) 7870 { 7871 IRType ty; 7872 IRTemp addr, t0, t1, t2, t3, t4, t5, t6; 7873 Int alen; 7874 UChar opc, modrm, abyte, pre; 7875 UInt d32; 7876 HChar dis_buf[50]; 7877 Int am_sz, d_sz, n_prefixes; 7878 DisResult dres; 7879 UChar* insn; /* used in SSE decoders */ 7880 7881 /* The running delta */ 7882 Int delta = (Int)delta64; 7883 7884 /* Holds eip at the start of the insn, so that we can print 7885 consistent error messages for unimplemented insns. */ 7886 Int delta_start = delta; 7887 7888 /* sz denotes the nominal data-op size of the insn; we change it to 7889 2 if an 0x66 prefix is seen */ 7890 Int sz = 4; 7891 7892 /* sorb holds the segment-override-prefix byte, if any. Zero if no 7893 prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65} 7894 indicating the prefix. */ 7895 UChar sorb = 0; 7896 7897 /* Gets set to True if a LOCK prefix is seen. */ 7898 Bool pfx_lock = False; 7899 7900 /* Set result defaults. */ 7901 dres.whatNext = Dis_Continue; 7902 dres.len = 0; 7903 dres.continueAt = 0; 7904 7905 *expect_CAS = False; 7906 7907 addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 7908 7909 vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr); 7910 DIP("\t0x%x: ", guest_EIP_bbstart+delta); 7911 7912 /* We may be asked to update the guest EIP before going further. */ 7913 if (put_IP) 7914 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) ); 7915 7916 /* Spot "Special" instructions (see comment at top of file). */ 7917 { 7918 UChar* code = (UChar*)(guest_code + delta); 7919 /* Spot the 12-byte preamble: 7920 C1C703 roll $3, %edi 7921 C1C70D roll $13, %edi 7922 C1C71D roll $29, %edi 7923 C1C713 roll $19, %edi 7924 */ 7925 if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 && 7926 code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D && 7927 code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D && 7928 code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) { 7929 /* Got a "Special" instruction preamble. Which one is it? */ 7930 if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) { 7931 /* %EDX = client_request ( %EAX ) */ 7932 DIP("%%edx = client_request ( %%eax )\n"); 7933 delta += 14; 7934 jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta); 7935 dres.whatNext = Dis_StopHere; 7936 goto decode_success; 7937 } 7938 else 7939 if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) { 7940 /* %EAX = guest_NRADDR */ 7941 DIP("%%eax = guest_NRADDR\n"); 7942 delta += 14; 7943 putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 )); 7944 goto decode_success; 7945 } 7946 else 7947 if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) { 7948 /* call-noredir *%EAX */ 7949 DIP("call-noredir *%%eax\n"); 7950 delta += 14; 7951 t1 = newTemp(Ity_I32); 7952 assign(t1, getIReg(4,R_EAX)); 7953 t2 = newTemp(Ity_I32); 7954 assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 7955 putIReg(4, R_ESP, mkexpr(t2)); 7956 storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta)); 7957 jmp_treg(Ijk_NoRedir,t1); 7958 dres.whatNext = Dis_StopHere; 7959 goto decode_success; 7960 } 7961 /* We don't know what it is. */ 7962 goto decode_failure; 7963 /*NOTREACHED*/ 7964 } 7965 } 7966 7967 /* Handle a couple of weird-ass NOPs that have been observed in the 7968 wild. */ 7969 { 7970 UChar* code = (UChar*)(guest_code + delta); 7971 /* Sun's JVM 1.5.0 uses the following as a NOP: 7972 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */ 7973 if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64 7974 && code[3] == 0x65 && code[4] == 0x90) { 7975 DIP("%%es:%%cs:%%fs:%%gs:nop\n"); 7976 delta += 5; 7977 goto decode_success; 7978 } 7979 /* Don't barf on recent binutils padding, 7980 all variants of which are: nopw %cs:0x0(%eax,%eax,1) 7981 66 2e 0f 1f 84 00 00 00 00 00 7982 66 66 2e 0f 1f 84 00 00 00 00 00 7983 66 66 66 2e 0f 1f 84 00 00 00 00 00 7984 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7985 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7986 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 7987 */ 7988 if (code[0] == 0x66) { 7989 Int data16_cnt; 7990 for (data16_cnt = 1; data16_cnt < 6; data16_cnt++) 7991 if (code[data16_cnt] != 0x66) 7992 break; 7993 if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F 7994 && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84 7995 && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00 7996 && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00 7997 && code[data16_cnt + 8] == 0x00 ) { 7998 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n"); 7999 delta += 9 + data16_cnt; 8000 goto decode_success; 8001 } 8002 } 8003 } 8004 8005 /* Normal instruction handling starts here. */ 8006 8007 /* Deal with some but not all prefixes: 8008 66(oso) 8009 F0(lock) 8010 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:) 8011 Not dealt with (left in place): 8012 F2 F3 8013 */ 8014 n_prefixes = 0; 8015 while (True) { 8016 if (n_prefixes > 7) goto decode_failure; 8017 pre = getUChar(delta); 8018 switch (pre) { 8019 case 0x66: 8020 sz = 2; 8021 break; 8022 case 0xF0: 8023 pfx_lock = True; 8024 *expect_CAS = True; 8025 break; 8026 case 0x3E: /* %DS: */ 8027 case 0x26: /* %ES: */ 8028 case 0x64: /* %FS: */ 8029 case 0x65: /* %GS: */ 8030 if (sorb != 0) 8031 goto decode_failure; /* only one seg override allowed */ 8032 sorb = pre; 8033 break; 8034 case 0x2E: { /* %CS: */ 8035 /* 2E prefix on a conditional branch instruction is a 8036 branch-prediction hint, which can safely be ignored. */ 8037 UChar op1 = getIByte(delta+1); 8038 UChar op2 = getIByte(delta+2); 8039 if ((op1 >= 0x70 && op1 <= 0x7F) 8040 || (op1 == 0xE3) 8041 || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) { 8042 if (0) vex_printf("vex x86->IR: ignoring branch hint\n"); 8043 } else { 8044 /* All other CS override cases are not handled */ 8045 goto decode_failure; 8046 } 8047 break; 8048 } 8049 case 0x36: /* %SS: */ 8050 /* SS override cases are not handled */ 8051 goto decode_failure; 8052 default: 8053 goto not_a_prefix; 8054 } 8055 n_prefixes++; 8056 delta++; 8057 } 8058 8059 not_a_prefix: 8060 8061 /* Now we should be looking at the primary opcode byte or the 8062 leading F2 or F3. Check that any LOCK prefix is actually 8063 allowed. */ 8064 8065 if (pfx_lock) { 8066 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 8067 DIP("lock "); 8068 } else { 8069 *expect_CAS = False; 8070 goto decode_failure; 8071 } 8072 } 8073 8074 8075 /* ---------------------------------------------------- */ 8076 /* --- The SSE decoder. --- */ 8077 /* ---------------------------------------------------- */ 8078 8079 /* What did I do to deserve SSE ? Perhaps I was really bad in a 8080 previous life? */ 8081 8082 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a 8083 later section, further on. */ 8084 8085 insn = (UChar*)&guest_code[delta]; 8086 8087 /* Treat fxsave specially. It should be doable even on an SSE0 8088 (Pentium-II class) CPU. Hence be prepared to handle it on 8089 any subarchitecture variant. 8090 */ 8091 8092 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 8093 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8094 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { 8095 IRDirty* d; 8096 modrm = getIByte(delta+2); 8097 vassert(sz == 4); 8098 vassert(!epartIsReg(modrm)); 8099 8100 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8101 delta += 2+alen; 8102 8103 DIP("fxsave %s\n", dis_buf); 8104 8105 /* Uses dirty helper: 8106 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ 8107 d = unsafeIRDirty_0_N ( 8108 0/*regparms*/, 8109 "x86g_dirtyhelper_FXSAVE", 8110 &x86g_dirtyhelper_FXSAVE, 8111 mkIRExprVec_1( mkexpr(addr) ) 8112 ); 8113 d->needsBBP = True; 8114 8115 /* declare we're writing memory */ 8116 d->mFx = Ifx_Write; 8117 d->mAddr = mkexpr(addr); 8118 d->mSize = 512; 8119 8120 /* declare we're reading guest state */ 8121 d->nFxState = 7; 8122 8123 d->fxState[0].fx = Ifx_Read; 8124 d->fxState[0].offset = OFFB_FTOP; 8125 d->fxState[0].size = sizeof(UInt); 8126 8127 d->fxState[1].fx = Ifx_Read; 8128 d->fxState[1].offset = OFFB_FPREGS; 8129 d->fxState[1].size = 8 * sizeof(ULong); 8130 8131 d->fxState[2].fx = Ifx_Read; 8132 d->fxState[2].offset = OFFB_FPTAGS; 8133 d->fxState[2].size = 8 * sizeof(UChar); 8134 8135 d->fxState[3].fx = Ifx_Read; 8136 d->fxState[3].offset = OFFB_FPROUND; 8137 d->fxState[3].size = sizeof(UInt); 8138 8139 d->fxState[4].fx = Ifx_Read; 8140 d->fxState[4].offset = OFFB_FC3210; 8141 d->fxState[4].size = sizeof(UInt); 8142 8143 d->fxState[5].fx = Ifx_Read; 8144 d->fxState[5].offset = OFFB_XMM0; 8145 d->fxState[5].size = 8 * sizeof(U128); 8146 8147 d->fxState[6].fx = Ifx_Read; 8148 d->fxState[6].offset = OFFB_SSEROUND; 8149 d->fxState[6].size = sizeof(UInt); 8150 8151 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8152 images are packed back-to-back. If not, the value of 8153 d->fxState[5].size is wrong. */ 8154 vassert(16 == sizeof(U128)); 8155 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8156 8157 stmt( IRStmt_Dirty(d) ); 8158 8159 goto decode_success; 8160 } 8161 8162 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 8163 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 8164 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) { 8165 IRDirty* d; 8166 modrm = getIByte(delta+2); 8167 vassert(sz == 4); 8168 vassert(!epartIsReg(modrm)); 8169 8170 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8171 delta += 2+alen; 8172 8173 DIP("fxrstor %s\n", dis_buf); 8174 8175 /* Uses dirty helper: 8176 void x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) */ 8177 d = unsafeIRDirty_0_N ( 8178 0/*regparms*/, 8179 "x86g_dirtyhelper_FXRSTOR", 8180 &x86g_dirtyhelper_FXRSTOR, 8181 mkIRExprVec_1( mkexpr(addr) ) 8182 ); 8183 d->needsBBP = True; 8184 8185 /* declare we're reading memory */ 8186 d->mFx = Ifx_Read; 8187 d->mAddr = mkexpr(addr); 8188 d->mSize = 512; 8189 8190 /* declare we're writing guest state */ 8191 d->nFxState = 7; 8192 8193 d->fxState[0].fx = Ifx_Write; 8194 d->fxState[0].offset = OFFB_FTOP; 8195 d->fxState[0].size = sizeof(UInt); 8196 8197 d->fxState[1].fx = Ifx_Write; 8198 d->fxState[1].offset = OFFB_FPREGS; 8199 d->fxState[1].size = 8 * sizeof(ULong); 8200 8201 d->fxState[2].fx = Ifx_Write; 8202 d->fxState[2].offset = OFFB_FPTAGS; 8203 d->fxState[2].size = 8 * sizeof(UChar); 8204 8205 d->fxState[3].fx = Ifx_Write; 8206 d->fxState[3].offset = OFFB_FPROUND; 8207 d->fxState[3].size = sizeof(UInt); 8208 8209 d->fxState[4].fx = Ifx_Write; 8210 d->fxState[4].offset = OFFB_FC3210; 8211 d->fxState[4].size = sizeof(UInt); 8212 8213 d->fxState[5].fx = Ifx_Write; 8214 d->fxState[5].offset = OFFB_XMM0; 8215 d->fxState[5].size = 8 * sizeof(U128); 8216 8217 d->fxState[6].fx = Ifx_Write; 8218 d->fxState[6].offset = OFFB_SSEROUND; 8219 d->fxState[6].size = sizeof(UInt); 8220 8221 /* Be paranoid ... this assertion tries to ensure the 8 %xmm 8222 images are packed back-to-back. If not, the value of 8223 d->fxState[5].size is wrong. */ 8224 vassert(16 == sizeof(U128)); 8225 vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); 8226 8227 stmt( IRStmt_Dirty(d) ); 8228 8229 goto decode_success; 8230 } 8231 8232 /* ------ SSE decoder main ------ */ 8233 8234 /* Skip parts of the decoder which don't apply given the stated 8235 guest subarchitecture. */ 8236 if (archinfo->hwcaps == 0/*baseline, no sse at all*/) 8237 goto after_sse_decoders; 8238 8239 /* Otherwise we must be doing sse1 or sse2, so we can at least try 8240 for SSE1 here. */ 8241 8242 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 8243 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) { 8244 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 ); 8245 goto decode_success; 8246 } 8247 8248 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 8249 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) { 8250 vassert(sz == 4); 8251 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 ); 8252 goto decode_success; 8253 } 8254 8255 /* 0F 55 = ANDNPS -- G = (not G) and E */ 8256 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) { 8257 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 ); 8258 goto decode_success; 8259 } 8260 8261 /* 0F 54 = ANDPS -- G = G and E */ 8262 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) { 8263 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 ); 8264 goto decode_success; 8265 } 8266 8267 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 8268 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) { 8269 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 ); 8270 goto decode_success; 8271 } 8272 8273 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 8274 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) { 8275 vassert(sz == 4); 8276 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 ); 8277 goto decode_success; 8278 } 8279 8280 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 8281 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 8282 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 8283 IRTemp argL = newTemp(Ity_F32); 8284 IRTemp argR = newTemp(Ity_F32); 8285 modrm = getIByte(delta+2); 8286 if (epartIsReg(modrm)) { 8287 assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 8288 delta += 2+1; 8289 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8290 nameXMMReg(gregOfRM(modrm)) ); 8291 } else { 8292 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8293 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 8294 delta += 2+alen; 8295 DIP("[u]comiss %s,%s\n", dis_buf, 8296 nameXMMReg(gregOfRM(modrm)) ); 8297 } 8298 assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 8299 8300 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 8301 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 8302 stmt( IRStmt_Put( 8303 OFFB_CC_DEP1, 8304 binop( Iop_And32, 8305 binop(Iop_CmpF64, 8306 unop(Iop_F32toF64,mkexpr(argL)), 8307 unop(Iop_F32toF64,mkexpr(argR))), 8308 mkU32(0x45) 8309 ))); 8310 /* Set NDEP even though it isn't used. This makes redundant-PUT 8311 elimination of previous stores to this field work better. */ 8312 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 8313 goto decode_success; 8314 } 8315 8316 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 8317 half xmm */ 8318 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) { 8319 IRTemp arg64 = newTemp(Ity_I64); 8320 IRTemp rmode = newTemp(Ity_I32); 8321 vassert(sz == 4); 8322 8323 modrm = getIByte(delta+2); 8324 do_MMX_preamble(); 8325 if (epartIsReg(modrm)) { 8326 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 8327 delta += 2+1; 8328 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8329 nameXMMReg(gregOfRM(modrm))); 8330 } else { 8331 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8332 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 8333 delta += 2+alen; 8334 DIP("cvtpi2ps %s,%s\n", dis_buf, 8335 nameXMMReg(gregOfRM(modrm)) ); 8336 } 8337 8338 assign( rmode, get_sse_roundingmode() ); 8339 8340 putXMMRegLane32F( 8341 gregOfRM(modrm), 0, 8342 binop(Iop_F64toF32, 8343 mkexpr(rmode), 8344 unop(Iop_I32StoF64, 8345 unop(Iop_64to32, mkexpr(arg64)) )) ); 8346 8347 putXMMRegLane32F( 8348 gregOfRM(modrm), 1, 8349 binop(Iop_F64toF32, 8350 mkexpr(rmode), 8351 unop(Iop_I32StoF64, 8352 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 8353 8354 goto decode_success; 8355 } 8356 8357 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low 8358 quarter xmm */ 8359 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) { 8360 IRTemp arg32 = newTemp(Ity_I32); 8361 IRTemp rmode = newTemp(Ity_I32); 8362 vassert(sz == 4); 8363 8364 modrm = getIByte(delta+3); 8365 if (epartIsReg(modrm)) { 8366 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 8367 delta += 3+1; 8368 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)), 8369 nameXMMReg(gregOfRM(modrm))); 8370 } else { 8371 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8372 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 8373 delta += 3+alen; 8374 DIP("cvtsi2ss %s,%s\n", dis_buf, 8375 nameXMMReg(gregOfRM(modrm)) ); 8376 } 8377 8378 assign( rmode, get_sse_roundingmode() ); 8379 8380 putXMMRegLane32F( 8381 gregOfRM(modrm), 0, 8382 binop(Iop_F64toF32, 8383 mkexpr(rmode), 8384 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 8385 8386 goto decode_success; 8387 } 8388 8389 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8390 I32 in mmx, according to prevailing SSE rounding mode */ 8391 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 8392 I32 in mmx, rounding towards zero */ 8393 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 8394 IRTemp dst64 = newTemp(Ity_I64); 8395 IRTemp rmode = newTemp(Ity_I32); 8396 IRTemp f32lo = newTemp(Ity_F32); 8397 IRTemp f32hi = newTemp(Ity_F32); 8398 Bool r2zero = toBool(insn[1] == 0x2C); 8399 8400 do_MMX_preamble(); 8401 modrm = getIByte(delta+2); 8402 8403 if (epartIsReg(modrm)) { 8404 delta += 2+1; 8405 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8406 assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1)); 8407 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8408 nameXMMReg(eregOfRM(modrm)), 8409 nameMMXReg(gregOfRM(modrm))); 8410 } else { 8411 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8412 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8413 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32, 8414 mkexpr(addr), 8415 mkU32(4) ))); 8416 delta += 2+alen; 8417 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 8418 dis_buf, 8419 nameMMXReg(gregOfRM(modrm))); 8420 } 8421 8422 if (r2zero) { 8423 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 8424 } else { 8425 assign( rmode, get_sse_roundingmode() ); 8426 } 8427 8428 assign( 8429 dst64, 8430 binop( Iop_32HLto64, 8431 binop( Iop_F64toI32S, 8432 mkexpr(rmode), 8433 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 8434 binop( Iop_F64toI32S, 8435 mkexpr(rmode), 8436 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8437 ) 8438 ); 8439 8440 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 8441 goto decode_success; 8442 } 8443 8444 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to 8445 I32 in ireg, according to prevailing SSE rounding mode */ 8446 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to 8447 I32 in ireg, rounding towards zero */ 8448 if (insn[0] == 0xF3 && insn[1] == 0x0F 8449 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 8450 IRTemp rmode = newTemp(Ity_I32); 8451 IRTemp f32lo = newTemp(Ity_F32); 8452 Bool r2zero = toBool(insn[2] == 0x2C); 8453 vassert(sz == 4); 8454 8455 modrm = getIByte(delta+3); 8456 if (epartIsReg(modrm)) { 8457 delta += 3+1; 8458 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 8459 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8460 nameXMMReg(eregOfRM(modrm)), 8461 nameIReg(4, gregOfRM(modrm))); 8462 } else { 8463 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8464 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 8465 delta += 3+alen; 8466 DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "", 8467 dis_buf, 8468 nameIReg(4, gregOfRM(modrm))); 8469 } 8470 8471 if (r2zero) { 8472 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 8473 } else { 8474 assign( rmode, get_sse_roundingmode() ); 8475 } 8476 8477 putIReg(4, gregOfRM(modrm), 8478 binop( Iop_F64toI32S, 8479 mkexpr(rmode), 8480 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 8481 ); 8482 8483 goto decode_success; 8484 } 8485 8486 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 8487 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) { 8488 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 ); 8489 goto decode_success; 8490 } 8491 8492 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 8493 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) { 8494 vassert(sz == 4); 8495 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 ); 8496 goto decode_success; 8497 } 8498 8499 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 8500 if (insn[0] == 0x0F && insn[1] == 0xAE 8501 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) { 8502 8503 IRTemp t64 = newTemp(Ity_I64); 8504 IRTemp ew = newTemp(Ity_I32); 8505 8506 modrm = getIByte(delta+2); 8507 vassert(!epartIsReg(modrm)); 8508 vassert(sz == 4); 8509 8510 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8511 delta += 2+alen; 8512 DIP("ldmxcsr %s\n", dis_buf); 8513 8514 /* The only thing we observe in %mxcsr is the rounding mode. 8515 Therefore, pass the 32-bit value (SSE native-format control 8516 word) to a clean helper, getting back a 64-bit value, the 8517 lower half of which is the SSEROUND value to store, and the 8518 upper half of which is the emulation-warning token which may 8519 be generated. 8520 */ 8521 /* ULong x86h_check_ldmxcsr ( UInt ); */ 8522 assign( t64, mkIRExprCCall( 8523 Ity_I64, 0/*regparms*/, 8524 "x86g_check_ldmxcsr", 8525 &x86g_check_ldmxcsr, 8526 mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) ) 8527 ) 8528 ); 8529 8530 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 8531 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 8532 put_emwarn( mkexpr(ew) ); 8533 /* Finally, if an emulation warning was reported, side-exit to 8534 the next insn, reporting the warning, so that Valgrind's 8535 dispatcher sees the warning. */ 8536 stmt( 8537 IRStmt_Exit( 8538 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 8539 Ijk_EmWarn, 8540 IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta) 8541 ) 8542 ); 8543 goto decode_success; 8544 } 8545 8546 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8547 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 8548 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { 8549 Bool ok = False; 8550 delta = dis_MMX( &ok, sorb, sz, delta+1 ); 8551 if (!ok) 8552 goto decode_failure; 8553 goto decode_success; 8554 } 8555 8556 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 8557 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { 8558 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); 8559 goto decode_success; 8560 } 8561 8562 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 8563 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { 8564 vassert(sz == 4); 8565 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); 8566 goto decode_success; 8567 } 8568 8569 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 8570 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { 8571 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); 8572 goto decode_success; 8573 } 8574 8575 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 8576 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { 8577 vassert(sz == 4); 8578 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); 8579 goto decode_success; 8580 } 8581 8582 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 8583 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 8584 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { 8585 modrm = getIByte(delta+2); 8586 if (epartIsReg(modrm)) { 8587 putXMMReg( gregOfRM(modrm), 8588 getXMMReg( eregOfRM(modrm) )); 8589 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8590 nameXMMReg(gregOfRM(modrm))); 8591 delta += 2+1; 8592 } else { 8593 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8594 if (insn[1] == 0x28/*movaps*/) 8595 gen_SEGV_if_not_16_aligned( addr ); 8596 putXMMReg( gregOfRM(modrm), 8597 loadLE(Ity_V128, mkexpr(addr)) ); 8598 DIP("mov[ua]ps %s,%s\n", dis_buf, 8599 nameXMMReg(gregOfRM(modrm))); 8600 delta += 2+alen; 8601 } 8602 goto decode_success; 8603 } 8604 8605 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 8606 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 8607 if (sz == 4 && insn[0] == 0x0F 8608 && (insn[1] == 0x29 || insn[1] == 0x11)) { 8609 modrm = getIByte(delta+2); 8610 if (epartIsReg(modrm)) { 8611 /* fall through; awaiting test case */ 8612 } else { 8613 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8614 if (insn[1] == 0x29/*movaps*/) 8615 gen_SEGV_if_not_16_aligned( addr ); 8616 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8617 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8618 dis_buf ); 8619 delta += 2+alen; 8620 goto decode_success; 8621 } 8622 } 8623 8624 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 8625 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 8626 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { 8627 modrm = getIByte(delta+2); 8628 if (epartIsReg(modrm)) { 8629 delta += 2+1; 8630 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8631 getXMMRegLane64( eregOfRM(modrm), 0 ) ); 8632 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8633 nameXMMReg(gregOfRM(modrm))); 8634 } else { 8635 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8636 delta += 2+alen; 8637 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 8638 loadLE(Ity_I64, mkexpr(addr)) ); 8639 DIP("movhps %s,%s\n", dis_buf, 8640 nameXMMReg( gregOfRM(modrm) )); 8641 } 8642 goto decode_success; 8643 } 8644 8645 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 8646 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { 8647 if (!epartIsReg(insn[2])) { 8648 delta += 2; 8649 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8650 delta += alen; 8651 storeLE( mkexpr(addr), 8652 getXMMRegLane64( gregOfRM(insn[2]), 8653 1/*upper lane*/ ) ); 8654 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 8655 dis_buf); 8656 goto decode_success; 8657 } 8658 /* else fall through */ 8659 } 8660 8661 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 8662 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 8663 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { 8664 modrm = getIByte(delta+2); 8665 if (epartIsReg(modrm)) { 8666 delta += 2+1; 8667 putXMMRegLane64( gregOfRM(modrm), 8668 0/*lower lane*/, 8669 getXMMRegLane64( eregOfRM(modrm), 1 )); 8670 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), 8671 nameXMMReg(gregOfRM(modrm))); 8672 } else { 8673 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8674 delta += 2+alen; 8675 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 8676 loadLE(Ity_I64, mkexpr(addr)) ); 8677 DIP("movlps %s, %s\n", 8678 dis_buf, nameXMMReg( gregOfRM(modrm) )); 8679 } 8680 goto decode_success; 8681 } 8682 8683 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 8684 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { 8685 if (!epartIsReg(insn[2])) { 8686 delta += 2; 8687 addr = disAMode ( &alen, sorb, delta, dis_buf ); 8688 delta += alen; 8689 storeLE( mkexpr(addr), 8690 getXMMRegLane64( gregOfRM(insn[2]), 8691 0/*lower lane*/ ) ); 8692 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 8693 dis_buf); 8694 goto decode_success; 8695 } 8696 /* else fall through */ 8697 } 8698 8699 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 8700 to 4 lowest bits of ireg(G) */ 8701 if (insn[0] == 0x0F && insn[1] == 0x50) { 8702 modrm = getIByte(delta+2); 8703 if (sz == 4 && epartIsReg(modrm)) { 8704 Int src; 8705 t0 = newTemp(Ity_I32); 8706 t1 = newTemp(Ity_I32); 8707 t2 = newTemp(Ity_I32); 8708 t3 = newTemp(Ity_I32); 8709 delta += 2+1; 8710 src = eregOfRM(modrm); 8711 assign( t0, binop( Iop_And32, 8712 binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), 8713 mkU32(1) )); 8714 assign( t1, binop( Iop_And32, 8715 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), 8716 mkU32(2) )); 8717 assign( t2, binop( Iop_And32, 8718 binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), 8719 mkU32(4) )); 8720 assign( t3, binop( Iop_And32, 8721 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), 8722 mkU32(8) )); 8723 putIReg(4, gregOfRM(modrm), 8724 binop(Iop_Or32, 8725 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 8726 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) 8727 ) 8728 ); 8729 DIP("movmskps %s,%s\n", nameXMMReg(src), 8730 nameIReg(4, gregOfRM(modrm))); 8731 goto decode_success; 8732 } 8733 /* else fall through */ 8734 } 8735 8736 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 8737 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 8738 if (insn[0] == 0x0F && insn[1] == 0x2B) { 8739 modrm = getIByte(delta+2); 8740 if (!epartIsReg(modrm)) { 8741 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8742 gen_SEGV_if_not_16_aligned( addr ); 8743 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 8744 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 8745 dis_buf, 8746 nameXMMReg(gregOfRM(modrm))); 8747 delta += 2+alen; 8748 goto decode_success; 8749 } 8750 /* else fall through */ 8751 } 8752 8753 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8754 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 8755 Intel manual does not say anything about the usual business of 8756 the FP reg tags getting trashed whenever an MMX insn happens. 8757 So we just leave them alone. 8758 */ 8759 if (insn[0] == 0x0F && insn[1] == 0xE7) { 8760 modrm = getIByte(delta+2); 8761 if (sz == 4 && !epartIsReg(modrm)) { 8762 /* do_MMX_preamble(); Intel docs don't specify this */ 8763 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8764 storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) ); 8765 DIP("movntq %s,%s\n", dis_buf, 8766 nameMMXReg(gregOfRM(modrm))); 8767 delta += 2+alen; 8768 goto decode_success; 8769 } 8770 /* else fall through */ 8771 } 8772 8773 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 8774 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 8775 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { 8776 vassert(sz == 4); 8777 modrm = getIByte(delta+3); 8778 if (epartIsReg(modrm)) { 8779 putXMMRegLane32( gregOfRM(modrm), 0, 8780 getXMMRegLane32( eregOfRM(modrm), 0 )); 8781 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 8782 nameXMMReg(gregOfRM(modrm))); 8783 delta += 3+1; 8784 } else { 8785 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8786 /* zero bits 127:64 */ 8787 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 8788 /* zero bits 63:32 */ 8789 putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); 8790 /* write bits 31:0 */ 8791 putXMMRegLane32( gregOfRM(modrm), 0, 8792 loadLE(Ity_I32, mkexpr(addr)) ); 8793 DIP("movss %s,%s\n", dis_buf, 8794 nameXMMReg(gregOfRM(modrm))); 8795 delta += 3+alen; 8796 } 8797 goto decode_success; 8798 } 8799 8800 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 8801 or lo 1/4 xmm). */ 8802 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { 8803 vassert(sz == 4); 8804 modrm = getIByte(delta+3); 8805 if (epartIsReg(modrm)) { 8806 /* fall through, we don't yet have a test case */ 8807 } else { 8808 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 8809 storeLE( mkexpr(addr), 8810 getXMMRegLane32(gregOfRM(modrm), 0) ); 8811 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), 8812 dis_buf); 8813 delta += 3+alen; 8814 goto decode_success; 8815 } 8816 } 8817 8818 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 8819 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { 8820 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); 8821 goto decode_success; 8822 } 8823 8824 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 8825 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { 8826 vassert(sz == 4); 8827 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); 8828 goto decode_success; 8829 } 8830 8831 /* 0F 56 = ORPS -- G = G and E */ 8832 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { 8833 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); 8834 goto decode_success; 8835 } 8836 8837 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8838 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 8839 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { 8840 do_MMX_preamble(); 8841 delta = dis_MMXop_regmem_to_reg ( 8842 sorb, delta+2, insn[1], "pavgb", False ); 8843 goto decode_success; 8844 } 8845 8846 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8847 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 8848 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) { 8849 do_MMX_preamble(); 8850 delta = dis_MMXop_regmem_to_reg ( 8851 sorb, delta+2, insn[1], "pavgw", False ); 8852 goto decode_success; 8853 } 8854 8855 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8856 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 8857 zero-extend of it in ireg(G). */ 8858 if (insn[0] == 0x0F && insn[1] == 0xC5) { 8859 modrm = insn[2]; 8860 if (sz == 4 && epartIsReg(modrm)) { 8861 IRTemp sV = newTemp(Ity_I64); 8862 t5 = newTemp(Ity_I16); 8863 do_MMX_preamble(); 8864 assign(sV, getMMXReg(eregOfRM(modrm))); 8865 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 8866 switch (insn[3] & 3) { 8867 case 0: assign(t5, mkexpr(t0)); break; 8868 case 1: assign(t5, mkexpr(t1)); break; 8869 case 2: assign(t5, mkexpr(t2)); break; 8870 case 3: assign(t5, mkexpr(t3)); break; 8871 default: vassert(0); /*NOTREACHED*/ 8872 } 8873 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5))); 8874 DIP("pextrw $%d,%s,%s\n", 8875 (Int)insn[3], nameMMXReg(eregOfRM(modrm)), 8876 nameIReg(4,gregOfRM(modrm))); 8877 delta += 4; 8878 goto decode_success; 8879 } 8880 /* else fall through */ 8881 } 8882 8883 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8884 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 8885 put it into the specified lane of mmx(G). */ 8886 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) { 8887 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 8888 mmx reg. t4 is the new lane value. t5 is the original 8889 mmx value. t6 is the new mmx value. */ 8890 Int lane; 8891 t4 = newTemp(Ity_I16); 8892 t5 = newTemp(Ity_I64); 8893 t6 = newTemp(Ity_I64); 8894 modrm = insn[2]; 8895 do_MMX_preamble(); 8896 8897 assign(t5, getMMXReg(gregOfRM(modrm))); 8898 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 8899 8900 if (epartIsReg(modrm)) { 8901 assign(t4, getIReg(2, eregOfRM(modrm))); 8902 delta += 3+1; 8903 lane = insn[3+1-1]; 8904 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8905 nameIReg(2,eregOfRM(modrm)), 8906 nameMMXReg(gregOfRM(modrm))); 8907 } else { 8908 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 8909 delta += 3+alen; 8910 lane = insn[3+alen-1]; 8911 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 8912 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 8913 dis_buf, 8914 nameMMXReg(gregOfRM(modrm))); 8915 } 8916 8917 switch (lane & 3) { 8918 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 8919 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 8920 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 8921 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 8922 default: vassert(0); /*NOTREACHED*/ 8923 } 8924 putMMXReg(gregOfRM(modrm), mkexpr(t6)); 8925 goto decode_success; 8926 } 8927 8928 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8929 /* 0F EE = PMAXSW -- 16x4 signed max */ 8930 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) { 8931 do_MMX_preamble(); 8932 delta = dis_MMXop_regmem_to_reg ( 8933 sorb, delta+2, insn[1], "pmaxsw", False ); 8934 goto decode_success; 8935 } 8936 8937 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8938 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 8939 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) { 8940 do_MMX_preamble(); 8941 delta = dis_MMXop_regmem_to_reg ( 8942 sorb, delta+2, insn[1], "pmaxub", False ); 8943 goto decode_success; 8944 } 8945 8946 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8947 /* 0F EA = PMINSW -- 16x4 signed min */ 8948 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) { 8949 do_MMX_preamble(); 8950 delta = dis_MMXop_regmem_to_reg ( 8951 sorb, delta+2, insn[1], "pminsw", False ); 8952 goto decode_success; 8953 } 8954 8955 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8956 /* 0F DA = PMINUB -- 8x8 unsigned min */ 8957 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) { 8958 do_MMX_preamble(); 8959 delta = dis_MMXop_regmem_to_reg ( 8960 sorb, delta+2, insn[1], "pminub", False ); 8961 goto decode_success; 8962 } 8963 8964 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8965 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 8966 mmx(G), turn them into a byte, and put zero-extend of it in 8967 ireg(G). */ 8968 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) { 8969 modrm = insn[2]; 8970 if (epartIsReg(modrm)) { 8971 do_MMX_preamble(); 8972 t0 = newTemp(Ity_I64); 8973 t1 = newTemp(Ity_I32); 8974 assign(t0, getMMXReg(eregOfRM(modrm))); 8975 assign(t1, mkIRExprCCall( 8976 Ity_I32, 0/*regparms*/, 8977 "x86g_calculate_mmx_pmovmskb", 8978 &x86g_calculate_mmx_pmovmskb, 8979 mkIRExprVec_1(mkexpr(t0)))); 8980 putIReg(4, gregOfRM(modrm), mkexpr(t1)); 8981 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 8982 nameIReg(4,gregOfRM(modrm))); 8983 delta += 3; 8984 goto decode_success; 8985 } 8986 /* else fall through */ 8987 } 8988 8989 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 8990 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 8991 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) { 8992 do_MMX_preamble(); 8993 delta = dis_MMXop_regmem_to_reg ( 8994 sorb, delta+2, insn[1], "pmuluh", False ); 8995 goto decode_success; 8996 } 8997 8998 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 8999 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 9000 /* 0F 18 /2 = PREFETCH1 */ 9001 /* 0F 18 /3 = PREFETCH2 */ 9002 if (insn[0] == 0x0F && insn[1] == 0x18 9003 && !epartIsReg(insn[2]) 9004 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) { 9005 HChar* hintstr = "??"; 9006 9007 modrm = getIByte(delta+2); 9008 vassert(!epartIsReg(modrm)); 9009 9010 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9011 delta += 2+alen; 9012 9013 switch (gregOfRM(modrm)) { 9014 case 0: hintstr = "nta"; break; 9015 case 1: hintstr = "t0"; break; 9016 case 2: hintstr = "t1"; break; 9017 case 3: hintstr = "t2"; break; 9018 default: vassert(0); /*NOTREACHED*/ 9019 } 9020 9021 DIP("prefetch%s %s\n", hintstr, dis_buf); 9022 goto decode_success; 9023 } 9024 9025 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */ 9026 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */ 9027 if (insn[0] == 0x0F && insn[1] == 0x0D 9028 && !epartIsReg(insn[2]) 9029 && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) { 9030 HChar* hintstr = "??"; 9031 9032 modrm = getIByte(delta+2); 9033 vassert(!epartIsReg(modrm)); 9034 9035 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9036 delta += 2+alen; 9037 9038 switch (gregOfRM(modrm)) { 9039 case 0: hintstr = ""; break; 9040 case 1: hintstr = "w"; break; 9041 default: vassert(0); /*NOTREACHED*/ 9042 } 9043 9044 DIP("prefetch%s %s\n", hintstr, dis_buf); 9045 goto decode_success; 9046 } 9047 9048 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9049 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 9050 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) { 9051 do_MMX_preamble(); 9052 delta = dis_MMXop_regmem_to_reg ( 9053 sorb, delta+2, insn[1], "psadbw", False ); 9054 goto decode_success; 9055 } 9056 9057 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 9058 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 9059 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) { 9060 Int order; 9061 IRTemp sV, dV, s3, s2, s1, s0; 9062 s3 = s2 = s1 = s0 = IRTemp_INVALID; 9063 sV = newTemp(Ity_I64); 9064 dV = newTemp(Ity_I64); 9065 do_MMX_preamble(); 9066 modrm = insn[2]; 9067 if (epartIsReg(modrm)) { 9068 assign( sV, getMMXReg(eregOfRM(modrm)) ); 9069 order = (Int)insn[3]; 9070 delta += 2+2; 9071 DIP("pshufw $%d,%s,%s\n", order, 9072 nameMMXReg(eregOfRM(modrm)), 9073 nameMMXReg(gregOfRM(modrm))); 9074 } else { 9075 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9076 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 9077 order = (Int)insn[2+alen]; 9078 delta += 3+alen; 9079 DIP("pshufw $%d,%s,%s\n", order, 9080 dis_buf, 9081 nameMMXReg(gregOfRM(modrm))); 9082 } 9083 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 9084 9085 # define SEL(n) \ 9086 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9087 assign(dV, 9088 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 9089 SEL((order>>2)&3), SEL((order>>0)&3) ) 9090 ); 9091 putMMXReg(gregOfRM(modrm), mkexpr(dV)); 9092 # undef SEL 9093 goto decode_success; 9094 } 9095 9096 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 9097 if (insn[0] == 0x0F && insn[1] == 0x53) { 9098 vassert(sz == 4); 9099 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9100 "rcpps", Iop_Recip32Fx4 ); 9101 goto decode_success; 9102 } 9103 9104 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 9105 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) { 9106 vassert(sz == 4); 9107 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9108 "rcpss", Iop_Recip32F0x4 ); 9109 goto decode_success; 9110 } 9111 9112 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 9113 if (insn[0] == 0x0F && insn[1] == 0x52) { 9114 vassert(sz == 4); 9115 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9116 "rsqrtps", Iop_RSqrt32Fx4 ); 9117 goto decode_success; 9118 } 9119 9120 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 9121 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) { 9122 vassert(sz == 4); 9123 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9124 "rsqrtss", Iop_RSqrt32F0x4 ); 9125 goto decode_success; 9126 } 9127 9128 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 9129 if (insn[0] == 0x0F && insn[1] == 0xAE 9130 && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 9131 vassert(sz == 4); 9132 delta += 3; 9133 /* Insert a memory fence. It's sometimes important that these 9134 are carried through to the generated code. */ 9135 stmt( IRStmt_MBE(Imbe_Fence) ); 9136 DIP("sfence\n"); 9137 goto decode_success; 9138 } 9139 9140 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 9141 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { 9142 Int select; 9143 IRTemp sV, dV; 9144 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9145 sV = newTemp(Ity_V128); 9146 dV = newTemp(Ity_V128); 9147 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9148 modrm = insn[2]; 9149 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9150 9151 if (epartIsReg(modrm)) { 9152 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9153 select = (Int)insn[3]; 9154 delta += 2+2; 9155 DIP("shufps $%d,%s,%s\n", select, 9156 nameXMMReg(eregOfRM(modrm)), 9157 nameXMMReg(gregOfRM(modrm))); 9158 } else { 9159 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9160 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9161 select = (Int)insn[2+alen]; 9162 delta += 3+alen; 9163 DIP("shufps $%d,%s,%s\n", select, 9164 dis_buf, 9165 nameXMMReg(gregOfRM(modrm))); 9166 } 9167 9168 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9169 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9170 9171 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 9172 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 9173 9174 putXMMReg( 9175 gregOfRM(modrm), 9176 mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3), 9177 SELD((select>>2)&3), SELD((select>>0)&3) ) 9178 ); 9179 9180 # undef SELD 9181 # undef SELS 9182 9183 goto decode_success; 9184 } 9185 9186 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 9187 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) { 9188 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 9189 "sqrtps", Iop_Sqrt32Fx4 ); 9190 goto decode_success; 9191 } 9192 9193 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 9194 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) { 9195 vassert(sz == 4); 9196 delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3, 9197 "sqrtss", Iop_Sqrt32F0x4 ); 9198 goto decode_success; 9199 } 9200 9201 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 9202 if (insn[0] == 0x0F && insn[1] == 0xAE 9203 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) { 9204 modrm = getIByte(delta+2); 9205 vassert(sz == 4); 9206 vassert(!epartIsReg(modrm)); 9207 9208 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9209 delta += 2+alen; 9210 9211 /* Fake up a native SSE mxcsr word. The only thing it depends 9212 on is SSEROUND[1:0], so call a clean helper to cook it up. 9213 */ 9214 /* UInt x86h_create_mxcsr ( UInt sseround ) */ 9215 DIP("stmxcsr %s\n", dis_buf); 9216 storeLE( mkexpr(addr), 9217 mkIRExprCCall( 9218 Ity_I32, 0/*regp*/, 9219 "x86g_create_mxcsr", &x86g_create_mxcsr, 9220 mkIRExprVec_1( get_sse_roundingmode() ) 9221 ) 9222 ); 9223 goto decode_success; 9224 } 9225 9226 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 9227 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) { 9228 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 ); 9229 goto decode_success; 9230 } 9231 9232 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 9233 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) { 9234 vassert(sz == 4); 9235 delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 ); 9236 goto decode_success; 9237 } 9238 9239 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 9240 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 9241 /* These just appear to be special cases of SHUFPS */ 9242 if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 9243 IRTemp sV, dV; 9244 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 9245 Bool hi = toBool(insn[1] == 0x15); 9246 sV = newTemp(Ity_V128); 9247 dV = newTemp(Ity_V128); 9248 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 9249 modrm = insn[2]; 9250 assign( dV, getXMMReg(gregOfRM(modrm)) ); 9251 9252 if (epartIsReg(modrm)) { 9253 assign( sV, getXMMReg(eregOfRM(modrm)) ); 9254 delta += 2+1; 9255 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9256 nameXMMReg(eregOfRM(modrm)), 9257 nameXMMReg(gregOfRM(modrm))); 9258 } else { 9259 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9260 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 9261 delta += 2+alen; 9262 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 9263 dis_buf, 9264 nameXMMReg(gregOfRM(modrm))); 9265 } 9266 9267 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 9268 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 9269 9270 if (hi) { 9271 putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) ); 9272 } else { 9273 putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) ); 9274 } 9275 9276 goto decode_success; 9277 } 9278 9279 /* 0F 57 = XORPS -- G = G and E */ 9280 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) { 9281 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 ); 9282 goto decode_success; 9283 } 9284 9285 /* ---------------------------------------------------- */ 9286 /* --- end of the SSE decoder. --- */ 9287 /* ---------------------------------------------------- */ 9288 9289 /* ---------------------------------------------------- */ 9290 /* --- start of the SSE2 decoder. --- */ 9291 /* ---------------------------------------------------- */ 9292 9293 /* Skip parts of the decoder which don't apply given the stated 9294 guest subarchitecture. */ 9295 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 9296 goto after_sse_decoders; /* no SSE2 capabilities */ 9297 9298 insn = (UChar*)&guest_code[delta]; 9299 9300 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 9301 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) { 9302 delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 ); 9303 goto decode_success; 9304 } 9305 9306 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 9307 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) { 9308 vassert(sz == 4); 9309 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 ); 9310 goto decode_success; 9311 } 9312 9313 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 9314 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) { 9315 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 ); 9316 goto decode_success; 9317 } 9318 9319 /* 66 0F 54 = ANDPD -- G = G and E */ 9320 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) { 9321 delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 ); 9322 goto decode_success; 9323 } 9324 9325 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 9326 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) { 9327 delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 ); 9328 goto decode_success; 9329 } 9330 9331 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 9332 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) { 9333 vassert(sz == 4); 9334 delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 ); 9335 goto decode_success; 9336 } 9337 9338 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 9339 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 9340 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) { 9341 IRTemp argL = newTemp(Ity_F64); 9342 IRTemp argR = newTemp(Ity_F64); 9343 modrm = getIByte(delta+2); 9344 if (epartIsReg(modrm)) { 9345 assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) ); 9346 delta += 2+1; 9347 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9348 nameXMMReg(gregOfRM(modrm)) ); 9349 } else { 9350 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9351 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9352 delta += 2+alen; 9353 DIP("[u]comisd %s,%s\n", dis_buf, 9354 nameXMMReg(gregOfRM(modrm)) ); 9355 } 9356 assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) ); 9357 9358 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 9359 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 9360 stmt( IRStmt_Put( 9361 OFFB_CC_DEP1, 9362 binop( Iop_And32, 9363 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)), 9364 mkU32(0x45) 9365 ))); 9366 /* Set NDEP even though it isn't used. This makes redundant-PUT 9367 elimination of previous stores to this field work better. */ 9368 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 9369 goto decode_success; 9370 } 9371 9372 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 9373 F64 in xmm(G) */ 9374 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) { 9375 IRTemp arg64 = newTemp(Ity_I64); 9376 vassert(sz == 4); 9377 9378 modrm = getIByte(delta+3); 9379 if (epartIsReg(modrm)) { 9380 assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) ); 9381 delta += 3+1; 9382 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9383 nameXMMReg(gregOfRM(modrm))); 9384 } else { 9385 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9386 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9387 delta += 3+alen; 9388 DIP("cvtdq2pd %s,%s\n", dis_buf, 9389 nameXMMReg(gregOfRM(modrm)) ); 9390 } 9391 9392 putXMMRegLane64F( 9393 gregOfRM(modrm), 0, 9394 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 9395 ); 9396 9397 putXMMRegLane64F( 9398 gregOfRM(modrm), 1, 9399 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 9400 ); 9401 9402 goto decode_success; 9403 } 9404 9405 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 9406 xmm(G) */ 9407 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) { 9408 IRTemp argV = newTemp(Ity_V128); 9409 IRTemp rmode = newTemp(Ity_I32); 9410 9411 modrm = getIByte(delta+2); 9412 if (epartIsReg(modrm)) { 9413 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9414 delta += 2+1; 9415 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9416 nameXMMReg(gregOfRM(modrm))); 9417 } else { 9418 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9419 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9420 delta += 2+alen; 9421 DIP("cvtdq2ps %s,%s\n", dis_buf, 9422 nameXMMReg(gregOfRM(modrm)) ); 9423 } 9424 9425 assign( rmode, get_sse_roundingmode() ); 9426 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9427 9428 # define CVT(_t) binop( Iop_F64toF32, \ 9429 mkexpr(rmode), \ 9430 unop(Iop_I32StoF64,mkexpr(_t))) 9431 9432 putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) ); 9433 putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) ); 9434 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9435 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9436 9437 # undef CVT 9438 9439 goto decode_success; 9440 } 9441 9442 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9443 lo half xmm(G), and zero upper half */ 9444 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) { 9445 IRTemp argV = newTemp(Ity_V128); 9446 IRTemp rmode = newTemp(Ity_I32); 9447 vassert(sz == 4); 9448 9449 modrm = getIByte(delta+3); 9450 if (epartIsReg(modrm)) { 9451 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9452 delta += 3+1; 9453 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9454 nameXMMReg(gregOfRM(modrm))); 9455 } else { 9456 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9457 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9458 delta += 3+alen; 9459 DIP("cvtpd2dq %s,%s\n", dis_buf, 9460 nameXMMReg(gregOfRM(modrm)) ); 9461 } 9462 9463 assign( rmode, get_sse_roundingmode() ); 9464 t0 = newTemp(Ity_F64); 9465 t1 = newTemp(Ity_F64); 9466 assign( t0, unop(Iop_ReinterpI64asF64, 9467 unop(Iop_V128to64, mkexpr(argV))) ); 9468 assign( t1, unop(Iop_ReinterpI64asF64, 9469 unop(Iop_V128HIto64, mkexpr(argV))) ); 9470 9471 # define CVT(_t) binop( Iop_F64toI32S, \ 9472 mkexpr(rmode), \ 9473 mkexpr(_t) ) 9474 9475 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9476 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9477 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9478 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9479 9480 # undef CVT 9481 9482 goto decode_success; 9483 } 9484 9485 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9486 I32 in mmx, according to prevailing SSE rounding mode */ 9487 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 9488 I32 in mmx, rounding towards zero */ 9489 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) { 9490 IRTemp dst64 = newTemp(Ity_I64); 9491 IRTemp rmode = newTemp(Ity_I32); 9492 IRTemp f64lo = newTemp(Ity_F64); 9493 IRTemp f64hi = newTemp(Ity_F64); 9494 Bool r2zero = toBool(insn[1] == 0x2C); 9495 9496 do_MMX_preamble(); 9497 modrm = getIByte(delta+2); 9498 9499 if (epartIsReg(modrm)) { 9500 delta += 2+1; 9501 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9502 assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1)); 9503 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 9504 nameXMMReg(eregOfRM(modrm)), 9505 nameMMXReg(gregOfRM(modrm))); 9506 } else { 9507 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9508 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9509 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32, 9510 mkexpr(addr), 9511 mkU32(8) ))); 9512 delta += 2+alen; 9513 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 9514 dis_buf, 9515 nameMMXReg(gregOfRM(modrm))); 9516 } 9517 9518 if (r2zero) { 9519 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 9520 } else { 9521 assign( rmode, get_sse_roundingmode() ); 9522 } 9523 9524 assign( 9525 dst64, 9526 binop( Iop_32HLto64, 9527 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 9528 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 9529 ) 9530 ); 9531 9532 putMMXReg(gregOfRM(modrm), mkexpr(dst64)); 9533 goto decode_success; 9534 } 9535 9536 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 9537 lo half xmm(G), and zero upper half */ 9538 /* Note, this is practically identical to CVTPD2DQ. It would have 9539 been nicer to merge them together, but the insn[] offsets differ 9540 by one. */ 9541 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) { 9542 IRTemp argV = newTemp(Ity_V128); 9543 IRTemp rmode = newTemp(Ity_I32); 9544 9545 modrm = getIByte(delta+2); 9546 if (epartIsReg(modrm)) { 9547 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9548 delta += 2+1; 9549 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9550 nameXMMReg(gregOfRM(modrm))); 9551 } else { 9552 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9553 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9554 delta += 2+alen; 9555 DIP("cvtpd2ps %s,%s\n", dis_buf, 9556 nameXMMReg(gregOfRM(modrm)) ); 9557 } 9558 9559 assign( rmode, get_sse_roundingmode() ); 9560 t0 = newTemp(Ity_F64); 9561 t1 = newTemp(Ity_F64); 9562 assign( t0, unop(Iop_ReinterpI64asF64, 9563 unop(Iop_V128to64, mkexpr(argV))) ); 9564 assign( t1, unop(Iop_ReinterpI64asF64, 9565 unop(Iop_V128HIto64, mkexpr(argV))) ); 9566 9567 # define CVT(_t) binop( Iop_F64toF32, \ 9568 mkexpr(rmode), \ 9569 mkexpr(_t) ) 9570 9571 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9572 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9573 putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) ); 9574 putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) ); 9575 9576 # undef CVT 9577 9578 goto decode_success; 9579 } 9580 9581 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 9582 xmm(G) */ 9583 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) { 9584 IRTemp arg64 = newTemp(Ity_I64); 9585 9586 modrm = getIByte(delta+2); 9587 if (epartIsReg(modrm)) { 9588 /* Only switch to MMX mode if the source is a MMX register. 9589 This is inconsistent with all other instructions which 9590 convert between XMM and (M64 or MMX), which always switch 9591 to MMX mode even if 64-bit operand is M64 and not MMX. At 9592 least, that's what the Intel docs seem to me to say. 9593 Fixes #210264. */ 9594 do_MMX_preamble(); 9595 assign( arg64, getMMXReg(eregOfRM(modrm)) ); 9596 delta += 2+1; 9597 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)), 9598 nameXMMReg(gregOfRM(modrm))); 9599 } else { 9600 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9601 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 9602 delta += 2+alen; 9603 DIP("cvtpi2pd %s,%s\n", dis_buf, 9604 nameXMMReg(gregOfRM(modrm)) ); 9605 } 9606 9607 putXMMRegLane64F( 9608 gregOfRM(modrm), 0, 9609 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 9610 ); 9611 9612 putXMMRegLane64F( 9613 gregOfRM(modrm), 1, 9614 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 9615 ); 9616 9617 goto decode_success; 9618 } 9619 9620 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9621 xmm(G) */ 9622 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) { 9623 IRTemp argV = newTemp(Ity_V128); 9624 IRTemp rmode = newTemp(Ity_I32); 9625 9626 modrm = getIByte(delta+2); 9627 if (epartIsReg(modrm)) { 9628 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9629 delta += 2+1; 9630 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9631 nameXMMReg(gregOfRM(modrm))); 9632 } else { 9633 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9634 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9635 delta += 2+alen; 9636 DIP("cvtps2dq %s,%s\n", dis_buf, 9637 nameXMMReg(gregOfRM(modrm)) ); 9638 } 9639 9640 assign( rmode, get_sse_roundingmode() ); 9641 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9642 9643 /* This is less than ideal. If it turns out to be a performance 9644 bottleneck it can be improved. */ 9645 # define CVT(_t) \ 9646 binop( Iop_F64toI32S, \ 9647 mkexpr(rmode), \ 9648 unop( Iop_F32toF64, \ 9649 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9650 9651 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9652 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9653 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9654 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9655 9656 # undef CVT 9657 9658 goto decode_success; 9659 } 9660 9661 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 9662 F64 in xmm(G). */ 9663 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) { 9664 IRTemp f32lo = newTemp(Ity_F32); 9665 IRTemp f32hi = newTemp(Ity_F32); 9666 9667 modrm = getIByte(delta+2); 9668 if (epartIsReg(modrm)) { 9669 assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) ); 9670 assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) ); 9671 delta += 2+1; 9672 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9673 nameXMMReg(gregOfRM(modrm))); 9674 } else { 9675 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9676 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 9677 assign( f32hi, loadLE(Ity_F32, 9678 binop(Iop_Add32,mkexpr(addr),mkU32(4))) ); 9679 delta += 2+alen; 9680 DIP("cvtps2pd %s,%s\n", dis_buf, 9681 nameXMMReg(gregOfRM(modrm)) ); 9682 } 9683 9684 putXMMRegLane64F( gregOfRM(modrm), 1, 9685 unop(Iop_F32toF64, mkexpr(f32hi)) ); 9686 putXMMRegLane64F( gregOfRM(modrm), 0, 9687 unop(Iop_F32toF64, mkexpr(f32lo)) ); 9688 9689 goto decode_success; 9690 } 9691 9692 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to 9693 I32 in ireg, according to prevailing SSE rounding mode */ 9694 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to 9695 I32 in ireg, rounding towards zero */ 9696 if (insn[0] == 0xF2 && insn[1] == 0x0F 9697 && (insn[2] == 0x2D || insn[2] == 0x2C)) { 9698 IRTemp rmode = newTemp(Ity_I32); 9699 IRTemp f64lo = newTemp(Ity_F64); 9700 Bool r2zero = toBool(insn[2] == 0x2C); 9701 vassert(sz == 4); 9702 9703 modrm = getIByte(delta+3); 9704 if (epartIsReg(modrm)) { 9705 delta += 3+1; 9706 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9707 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9708 nameXMMReg(eregOfRM(modrm)), 9709 nameIReg(4, gregOfRM(modrm))); 9710 } else { 9711 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9712 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9713 delta += 3+alen; 9714 DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "", 9715 dis_buf, 9716 nameIReg(4, gregOfRM(modrm))); 9717 } 9718 9719 if (r2zero) { 9720 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9721 } else { 9722 assign( rmode, get_sse_roundingmode() ); 9723 } 9724 9725 putIReg(4, gregOfRM(modrm), 9726 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 9727 9728 goto decode_success; 9729 } 9730 9731 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 9732 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 9733 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) { 9734 IRTemp rmode = newTemp(Ity_I32); 9735 IRTemp f64lo = newTemp(Ity_F64); 9736 vassert(sz == 4); 9737 9738 modrm = getIByte(delta+3); 9739 if (epartIsReg(modrm)) { 9740 delta += 3+1; 9741 assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0)); 9742 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9743 nameXMMReg(gregOfRM(modrm))); 9744 } else { 9745 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9746 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 9747 delta += 3+alen; 9748 DIP("cvtsd2ss %s,%s\n", dis_buf, 9749 nameXMMReg(gregOfRM(modrm))); 9750 } 9751 9752 assign( rmode, get_sse_roundingmode() ); 9753 putXMMRegLane32F( 9754 gregOfRM(modrm), 0, 9755 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 9756 ); 9757 9758 goto decode_success; 9759 } 9760 9761 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low 9762 half xmm */ 9763 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) { 9764 IRTemp arg32 = newTemp(Ity_I32); 9765 vassert(sz == 4); 9766 9767 modrm = getIByte(delta+3); 9768 if (epartIsReg(modrm)) { 9769 assign( arg32, getIReg(4, eregOfRM(modrm)) ); 9770 delta += 3+1; 9771 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)), 9772 nameXMMReg(gregOfRM(modrm))); 9773 } else { 9774 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9775 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 9776 delta += 3+alen; 9777 DIP("cvtsi2sd %s,%s\n", dis_buf, 9778 nameXMMReg(gregOfRM(modrm)) ); 9779 } 9780 9781 putXMMRegLane64F( 9782 gregOfRM(modrm), 0, 9783 unop(Iop_I32StoF64, mkexpr(arg32)) ); 9784 9785 goto decode_success; 9786 } 9787 9788 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 9789 low half xmm(G) */ 9790 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) { 9791 IRTemp f32lo = newTemp(Ity_F32); 9792 vassert(sz == 4); 9793 9794 modrm = getIByte(delta+3); 9795 if (epartIsReg(modrm)) { 9796 delta += 3+1; 9797 assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0)); 9798 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9799 nameXMMReg(gregOfRM(modrm))); 9800 } else { 9801 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9802 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 9803 delta += 3+alen; 9804 DIP("cvtss2sd %s,%s\n", dis_buf, 9805 nameXMMReg(gregOfRM(modrm))); 9806 } 9807 9808 putXMMRegLane64F( gregOfRM(modrm), 0, 9809 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 9810 9811 goto decode_success; 9812 } 9813 9814 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 9815 lo half xmm(G), and zero upper half, rounding towards zero */ 9816 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) { 9817 IRTemp argV = newTemp(Ity_V128); 9818 IRTemp rmode = newTemp(Ity_I32); 9819 9820 modrm = getIByte(delta+2); 9821 if (epartIsReg(modrm)) { 9822 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9823 delta += 2+1; 9824 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9825 nameXMMReg(gregOfRM(modrm))); 9826 } else { 9827 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9828 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9829 delta += 2+alen; 9830 DIP("cvttpd2dq %s,%s\n", dis_buf, 9831 nameXMMReg(gregOfRM(modrm)) ); 9832 } 9833 9834 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9835 9836 t0 = newTemp(Ity_F64); 9837 t1 = newTemp(Ity_F64); 9838 assign( t0, unop(Iop_ReinterpI64asF64, 9839 unop(Iop_V128to64, mkexpr(argV))) ); 9840 assign( t1, unop(Iop_ReinterpI64asF64, 9841 unop(Iop_V128HIto64, mkexpr(argV))) ); 9842 9843 # define CVT(_t) binop( Iop_F64toI32S, \ 9844 mkexpr(rmode), \ 9845 mkexpr(_t) ) 9846 9847 putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) ); 9848 putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) ); 9849 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9850 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9851 9852 # undef CVT 9853 9854 goto decode_success; 9855 } 9856 9857 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 9858 xmm(G), rounding towards zero */ 9859 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) { 9860 IRTemp argV = newTemp(Ity_V128); 9861 IRTemp rmode = newTemp(Ity_I32); 9862 vassert(sz == 4); 9863 9864 modrm = getIByte(delta+3); 9865 if (epartIsReg(modrm)) { 9866 assign( argV, getXMMReg(eregOfRM(modrm)) ); 9867 delta += 3+1; 9868 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 9869 nameXMMReg(gregOfRM(modrm))); 9870 } else { 9871 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 9872 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 9873 delta += 3+alen; 9874 DIP("cvttps2dq %s,%s\n", dis_buf, 9875 nameXMMReg(gregOfRM(modrm)) ); 9876 } 9877 9878 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 9879 breakup128to32s( argV, &t3, &t2, &t1, &t0 ); 9880 9881 /* This is less than ideal. If it turns out to be a performance 9882 bottleneck it can be improved. */ 9883 # define CVT(_t) \ 9884 binop( Iop_F64toI32S, \ 9885 mkexpr(rmode), \ 9886 unop( Iop_F32toF64, \ 9887 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 9888 9889 putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) ); 9890 putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) ); 9891 putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) ); 9892 putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) ); 9893 9894 # undef CVT 9895 9896 goto decode_success; 9897 } 9898 9899 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 9900 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) { 9901 delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 ); 9902 goto decode_success; 9903 } 9904 9905 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 9906 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) { 9907 vassert(sz == 4); 9908 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 ); 9909 goto decode_success; 9910 } 9911 9912 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 9913 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 9914 if (insn[0] == 0x0F && insn[1] == 0xAE 9915 && epartIsReg(insn[2]) 9916 && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) { 9917 vassert(sz == 4); 9918 delta += 3; 9919 /* Insert a memory fence. It's sometimes important that these 9920 are carried through to the generated code. */ 9921 stmt( IRStmt_MBE(Imbe_Fence) ); 9922 DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m"); 9923 goto decode_success; 9924 } 9925 9926 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 9927 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) { 9928 delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 ); 9929 goto decode_success; 9930 } 9931 9932 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 9933 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) { 9934 vassert(sz == 4); 9935 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 ); 9936 goto decode_success; 9937 } 9938 9939 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 9940 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) { 9941 delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 ); 9942 goto decode_success; 9943 } 9944 9945 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 9946 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) { 9947 vassert(sz == 4); 9948 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 ); 9949 goto decode_success; 9950 } 9951 9952 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 9953 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 9954 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 9955 if (sz == 2 && insn[0] == 0x0F 9956 && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) { 9957 HChar* wot = insn[1]==0x28 ? "apd" : 9958 insn[1]==0x10 ? "upd" : "dqa"; 9959 modrm = getIByte(delta+2); 9960 if (epartIsReg(modrm)) { 9961 putXMMReg( gregOfRM(modrm), 9962 getXMMReg( eregOfRM(modrm) )); 9963 DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)), 9964 nameXMMReg(gregOfRM(modrm))); 9965 delta += 2+1; 9966 } else { 9967 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9968 if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/) 9969 gen_SEGV_if_not_16_aligned( addr ); 9970 putXMMReg( gregOfRM(modrm), 9971 loadLE(Ity_V128, mkexpr(addr)) ); 9972 DIP("mov%s %s,%s\n", wot, dis_buf, 9973 nameXMMReg(gregOfRM(modrm))); 9974 delta += 2+alen; 9975 } 9976 goto decode_success; 9977 } 9978 9979 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 9980 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 9981 if (sz == 2 && insn[0] == 0x0F 9982 && (insn[1] == 0x29 || insn[1] == 0x11)) { 9983 HChar* wot = insn[1]==0x29 ? "apd" : "upd"; 9984 modrm = getIByte(delta+2); 9985 if (epartIsReg(modrm)) { 9986 /* fall through; awaiting test case */ 9987 } else { 9988 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 9989 if (insn[1] == 0x29/*movapd*/) 9990 gen_SEGV_if_not_16_aligned( addr ); 9991 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 9992 DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)), 9993 dis_buf ); 9994 delta += 2+alen; 9995 goto decode_success; 9996 } 9997 } 9998 9999 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */ 10000 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) { 10001 modrm = getIByte(delta+2); 10002 if (epartIsReg(modrm)) { 10003 delta += 2+1; 10004 putXMMReg( 10005 gregOfRM(modrm), 10006 unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) ) 10007 ); 10008 DIP("movd %s, %s\n", 10009 nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm))); 10010 } else { 10011 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10012 delta += 2+alen; 10013 putXMMReg( 10014 gregOfRM(modrm), 10015 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 10016 ); 10017 DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); 10018 } 10019 goto decode_success; 10020 } 10021 10022 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */ 10023 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) { 10024 modrm = getIByte(delta+2); 10025 if (epartIsReg(modrm)) { 10026 delta += 2+1; 10027 putIReg( 4, eregOfRM(modrm), 10028 getXMMRegLane32(gregOfRM(modrm), 0) ); 10029 DIP("movd %s, %s\n", 10030 nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm))); 10031 } else { 10032 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10033 delta += 2+alen; 10034 storeLE( mkexpr(addr), 10035 getXMMRegLane32(gregOfRM(modrm), 0) ); 10036 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10037 } 10038 goto decode_success; 10039 } 10040 10041 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 10042 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) { 10043 modrm = getIByte(delta+2); 10044 if (epartIsReg(modrm)) { 10045 delta += 2+1; 10046 putXMMReg( eregOfRM(modrm), 10047 getXMMReg(gregOfRM(modrm)) ); 10048 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10049 nameXMMReg(eregOfRM(modrm))); 10050 } else { 10051 addr = disAMode( &alen, sorb, delta+2, dis_buf ); 10052 delta += 2+alen; 10053 gen_SEGV_if_not_16_aligned( addr ); 10054 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10055 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10056 } 10057 goto decode_success; 10058 } 10059 10060 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 10061 /* Unfortunately can't simply use the MOVDQA case since the 10062 prefix lengths are different (66 vs F3) */ 10063 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) { 10064 vassert(sz == 4); 10065 modrm = getIByte(delta+3); 10066 if (epartIsReg(modrm)) { 10067 putXMMReg( gregOfRM(modrm), 10068 getXMMReg( eregOfRM(modrm) )); 10069 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10070 nameXMMReg(gregOfRM(modrm))); 10071 delta += 3+1; 10072 } else { 10073 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10074 putXMMReg( gregOfRM(modrm), 10075 loadLE(Ity_V128, mkexpr(addr)) ); 10076 DIP("movdqu %s,%s\n", dis_buf, 10077 nameXMMReg(gregOfRM(modrm))); 10078 delta += 3+alen; 10079 } 10080 goto decode_success; 10081 } 10082 10083 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 10084 /* Unfortunately can't simply use the MOVDQA case since the 10085 prefix lengths are different (66 vs F3) */ 10086 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) { 10087 vassert(sz == 4); 10088 modrm = getIByte(delta+3); 10089 if (epartIsReg(modrm)) { 10090 delta += 3+1; 10091 putXMMReg( eregOfRM(modrm), 10092 getXMMReg(gregOfRM(modrm)) ); 10093 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), 10094 nameXMMReg(eregOfRM(modrm))); 10095 } else { 10096 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 10097 delta += 3+alen; 10098 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10099 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf); 10100 } 10101 goto decode_success; 10102 } 10103 10104 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 10105 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) { 10106 vassert(sz == 4); 10107 modrm = getIByte(delta+3); 10108 if (epartIsReg(modrm)) { 10109 do_MMX_preamble(); 10110 putMMXReg( gregOfRM(modrm), 10111 getXMMRegLane64( eregOfRM(modrm), 0 )); 10112 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10113 nameMMXReg(gregOfRM(modrm))); 10114 delta += 3+1; 10115 goto decode_success; 10116 } else { 10117 /* fall through, apparently no mem case for this insn */ 10118 } 10119 } 10120 10121 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 10122 /* These seems identical to MOVHPS. This instruction encoding is 10123 completely crazy. */ 10124 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) { 10125 modrm = getIByte(delta+2); 10126 if (epartIsReg(modrm)) { 10127 /* fall through; apparently reg-reg is not possible */ 10128 } else { 10129 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10130 delta += 2+alen; 10131 putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, 10132 loadLE(Ity_I64, mkexpr(addr)) ); 10133 DIP("movhpd %s,%s\n", dis_buf, 10134 nameXMMReg( gregOfRM(modrm) )); 10135 goto decode_success; 10136 } 10137 } 10138 10139 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 10140 /* Again, this seems identical to MOVHPS. */ 10141 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) { 10142 if (!epartIsReg(insn[2])) { 10143 delta += 2; 10144 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10145 delta += alen; 10146 storeLE( mkexpr(addr), 10147 getXMMRegLane64( gregOfRM(insn[2]), 10148 1/*upper lane*/ ) ); 10149 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), 10150 dis_buf); 10151 goto decode_success; 10152 } 10153 /* else fall through */ 10154 } 10155 10156 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 10157 /* Identical to MOVLPS ? */ 10158 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) { 10159 modrm = getIByte(delta+2); 10160 if (epartIsReg(modrm)) { 10161 /* fall through; apparently reg-reg is not possible */ 10162 } else { 10163 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10164 delta += 2+alen; 10165 putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, 10166 loadLE(Ity_I64, mkexpr(addr)) ); 10167 DIP("movlpd %s, %s\n", 10168 dis_buf, nameXMMReg( gregOfRM(modrm) )); 10169 goto decode_success; 10170 } 10171 } 10172 10173 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 10174 /* Identical to MOVLPS ? */ 10175 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) { 10176 if (!epartIsReg(insn[2])) { 10177 delta += 2; 10178 addr = disAMode ( &alen, sorb, delta, dis_buf ); 10179 delta += alen; 10180 storeLE( mkexpr(addr), 10181 getXMMRegLane64( gregOfRM(insn[2]), 10182 0/*lower lane*/ ) ); 10183 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), 10184 dis_buf); 10185 goto decode_success; 10186 } 10187 /* else fall through */ 10188 } 10189 10190 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 10191 2 lowest bits of ireg(G) */ 10192 if (insn[0] == 0x0F && insn[1] == 0x50) { 10193 modrm = getIByte(delta+2); 10194 if (sz == 2 && epartIsReg(modrm)) { 10195 Int src; 10196 t0 = newTemp(Ity_I32); 10197 t1 = newTemp(Ity_I32); 10198 delta += 2+1; 10199 src = eregOfRM(modrm); 10200 assign( t0, binop( Iop_And32, 10201 binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)), 10202 mkU32(1) )); 10203 assign( t1, binop( Iop_And32, 10204 binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)), 10205 mkU32(2) )); 10206 putIReg(4, gregOfRM(modrm), 10207 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)) 10208 ); 10209 DIP("movmskpd %s,%s\n", nameXMMReg(src), 10210 nameIReg(4, gregOfRM(modrm))); 10211 goto decode_success; 10212 } 10213 /* else fall through */ 10214 } 10215 10216 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 10217 if (insn[0] == 0x0F && insn[1] == 0xF7) { 10218 modrm = getIByte(delta+2); 10219 if (sz == 2 && epartIsReg(modrm)) { 10220 IRTemp regD = newTemp(Ity_V128); 10221 IRTemp mask = newTemp(Ity_V128); 10222 IRTemp olddata = newTemp(Ity_V128); 10223 IRTemp newdata = newTemp(Ity_V128); 10224 addr = newTemp(Ity_I32); 10225 10226 assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); 10227 assign( regD, getXMMReg( gregOfRM(modrm) )); 10228 10229 /* Unfortunately can't do the obvious thing with SarN8x16 10230 here since that can't be re-emitted as SSE2 code - no such 10231 insn. */ 10232 assign( 10233 mask, 10234 binop(Iop_64HLtoV128, 10235 binop(Iop_SarN8x8, 10236 getXMMRegLane64( eregOfRM(modrm), 1 ), 10237 mkU8(7) ), 10238 binop(Iop_SarN8x8, 10239 getXMMRegLane64( eregOfRM(modrm), 0 ), 10240 mkU8(7) ) )); 10241 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 10242 assign( newdata, 10243 binop(Iop_OrV128, 10244 binop(Iop_AndV128, 10245 mkexpr(regD), 10246 mkexpr(mask) ), 10247 binop(Iop_AndV128, 10248 mkexpr(olddata), 10249 unop(Iop_NotV128, mkexpr(mask)))) ); 10250 storeLE( mkexpr(addr), mkexpr(newdata) ); 10251 10252 delta += 2+1; 10253 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), 10254 nameXMMReg( gregOfRM(modrm) ) ); 10255 goto decode_success; 10256 } 10257 /* else fall through */ 10258 } 10259 10260 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 10261 if (insn[0] == 0x0F && insn[1] == 0xE7) { 10262 modrm = getIByte(delta+2); 10263 if (sz == 2 && !epartIsReg(modrm)) { 10264 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10265 gen_SEGV_if_not_16_aligned( addr ); 10266 storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); 10267 DIP("movntdq %s,%s\n", dis_buf, 10268 nameXMMReg(gregOfRM(modrm))); 10269 delta += 2+alen; 10270 goto decode_success; 10271 } 10272 /* else fall through */ 10273 } 10274 10275 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 10276 if (insn[0] == 0x0F && insn[1] == 0xC3) { 10277 vassert(sz == 4); 10278 modrm = getIByte(delta+2); 10279 if (!epartIsReg(modrm)) { 10280 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10281 storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) ); 10282 DIP("movnti %s,%s\n", dis_buf, 10283 nameIReg(4, gregOfRM(modrm))); 10284 delta += 2+alen; 10285 goto decode_success; 10286 } 10287 /* else fall through */ 10288 } 10289 10290 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 10291 or lo half xmm). */ 10292 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) { 10293 modrm = getIByte(delta+2); 10294 if (epartIsReg(modrm)) { 10295 /* fall through, awaiting test case */ 10296 /* dst: lo half copied, hi half zeroed */ 10297 } else { 10298 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10299 storeLE( mkexpr(addr), 10300 getXMMRegLane64( gregOfRM(modrm), 0 )); 10301 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf ); 10302 delta += 2+alen; 10303 goto decode_success; 10304 } 10305 } 10306 10307 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 10308 hi half). */ 10309 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) { 10310 vassert(sz == 4); 10311 modrm = getIByte(delta+3); 10312 if (epartIsReg(modrm)) { 10313 do_MMX_preamble(); 10314 putXMMReg( gregOfRM(modrm), 10315 unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) ); 10316 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10317 nameXMMReg(gregOfRM(modrm))); 10318 delta += 3+1; 10319 goto decode_success; 10320 } else { 10321 /* fall through, apparently no mem case for this insn */ 10322 } 10323 } 10324 10325 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 10326 G (lo half xmm). Upper half of G is zeroed out. */ 10327 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 10328 G (lo half xmm). If E is mem, upper half of G is zeroed out. 10329 If E is reg, upper half of G is unchanged. */ 10330 if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10) 10331 || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) { 10332 vassert(sz == 4); 10333 modrm = getIByte(delta+3); 10334 if (epartIsReg(modrm)) { 10335 putXMMRegLane64( gregOfRM(modrm), 0, 10336 getXMMRegLane64( eregOfRM(modrm), 0 )); 10337 if (insn[0] == 0xF3/*MOVQ*/) { 10338 /* zero bits 127:64 */ 10339 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10340 } 10341 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10342 nameXMMReg(gregOfRM(modrm))); 10343 delta += 3+1; 10344 } else { 10345 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10346 /* zero bits 127:64 */ 10347 putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); 10348 /* write bits 63:0 */ 10349 putXMMRegLane64( gregOfRM(modrm), 0, 10350 loadLE(Ity_I64, mkexpr(addr)) ); 10351 DIP("movsd %s,%s\n", dis_buf, 10352 nameXMMReg(gregOfRM(modrm))); 10353 delta += 3+alen; 10354 } 10355 goto decode_success; 10356 } 10357 10358 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 10359 or lo half xmm). */ 10360 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) { 10361 vassert(sz == 4); 10362 modrm = getIByte(delta+3); 10363 if (epartIsReg(modrm)) { 10364 putXMMRegLane64( eregOfRM(modrm), 0, 10365 getXMMRegLane64( gregOfRM(modrm), 0 )); 10366 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10367 nameXMMReg(eregOfRM(modrm))); 10368 delta += 3+1; 10369 } else { 10370 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 10371 storeLE( mkexpr(addr), 10372 getXMMRegLane64(gregOfRM(modrm), 0) ); 10373 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)), 10374 dis_buf); 10375 delta += 3+alen; 10376 } 10377 goto decode_success; 10378 } 10379 10380 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 10381 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) { 10382 delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 ); 10383 goto decode_success; 10384 } 10385 10386 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 10387 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) { 10388 vassert(sz == 4); 10389 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 ); 10390 goto decode_success; 10391 } 10392 10393 /* 66 0F 56 = ORPD -- G = G and E */ 10394 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) { 10395 delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 ); 10396 goto decode_success; 10397 } 10398 10399 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 10400 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) { 10401 Int select; 10402 IRTemp sV = newTemp(Ity_V128); 10403 IRTemp dV = newTemp(Ity_V128); 10404 IRTemp s1 = newTemp(Ity_I64); 10405 IRTemp s0 = newTemp(Ity_I64); 10406 IRTemp d1 = newTemp(Ity_I64); 10407 IRTemp d0 = newTemp(Ity_I64); 10408 10409 modrm = insn[2]; 10410 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10411 10412 if (epartIsReg(modrm)) { 10413 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10414 select = (Int)insn[3]; 10415 delta += 2+2; 10416 DIP("shufpd $%d,%s,%s\n", select, 10417 nameXMMReg(eregOfRM(modrm)), 10418 nameXMMReg(gregOfRM(modrm))); 10419 } else { 10420 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10421 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10422 select = (Int)insn[2+alen]; 10423 delta += 3+alen; 10424 DIP("shufpd $%d,%s,%s\n", select, 10425 dis_buf, 10426 nameXMMReg(gregOfRM(modrm))); 10427 } 10428 10429 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10430 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10431 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10432 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10433 10434 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 10435 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 10436 10437 putXMMReg( 10438 gregOfRM(modrm), 10439 binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) ) 10440 ); 10441 10442 # undef SELD 10443 # undef SELS 10444 10445 goto decode_success; 10446 } 10447 10448 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 10449 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) { 10450 delta = dis_SSE_E_to_G_unary_all( sorb, delta+2, 10451 "sqrtpd", Iop_Sqrt64Fx2 ); 10452 goto decode_success; 10453 } 10454 10455 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 10456 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) { 10457 vassert(sz == 4); 10458 delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3, 10459 "sqrtsd", Iop_Sqrt64F0x2 ); 10460 goto decode_success; 10461 } 10462 10463 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 10464 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) { 10465 delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 ); 10466 goto decode_success; 10467 } 10468 10469 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 10470 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) { 10471 vassert(sz == 4); 10472 delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 ); 10473 goto decode_success; 10474 } 10475 10476 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 10477 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 10478 /* These just appear to be special cases of SHUFPS */ 10479 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) { 10480 IRTemp s1 = newTemp(Ity_I64); 10481 IRTemp s0 = newTemp(Ity_I64); 10482 IRTemp d1 = newTemp(Ity_I64); 10483 IRTemp d0 = newTemp(Ity_I64); 10484 IRTemp sV = newTemp(Ity_V128); 10485 IRTemp dV = newTemp(Ity_V128); 10486 Bool hi = toBool(insn[1] == 0x15); 10487 10488 modrm = insn[2]; 10489 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10490 10491 if (epartIsReg(modrm)) { 10492 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10493 delta += 2+1; 10494 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10495 nameXMMReg(eregOfRM(modrm)), 10496 nameXMMReg(gregOfRM(modrm))); 10497 } else { 10498 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10499 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10500 delta += 2+alen; 10501 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 10502 dis_buf, 10503 nameXMMReg(gregOfRM(modrm))); 10504 } 10505 10506 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10507 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10508 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10509 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10510 10511 if (hi) { 10512 putXMMReg( gregOfRM(modrm), 10513 binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 10514 } else { 10515 putXMMReg( gregOfRM(modrm), 10516 binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 10517 } 10518 10519 goto decode_success; 10520 } 10521 10522 /* 66 0F 57 = XORPD -- G = G and E */ 10523 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) { 10524 delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 ); 10525 goto decode_success; 10526 } 10527 10528 /* 66 0F 6B = PACKSSDW */ 10529 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { 10530 delta = dis_SSEint_E_to_G( sorb, delta+2, 10531 "packssdw", Iop_QNarrow32Sx4, True ); 10532 goto decode_success; 10533 } 10534 10535 /* 66 0F 63 = PACKSSWB */ 10536 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { 10537 delta = dis_SSEint_E_to_G( sorb, delta+2, 10538 "packsswb", Iop_QNarrow16Sx8, True ); 10539 goto decode_success; 10540 } 10541 10542 /* 66 0F 67 = PACKUSWB */ 10543 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { 10544 delta = dis_SSEint_E_to_G( sorb, delta+2, 10545 "packuswb", Iop_QNarrow16Ux8, True ); 10546 goto decode_success; 10547 } 10548 10549 /* 66 0F FC = PADDB */ 10550 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) { 10551 delta = dis_SSEint_E_to_G( sorb, delta+2, 10552 "paddb", Iop_Add8x16, False ); 10553 goto decode_success; 10554 } 10555 10556 /* 66 0F FE = PADDD */ 10557 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) { 10558 delta = dis_SSEint_E_to_G( sorb, delta+2, 10559 "paddd", Iop_Add32x4, False ); 10560 goto decode_success; 10561 } 10562 10563 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10564 /* 0F D4 = PADDQ -- add 64x1 */ 10565 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) { 10566 do_MMX_preamble(); 10567 delta = dis_MMXop_regmem_to_reg ( 10568 sorb, delta+2, insn[1], "paddq", False ); 10569 goto decode_success; 10570 } 10571 10572 /* 66 0F D4 = PADDQ */ 10573 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) { 10574 delta = dis_SSEint_E_to_G( sorb, delta+2, 10575 "paddq", Iop_Add64x2, False ); 10576 goto decode_success; 10577 } 10578 10579 /* 66 0F FD = PADDW */ 10580 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) { 10581 delta = dis_SSEint_E_to_G( sorb, delta+2, 10582 "paddw", Iop_Add16x8, False ); 10583 goto decode_success; 10584 } 10585 10586 /* 66 0F EC = PADDSB */ 10587 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) { 10588 delta = dis_SSEint_E_to_G( sorb, delta+2, 10589 "paddsb", Iop_QAdd8Sx16, False ); 10590 goto decode_success; 10591 } 10592 10593 /* 66 0F ED = PADDSW */ 10594 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) { 10595 delta = dis_SSEint_E_to_G( sorb, delta+2, 10596 "paddsw", Iop_QAdd16Sx8, False ); 10597 goto decode_success; 10598 } 10599 10600 /* 66 0F DC = PADDUSB */ 10601 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) { 10602 delta = dis_SSEint_E_to_G( sorb, delta+2, 10603 "paddusb", Iop_QAdd8Ux16, False ); 10604 goto decode_success; 10605 } 10606 10607 /* 66 0F DD = PADDUSW */ 10608 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) { 10609 delta = dis_SSEint_E_to_G( sorb, delta+2, 10610 "paddusw", Iop_QAdd16Ux8, False ); 10611 goto decode_success; 10612 } 10613 10614 /* 66 0F DB = PAND */ 10615 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) { 10616 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 ); 10617 goto decode_success; 10618 } 10619 10620 /* 66 0F DF = PANDN */ 10621 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) { 10622 delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 ); 10623 goto decode_success; 10624 } 10625 10626 /* 66 0F E0 = PAVGB */ 10627 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) { 10628 delta = dis_SSEint_E_to_G( sorb, delta+2, 10629 "pavgb", Iop_Avg8Ux16, False ); 10630 goto decode_success; 10631 } 10632 10633 /* 66 0F E3 = PAVGW */ 10634 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) { 10635 delta = dis_SSEint_E_to_G( sorb, delta+2, 10636 "pavgw", Iop_Avg16Ux8, False ); 10637 goto decode_success; 10638 } 10639 10640 /* 66 0F 74 = PCMPEQB */ 10641 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) { 10642 delta = dis_SSEint_E_to_G( sorb, delta+2, 10643 "pcmpeqb", Iop_CmpEQ8x16, False ); 10644 goto decode_success; 10645 } 10646 10647 /* 66 0F 76 = PCMPEQD */ 10648 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) { 10649 delta = dis_SSEint_E_to_G( sorb, delta+2, 10650 "pcmpeqd", Iop_CmpEQ32x4, False ); 10651 goto decode_success; 10652 } 10653 10654 /* 66 0F 75 = PCMPEQW */ 10655 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) { 10656 delta = dis_SSEint_E_to_G( sorb, delta+2, 10657 "pcmpeqw", Iop_CmpEQ16x8, False ); 10658 goto decode_success; 10659 } 10660 10661 /* 66 0F 64 = PCMPGTB */ 10662 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) { 10663 delta = dis_SSEint_E_to_G( sorb, delta+2, 10664 "pcmpgtb", Iop_CmpGT8Sx16, False ); 10665 goto decode_success; 10666 } 10667 10668 /* 66 0F 66 = PCMPGTD */ 10669 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) { 10670 delta = dis_SSEint_E_to_G( sorb, delta+2, 10671 "pcmpgtd", Iop_CmpGT32Sx4, False ); 10672 goto decode_success; 10673 } 10674 10675 /* 66 0F 65 = PCMPGTW */ 10676 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) { 10677 delta = dis_SSEint_E_to_G( sorb, delta+2, 10678 "pcmpgtw", Iop_CmpGT16Sx8, False ); 10679 goto decode_success; 10680 } 10681 10682 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 10683 zero-extend of it in ireg(G). */ 10684 if (insn[0] == 0x0F && insn[1] == 0xC5) { 10685 modrm = insn[2]; 10686 if (sz == 2 && epartIsReg(modrm)) { 10687 t5 = newTemp(Ity_V128); 10688 t4 = newTemp(Ity_I16); 10689 assign(t5, getXMMReg(eregOfRM(modrm))); 10690 breakup128to32s( t5, &t3, &t2, &t1, &t0 ); 10691 switch (insn[3] & 7) { 10692 case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break; 10693 case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break; 10694 case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break; 10695 case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break; 10696 case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break; 10697 case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break; 10698 case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break; 10699 case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break; 10700 default: vassert(0); /*NOTREACHED*/ 10701 } 10702 putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4))); 10703 DIP("pextrw $%d,%s,%s\n", 10704 (Int)insn[3], nameXMMReg(eregOfRM(modrm)), 10705 nameIReg(4,gregOfRM(modrm))); 10706 delta += 4; 10707 goto decode_success; 10708 } 10709 /* else fall through */ 10710 } 10711 10712 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 10713 put it into the specified lane of xmm(G). */ 10714 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) { 10715 Int lane; 10716 t4 = newTemp(Ity_I16); 10717 modrm = insn[2]; 10718 10719 if (epartIsReg(modrm)) { 10720 assign(t4, getIReg(2, eregOfRM(modrm))); 10721 delta += 3+1; 10722 lane = insn[3+1-1]; 10723 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10724 nameIReg(2,eregOfRM(modrm)), 10725 nameXMMReg(gregOfRM(modrm))); 10726 } else { 10727 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10728 delta += 3+alen; 10729 lane = insn[3+alen-1]; 10730 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 10731 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 10732 dis_buf, 10733 nameXMMReg(gregOfRM(modrm))); 10734 } 10735 10736 putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) ); 10737 goto decode_success; 10738 } 10739 10740 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 10741 E(xmm or mem) to G(xmm) */ 10742 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) { 10743 IRTemp s1V = newTemp(Ity_V128); 10744 IRTemp s2V = newTemp(Ity_V128); 10745 IRTemp dV = newTemp(Ity_V128); 10746 IRTemp s1Hi = newTemp(Ity_I64); 10747 IRTemp s1Lo = newTemp(Ity_I64); 10748 IRTemp s2Hi = newTemp(Ity_I64); 10749 IRTemp s2Lo = newTemp(Ity_I64); 10750 IRTemp dHi = newTemp(Ity_I64); 10751 IRTemp dLo = newTemp(Ity_I64); 10752 modrm = insn[2]; 10753 if (epartIsReg(modrm)) { 10754 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 10755 delta += 2+1; 10756 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10757 nameXMMReg(gregOfRM(modrm))); 10758 } else { 10759 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10760 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 10761 delta += 2+alen; 10762 DIP("pmaddwd %s,%s\n", dis_buf, 10763 nameXMMReg(gregOfRM(modrm))); 10764 } 10765 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 10766 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 10767 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 10768 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 10769 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 10770 assign( dHi, mkIRExprCCall( 10771 Ity_I64, 0/*regparms*/, 10772 "x86g_calculate_mmx_pmaddwd", 10773 &x86g_calculate_mmx_pmaddwd, 10774 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 10775 )); 10776 assign( dLo, mkIRExprCCall( 10777 Ity_I64, 0/*regparms*/, 10778 "x86g_calculate_mmx_pmaddwd", 10779 &x86g_calculate_mmx_pmaddwd, 10780 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 10781 )); 10782 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 10783 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 10784 goto decode_success; 10785 } 10786 10787 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 10788 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) { 10789 delta = dis_SSEint_E_to_G( sorb, delta+2, 10790 "pmaxsw", Iop_Max16Sx8, False ); 10791 goto decode_success; 10792 } 10793 10794 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 10795 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) { 10796 delta = dis_SSEint_E_to_G( sorb, delta+2, 10797 "pmaxub", Iop_Max8Ux16, False ); 10798 goto decode_success; 10799 } 10800 10801 /* 66 0F EA = PMINSW -- 16x8 signed min */ 10802 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) { 10803 delta = dis_SSEint_E_to_G( sorb, delta+2, 10804 "pminsw", Iop_Min16Sx8, False ); 10805 goto decode_success; 10806 } 10807 10808 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 10809 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) { 10810 delta = dis_SSEint_E_to_G( sorb, delta+2, 10811 "pminub", Iop_Min8Ux16, False ); 10812 goto decode_success; 10813 } 10814 10815 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in 10816 xmm(G), turn them into a byte, and put zero-extend of it in 10817 ireg(G). Doing this directly is just too cumbersome; give up 10818 therefore and call a helper. */ 10819 /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */ 10820 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) { 10821 modrm = insn[2]; 10822 if (epartIsReg(modrm)) { 10823 t0 = newTemp(Ity_I64); 10824 t1 = newTemp(Ity_I64); 10825 assign(t0, getXMMRegLane64(eregOfRM(modrm), 0)); 10826 assign(t1, getXMMRegLane64(eregOfRM(modrm), 1)); 10827 t5 = newTemp(Ity_I32); 10828 assign(t5, mkIRExprCCall( 10829 Ity_I32, 0/*regparms*/, 10830 "x86g_calculate_sse_pmovmskb", 10831 &x86g_calculate_sse_pmovmskb, 10832 mkIRExprVec_2( mkexpr(t1), mkexpr(t0) ))); 10833 putIReg(4, gregOfRM(modrm), mkexpr(t5)); 10834 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10835 nameIReg(4,gregOfRM(modrm))); 10836 delta += 3; 10837 goto decode_success; 10838 } 10839 /* else fall through */ 10840 } 10841 10842 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 10843 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) { 10844 delta = dis_SSEint_E_to_G( sorb, delta+2, 10845 "pmulhuw", Iop_MulHi16Ux8, False ); 10846 goto decode_success; 10847 } 10848 10849 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 10850 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) { 10851 delta = dis_SSEint_E_to_G( sorb, delta+2, 10852 "pmulhw", Iop_MulHi16Sx8, False ); 10853 goto decode_success; 10854 } 10855 10856 /* 66 0F D5 = PMULHL -- 16x8 multiply */ 10857 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) { 10858 delta = dis_SSEint_E_to_G( sorb, delta+2, 10859 "pmullw", Iop_Mul16x8, False ); 10860 goto decode_success; 10861 } 10862 10863 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 10864 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10865 0 to form 64-bit result */ 10866 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) { 10867 IRTemp sV = newTemp(Ity_I64); 10868 IRTemp dV = newTemp(Ity_I64); 10869 t1 = newTemp(Ity_I32); 10870 t0 = newTemp(Ity_I32); 10871 modrm = insn[2]; 10872 10873 do_MMX_preamble(); 10874 assign( dV, getMMXReg(gregOfRM(modrm)) ); 10875 10876 if (epartIsReg(modrm)) { 10877 assign( sV, getMMXReg(eregOfRM(modrm)) ); 10878 delta += 2+1; 10879 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)), 10880 nameMMXReg(gregOfRM(modrm))); 10881 } else { 10882 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10883 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 10884 delta += 2+alen; 10885 DIP("pmuludq %s,%s\n", dis_buf, 10886 nameMMXReg(gregOfRM(modrm))); 10887 } 10888 10889 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 10890 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 10891 putMMXReg( gregOfRM(modrm), 10892 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 10893 goto decode_success; 10894 } 10895 10896 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 10897 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 10898 half */ 10899 /* This is a really poor translation -- could be improved if 10900 performance critical */ 10901 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) { 10902 IRTemp sV, dV; 10903 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10904 sV = newTemp(Ity_V128); 10905 dV = newTemp(Ity_V128); 10906 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10907 t1 = newTemp(Ity_I64); 10908 t0 = newTemp(Ity_I64); 10909 modrm = insn[2]; 10910 assign( dV, getXMMReg(gregOfRM(modrm)) ); 10911 10912 if (epartIsReg(modrm)) { 10913 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10914 delta += 2+1; 10915 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10916 nameXMMReg(gregOfRM(modrm))); 10917 } else { 10918 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10919 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10920 delta += 2+alen; 10921 DIP("pmuludq %s,%s\n", dis_buf, 10922 nameXMMReg(gregOfRM(modrm))); 10923 } 10924 10925 breakup128to32s( dV, &d3, &d2, &d1, &d0 ); 10926 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 10927 10928 assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ); 10929 putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) ); 10930 assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) ); 10931 putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) ); 10932 goto decode_success; 10933 } 10934 10935 /* 66 0F EB = POR */ 10936 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) { 10937 delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 ); 10938 goto decode_success; 10939 } 10940 10941 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 10942 from E(xmm or mem) to G(xmm) */ 10943 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) { 10944 IRTemp s1V = newTemp(Ity_V128); 10945 IRTemp s2V = newTemp(Ity_V128); 10946 IRTemp dV = newTemp(Ity_V128); 10947 IRTemp s1Hi = newTemp(Ity_I64); 10948 IRTemp s1Lo = newTemp(Ity_I64); 10949 IRTemp s2Hi = newTemp(Ity_I64); 10950 IRTemp s2Lo = newTemp(Ity_I64); 10951 IRTemp dHi = newTemp(Ity_I64); 10952 IRTemp dLo = newTemp(Ity_I64); 10953 modrm = insn[2]; 10954 if (epartIsReg(modrm)) { 10955 assign( s1V, getXMMReg(eregOfRM(modrm)) ); 10956 delta += 2+1; 10957 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 10958 nameXMMReg(gregOfRM(modrm))); 10959 } else { 10960 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 10961 assign( s1V, loadLE(Ity_V128, mkexpr(addr)) ); 10962 delta += 2+alen; 10963 DIP("psadbw %s,%s\n", dis_buf, 10964 nameXMMReg(gregOfRM(modrm))); 10965 } 10966 assign( s2V, getXMMReg(gregOfRM(modrm)) ); 10967 assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) ); 10968 assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) ); 10969 assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) ); 10970 assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) ); 10971 assign( dHi, mkIRExprCCall( 10972 Ity_I64, 0/*regparms*/, 10973 "x86g_calculate_mmx_psadbw", 10974 &x86g_calculate_mmx_psadbw, 10975 mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi)) 10976 )); 10977 assign( dLo, mkIRExprCCall( 10978 Ity_I64, 0/*regparms*/, 10979 "x86g_calculate_mmx_psadbw", 10980 &x86g_calculate_mmx_psadbw, 10981 mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo)) 10982 )); 10983 assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ; 10984 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 10985 goto decode_success; 10986 } 10987 10988 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 10989 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) { 10990 Int order; 10991 IRTemp sV, dV, s3, s2, s1, s0; 10992 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10993 sV = newTemp(Ity_V128); 10994 dV = newTemp(Ity_V128); 10995 modrm = insn[2]; 10996 if (epartIsReg(modrm)) { 10997 assign( sV, getXMMReg(eregOfRM(modrm)) ); 10998 order = (Int)insn[3]; 10999 delta += 2+2; 11000 DIP("pshufd $%d,%s,%s\n", order, 11001 nameXMMReg(eregOfRM(modrm)), 11002 nameXMMReg(gregOfRM(modrm))); 11003 } else { 11004 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11005 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11006 order = (Int)insn[2+alen]; 11007 delta += 3+alen; 11008 DIP("pshufd $%d,%s,%s\n", order, 11009 dis_buf, 11010 nameXMMReg(gregOfRM(modrm))); 11011 } 11012 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11013 11014 # define SEL(n) \ 11015 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11016 assign(dV, 11017 mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 11018 SEL((order>>2)&3), SEL((order>>0)&3) ) 11019 ); 11020 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11021 # undef SEL 11022 goto decode_success; 11023 } 11024 11025 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 11026 mem) to G(xmm), and copy lower half */ 11027 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) { 11028 Int order; 11029 IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0; 11030 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11031 sV = newTemp(Ity_V128); 11032 dV = newTemp(Ity_V128); 11033 sVhi = newTemp(Ity_I64); 11034 dVhi = newTemp(Ity_I64); 11035 modrm = insn[3]; 11036 if (epartIsReg(modrm)) { 11037 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11038 order = (Int)insn[4]; 11039 delta += 4+1; 11040 DIP("pshufhw $%d,%s,%s\n", order, 11041 nameXMMReg(eregOfRM(modrm)), 11042 nameXMMReg(gregOfRM(modrm))); 11043 } else { 11044 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11045 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11046 order = (Int)insn[3+alen]; 11047 delta += 4+alen; 11048 DIP("pshufhw $%d,%s,%s\n", order, 11049 dis_buf, 11050 nameXMMReg(gregOfRM(modrm))); 11051 } 11052 assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) ); 11053 breakup64to16s( sVhi, &s3, &s2, &s1, &s0 ); 11054 11055 # define SEL(n) \ 11056 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11057 assign(dVhi, 11058 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11059 SEL((order>>2)&3), SEL((order>>0)&3) ) 11060 ); 11061 assign(dV, binop( Iop_64HLtoV128, 11062 mkexpr(dVhi), 11063 unop(Iop_V128to64, mkexpr(sV))) ); 11064 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11065 # undef SEL 11066 goto decode_success; 11067 } 11068 11069 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 11070 mem) to G(xmm), and copy upper half */ 11071 if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) { 11072 Int order; 11073 IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0; 11074 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11075 sV = newTemp(Ity_V128); 11076 dV = newTemp(Ity_V128); 11077 sVlo = newTemp(Ity_I64); 11078 dVlo = newTemp(Ity_I64); 11079 modrm = insn[3]; 11080 if (epartIsReg(modrm)) { 11081 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11082 order = (Int)insn[4]; 11083 delta += 4+1; 11084 DIP("pshuflw $%d,%s,%s\n", order, 11085 nameXMMReg(eregOfRM(modrm)), 11086 nameXMMReg(gregOfRM(modrm))); 11087 } else { 11088 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11089 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11090 order = (Int)insn[3+alen]; 11091 delta += 4+alen; 11092 DIP("pshuflw $%d,%s,%s\n", order, 11093 dis_buf, 11094 nameXMMReg(gregOfRM(modrm))); 11095 } 11096 assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) ); 11097 breakup64to16s( sVlo, &s3, &s2, &s1, &s0 ); 11098 11099 # define SEL(n) \ 11100 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11101 assign(dVlo, 11102 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 11103 SEL((order>>2)&3), SEL((order>>0)&3) ) 11104 ); 11105 assign(dV, binop( Iop_64HLtoV128, 11106 unop(Iop_V128HIto64, mkexpr(sV)), 11107 mkexpr(dVlo) ) ); 11108 putXMMReg(gregOfRM(modrm), mkexpr(dV)); 11109 # undef SEL 11110 goto decode_success; 11111 } 11112 11113 /* 66 0F 72 /6 ib = PSLLD by immediate */ 11114 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11115 && epartIsReg(insn[2]) 11116 && gregOfRM(insn[2]) == 6) { 11117 delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 ); 11118 goto decode_success; 11119 } 11120 11121 /* 66 0F F2 = PSLLD by E */ 11122 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) { 11123 delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 ); 11124 goto decode_success; 11125 } 11126 11127 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 11128 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11129 && epartIsReg(insn[2]) 11130 && gregOfRM(insn[2]) == 7) { 11131 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11132 Int imm = (Int)insn[3]; 11133 Int reg = eregOfRM(insn[2]); 11134 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 11135 vassert(imm >= 0 && imm <= 255); 11136 delta += 4; 11137 11138 sV = newTemp(Ity_V128); 11139 dV = newTemp(Ity_V128); 11140 hi64 = newTemp(Ity_I64); 11141 lo64 = newTemp(Ity_I64); 11142 hi64r = newTemp(Ity_I64); 11143 lo64r = newTemp(Ity_I64); 11144 11145 if (imm >= 16) { 11146 putXMMReg(reg, mkV128(0x0000)); 11147 goto decode_success; 11148 } 11149 11150 assign( sV, getXMMReg(reg) ); 11151 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11152 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11153 11154 if (imm == 0) { 11155 assign( lo64r, mkexpr(lo64) ); 11156 assign( hi64r, mkexpr(hi64) ); 11157 } 11158 else 11159 if (imm == 8) { 11160 assign( lo64r, mkU64(0) ); 11161 assign( hi64r, mkexpr(lo64) ); 11162 } 11163 else 11164 if (imm > 8) { 11165 assign( lo64r, mkU64(0) ); 11166 assign( hi64r, binop( Iop_Shl64, 11167 mkexpr(lo64), 11168 mkU8( 8*(imm-8) ) )); 11169 } else { 11170 assign( lo64r, binop( Iop_Shl64, 11171 mkexpr(lo64), 11172 mkU8(8 * imm) )); 11173 assign( hi64r, 11174 binop( Iop_Or64, 11175 binop(Iop_Shl64, mkexpr(hi64), 11176 mkU8(8 * imm)), 11177 binop(Iop_Shr64, mkexpr(lo64), 11178 mkU8(8 * (8 - imm)) ) 11179 ) 11180 ); 11181 } 11182 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11183 putXMMReg(reg, mkexpr(dV)); 11184 goto decode_success; 11185 } 11186 11187 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 11188 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11189 && epartIsReg(insn[2]) 11190 && gregOfRM(insn[2]) == 6) { 11191 delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 ); 11192 goto decode_success; 11193 } 11194 11195 /* 66 0F F3 = PSLLQ by E */ 11196 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) { 11197 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 ); 11198 goto decode_success; 11199 } 11200 11201 /* 66 0F 71 /6 ib = PSLLW by immediate */ 11202 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11203 && epartIsReg(insn[2]) 11204 && gregOfRM(insn[2]) == 6) { 11205 delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 ); 11206 goto decode_success; 11207 } 11208 11209 /* 66 0F F1 = PSLLW by E */ 11210 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) { 11211 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 ); 11212 goto decode_success; 11213 } 11214 11215 /* 66 0F 72 /4 ib = PSRAD by immediate */ 11216 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11217 && epartIsReg(insn[2]) 11218 && gregOfRM(insn[2]) == 4) { 11219 delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 ); 11220 goto decode_success; 11221 } 11222 11223 /* 66 0F E2 = PSRAD by E */ 11224 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) { 11225 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 ); 11226 goto decode_success; 11227 } 11228 11229 /* 66 0F 71 /4 ib = PSRAW by immediate */ 11230 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11231 && epartIsReg(insn[2]) 11232 && gregOfRM(insn[2]) == 4) { 11233 delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 ); 11234 goto decode_success; 11235 } 11236 11237 /* 66 0F E1 = PSRAW by E */ 11238 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) { 11239 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 ); 11240 goto decode_success; 11241 } 11242 11243 /* 66 0F 72 /2 ib = PSRLD by immediate */ 11244 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72 11245 && epartIsReg(insn[2]) 11246 && gregOfRM(insn[2]) == 2) { 11247 delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 ); 11248 goto decode_success; 11249 } 11250 11251 /* 66 0F D2 = PSRLD by E */ 11252 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) { 11253 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 ); 11254 goto decode_success; 11255 } 11256 11257 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 11258 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11259 && epartIsReg(insn[2]) 11260 && gregOfRM(insn[2]) == 3) { 11261 IRTemp sV, dV, hi64, lo64, hi64r, lo64r; 11262 Int imm = (Int)insn[3]; 11263 Int reg = eregOfRM(insn[2]); 11264 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 11265 vassert(imm >= 0 && imm <= 255); 11266 delta += 4; 11267 11268 sV = newTemp(Ity_V128); 11269 dV = newTemp(Ity_V128); 11270 hi64 = newTemp(Ity_I64); 11271 lo64 = newTemp(Ity_I64); 11272 hi64r = newTemp(Ity_I64); 11273 lo64r = newTemp(Ity_I64); 11274 11275 if (imm >= 16) { 11276 putXMMReg(reg, mkV128(0x0000)); 11277 goto decode_success; 11278 } 11279 11280 assign( sV, getXMMReg(reg) ); 11281 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 11282 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 11283 11284 if (imm == 0) { 11285 assign( lo64r, mkexpr(lo64) ); 11286 assign( hi64r, mkexpr(hi64) ); 11287 } 11288 else 11289 if (imm == 8) { 11290 assign( hi64r, mkU64(0) ); 11291 assign( lo64r, mkexpr(hi64) ); 11292 } 11293 else 11294 if (imm > 8) { 11295 assign( hi64r, mkU64(0) ); 11296 assign( lo64r, binop( Iop_Shr64, 11297 mkexpr(hi64), 11298 mkU8( 8*(imm-8) ) )); 11299 } else { 11300 assign( hi64r, binop( Iop_Shr64, 11301 mkexpr(hi64), 11302 mkU8(8 * imm) )); 11303 assign( lo64r, 11304 binop( Iop_Or64, 11305 binop(Iop_Shr64, mkexpr(lo64), 11306 mkU8(8 * imm)), 11307 binop(Iop_Shl64, mkexpr(hi64), 11308 mkU8(8 * (8 - imm)) ) 11309 ) 11310 ); 11311 } 11312 11313 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 11314 putXMMReg(reg, mkexpr(dV)); 11315 goto decode_success; 11316 } 11317 11318 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 11319 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73 11320 && epartIsReg(insn[2]) 11321 && gregOfRM(insn[2]) == 2) { 11322 delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 ); 11323 goto decode_success; 11324 } 11325 11326 /* 66 0F D3 = PSRLQ by E */ 11327 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) { 11328 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 ); 11329 goto decode_success; 11330 } 11331 11332 /* 66 0F 71 /2 ib = PSRLW by immediate */ 11333 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71 11334 && epartIsReg(insn[2]) 11335 && gregOfRM(insn[2]) == 2) { 11336 delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 ); 11337 goto decode_success; 11338 } 11339 11340 /* 66 0F D1 = PSRLW by E */ 11341 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) { 11342 delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 ); 11343 goto decode_success; 11344 } 11345 11346 /* 66 0F F8 = PSUBB */ 11347 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) { 11348 delta = dis_SSEint_E_to_G( sorb, delta+2, 11349 "psubb", Iop_Sub8x16, False ); 11350 goto decode_success; 11351 } 11352 11353 /* 66 0F FA = PSUBD */ 11354 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) { 11355 delta = dis_SSEint_E_to_G( sorb, delta+2, 11356 "psubd", Iop_Sub32x4, False ); 11357 goto decode_success; 11358 } 11359 11360 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 11361 /* 0F FB = PSUBQ -- sub 64x1 */ 11362 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) { 11363 do_MMX_preamble(); 11364 delta = dis_MMXop_regmem_to_reg ( 11365 sorb, delta+2, insn[1], "psubq", False ); 11366 goto decode_success; 11367 } 11368 11369 /* 66 0F FB = PSUBQ */ 11370 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) { 11371 delta = dis_SSEint_E_to_G( sorb, delta+2, 11372 "psubq", Iop_Sub64x2, False ); 11373 goto decode_success; 11374 } 11375 11376 /* 66 0F F9 = PSUBW */ 11377 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) { 11378 delta = dis_SSEint_E_to_G( sorb, delta+2, 11379 "psubw", Iop_Sub16x8, False ); 11380 goto decode_success; 11381 } 11382 11383 /* 66 0F E8 = PSUBSB */ 11384 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) { 11385 delta = dis_SSEint_E_to_G( sorb, delta+2, 11386 "psubsb", Iop_QSub8Sx16, False ); 11387 goto decode_success; 11388 } 11389 11390 /* 66 0F E9 = PSUBSW */ 11391 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) { 11392 delta = dis_SSEint_E_to_G( sorb, delta+2, 11393 "psubsw", Iop_QSub16Sx8, False ); 11394 goto decode_success; 11395 } 11396 11397 /* 66 0F D8 = PSUBSB */ 11398 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) { 11399 delta = dis_SSEint_E_to_G( sorb, delta+2, 11400 "psubusb", Iop_QSub8Ux16, False ); 11401 goto decode_success; 11402 } 11403 11404 /* 66 0F D9 = PSUBSW */ 11405 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) { 11406 delta = dis_SSEint_E_to_G( sorb, delta+2, 11407 "psubusw", Iop_QSub16Ux8, False ); 11408 goto decode_success; 11409 } 11410 11411 /* 66 0F 68 = PUNPCKHBW */ 11412 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) { 11413 delta = dis_SSEint_E_to_G( sorb, delta+2, 11414 "punpckhbw", 11415 Iop_InterleaveHI8x16, True ); 11416 goto decode_success; 11417 } 11418 11419 /* 66 0F 6A = PUNPCKHDQ */ 11420 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) { 11421 delta = dis_SSEint_E_to_G( sorb, delta+2, 11422 "punpckhdq", 11423 Iop_InterleaveHI32x4, True ); 11424 goto decode_success; 11425 } 11426 11427 /* 66 0F 6D = PUNPCKHQDQ */ 11428 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) { 11429 delta = dis_SSEint_E_to_G( sorb, delta+2, 11430 "punpckhqdq", 11431 Iop_InterleaveHI64x2, True ); 11432 goto decode_success; 11433 } 11434 11435 /* 66 0F 69 = PUNPCKHWD */ 11436 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) { 11437 delta = dis_SSEint_E_to_G( sorb, delta+2, 11438 "punpckhwd", 11439 Iop_InterleaveHI16x8, True ); 11440 goto decode_success; 11441 } 11442 11443 /* 66 0F 60 = PUNPCKLBW */ 11444 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) { 11445 delta = dis_SSEint_E_to_G( sorb, delta+2, 11446 "punpcklbw", 11447 Iop_InterleaveLO8x16, True ); 11448 goto decode_success; 11449 } 11450 11451 /* 66 0F 62 = PUNPCKLDQ */ 11452 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) { 11453 delta = dis_SSEint_E_to_G( sorb, delta+2, 11454 "punpckldq", 11455 Iop_InterleaveLO32x4, True ); 11456 goto decode_success; 11457 } 11458 11459 /* 66 0F 6C = PUNPCKLQDQ */ 11460 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) { 11461 delta = dis_SSEint_E_to_G( sorb, delta+2, 11462 "punpcklqdq", 11463 Iop_InterleaveLO64x2, True ); 11464 goto decode_success; 11465 } 11466 11467 /* 66 0F 61 = PUNPCKLWD */ 11468 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) { 11469 delta = dis_SSEint_E_to_G( sorb, delta+2, 11470 "punpcklwd", 11471 Iop_InterleaveLO16x8, True ); 11472 goto decode_success; 11473 } 11474 11475 /* 66 0F EF = PXOR */ 11476 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) { 11477 delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 ); 11478 goto decode_success; 11479 } 11480 11481 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */ 11482 //-- if (insn[0] == 0x0F && insn[1] == 0xAE 11483 //-- && (!epartIsReg(insn[2])) 11484 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) { 11485 //-- Bool store = gregOfRM(insn[2]) == 0; 11486 //-- vg_assert(sz == 4); 11487 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf ); 11488 //-- t1 = LOW24(pair); 11489 //-- eip += 2+HI8(pair); 11490 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512, 11491 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1], 11492 //-- Lit16, (UShort)insn[2], 11493 //-- TempReg, t1 ); 11494 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf ); 11495 //-- goto decode_success; 11496 //-- } 11497 11498 /* 0F AE /7 = CLFLUSH -- flush cache line */ 11499 if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE 11500 && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { 11501 11502 /* This is something of a hack. We need to know the size of the 11503 cache line containing addr. Since we don't (easily), assume 11504 256 on the basis that no real cache would have a line that 11505 big. It's safe to invalidate more stuff than we need, just 11506 inefficient. */ 11507 UInt lineszB = 256; 11508 11509 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11510 delta += 2+alen; 11511 11512 /* Round addr down to the start of the containing block. */ 11513 stmt( IRStmt_Put( 11514 OFFB_TISTART, 11515 binop( Iop_And32, 11516 mkexpr(addr), 11517 mkU32( ~(lineszB-1) ))) ); 11518 11519 stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) ); 11520 11521 irsb->jumpkind = Ijk_TInval; 11522 irsb->next = mkU32(guest_EIP_bbstart+delta); 11523 dres.whatNext = Dis_StopHere; 11524 11525 DIP("clflush %s\n", dis_buf); 11526 goto decode_success; 11527 } 11528 11529 /* ---------------------------------------------------- */ 11530 /* --- end of the SSE2 decoder. --- */ 11531 /* ---------------------------------------------------- */ 11532 11533 /* ---------------------------------------------------- */ 11534 /* --- start of the SSE3 decoder. --- */ 11535 /* ---------------------------------------------------- */ 11536 11537 /* Skip parts of the decoder which don't apply given the stated 11538 guest subarchitecture. */ 11539 /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */ 11540 /* In fact this is highly bogus; we accept SSE3 insns even on a 11541 SSE2-only guest since they turn into IR which can be re-emitted 11542 successfully on an SSE2 host. */ 11543 if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) 11544 goto after_sse_decoders; /* no SSE3 capabilities */ 11545 11546 insn = (UChar*)&guest_code[delta]; 11547 11548 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 11549 duplicating some lanes (2:2:0:0). */ 11550 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 11551 duplicating some lanes (3:3:1:1). */ 11552 if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F 11553 && (insn[2] == 0x12 || insn[2] == 0x16)) { 11554 IRTemp s3, s2, s1, s0; 11555 IRTemp sV = newTemp(Ity_V128); 11556 Bool isH = insn[2] == 0x16; 11557 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11558 11559 modrm = insn[3]; 11560 if (epartIsReg(modrm)) { 11561 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11562 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11563 nameXMMReg(eregOfRM(modrm)), 11564 nameXMMReg(gregOfRM(modrm))); 11565 delta += 3+1; 11566 } else { 11567 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11568 gen_SEGV_if_not_16_aligned( addr ); 11569 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11570 DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l', 11571 dis_buf, 11572 nameXMMReg(gregOfRM(modrm))); 11573 delta += 3+alen; 11574 } 11575 11576 breakup128to32s( sV, &s3, &s2, &s1, &s0 ); 11577 putXMMReg( gregOfRM(modrm), 11578 isH ? mk128from32s( s3, s3, s1, s1 ) 11579 : mk128from32s( s2, s2, s0, s0 ) ); 11580 goto decode_success; 11581 } 11582 11583 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 11584 duplicating some lanes (0:1:0:1). */ 11585 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) { 11586 IRTemp sV = newTemp(Ity_V128); 11587 IRTemp d0 = newTemp(Ity_I64); 11588 11589 modrm = insn[3]; 11590 if (epartIsReg(modrm)) { 11591 assign( sV, getXMMReg( eregOfRM(modrm)) ); 11592 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11593 nameXMMReg(gregOfRM(modrm))); 11594 delta += 3+1; 11595 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 11596 } else { 11597 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11598 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 11599 DIP("movddup %s,%s\n", dis_buf, 11600 nameXMMReg(gregOfRM(modrm))); 11601 delta += 3+alen; 11602 } 11603 11604 putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 11605 goto decode_success; 11606 } 11607 11608 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 11609 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) { 11610 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11611 IRTemp eV = newTemp(Ity_V128); 11612 IRTemp gV = newTemp(Ity_V128); 11613 IRTemp addV = newTemp(Ity_V128); 11614 IRTemp subV = newTemp(Ity_V128); 11615 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11616 11617 modrm = insn[3]; 11618 if (epartIsReg(modrm)) { 11619 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11620 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11621 nameXMMReg(gregOfRM(modrm))); 11622 delta += 3+1; 11623 } else { 11624 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11625 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11626 DIP("addsubps %s,%s\n", dis_buf, 11627 nameXMMReg(gregOfRM(modrm))); 11628 delta += 3+alen; 11629 } 11630 11631 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11632 11633 assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) ); 11634 assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) ); 11635 11636 breakup128to32s( addV, &a3, &a2, &a1, &a0 ); 11637 breakup128to32s( subV, &s3, &s2, &s1, &s0 ); 11638 11639 putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 )); 11640 goto decode_success; 11641 } 11642 11643 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 11644 if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) { 11645 IRTemp eV = newTemp(Ity_V128); 11646 IRTemp gV = newTemp(Ity_V128); 11647 IRTemp addV = newTemp(Ity_V128); 11648 IRTemp subV = newTemp(Ity_V128); 11649 IRTemp a1 = newTemp(Ity_I64); 11650 IRTemp s0 = newTemp(Ity_I64); 11651 11652 modrm = insn[2]; 11653 if (epartIsReg(modrm)) { 11654 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11655 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11656 nameXMMReg(gregOfRM(modrm))); 11657 delta += 2+1; 11658 } else { 11659 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11660 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11661 DIP("addsubpd %s,%s\n", dis_buf, 11662 nameXMMReg(gregOfRM(modrm))); 11663 delta += 2+alen; 11664 } 11665 11666 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11667 11668 assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) ); 11669 assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) ); 11670 11671 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11672 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11673 11674 putXMMReg( gregOfRM(modrm), 11675 binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11676 goto decode_success; 11677 } 11678 11679 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 11680 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 11681 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F 11682 && (insn[2] == 0x7C || insn[2] == 0x7D)) { 11683 IRTemp e3, e2, e1, e0, g3, g2, g1, g0; 11684 IRTemp eV = newTemp(Ity_V128); 11685 IRTemp gV = newTemp(Ity_V128); 11686 IRTemp leftV = newTemp(Ity_V128); 11687 IRTemp rightV = newTemp(Ity_V128); 11688 Bool isAdd = insn[2] == 0x7C; 11689 HChar* str = isAdd ? "add" : "sub"; 11690 e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID; 11691 11692 modrm = insn[3]; 11693 if (epartIsReg(modrm)) { 11694 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11695 DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11696 nameXMMReg(gregOfRM(modrm))); 11697 delta += 3+1; 11698 } else { 11699 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11700 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11701 DIP("h%sps %s,%s\n", str, dis_buf, 11702 nameXMMReg(gregOfRM(modrm))); 11703 delta += 3+alen; 11704 } 11705 11706 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11707 11708 breakup128to32s( eV, &e3, &e2, &e1, &e0 ); 11709 breakup128to32s( gV, &g3, &g2, &g1, &g0 ); 11710 11711 assign( leftV, mk128from32s( e2, e0, g2, g0 ) ); 11712 assign( rightV, mk128from32s( e3, e1, g3, g1 ) ); 11713 11714 putXMMReg( gregOfRM(modrm), 11715 binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 11716 mkexpr(leftV), mkexpr(rightV) ) ); 11717 goto decode_success; 11718 } 11719 11720 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 11721 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 11722 if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) { 11723 IRTemp e1 = newTemp(Ity_I64); 11724 IRTemp e0 = newTemp(Ity_I64); 11725 IRTemp g1 = newTemp(Ity_I64); 11726 IRTemp g0 = newTemp(Ity_I64); 11727 IRTemp eV = newTemp(Ity_V128); 11728 IRTemp gV = newTemp(Ity_V128); 11729 IRTemp leftV = newTemp(Ity_V128); 11730 IRTemp rightV = newTemp(Ity_V128); 11731 Bool isAdd = insn[1] == 0x7C; 11732 HChar* str = isAdd ? "add" : "sub"; 11733 11734 modrm = insn[2]; 11735 if (epartIsReg(modrm)) { 11736 assign( eV, getXMMReg( eregOfRM(modrm)) ); 11737 DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 11738 nameXMMReg(gregOfRM(modrm))); 11739 delta += 2+1; 11740 } else { 11741 addr = disAMode ( &alen, sorb, delta+2, dis_buf ); 11742 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 11743 DIP("h%spd %s,%s\n", str, dis_buf, 11744 nameXMMReg(gregOfRM(modrm))); 11745 delta += 2+alen; 11746 } 11747 11748 assign( gV, getXMMReg(gregOfRM(modrm)) ); 11749 11750 assign( e1, unop(Iop_V128HIto64, mkexpr(eV) )); 11751 assign( e0, unop(Iop_V128to64, mkexpr(eV) )); 11752 assign( g1, unop(Iop_V128HIto64, mkexpr(gV) )); 11753 assign( g0, unop(Iop_V128to64, mkexpr(gV) )); 11754 11755 assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) ); 11756 assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) ); 11757 11758 putXMMReg( gregOfRM(modrm), 11759 binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 11760 mkexpr(leftV), mkexpr(rightV) ) ); 11761 goto decode_success; 11762 } 11763 11764 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 11765 if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) { 11766 modrm = getIByte(delta+3); 11767 if (epartIsReg(modrm)) { 11768 goto decode_failure; 11769 } else { 11770 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11771 putXMMReg( gregOfRM(modrm), 11772 loadLE(Ity_V128, mkexpr(addr)) ); 11773 DIP("lddqu %s,%s\n", dis_buf, 11774 nameXMMReg(gregOfRM(modrm))); 11775 delta += 3+alen; 11776 } 11777 goto decode_success; 11778 } 11779 11780 /* ---------------------------------------------------- */ 11781 /* --- end of the SSE3 decoder. --- */ 11782 /* ---------------------------------------------------- */ 11783 11784 /* ---------------------------------------------------- */ 11785 /* --- start of the SSSE3 decoder. --- */ 11786 /* ---------------------------------------------------- */ 11787 11788 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11789 Unsigned Bytes (MMX) */ 11790 if (sz == 4 11791 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11792 IRTemp sV = newTemp(Ity_I64); 11793 IRTemp dV = newTemp(Ity_I64); 11794 IRTemp sVoddsSX = newTemp(Ity_I64); 11795 IRTemp sVevensSX = newTemp(Ity_I64); 11796 IRTemp dVoddsZX = newTemp(Ity_I64); 11797 IRTemp dVevensZX = newTemp(Ity_I64); 11798 11799 modrm = insn[3]; 11800 do_MMX_preamble(); 11801 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11802 11803 if (epartIsReg(modrm)) { 11804 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11805 delta += 3+1; 11806 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 11807 nameMMXReg(gregOfRM(modrm))); 11808 } else { 11809 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11810 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11811 delta += 3+alen; 11812 DIP("pmaddubsw %s,%s\n", dis_buf, 11813 nameMMXReg(gregOfRM(modrm))); 11814 } 11815 11816 /* compute dV unsigned x sV signed */ 11817 assign( sVoddsSX, 11818 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 11819 assign( sVevensSX, 11820 binop(Iop_SarN16x4, 11821 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 11822 mkU8(8)) ); 11823 assign( dVoddsZX, 11824 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 11825 assign( dVevensZX, 11826 binop(Iop_ShrN16x4, 11827 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 11828 mkU8(8)) ); 11829 11830 putMMXReg( 11831 gregOfRM(modrm), 11832 binop(Iop_QAdd16Sx4, 11833 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11834 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11835 ) 11836 ); 11837 goto decode_success; 11838 } 11839 11840 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 11841 Unsigned Bytes (XMM) */ 11842 if (sz == 2 11843 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { 11844 IRTemp sV = newTemp(Ity_V128); 11845 IRTemp dV = newTemp(Ity_V128); 11846 IRTemp sVoddsSX = newTemp(Ity_V128); 11847 IRTemp sVevensSX = newTemp(Ity_V128); 11848 IRTemp dVoddsZX = newTemp(Ity_V128); 11849 IRTemp dVevensZX = newTemp(Ity_V128); 11850 11851 modrm = insn[3]; 11852 assign( dV, getXMMReg(gregOfRM(modrm)) ); 11853 11854 if (epartIsReg(modrm)) { 11855 assign( sV, getXMMReg(eregOfRM(modrm)) ); 11856 delta += 3+1; 11857 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 11858 nameXMMReg(gregOfRM(modrm))); 11859 } else { 11860 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11861 gen_SEGV_if_not_16_aligned( addr ); 11862 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11863 delta += 3+alen; 11864 DIP("pmaddubsw %s,%s\n", dis_buf, 11865 nameXMMReg(gregOfRM(modrm))); 11866 } 11867 11868 /* compute dV unsigned x sV signed */ 11869 assign( sVoddsSX, 11870 binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 11871 assign( sVevensSX, 11872 binop(Iop_SarN16x8, 11873 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 11874 mkU8(8)) ); 11875 assign( dVoddsZX, 11876 binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 11877 assign( dVevensZX, 11878 binop(Iop_ShrN16x8, 11879 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 11880 mkU8(8)) ); 11881 11882 putXMMReg( 11883 gregOfRM(modrm), 11884 binop(Iop_QAdd16Sx8, 11885 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 11886 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 11887 ) 11888 ); 11889 goto decode_success; 11890 } 11891 11892 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 11893 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 11894 mmx) and G to G (mmx). */ 11895 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 11896 mmx) and G to G (mmx). */ 11897 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 11898 to G (mmx). */ 11899 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 11900 to G (mmx). */ 11901 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 11902 to G (mmx). */ 11903 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 11904 to G (mmx). */ 11905 11906 if (sz == 4 11907 && insn[0] == 0x0F && insn[1] == 0x38 11908 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 11909 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 11910 HChar* str = "???"; 11911 IROp opV64 = Iop_INVALID; 11912 IROp opCatO = Iop_CatOddLanes16x4; 11913 IROp opCatE = Iop_CatEvenLanes16x4; 11914 IRTemp sV = newTemp(Ity_I64); 11915 IRTemp dV = newTemp(Ity_I64); 11916 11917 modrm = insn[3]; 11918 11919 switch (insn[2]) { 11920 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 11921 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 11922 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 11923 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 11924 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 11925 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 11926 default: vassert(0); 11927 } 11928 if (insn[2] == 0x02 || insn[2] == 0x06) { 11929 opCatO = Iop_InterleaveHI32x2; 11930 opCatE = Iop_InterleaveLO32x2; 11931 } 11932 11933 do_MMX_preamble(); 11934 assign( dV, getMMXReg(gregOfRM(modrm)) ); 11935 11936 if (epartIsReg(modrm)) { 11937 assign( sV, getMMXReg(eregOfRM(modrm)) ); 11938 delta += 3+1; 11939 DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 11940 nameMMXReg(gregOfRM(modrm))); 11941 } else { 11942 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 11943 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 11944 delta += 3+alen; 11945 DIP("ph%s %s,%s\n", str, dis_buf, 11946 nameMMXReg(gregOfRM(modrm))); 11947 } 11948 11949 putMMXReg( 11950 gregOfRM(modrm), 11951 binop(opV64, 11952 binop(opCatE,mkexpr(sV),mkexpr(dV)), 11953 binop(opCatO,mkexpr(sV),mkexpr(dV)) 11954 ) 11955 ); 11956 goto decode_success; 11957 } 11958 11959 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 11960 xmm) and G to G (xmm). */ 11961 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 11962 xmm) and G to G (xmm). */ 11963 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 11964 G to G (xmm). */ 11965 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 11966 G to G (xmm). */ 11967 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 11968 G to G (xmm). */ 11969 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 11970 G to G (xmm). */ 11971 11972 if (sz == 2 11973 && insn[0] == 0x0F && insn[1] == 0x38 11974 && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 11975 || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { 11976 HChar* str = "???"; 11977 IROp opV64 = Iop_INVALID; 11978 IROp opCatO = Iop_CatOddLanes16x4; 11979 IROp opCatE = Iop_CatEvenLanes16x4; 11980 IRTemp sV = newTemp(Ity_V128); 11981 IRTemp dV = newTemp(Ity_V128); 11982 IRTemp sHi = newTemp(Ity_I64); 11983 IRTemp sLo = newTemp(Ity_I64); 11984 IRTemp dHi = newTemp(Ity_I64); 11985 IRTemp dLo = newTemp(Ity_I64); 11986 11987 modrm = insn[3]; 11988 11989 switch (insn[2]) { 11990 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 11991 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 11992 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 11993 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 11994 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 11995 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 11996 default: vassert(0); 11997 } 11998 if (insn[2] == 0x02 || insn[2] == 0x06) { 11999 opCatO = Iop_InterleaveHI32x2; 12000 opCatE = Iop_InterleaveLO32x2; 12001 } 12002 12003 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12004 12005 if (epartIsReg(modrm)) { 12006 assign( sV, getXMMReg( eregOfRM(modrm)) ); 12007 DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12008 nameXMMReg(gregOfRM(modrm))); 12009 delta += 3+1; 12010 } else { 12011 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12012 gen_SEGV_if_not_16_aligned( addr ); 12013 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12014 DIP("ph%s %s,%s\n", str, dis_buf, 12015 nameXMMReg(gregOfRM(modrm))); 12016 delta += 3+alen; 12017 } 12018 12019 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12020 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12021 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12022 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12023 12024 /* This isn't a particularly efficient way to compute the 12025 result, but at least it avoids a proliferation of IROps, 12026 hence avoids complication all the backends. */ 12027 putXMMReg( 12028 gregOfRM(modrm), 12029 binop(Iop_64HLtoV128, 12030 binop(opV64, 12031 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 12032 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) 12033 ), 12034 binop(opV64, 12035 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 12036 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) 12037 ) 12038 ) 12039 ); 12040 goto decode_success; 12041 } 12042 12043 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 12044 (MMX) */ 12045 if (sz == 4 12046 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12047 IRTemp sV = newTemp(Ity_I64); 12048 IRTemp dV = newTemp(Ity_I64); 12049 12050 modrm = insn[3]; 12051 do_MMX_preamble(); 12052 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12053 12054 if (epartIsReg(modrm)) { 12055 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12056 delta += 3+1; 12057 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12058 nameMMXReg(gregOfRM(modrm))); 12059 } else { 12060 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12061 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12062 delta += 3+alen; 12063 DIP("pmulhrsw %s,%s\n", dis_buf, 12064 nameMMXReg(gregOfRM(modrm))); 12065 } 12066 12067 putMMXReg( 12068 gregOfRM(modrm), 12069 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 12070 ); 12071 goto decode_success; 12072 } 12073 12074 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 12075 Scale (XMM) */ 12076 if (sz == 2 12077 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { 12078 IRTemp sV = newTemp(Ity_V128); 12079 IRTemp dV = newTemp(Ity_V128); 12080 IRTemp sHi = newTemp(Ity_I64); 12081 IRTemp sLo = newTemp(Ity_I64); 12082 IRTemp dHi = newTemp(Ity_I64); 12083 IRTemp dLo = newTemp(Ity_I64); 12084 12085 modrm = insn[3]; 12086 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12087 12088 if (epartIsReg(modrm)) { 12089 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12090 delta += 3+1; 12091 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12092 nameXMMReg(gregOfRM(modrm))); 12093 } else { 12094 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12095 gen_SEGV_if_not_16_aligned( addr ); 12096 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12097 delta += 3+alen; 12098 DIP("pmulhrsw %s,%s\n", dis_buf, 12099 nameXMMReg(gregOfRM(modrm))); 12100 } 12101 12102 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12103 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12104 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12105 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12106 12107 putXMMReg( 12108 gregOfRM(modrm), 12109 binop(Iop_64HLtoV128, 12110 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 12111 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 12112 ) 12113 ); 12114 goto decode_success; 12115 } 12116 12117 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 12118 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 12119 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ 12120 if (sz == 4 12121 && insn[0] == 0x0F && insn[1] == 0x38 12122 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12123 IRTemp sV = newTemp(Ity_I64); 12124 IRTemp dV = newTemp(Ity_I64); 12125 HChar* str = "???"; 12126 Int laneszB = 0; 12127 12128 switch (insn[2]) { 12129 case 0x08: laneszB = 1; str = "b"; break; 12130 case 0x09: laneszB = 2; str = "w"; break; 12131 case 0x0A: laneszB = 4; str = "d"; break; 12132 default: vassert(0); 12133 } 12134 12135 modrm = insn[3]; 12136 do_MMX_preamble(); 12137 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12138 12139 if (epartIsReg(modrm)) { 12140 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12141 delta += 3+1; 12142 DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12143 nameMMXReg(gregOfRM(modrm))); 12144 } else { 12145 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12146 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12147 delta += 3+alen; 12148 DIP("psign%s %s,%s\n", str, dis_buf, 12149 nameMMXReg(gregOfRM(modrm))); 12150 } 12151 12152 putMMXReg( 12153 gregOfRM(modrm), 12154 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 12155 ); 12156 goto decode_success; 12157 } 12158 12159 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 12160 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 12161 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ 12162 if (sz == 2 12163 && insn[0] == 0x0F && insn[1] == 0x38 12164 && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { 12165 IRTemp sV = newTemp(Ity_V128); 12166 IRTemp dV = newTemp(Ity_V128); 12167 IRTemp sHi = newTemp(Ity_I64); 12168 IRTemp sLo = newTemp(Ity_I64); 12169 IRTemp dHi = newTemp(Ity_I64); 12170 IRTemp dLo = newTemp(Ity_I64); 12171 HChar* str = "???"; 12172 Int laneszB = 0; 12173 12174 switch (insn[2]) { 12175 case 0x08: laneszB = 1; str = "b"; break; 12176 case 0x09: laneszB = 2; str = "w"; break; 12177 case 0x0A: laneszB = 4; str = "d"; break; 12178 default: vassert(0); 12179 } 12180 12181 modrm = insn[3]; 12182 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12183 12184 if (epartIsReg(modrm)) { 12185 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12186 delta += 3+1; 12187 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12188 nameXMMReg(gregOfRM(modrm))); 12189 } else { 12190 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12191 gen_SEGV_if_not_16_aligned( addr ); 12192 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12193 delta += 3+alen; 12194 DIP("psign%s %s,%s\n", str, dis_buf, 12195 nameXMMReg(gregOfRM(modrm))); 12196 } 12197 12198 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12199 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12200 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12201 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12202 12203 putXMMReg( 12204 gregOfRM(modrm), 12205 binop(Iop_64HLtoV128, 12206 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 12207 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 12208 ) 12209 ); 12210 goto decode_success; 12211 } 12212 12213 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 12214 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 12215 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 12216 if (sz == 4 12217 && insn[0] == 0x0F && insn[1] == 0x38 12218 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12219 IRTemp sV = newTemp(Ity_I64); 12220 HChar* str = "???"; 12221 Int laneszB = 0; 12222 12223 switch (insn[2]) { 12224 case 0x1C: laneszB = 1; str = "b"; break; 12225 case 0x1D: laneszB = 2; str = "w"; break; 12226 case 0x1E: laneszB = 4; str = "d"; break; 12227 default: vassert(0); 12228 } 12229 12230 modrm = insn[3]; 12231 do_MMX_preamble(); 12232 12233 if (epartIsReg(modrm)) { 12234 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12235 delta += 3+1; 12236 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), 12237 nameMMXReg(gregOfRM(modrm))); 12238 } else { 12239 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12240 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12241 delta += 3+alen; 12242 DIP("pabs%s %s,%s\n", str, dis_buf, 12243 nameMMXReg(gregOfRM(modrm))); 12244 } 12245 12246 putMMXReg( 12247 gregOfRM(modrm), 12248 dis_PABS_helper( mkexpr(sV), laneszB ) 12249 ); 12250 goto decode_success; 12251 } 12252 12253 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 12254 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 12255 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 12256 if (sz == 2 12257 && insn[0] == 0x0F && insn[1] == 0x38 12258 && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { 12259 IRTemp sV = newTemp(Ity_V128); 12260 IRTemp sHi = newTemp(Ity_I64); 12261 IRTemp sLo = newTemp(Ity_I64); 12262 HChar* str = "???"; 12263 Int laneszB = 0; 12264 12265 switch (insn[2]) { 12266 case 0x1C: laneszB = 1; str = "b"; break; 12267 case 0x1D: laneszB = 2; str = "w"; break; 12268 case 0x1E: laneszB = 4; str = "d"; break; 12269 default: vassert(0); 12270 } 12271 12272 modrm = insn[3]; 12273 12274 if (epartIsReg(modrm)) { 12275 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12276 delta += 3+1; 12277 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), 12278 nameXMMReg(gregOfRM(modrm))); 12279 } else { 12280 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12281 gen_SEGV_if_not_16_aligned( addr ); 12282 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12283 delta += 3+alen; 12284 DIP("pabs%s %s,%s\n", str, dis_buf, 12285 nameXMMReg(gregOfRM(modrm))); 12286 } 12287 12288 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12289 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12290 12291 putXMMReg( 12292 gregOfRM(modrm), 12293 binop(Iop_64HLtoV128, 12294 dis_PABS_helper( mkexpr(sHi), laneszB ), 12295 dis_PABS_helper( mkexpr(sLo), laneszB ) 12296 ) 12297 ); 12298 goto decode_success; 12299 } 12300 12301 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 12302 if (sz == 4 12303 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12304 IRTemp sV = newTemp(Ity_I64); 12305 IRTemp dV = newTemp(Ity_I64); 12306 IRTemp res = newTemp(Ity_I64); 12307 12308 modrm = insn[3]; 12309 do_MMX_preamble(); 12310 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12311 12312 if (epartIsReg(modrm)) { 12313 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12314 d32 = (UInt)insn[3+1]; 12315 delta += 3+1+1; 12316 DIP("palignr $%d,%s,%s\n", (Int)d32, 12317 nameMMXReg(eregOfRM(modrm)), 12318 nameMMXReg(gregOfRM(modrm))); 12319 } else { 12320 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12321 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12322 d32 = (UInt)insn[3+alen]; 12323 delta += 3+alen+1; 12324 DIP("palignr $%d%s,%s\n", (Int)d32, 12325 dis_buf, 12326 nameMMXReg(gregOfRM(modrm))); 12327 } 12328 12329 if (d32 == 0) { 12330 assign( res, mkexpr(sV) ); 12331 } 12332 else if (d32 >= 1 && d32 <= 7) { 12333 assign(res, 12334 binop(Iop_Or64, 12335 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), 12336 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) 12337 ))); 12338 } 12339 else if (d32 == 8) { 12340 assign( res, mkexpr(dV) ); 12341 } 12342 else if (d32 >= 9 && d32 <= 15) { 12343 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); 12344 } 12345 else if (d32 >= 16 && d32 <= 255) { 12346 assign( res, mkU64(0) ); 12347 } 12348 else 12349 vassert(0); 12350 12351 putMMXReg( gregOfRM(modrm), mkexpr(res) ); 12352 goto decode_success; 12353 } 12354 12355 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 12356 if (sz == 2 12357 && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { 12358 IRTemp sV = newTemp(Ity_V128); 12359 IRTemp dV = newTemp(Ity_V128); 12360 IRTemp sHi = newTemp(Ity_I64); 12361 IRTemp sLo = newTemp(Ity_I64); 12362 IRTemp dHi = newTemp(Ity_I64); 12363 IRTemp dLo = newTemp(Ity_I64); 12364 IRTemp rHi = newTemp(Ity_I64); 12365 IRTemp rLo = newTemp(Ity_I64); 12366 12367 modrm = insn[3]; 12368 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12369 12370 if (epartIsReg(modrm)) { 12371 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12372 d32 = (UInt)insn[3+1]; 12373 delta += 3+1+1; 12374 DIP("palignr $%d,%s,%s\n", (Int)d32, 12375 nameXMMReg(eregOfRM(modrm)), 12376 nameXMMReg(gregOfRM(modrm))); 12377 } else { 12378 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12379 gen_SEGV_if_not_16_aligned( addr ); 12380 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12381 d32 = (UInt)insn[3+alen]; 12382 delta += 3+alen+1; 12383 DIP("palignr $%d,%s,%s\n", (Int)d32, 12384 dis_buf, 12385 nameXMMReg(gregOfRM(modrm))); 12386 } 12387 12388 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12389 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12390 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12391 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12392 12393 if (d32 == 0) { 12394 assign( rHi, mkexpr(sHi) ); 12395 assign( rLo, mkexpr(sLo) ); 12396 } 12397 else if (d32 >= 1 && d32 <= 7) { 12398 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); 12399 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); 12400 } 12401 else if (d32 == 8) { 12402 assign( rHi, mkexpr(dLo) ); 12403 assign( rLo, mkexpr(sHi) ); 12404 } 12405 else if (d32 >= 9 && d32 <= 15) { 12406 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); 12407 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); 12408 } 12409 else if (d32 == 16) { 12410 assign( rHi, mkexpr(dHi) ); 12411 assign( rLo, mkexpr(dLo) ); 12412 } 12413 else if (d32 >= 17 && d32 <= 23) { 12414 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); 12415 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); 12416 } 12417 else if (d32 == 24) { 12418 assign( rHi, mkU64(0) ); 12419 assign( rLo, mkexpr(dHi) ); 12420 } 12421 else if (d32 >= 25 && d32 <= 31) { 12422 assign( rHi, mkU64(0) ); 12423 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); 12424 } 12425 else if (d32 >= 32 && d32 <= 255) { 12426 assign( rHi, mkU64(0) ); 12427 assign( rLo, mkU64(0) ); 12428 } 12429 else 12430 vassert(0); 12431 12432 putXMMReg( 12433 gregOfRM(modrm), 12434 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12435 ); 12436 goto decode_success; 12437 } 12438 12439 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 12440 if (sz == 4 12441 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12442 IRTemp sV = newTemp(Ity_I64); 12443 IRTemp dV = newTemp(Ity_I64); 12444 12445 modrm = insn[3]; 12446 do_MMX_preamble(); 12447 assign( dV, getMMXReg(gregOfRM(modrm)) ); 12448 12449 if (epartIsReg(modrm)) { 12450 assign( sV, getMMXReg(eregOfRM(modrm)) ); 12451 delta += 3+1; 12452 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), 12453 nameMMXReg(gregOfRM(modrm))); 12454 } else { 12455 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12456 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 12457 delta += 3+alen; 12458 DIP("pshufb %s,%s\n", dis_buf, 12459 nameMMXReg(gregOfRM(modrm))); 12460 } 12461 12462 putMMXReg( 12463 gregOfRM(modrm), 12464 binop( 12465 Iop_And64, 12466 /* permute the lanes */ 12467 binop( 12468 Iop_Perm8x8, 12469 mkexpr(dV), 12470 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 12471 ), 12472 /* mask off lanes which have (index & 0x80) == 0x80 */ 12473 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 12474 ) 12475 ); 12476 goto decode_success; 12477 } 12478 12479 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 12480 if (sz == 2 12481 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { 12482 IRTemp sV = newTemp(Ity_V128); 12483 IRTemp dV = newTemp(Ity_V128); 12484 IRTemp sHi = newTemp(Ity_I64); 12485 IRTemp sLo = newTemp(Ity_I64); 12486 IRTemp dHi = newTemp(Ity_I64); 12487 IRTemp dLo = newTemp(Ity_I64); 12488 IRTemp rHi = newTemp(Ity_I64); 12489 IRTemp rLo = newTemp(Ity_I64); 12490 IRTemp sevens = newTemp(Ity_I64); 12491 IRTemp mask0x80hi = newTemp(Ity_I64); 12492 IRTemp mask0x80lo = newTemp(Ity_I64); 12493 IRTemp maskBit3hi = newTemp(Ity_I64); 12494 IRTemp maskBit3lo = newTemp(Ity_I64); 12495 IRTemp sAnd7hi = newTemp(Ity_I64); 12496 IRTemp sAnd7lo = newTemp(Ity_I64); 12497 IRTemp permdHi = newTemp(Ity_I64); 12498 IRTemp permdLo = newTemp(Ity_I64); 12499 12500 modrm = insn[3]; 12501 assign( dV, getXMMReg(gregOfRM(modrm)) ); 12502 12503 if (epartIsReg(modrm)) { 12504 assign( sV, getXMMReg(eregOfRM(modrm)) ); 12505 delta += 3+1; 12506 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), 12507 nameXMMReg(gregOfRM(modrm))); 12508 } else { 12509 addr = disAMode ( &alen, sorb, delta+3, dis_buf ); 12510 gen_SEGV_if_not_16_aligned( addr ); 12511 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12512 delta += 3+alen; 12513 DIP("pshufb %s,%s\n", dis_buf, 12514 nameXMMReg(gregOfRM(modrm))); 12515 } 12516 12517 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 12518 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 12519 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 12520 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 12521 12522 assign( sevens, mkU64(0x0707070707070707ULL) ); 12523 12524 /* 12525 mask0x80hi = Not(SarN8x8(sHi,7)) 12526 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 12527 sAnd7hi = And(sHi,sevens) 12528 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 12529 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 12530 rHi = And(permdHi,mask0x80hi) 12531 */ 12532 assign( 12533 mask0x80hi, 12534 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 12535 12536 assign( 12537 maskBit3hi, 12538 binop(Iop_SarN8x8, 12539 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 12540 mkU8(7))); 12541 12542 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 12543 12544 assign( 12545 permdHi, 12546 binop( 12547 Iop_Or64, 12548 binop(Iop_And64, 12549 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 12550 mkexpr(maskBit3hi)), 12551 binop(Iop_And64, 12552 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 12553 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 12554 12555 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 12556 12557 /* And the same for the lower half of the result. What fun. */ 12558 12559 assign( 12560 mask0x80lo, 12561 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 12562 12563 assign( 12564 maskBit3lo, 12565 binop(Iop_SarN8x8, 12566 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 12567 mkU8(7))); 12568 12569 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 12570 12571 assign( 12572 permdLo, 12573 binop( 12574 Iop_Or64, 12575 binop(Iop_And64, 12576 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 12577 mkexpr(maskBit3lo)), 12578 binop(Iop_And64, 12579 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 12580 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 12581 12582 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 12583 12584 putXMMReg( 12585 gregOfRM(modrm), 12586 binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) 12587 ); 12588 goto decode_success; 12589 } 12590 12591 /* ---------------------------------------------------- */ 12592 /* --- end of the SSSE3 decoder. --- */ 12593 /* ---------------------------------------------------- */ 12594 12595 /* ---------------------------------------------------- */ 12596 /* --- start of the SSE4 decoder --- */ 12597 /* ---------------------------------------------------- */ 12598 12599 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 12600 (Partial implementation only -- only deal with cases where 12601 the rounding mode is specified directly by the immediate byte.) 12602 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 12603 (Limitations ditto) 12604 */ 12605 if (sz == 2 12606 && insn[0] == 0x0F && insn[1] == 0x3A 12607 && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) { 12608 12609 Bool isD = insn[2] == 0x0B; 12610 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 12611 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 12612 Int imm = 0; 12613 12614 modrm = insn[3]; 12615 12616 if (epartIsReg(modrm)) { 12617 assign( src, 12618 isD ? getXMMRegLane64F( eregOfRM(modrm), 0 ) 12619 : getXMMRegLane32F( eregOfRM(modrm), 0 ) ); 12620 imm = insn[3+1]; 12621 if (imm & ~3) goto decode_failure; 12622 delta += 3+1+1; 12623 DIP( "rounds%c $%d,%s,%s\n", 12624 isD ? 'd' : 's', 12625 imm, nameXMMReg( eregOfRM(modrm) ), 12626 nameXMMReg( gregOfRM(modrm) ) ); 12627 } else { 12628 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12629 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 12630 imm = insn[3+alen]; 12631 if (imm & ~3) goto decode_failure; 12632 delta += 3+alen+1; 12633 DIP( "roundsd $%d,%s,%s\n", 12634 imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); 12635 } 12636 12637 /* (imm & 3) contains an Intel-encoded rounding mode. Because 12638 that encoding is the same as the encoding for IRRoundingMode, 12639 we can use that value directly in the IR as a rounding 12640 mode. */ 12641 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 12642 mkU32(imm & 3), mkexpr(src)) ); 12643 12644 if (isD) 12645 putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) ); 12646 else 12647 putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) ); 12648 12649 goto decode_success; 12650 } 12651 12652 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 12653 which we can only decode if we're sure this is an AMD cpu that 12654 supports LZCNT, since otherwise it's BSR, which behaves 12655 differently. */ 12656 if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD 12657 && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) { 12658 vassert(sz == 2 || sz == 4); 12659 /*IRType*/ ty = szToITy(sz); 12660 IRTemp src = newTemp(ty); 12661 modrm = insn[3]; 12662 if (epartIsReg(modrm)) { 12663 assign(src, getIReg(sz, eregOfRM(modrm))); 12664 delta += 3+1; 12665 DIP("lzcnt%c %s, %s\n", nameISize(sz), 12666 nameIReg(sz, eregOfRM(modrm)), 12667 nameIReg(sz, gregOfRM(modrm))); 12668 } else { 12669 addr = disAMode( &alen, sorb, delta+3, dis_buf ); 12670 assign(src, loadLE(ty, mkexpr(addr))); 12671 delta += 3+alen; 12672 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 12673 nameIReg(sz, gregOfRM(modrm))); 12674 } 12675 12676 IRTemp res = gen_LZCNT(ty, src); 12677 putIReg(sz, gregOfRM(modrm), mkexpr(res)); 12678 12679 // Update flags. This is pretty lame .. perhaps can do better 12680 // if this turns out to be performance critical. 12681 // O S A P are cleared. Z is set if RESULT == 0. 12682 // C is set if SRC is zero. 12683 IRTemp src32 = newTemp(Ity_I32); 12684 IRTemp res32 = newTemp(Ity_I32); 12685 assign(src32, widenUto32(mkexpr(src))); 12686 assign(res32, widenUto32(mkexpr(res))); 12687 12688 IRTemp oszacp = newTemp(Ity_I32); 12689 assign( 12690 oszacp, 12691 binop(Iop_Or32, 12692 binop(Iop_Shl32, 12693 unop(Iop_1Uto32, 12694 binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))), 12695 mkU8(X86G_CC_SHIFT_Z)), 12696 binop(Iop_Shl32, 12697 unop(Iop_1Uto32, 12698 binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))), 12699 mkU8(X86G_CC_SHIFT_C)) 12700 ) 12701 ); 12702 12703 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12704 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12705 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12706 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 12707 12708 goto decode_success; 12709 } 12710 12711 /* ---------------------------------------------------- */ 12712 /* --- end of the SSE4 decoder --- */ 12713 /* ---------------------------------------------------- */ 12714 12715 after_sse_decoders: 12716 12717 /* ---------------------------------------------------- */ 12718 /* --- deal with misc 0x67 pfxs (addr size override) -- */ 12719 /* ---------------------------------------------------- */ 12720 12721 /* 67 E3 = JCXZ (for JECXZ see below) */ 12722 if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) { 12723 delta += 2; 12724 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 12725 delta ++; 12726 stmt( IRStmt_Exit( 12727 binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)), 12728 Ijk_Boring, 12729 IRConst_U32(d32) 12730 )); 12731 DIP("jcxz 0x%x\n", d32); 12732 goto decode_success; 12733 } 12734 12735 /* ---------------------------------------------------- */ 12736 /* --- start of the baseline insn decoder -- */ 12737 /* ---------------------------------------------------- */ 12738 12739 /* Get the primary opcode. */ 12740 opc = getIByte(delta); delta++; 12741 12742 /* We get here if the current insn isn't SSE, or this CPU doesn't 12743 support SSE. */ 12744 12745 switch (opc) { 12746 12747 /* ------------------------ Control flow --------------- */ 12748 12749 case 0xC2: /* RET imm16 */ 12750 d32 = getUDisp16(delta); 12751 delta += 2; 12752 dis_ret(d32); 12753 dres.whatNext = Dis_StopHere; 12754 DIP("ret %d\n", (Int)d32); 12755 break; 12756 case 0xC3: /* RET */ 12757 dis_ret(0); 12758 dres.whatNext = Dis_StopHere; 12759 DIP("ret\n"); 12760 break; 12761 12762 case 0xCF: /* IRET */ 12763 /* Note, this is an extremely kludgey and limited implementation 12764 of iret. All it really does is: 12765 popl %EIP; popl %CS; popl %EFLAGS. 12766 %CS is set but ignored (as it is in (eg) popw %cs)". */ 12767 t1 = newTemp(Ity_I32); /* ESP */ 12768 t2 = newTemp(Ity_I32); /* new EIP */ 12769 t3 = newTemp(Ity_I32); /* new CS */ 12770 t4 = newTemp(Ity_I32); /* new EFLAGS */ 12771 assign(t1, getIReg(4,R_ESP)); 12772 assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) ))); 12773 assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) ))); 12774 assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) ))); 12775 /* Get stuff off stack */ 12776 putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12))); 12777 /* set %CS (which is ignored anyway) */ 12778 putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) ); 12779 /* set %EFLAGS */ 12780 set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ ); 12781 /* goto new EIP value */ 12782 jmp_treg(Ijk_Ret,t2); 12783 dres.whatNext = Dis_StopHere; 12784 DIP("iret (very kludgey)\n"); 12785 break; 12786 12787 case 0xE8: /* CALL J4 */ 12788 d32 = getUDisp32(delta); delta += 4; 12789 d32 += (guest_EIP_bbstart+delta); 12790 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */ 12791 if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58 12792 && getIByte(delta) <= 0x5F) { 12793 /* Specially treat the position-independent-code idiom 12794 call X 12795 X: popl %reg 12796 as 12797 movl %eip, %reg. 12798 since this generates better code, but for no other reason. */ 12799 Int archReg = getIByte(delta) - 0x58; 12800 /* vex_printf("-- fPIC thingy\n"); */ 12801 putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta)); 12802 delta++; /* Step over the POP */ 12803 DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); 12804 } else { 12805 /* The normal sequence for a call. */ 12806 t1 = newTemp(Ity_I32); 12807 assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4))); 12808 putIReg(4, R_ESP, mkexpr(t1)); 12809 storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta)); 12810 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) { 12811 /* follow into the call target. */ 12812 dres.whatNext = Dis_ResteerU; 12813 dres.continueAt = (Addr64)(Addr32)d32; 12814 } else { 12815 jmp_lit(Ijk_Call,d32); 12816 dres.whatNext = Dis_StopHere; 12817 } 12818 DIP("call 0x%x\n",d32); 12819 } 12820 break; 12821 12822 //-- case 0xC8: /* ENTER */ 12823 //-- d32 = getUDisp16(eip); eip += 2; 12824 //-- abyte = getIByte(delta); delta++; 12825 //-- 12826 //-- vg_assert(sz == 4); 12827 //-- vg_assert(abyte == 0); 12828 //-- 12829 //-- t1 = newTemp(cb); t2 = newTemp(cb); 12830 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1); 12831 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); 12832 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12833 //-- uLiteral(cb, sz); 12834 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12835 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 12836 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); 12837 //-- if (d32) { 12838 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); 12839 //-- uLiteral(cb, d32); 12840 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); 12841 //-- } 12842 //-- DIP("enter 0x%x, 0x%x", d32, abyte); 12843 //-- break; 12844 12845 case 0xC9: /* LEAVE */ 12846 vassert(sz == 4); 12847 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 12848 assign(t1, getIReg(4,R_EBP)); 12849 /* First PUT ESP looks redundant, but need it because ESP must 12850 always be up-to-date for Memcheck to work... */ 12851 putIReg(4, R_ESP, mkexpr(t1)); 12852 assign(t2, loadLE(Ity_I32,mkexpr(t1))); 12853 putIReg(4, R_EBP, mkexpr(t2)); 12854 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) ); 12855 DIP("leave\n"); 12856 break; 12857 12858 /* ---------------- Misc weird-ass insns --------------- */ 12859 12860 case 0x27: /* DAA */ 12861 case 0x2F: /* DAS */ 12862 case 0x37: /* AAA */ 12863 case 0x3F: /* AAS */ 12864 /* An ugly implementation for some ugly instructions. Oh 12865 well. */ 12866 if (sz != 4) goto decode_failure; 12867 t1 = newTemp(Ity_I32); 12868 t2 = newTemp(Ity_I32); 12869 /* Make up a 32-bit value (t1), with the old value of AX in the 12870 bottom 16 bits, and the old OSZACP bitmask in the upper 16 12871 bits. */ 12872 assign(t1, 12873 binop(Iop_16HLto32, 12874 unop(Iop_32to16, 12875 mk_x86g_calculate_eflags_all()), 12876 getIReg(2, R_EAX) 12877 )); 12878 /* Call the helper fn, to get a new AX and OSZACP value, and 12879 poke both back into the guest state. Also pass the helper 12880 the actual opcode so it knows which of the 4 instructions it 12881 is doing the computation for. */ 12882 vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F); 12883 assign(t2, 12884 mkIRExprCCall( 12885 Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas", 12886 &x86g_calculate_daa_das_aaa_aas, 12887 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 12888 )); 12889 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 12890 12891 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12892 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12893 stmt( IRStmt_Put( OFFB_CC_DEP1, 12894 binop(Iop_And32, 12895 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 12896 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 12897 | X86G_CC_MASK_A | X86G_CC_MASK_Z 12898 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 12899 ) 12900 ) 12901 ); 12902 /* Set NDEP even though it isn't used. This makes redundant-PUT 12903 elimination of previous stores to this field work better. */ 12904 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12905 switch (opc) { 12906 case 0x27: DIP("daa\n"); break; 12907 case 0x2F: DIP("das\n"); break; 12908 case 0x37: DIP("aaa\n"); break; 12909 case 0x3F: DIP("aas\n"); break; 12910 default: vassert(0); 12911 } 12912 break; 12913 12914 case 0xD4: /* AAM */ 12915 case 0xD5: /* AAD */ 12916 d32 = getIByte(delta); delta++; 12917 if (sz != 4 || d32 != 10) goto decode_failure; 12918 t1 = newTemp(Ity_I32); 12919 t2 = newTemp(Ity_I32); 12920 /* Make up a 32-bit value (t1), with the old value of AX in the 12921 bottom 16 bits, and the old OSZACP bitmask in the upper 16 12922 bits. */ 12923 assign(t1, 12924 binop(Iop_16HLto32, 12925 unop(Iop_32to16, 12926 mk_x86g_calculate_eflags_all()), 12927 getIReg(2, R_EAX) 12928 )); 12929 /* Call the helper fn, to get a new AX and OSZACP value, and 12930 poke both back into the guest state. Also pass the helper 12931 the actual opcode so it knows which of the 2 instructions it 12932 is doing the computation for. */ 12933 assign(t2, 12934 mkIRExprCCall( 12935 Ity_I32, 0/*regparm*/, "x86g_calculate_aad_aam", 12936 &x86g_calculate_aad_aam, 12937 mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) ) 12938 )); 12939 putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) )); 12940 12941 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 12942 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 12943 stmt( IRStmt_Put( OFFB_CC_DEP1, 12944 binop(Iop_And32, 12945 binop(Iop_Shr32, mkexpr(t2), mkU8(16)), 12946 mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P 12947 | X86G_CC_MASK_A | X86G_CC_MASK_Z 12948 | X86G_CC_MASK_S| X86G_CC_MASK_O ) 12949 ) 12950 ) 12951 ); 12952 /* Set NDEP even though it isn't used. This makes 12953 redundant-PUT elimination of previous stores to this field 12954 work better. */ 12955 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 12956 12957 DIP(opc == 0xD4 ? "aam\n" : "aad\n"); 12958 break; 12959 12960 /* ------------------------ CWD/CDQ -------------------- */ 12961 12962 case 0x98: /* CBW */ 12963 if (sz == 4) { 12964 putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX))); 12965 DIP("cwde\n"); 12966 } else { 12967 vassert(sz == 2); 12968 putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX))); 12969 DIP("cbw\n"); 12970 } 12971 break; 12972 12973 case 0x99: /* CWD/CDQ */ 12974 ty = szToITy(sz); 12975 putIReg(sz, R_EDX, 12976 binop(mkSizedOp(ty,Iop_Sar8), 12977 getIReg(sz, R_EAX), 12978 mkU8(sz == 2 ? 15 : 31)) ); 12979 DIP(sz == 2 ? "cwdq\n" : "cdqq\n"); 12980 break; 12981 12982 /* ------------------------ FPU ops -------------------- */ 12983 12984 case 0x9E: /* SAHF */ 12985 codegen_SAHF(); 12986 DIP("sahf\n"); 12987 break; 12988 12989 case 0x9F: /* LAHF */ 12990 codegen_LAHF(); 12991 DIP("lahf\n"); 12992 break; 12993 12994 case 0x9B: /* FWAIT */ 12995 /* ignore? */ 12996 DIP("fwait\n"); 12997 break; 12998 12999 case 0xD8: 13000 case 0xD9: 13001 case 0xDA: 13002 case 0xDB: 13003 case 0xDC: 13004 case 0xDD: 13005 case 0xDE: 13006 case 0xDF: { 13007 Int delta0 = delta; 13008 Bool decode_OK = False; 13009 delta = dis_FPU ( &decode_OK, sorb, delta ); 13010 if (!decode_OK) { 13011 delta = delta0; 13012 goto decode_failure; 13013 } 13014 break; 13015 } 13016 13017 /* ------------------------ INC & DEC ------------------ */ 13018 13019 case 0x40: /* INC eAX */ 13020 case 0x41: /* INC eCX */ 13021 case 0x42: /* INC eDX */ 13022 case 0x43: /* INC eBX */ 13023 case 0x44: /* INC eSP */ 13024 case 0x45: /* INC eBP */ 13025 case 0x46: /* INC eSI */ 13026 case 0x47: /* INC eDI */ 13027 vassert(sz == 2 || sz == 4); 13028 ty = szToITy(sz); 13029 t1 = newTemp(ty); 13030 assign( t1, binop(mkSizedOp(ty,Iop_Add8), 13031 getIReg(sz, (UInt)(opc - 0x40)), 13032 mkU(ty,1)) ); 13033 setFlags_INC_DEC( True, t1, ty ); 13034 putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1)); 13035 DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); 13036 break; 13037 13038 case 0x48: /* DEC eAX */ 13039 case 0x49: /* DEC eCX */ 13040 case 0x4A: /* DEC eDX */ 13041 case 0x4B: /* DEC eBX */ 13042 case 0x4C: /* DEC eSP */ 13043 case 0x4D: /* DEC eBP */ 13044 case 0x4E: /* DEC eSI */ 13045 case 0x4F: /* DEC eDI */ 13046 vassert(sz == 2 || sz == 4); 13047 ty = szToITy(sz); 13048 t1 = newTemp(ty); 13049 assign( t1, binop(mkSizedOp(ty,Iop_Sub8), 13050 getIReg(sz, (UInt)(opc - 0x48)), 13051 mkU(ty,1)) ); 13052 setFlags_INC_DEC( False, t1, ty ); 13053 putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1)); 13054 DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); 13055 break; 13056 13057 /* ------------------------ INT ------------------------ */ 13058 13059 case 0xCC: /* INT 3 */ 13060 jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta); 13061 dres.whatNext = Dis_StopHere; 13062 DIP("int $0x3\n"); 13063 break; 13064 13065 case 0xCD: /* INT imm8 */ 13066 d32 = getIByte(delta); delta++; 13067 13068 /* For any of the cases where we emit a jump (that is, for all 13069 currently handled cases), it's important that all ArchRegs 13070 carry their up-to-date value at this point. So we declare an 13071 end-of-block here, which forces any TempRegs caching ArchRegs 13072 to be flushed. */ 13073 13074 /* Handle int $0x40 .. $0x43 by synthesising a segfault and a 13075 restart of this instruction (hence the "-2" two lines below, 13076 to get the restart EIP to be this instruction. This is 13077 probably Linux-specific and it would be more correct to only 13078 do this if the VexAbiInfo says that is what we should do. */ 13079 if (d32 >= 0x40 && d32 <= 0x43) { 13080 jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2); 13081 dres.whatNext = Dis_StopHere; 13082 DIP("int $0x%x\n", (Int)d32); 13083 break; 13084 } 13085 13086 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82 13087 (darwin syscalls). As part of this, note where we are, so we 13088 can back up the guest to this point if the syscall needs to 13089 be restarted. */ 13090 if (d32 == 0x80) { 13091 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13092 mkU32(guest_EIP_curr_instr) ) ); 13093 jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta); 13094 dres.whatNext = Dis_StopHere; 13095 DIP("int $0x80\n"); 13096 break; 13097 } 13098 if (d32 == 0x81) { 13099 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13100 mkU32(guest_EIP_curr_instr) ) ); 13101 jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta); 13102 dres.whatNext = Dis_StopHere; 13103 DIP("int $0x81\n"); 13104 break; 13105 } 13106 if (d32 == 0x82) { 13107 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 13108 mkU32(guest_EIP_curr_instr) ) ); 13109 jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta); 13110 dres.whatNext = Dis_StopHere; 13111 DIP("int $0x82\n"); 13112 break; 13113 } 13114 13115 /* none of the above */ 13116 goto decode_failure; 13117 13118 /* ------------------------ Jcond, byte offset --------- */ 13119 13120 case 0xEB: /* Jb (jump, byte offset) */ 13121 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13122 delta++; 13123 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13124 dres.whatNext = Dis_ResteerU; 13125 dres.continueAt = (Addr64)(Addr32)d32; 13126 } else { 13127 jmp_lit(Ijk_Boring,d32); 13128 dres.whatNext = Dis_StopHere; 13129 } 13130 DIP("jmp-8 0x%x\n", d32); 13131 break; 13132 13133 case 0xE9: /* Jv (jump, 16/32 offset) */ 13134 vassert(sz == 4); /* JRS added 2004 July 11 */ 13135 d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta); 13136 delta += sz; 13137 if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13138 dres.whatNext = Dis_ResteerU; 13139 dres.continueAt = (Addr64)(Addr32)d32; 13140 } else { 13141 jmp_lit(Ijk_Boring,d32); 13142 dres.whatNext = Dis_StopHere; 13143 } 13144 DIP("jmp 0x%x\n", d32); 13145 break; 13146 13147 case 0x70: 13148 case 0x71: 13149 case 0x72: /* JBb/JNAEb (jump below) */ 13150 case 0x73: /* JNBb/JAEb (jump not below) */ 13151 case 0x74: /* JZb/JEb (jump zero) */ 13152 case 0x75: /* JNZb/JNEb (jump not zero) */ 13153 case 0x76: /* JBEb/JNAb (jump below or equal) */ 13154 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 13155 case 0x78: /* JSb (jump negative) */ 13156 case 0x79: /* JSb (jump not negative) */ 13157 case 0x7A: /* JP (jump parity even) */ 13158 case 0x7B: /* JNP/JPO (jump parity odd) */ 13159 case 0x7C: /* JLb/JNGEb (jump less) */ 13160 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 13161 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 13162 case 0x7F: /* JGb/JNLEb (jump greater) */ 13163 { Int jmpDelta; 13164 HChar* comment = ""; 13165 jmpDelta = (Int)getSDisp8(delta); 13166 vassert(-128 <= jmpDelta && jmpDelta < 128); 13167 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta; 13168 delta++; 13169 if (resteerCisOk 13170 && vex_control.guest_chase_cond 13171 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13172 && jmpDelta < 0 13173 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 13174 /* Speculation: assume this backward branch is taken. So we 13175 need to emit a side-exit to the insn following this one, 13176 on the negation of the condition, and continue at the 13177 branch target address (d32). If we wind up back at the 13178 first instruction of the trace, just stop; it's better to 13179 let the IR loop unroller handle that case. */ 13180 stmt( IRStmt_Exit( 13181 mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))), 13182 Ijk_Boring, 13183 IRConst_U32(guest_EIP_bbstart+delta) ) ); 13184 dres.whatNext = Dis_ResteerC; 13185 dres.continueAt = (Addr64)(Addr32)d32; 13186 comment = "(assumed taken)"; 13187 } 13188 else 13189 if (resteerCisOk 13190 && vex_control.guest_chase_cond 13191 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 13192 && jmpDelta >= 0 13193 && resteerOkFn( callback_opaque, 13194 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 13195 /* Speculation: assume this forward branch is not taken. So 13196 we need to emit a side-exit to d32 (the dest) and continue 13197 disassembling at the insn immediately following this 13198 one. */ 13199 stmt( IRStmt_Exit( 13200 mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)), 13201 Ijk_Boring, 13202 IRConst_U32(d32) ) ); 13203 dres.whatNext = Dis_ResteerC; 13204 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 13205 comment = "(assumed not taken)"; 13206 } 13207 else { 13208 /* Conservative default translation - end the block at this 13209 point. */ 13210 jcc_01( (X86Condcode)(opc - 0x70), 13211 (Addr32)(guest_EIP_bbstart+delta), d32); 13212 dres.whatNext = Dis_StopHere; 13213 } 13214 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment); 13215 break; 13216 } 13217 13218 case 0xE3: /* JECXZ (for JCXZ see above) */ 13219 if (sz != 4) goto decode_failure; 13220 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13221 delta ++; 13222 stmt( IRStmt_Exit( 13223 binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)), 13224 Ijk_Boring, 13225 IRConst_U32(d32) 13226 )); 13227 DIP("jecxz 0x%x\n", d32); 13228 break; 13229 13230 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 13231 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 13232 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 13233 { /* Again, the docs say this uses ECX/CX as a count depending on 13234 the address size override, not the operand one. Since we 13235 don't handle address size overrides, I guess that means 13236 ECX. */ 13237 IRExpr* zbit = NULL; 13238 IRExpr* count = NULL; 13239 IRExpr* cond = NULL; 13240 HChar* xtra = NULL; 13241 13242 if (sz != 4) goto decode_failure; 13243 d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta); 13244 delta++; 13245 putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1))); 13246 13247 count = getIReg(4,R_ECX); 13248 cond = binop(Iop_CmpNE32, count, mkU32(0)); 13249 switch (opc) { 13250 case 0xE2: 13251 xtra = ""; 13252 break; 13253 case 0xE1: 13254 xtra = "e"; 13255 zbit = mk_x86g_calculate_condition( X86CondZ ); 13256 cond = mkAnd1(cond, zbit); 13257 break; 13258 case 0xE0: 13259 xtra = "ne"; 13260 zbit = mk_x86g_calculate_condition( X86CondNZ ); 13261 cond = mkAnd1(cond, zbit); 13262 break; 13263 default: 13264 vassert(0); 13265 } 13266 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) ); 13267 13268 DIP("loop%s 0x%x\n", xtra, d32); 13269 break; 13270 } 13271 13272 /* ------------------------ IMUL ----------------------- */ 13273 13274 case 0x69: /* IMUL Iv, Ev, Gv */ 13275 delta = dis_imul_I_E_G ( sorb, sz, delta, sz ); 13276 break; 13277 case 0x6B: /* IMUL Ib, Ev, Gv */ 13278 delta = dis_imul_I_E_G ( sorb, sz, delta, 1 ); 13279 break; 13280 13281 /* ------------------------ MOV ------------------------ */ 13282 13283 case 0x88: /* MOV Gb,Eb */ 13284 delta = dis_mov_G_E(sorb, 1, delta); 13285 break; 13286 13287 case 0x89: /* MOV Gv,Ev */ 13288 delta = dis_mov_G_E(sorb, sz, delta); 13289 break; 13290 13291 case 0x8A: /* MOV Eb,Gb */ 13292 delta = dis_mov_E_G(sorb, 1, delta); 13293 break; 13294 13295 case 0x8B: /* MOV Ev,Gv */ 13296 delta = dis_mov_E_G(sorb, sz, delta); 13297 break; 13298 13299 case 0x8D: /* LEA M,Gv */ 13300 if (sz != 4) 13301 goto decode_failure; 13302 modrm = getIByte(delta); 13303 if (epartIsReg(modrm)) 13304 goto decode_failure; 13305 /* NOTE! this is the one place where a segment override prefix 13306 has no effect on the address calculation. Therefore we pass 13307 zero instead of sorb here. */ 13308 addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf ); 13309 delta += alen; 13310 putIReg(sz, gregOfRM(modrm), mkexpr(addr)); 13311 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 13312 nameIReg(sz,gregOfRM(modrm))); 13313 break; 13314 13315 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */ 13316 delta = dis_mov_Sw_Ew(sorb, sz, delta); 13317 break; 13318 13319 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */ 13320 delta = dis_mov_Ew_Sw(sorb, delta); 13321 break; 13322 13323 case 0xA0: /* MOV Ob,AL */ 13324 sz = 1; 13325 /* Fall through ... */ 13326 case 0xA1: /* MOV Ov,eAX */ 13327 d32 = getUDisp32(delta); delta += 4; 13328 ty = szToITy(sz); 13329 addr = newTemp(Ity_I32); 13330 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13331 putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr))); 13332 DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb), 13333 d32, nameIReg(sz,R_EAX)); 13334 break; 13335 13336 case 0xA2: /* MOV Ob,AL */ 13337 sz = 1; 13338 /* Fall through ... */ 13339 case 0xA3: /* MOV eAX,Ov */ 13340 d32 = getUDisp32(delta); delta += 4; 13341 ty = szToITy(sz); 13342 addr = newTemp(Ity_I32); 13343 assign( addr, handleSegOverride(sorb, mkU32(d32)) ); 13344 storeLE( mkexpr(addr), getIReg(sz,R_EAX) ); 13345 DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX), 13346 sorbTxt(sorb), d32); 13347 break; 13348 13349 case 0xB0: /* MOV imm,AL */ 13350 case 0xB1: /* MOV imm,CL */ 13351 case 0xB2: /* MOV imm,DL */ 13352 case 0xB3: /* MOV imm,BL */ 13353 case 0xB4: /* MOV imm,AH */ 13354 case 0xB5: /* MOV imm,CH */ 13355 case 0xB6: /* MOV imm,DH */ 13356 case 0xB7: /* MOV imm,BH */ 13357 d32 = getIByte(delta); delta += 1; 13358 putIReg(1, opc-0xB0, mkU8(d32)); 13359 DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0)); 13360 break; 13361 13362 case 0xB8: /* MOV imm,eAX */ 13363 case 0xB9: /* MOV imm,eCX */ 13364 case 0xBA: /* MOV imm,eDX */ 13365 case 0xBB: /* MOV imm,eBX */ 13366 case 0xBC: /* MOV imm,eSP */ 13367 case 0xBD: /* MOV imm,eBP */ 13368 case 0xBE: /* MOV imm,eSI */ 13369 case 0xBF: /* MOV imm,eDI */ 13370 d32 = getUDisp(sz,delta); delta += sz; 13371 putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32)); 13372 DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8)); 13373 break; 13374 13375 case 0xC6: /* MOV Ib,Eb */ 13376 sz = 1; 13377 goto do_Mov_I_E; 13378 case 0xC7: /* MOV Iv,Ev */ 13379 goto do_Mov_I_E; 13380 13381 do_Mov_I_E: 13382 modrm = getIByte(delta); 13383 if (epartIsReg(modrm)) { 13384 delta++; /* mod/rm byte */ 13385 d32 = getUDisp(sz,delta); delta += sz; 13386 putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32)); 13387 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, 13388 nameIReg(sz,eregOfRM(modrm))); 13389 } else { 13390 addr = disAMode ( &alen, sorb, delta, dis_buf ); 13391 delta += alen; 13392 d32 = getUDisp(sz,delta); delta += sz; 13393 storeLE(mkexpr(addr), mkU(szToITy(sz), d32)); 13394 DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); 13395 } 13396 break; 13397 13398 /* ------------------------ opl imm, A ----------------- */ 13399 13400 case 0x04: /* ADD Ib, AL */ 13401 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 13402 break; 13403 case 0x05: /* ADD Iv, eAX */ 13404 delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" ); 13405 break; 13406 13407 case 0x0C: /* OR Ib, AL */ 13408 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 13409 break; 13410 case 0x0D: /* OR Iv, eAX */ 13411 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 13412 break; 13413 13414 case 0x14: /* ADC Ib, AL */ 13415 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 13416 break; 13417 case 0x15: /* ADC Iv, eAX */ 13418 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 13419 break; 13420 13421 case 0x1C: /* SBB Ib, AL */ 13422 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 13423 break; 13424 case 0x1D: /* SBB Iv, eAX */ 13425 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 13426 break; 13427 13428 case 0x24: /* AND Ib, AL */ 13429 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 13430 break; 13431 case 0x25: /* AND Iv, eAX */ 13432 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 13433 break; 13434 13435 case 0x2C: /* SUB Ib, AL */ 13436 delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" ); 13437 break; 13438 case 0x2D: /* SUB Iv, eAX */ 13439 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 13440 break; 13441 13442 case 0x34: /* XOR Ib, AL */ 13443 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 13444 break; 13445 case 0x35: /* XOR Iv, eAX */ 13446 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 13447 break; 13448 13449 case 0x3C: /* CMP Ib, AL */ 13450 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 13451 break; 13452 case 0x3D: /* CMP Iv, eAX */ 13453 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 13454 break; 13455 13456 case 0xA8: /* TEST Ib, AL */ 13457 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 13458 break; 13459 case 0xA9: /* TEST Iv, eAX */ 13460 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 13461 break; 13462 13463 /* ------------------------ opl Ev, Gv ----------------- */ 13464 13465 case 0x02: /* ADD Eb,Gb */ 13466 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" ); 13467 break; 13468 case 0x03: /* ADD Ev,Gv */ 13469 delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" ); 13470 break; 13471 13472 case 0x0A: /* OR Eb,Gb */ 13473 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" ); 13474 break; 13475 case 0x0B: /* OR Ev,Gv */ 13476 delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" ); 13477 break; 13478 13479 case 0x12: /* ADC Eb,Gb */ 13480 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" ); 13481 break; 13482 case 0x13: /* ADC Ev,Gv */ 13483 delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" ); 13484 break; 13485 13486 case 0x1A: /* SBB Eb,Gb */ 13487 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" ); 13488 break; 13489 case 0x1B: /* SBB Ev,Gv */ 13490 delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" ); 13491 break; 13492 13493 case 0x22: /* AND Eb,Gb */ 13494 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" ); 13495 break; 13496 case 0x23: /* AND Ev,Gv */ 13497 delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" ); 13498 break; 13499 13500 case 0x2A: /* SUB Eb,Gb */ 13501 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" ); 13502 break; 13503 case 0x2B: /* SUB Ev,Gv */ 13504 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" ); 13505 break; 13506 13507 case 0x32: /* XOR Eb,Gb */ 13508 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" ); 13509 break; 13510 case 0x33: /* XOR Ev,Gv */ 13511 delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" ); 13512 break; 13513 13514 case 0x3A: /* CMP Eb,Gb */ 13515 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" ); 13516 break; 13517 case 0x3B: /* CMP Ev,Gv */ 13518 delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" ); 13519 break; 13520 13521 case 0x84: /* TEST Eb,Gb */ 13522 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" ); 13523 break; 13524 case 0x85: /* TEST Ev,Gv */ 13525 delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" ); 13526 break; 13527 13528 /* ------------------------ opl Gv, Ev ----------------- */ 13529 13530 case 0x00: /* ADD Gb,Eb */ 13531 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13532 Iop_Add8, True, 1, delta, "add" ); 13533 break; 13534 case 0x01: /* ADD Gv,Ev */ 13535 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13536 Iop_Add8, True, sz, delta, "add" ); 13537 break; 13538 13539 case 0x08: /* OR Gb,Eb */ 13540 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13541 Iop_Or8, True, 1, delta, "or" ); 13542 break; 13543 case 0x09: /* OR Gv,Ev */ 13544 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13545 Iop_Or8, True, sz, delta, "or" ); 13546 break; 13547 13548 case 0x10: /* ADC Gb,Eb */ 13549 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13550 Iop_Add8, True, 1, delta, "adc" ); 13551 break; 13552 case 0x11: /* ADC Gv,Ev */ 13553 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13554 Iop_Add8, True, sz, delta, "adc" ); 13555 break; 13556 13557 case 0x18: /* SBB Gb,Eb */ 13558 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13559 Iop_Sub8, True, 1, delta, "sbb" ); 13560 break; 13561 case 0x19: /* SBB Gv,Ev */ 13562 delta = dis_op2_G_E ( sorb, pfx_lock, True, 13563 Iop_Sub8, True, sz, delta, "sbb" ); 13564 break; 13565 13566 case 0x20: /* AND Gb,Eb */ 13567 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13568 Iop_And8, True, 1, delta, "and" ); 13569 break; 13570 case 0x21: /* AND Gv,Ev */ 13571 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13572 Iop_And8, True, sz, delta, "and" ); 13573 break; 13574 13575 case 0x28: /* SUB Gb,Eb */ 13576 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13577 Iop_Sub8, True, 1, delta, "sub" ); 13578 break; 13579 case 0x29: /* SUB Gv,Ev */ 13580 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13581 Iop_Sub8, True, sz, delta, "sub" ); 13582 break; 13583 13584 case 0x30: /* XOR Gb,Eb */ 13585 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13586 Iop_Xor8, True, 1, delta, "xor" ); 13587 break; 13588 case 0x31: /* XOR Gv,Ev */ 13589 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13590 Iop_Xor8, True, sz, delta, "xor" ); 13591 break; 13592 13593 case 0x38: /* CMP Gb,Eb */ 13594 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13595 Iop_Sub8, False, 1, delta, "cmp" ); 13596 break; 13597 case 0x39: /* CMP Gv,Ev */ 13598 delta = dis_op2_G_E ( sorb, pfx_lock, False, 13599 Iop_Sub8, False, sz, delta, "cmp" ); 13600 break; 13601 13602 /* ------------------------ POP ------------------------ */ 13603 13604 case 0x58: /* POP eAX */ 13605 case 0x59: /* POP eCX */ 13606 case 0x5A: /* POP eDX */ 13607 case 0x5B: /* POP eBX */ 13608 case 0x5D: /* POP eBP */ 13609 case 0x5E: /* POP eSI */ 13610 case 0x5F: /* POP eDI */ 13611 case 0x5C: /* POP eSP */ 13612 vassert(sz == 2 || sz == 4); 13613 t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32); 13614 assign(t2, getIReg(4, R_ESP)); 13615 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 13616 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13617 putIReg(sz, opc-0x58, mkexpr(t1)); 13618 DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); 13619 break; 13620 13621 case 0x9D: /* POPF */ 13622 vassert(sz == 2 || sz == 4); 13623 t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32); 13624 assign(t2, getIReg(4, R_ESP)); 13625 assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2)))); 13626 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz))); 13627 13628 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the 13629 value in t1. */ 13630 set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/, 13631 ((Addr32)guest_EIP_bbstart)+delta ); 13632 13633 DIP("popf%c\n", nameISize(sz)); 13634 break; 13635 13636 case 0x61: /* POPA */ 13637 /* This is almost certainly wrong for sz==2. So ... */ 13638 if (sz != 4) goto decode_failure; 13639 13640 /* t5 is the old %ESP value. */ 13641 t5 = newTemp(Ity_I32); 13642 assign( t5, getIReg(4, R_ESP) ); 13643 13644 /* Reload all the registers, except %esp. */ 13645 putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) )); 13646 putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) )); 13647 putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) )); 13648 putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) )); 13649 /* ignore saved %ESP */ 13650 putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) )); 13651 putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) )); 13652 putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) )); 13653 13654 /* and move %ESP back up */ 13655 putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) ); 13656 13657 DIP("popa%c\n", nameISize(sz)); 13658 break; 13659 13660 case 0x8F: /* POPL/POPW m32 */ 13661 { Int len; 13662 UChar rm = getIByte(delta); 13663 13664 /* make sure this instruction is correct POP */ 13665 if (epartIsReg(rm) || gregOfRM(rm) != 0) 13666 goto decode_failure; 13667 /* and has correct size */ 13668 if (sz != 4 && sz != 2) 13669 goto decode_failure; 13670 ty = szToITy(sz); 13671 13672 t1 = newTemp(Ity_I32); /* stack address */ 13673 t3 = newTemp(ty); /* data */ 13674 /* set t1 to ESP: t1 = ESP */ 13675 assign( t1, getIReg(4, R_ESP) ); 13676 /* load M[ESP] to virtual register t3: t3 = M[t1] */ 13677 assign( t3, loadLE(ty, mkexpr(t1)) ); 13678 13679 /* increase ESP; must be done before the STORE. Intel manual says: 13680 If the ESP register is used as a base register for addressing 13681 a destination operand in memory, the POP instruction computes 13682 the effective address of the operand after it increments the 13683 ESP register. 13684 */ 13685 putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) ); 13686 13687 /* resolve MODR/M */ 13688 addr = disAMode ( &len, sorb, delta, dis_buf); 13689 storeLE( mkexpr(addr), mkexpr(t3) ); 13690 13691 DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf); 13692 13693 delta += len; 13694 break; 13695 } 13696 13697 case 0x1F: /* POP %DS */ 13698 dis_pop_segreg( R_DS, sz ); break; 13699 case 0x07: /* POP %ES */ 13700 dis_pop_segreg( R_ES, sz ); break; 13701 case 0x17: /* POP %SS */ 13702 dis_pop_segreg( R_SS, sz ); break; 13703 13704 /* ------------------------ PUSH ----------------------- */ 13705 13706 case 0x50: /* PUSH eAX */ 13707 case 0x51: /* PUSH eCX */ 13708 case 0x52: /* PUSH eDX */ 13709 case 0x53: /* PUSH eBX */ 13710 case 0x55: /* PUSH eBP */ 13711 case 0x56: /* PUSH eSI */ 13712 case 0x57: /* PUSH eDI */ 13713 case 0x54: /* PUSH eSP */ 13714 /* This is the Right Way, in that the value to be pushed is 13715 established before %esp is changed, so that pushl %esp 13716 correctly pushes the old value. */ 13717 vassert(sz == 2 || sz == 4); 13718 ty = sz==2 ? Ity_I16 : Ity_I32; 13719 t1 = newTemp(ty); t2 = newTemp(Ity_I32); 13720 assign(t1, getIReg(sz, opc-0x50)); 13721 assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz))); 13722 putIReg(4, R_ESP, mkexpr(t2) ); 13723 storeLE(mkexpr(t2),mkexpr(t1)); 13724 DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); 13725 break; 13726 13727 13728 case 0x68: /* PUSH Iv */ 13729 d32 = getUDisp(sz,delta); delta += sz; 13730 goto do_push_I; 13731 case 0x6A: /* PUSH Ib, sign-extended to sz */ 13732 d32 = getSDisp8(delta); delta += 1; 13733 goto do_push_I; 13734 do_push_I: 13735 ty = szToITy(sz); 13736 t1 = newTemp(Ity_I32); t2 = newTemp(ty); 13737 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13738 putIReg(4, R_ESP, mkexpr(t1) ); 13739 /* stop mkU16 asserting if d32 is a negative 16-bit number 13740 (bug #132813) */ 13741 if (ty == Ity_I16) 13742 d32 &= 0xFFFF; 13743 storeLE( mkexpr(t1), mkU(ty,d32) ); 13744 DIP("push%c $0x%x\n", nameISize(sz), d32); 13745 break; 13746 13747 case 0x9C: /* PUSHF */ { 13748 vassert(sz == 2 || sz == 4); 13749 13750 t1 = newTemp(Ity_I32); 13751 assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) ); 13752 putIReg(4, R_ESP, mkexpr(t1) ); 13753 13754 /* Calculate OSZACP, and patch in fixed fields as per 13755 Intel docs. 13756 - bit 1 is always 1 13757 - bit 9 is Interrupt Enable (should always be 1 in user mode?) 13758 */ 13759 t2 = newTemp(Ity_I32); 13760 assign( t2, binop(Iop_Or32, 13761 mk_x86g_calculate_eflags_all(), 13762 mkU32( (1<<1)|(1<<9) ) )); 13763 13764 /* Patch in the D flag. This can simply be a copy of bit 10 of 13765 baseBlock[OFFB_DFLAG]. */ 13766 t3 = newTemp(Ity_I32); 13767 assign( t3, binop(Iop_Or32, 13768 mkexpr(t2), 13769 binop(Iop_And32, 13770 IRExpr_Get(OFFB_DFLAG,Ity_I32), 13771 mkU32(1<<10))) 13772 ); 13773 13774 /* And patch in the ID flag. */ 13775 t4 = newTemp(Ity_I32); 13776 assign( t4, binop(Iop_Or32, 13777 mkexpr(t3), 13778 binop(Iop_And32, 13779 binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32), 13780 mkU8(21)), 13781 mkU32(1<<21))) 13782 ); 13783 13784 /* And patch in the AC flag. */ 13785 t5 = newTemp(Ity_I32); 13786 assign( t5, binop(Iop_Or32, 13787 mkexpr(t4), 13788 binop(Iop_And32, 13789 binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32), 13790 mkU8(18)), 13791 mkU32(1<<18))) 13792 ); 13793 13794 /* if sz==2, the stored value needs to be narrowed. */ 13795 if (sz == 2) 13796 storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) ); 13797 else 13798 storeLE( mkexpr(t1), mkexpr(t5) ); 13799 13800 DIP("pushf%c\n", nameISize(sz)); 13801 break; 13802 } 13803 13804 case 0x60: /* PUSHA */ 13805 /* This is almost certainly wrong for sz==2. So ... */ 13806 if (sz != 4) goto decode_failure; 13807 13808 /* This is the Right Way, in that the value to be pushed is 13809 established before %esp is changed, so that pusha 13810 correctly pushes the old %esp value. New value of %esp is 13811 pushed at start. */ 13812 /* t0 is the %ESP value we're going to push. */ 13813 t0 = newTemp(Ity_I32); 13814 assign( t0, getIReg(4, R_ESP) ); 13815 13816 /* t5 will be the new %ESP value. */ 13817 t5 = newTemp(Ity_I32); 13818 assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) ); 13819 13820 /* Update guest state before prodding memory. */ 13821 putIReg(4, R_ESP, mkexpr(t5)); 13822 13823 /* Dump all the registers. */ 13824 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) ); 13825 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) ); 13826 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) ); 13827 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) ); 13828 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/); 13829 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) ); 13830 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) ); 13831 storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) ); 13832 13833 DIP("pusha%c\n", nameISize(sz)); 13834 break; 13835 13836 case 0x0E: /* PUSH %CS */ 13837 dis_push_segreg( R_CS, sz ); break; 13838 case 0x1E: /* PUSH %DS */ 13839 dis_push_segreg( R_DS, sz ); break; 13840 case 0x06: /* PUSH %ES */ 13841 dis_push_segreg( R_ES, sz ); break; 13842 case 0x16: /* PUSH %SS */ 13843 dis_push_segreg( R_SS, sz ); break; 13844 13845 /* ------------------------ SCAS et al ----------------- */ 13846 13847 case 0xA4: /* MOVS, no REP prefix */ 13848 case 0xA5: 13849 if (sorb != 0) 13850 goto decode_failure; /* else dis_string_op asserts */ 13851 dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb ); 13852 break; 13853 13854 case 0xA6: /* CMPSb, no REP prefix */ 13855 case 0xA7: 13856 if (sorb != 0) 13857 goto decode_failure; /* else dis_string_op asserts */ 13858 dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb ); 13859 break; 13860 13861 case 0xAA: /* STOS, no REP prefix */ 13862 case 0xAB: 13863 if (sorb != 0) 13864 goto decode_failure; /* else dis_string_op asserts */ 13865 dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb ); 13866 break; 13867 13868 case 0xAC: /* LODS, no REP prefix */ 13869 case 0xAD: 13870 if (sorb != 0) 13871 goto decode_failure; /* else dis_string_op asserts */ 13872 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb ); 13873 break; 13874 13875 case 0xAE: /* SCAS, no REP prefix */ 13876 case 0xAF: 13877 if (sorb != 0) 13878 goto decode_failure; /* else dis_string_op asserts */ 13879 dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb ); 13880 break; 13881 13882 13883 case 0xFC: /* CLD */ 13884 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) ); 13885 DIP("cld\n"); 13886 break; 13887 13888 case 0xFD: /* STD */ 13889 stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) ); 13890 DIP("std\n"); 13891 break; 13892 13893 case 0xF8: /* CLC */ 13894 case 0xF9: /* STC */ 13895 case 0xF5: /* CMC */ 13896 t0 = newTemp(Ity_I32); 13897 t1 = newTemp(Ity_I32); 13898 assign( t0, mk_x86g_calculate_eflags_all() ); 13899 switch (opc) { 13900 case 0xF8: 13901 assign( t1, binop(Iop_And32, mkexpr(t0), 13902 mkU32(~X86G_CC_MASK_C))); 13903 DIP("clc\n"); 13904 break; 13905 case 0xF9: 13906 assign( t1, binop(Iop_Or32, mkexpr(t0), 13907 mkU32(X86G_CC_MASK_C))); 13908 DIP("stc\n"); 13909 break; 13910 case 0xF5: 13911 assign( t1, binop(Iop_Xor32, mkexpr(t0), 13912 mkU32(X86G_CC_MASK_C))); 13913 DIP("cmc\n"); 13914 break; 13915 default: 13916 vpanic("disInstr(x86)(clc/stc/cmc)"); 13917 } 13918 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 13919 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 13920 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) )); 13921 /* Set NDEP even though it isn't used. This makes redundant-PUT 13922 elimination of previous stores to this field work better. */ 13923 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 13924 break; 13925 13926 case 0xD6: /* SALC */ 13927 t0 = newTemp(Ity_I32); 13928 t1 = newTemp(Ity_I32); 13929 assign( t0, binop(Iop_And32, 13930 mk_x86g_calculate_eflags_c(), 13931 mkU32(1)) ); 13932 assign( t1, binop(Iop_Sar32, 13933 binop(Iop_Shl32, mkexpr(t0), mkU8(31)), 13934 mkU8(31)) ); 13935 putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) ); 13936 DIP("salc\n"); 13937 break; 13938 13939 /* REPNE prefix insn */ 13940 case 0xF2: { 13941 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 13942 if (sorb != 0) goto decode_failure; 13943 abyte = getIByte(delta); delta++; 13944 13945 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 13946 dres.whatNext = Dis_StopHere; 13947 13948 switch (abyte) { 13949 /* According to the Intel manual, "repne movs" should never occur, but 13950 * in practice it has happened, so allow for it here... */ 13951 case 0xA4: sz = 1; /* REPNE MOVS<sz> */ 13952 case 0xA5: 13953 dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig, 13954 guest_EIP_bbstart+delta, "repne movs" ); 13955 break; 13956 13957 case 0xA6: sz = 1; /* REPNE CMP<sz> */ 13958 case 0xA7: 13959 dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig, 13960 guest_EIP_bbstart+delta, "repne cmps" ); 13961 break; 13962 13963 case 0xAA: sz = 1; /* REPNE STOS<sz> */ 13964 case 0xAB: 13965 dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig, 13966 guest_EIP_bbstart+delta, "repne stos" ); 13967 break; 13968 13969 case 0xAE: sz = 1; /* REPNE SCAS<sz> */ 13970 case 0xAF: 13971 dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig, 13972 guest_EIP_bbstart+delta, "repne scas" ); 13973 break; 13974 13975 default: 13976 goto decode_failure; 13977 } 13978 break; 13979 } 13980 13981 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE, 13982 for the rest, it means REP) */ 13983 case 0xF3: { 13984 Addr32 eip_orig = guest_EIP_bbstart + delta_start; 13985 if (sorb != 0) goto decode_failure; 13986 abyte = getIByte(delta); delta++; 13987 13988 if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; } 13989 dres.whatNext = Dis_StopHere; 13990 13991 switch (abyte) { 13992 case 0xA4: sz = 1; /* REP MOVS<sz> */ 13993 case 0xA5: 13994 dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig, 13995 guest_EIP_bbstart+delta, "rep movs" ); 13996 break; 13997 13998 case 0xA6: sz = 1; /* REPE CMP<sz> */ 13999 case 0xA7: 14000 dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig, 14001 guest_EIP_bbstart+delta, "repe cmps" ); 14002 break; 14003 14004 case 0xAA: sz = 1; /* REP STOS<sz> */ 14005 case 0xAB: 14006 dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig, 14007 guest_EIP_bbstart+delta, "rep stos" ); 14008 break; 14009 14010 case 0xAC: sz = 1; /* REP LODS<sz> */ 14011 case 0xAD: 14012 dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig, 14013 guest_EIP_bbstart+delta, "rep lods" ); 14014 break; 14015 14016 case 0xAE: sz = 1; /* REPE SCAS<sz> */ 14017 case 0xAF: 14018 dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig, 14019 guest_EIP_bbstart+delta, "repe scas" ); 14020 break; 14021 14022 case 0x90: /* REP NOP (PAUSE) */ 14023 /* a hint to the P4 re spin-wait loop */ 14024 DIP("rep nop (P4 pause)\n"); 14025 /* "observe" the hint. The Vex client needs to be careful not 14026 to cause very long delays as a result, though. */ 14027 jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta); 14028 dres.whatNext = Dis_StopHere; 14029 break; 14030 14031 case 0xC3: /* REP RET -- same as normal ret? */ 14032 dis_ret(0); 14033 dres.whatNext = Dis_StopHere; 14034 DIP("rep ret\n"); 14035 break; 14036 14037 default: 14038 goto decode_failure; 14039 } 14040 break; 14041 } 14042 14043 /* ------------------------ XCHG ----------------------- */ 14044 14045 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 14046 prefix; hence it must be translated with an IRCAS (at least, the 14047 memory variant). */ 14048 case 0x86: /* XCHG Gb,Eb */ 14049 sz = 1; 14050 /* Fall through ... */ 14051 case 0x87: /* XCHG Gv,Ev */ 14052 modrm = getIByte(delta); 14053 ty = szToITy(sz); 14054 t1 = newTemp(ty); t2 = newTemp(ty); 14055 if (epartIsReg(modrm)) { 14056 assign(t1, getIReg(sz, eregOfRM(modrm))); 14057 assign(t2, getIReg(sz, gregOfRM(modrm))); 14058 putIReg(sz, gregOfRM(modrm), mkexpr(t1)); 14059 putIReg(sz, eregOfRM(modrm), mkexpr(t2)); 14060 delta++; 14061 DIP("xchg%c %s, %s\n", 14062 nameISize(sz), nameIReg(sz,gregOfRM(modrm)), 14063 nameIReg(sz,eregOfRM(modrm))); 14064 } else { 14065 *expect_CAS = True; 14066 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14067 assign( t1, loadLE(ty,mkexpr(addr)) ); 14068 assign( t2, getIReg(sz,gregOfRM(modrm)) ); 14069 casLE( mkexpr(addr), 14070 mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr ); 14071 putIReg( sz, gregOfRM(modrm), mkexpr(t1) ); 14072 delta += alen; 14073 DIP("xchg%c %s, %s\n", nameISize(sz), 14074 nameIReg(sz,gregOfRM(modrm)), dis_buf); 14075 } 14076 break; 14077 14078 case 0x90: /* XCHG eAX,eAX */ 14079 DIP("nop\n"); 14080 break; 14081 case 0x91: /* XCHG eAX,eCX */ 14082 case 0x92: /* XCHG eAX,eDX */ 14083 case 0x93: /* XCHG eAX,eBX */ 14084 case 0x94: /* XCHG eAX,eSP */ 14085 case 0x95: /* XCHG eAX,eBP */ 14086 case 0x96: /* XCHG eAX,eSI */ 14087 case 0x97: /* XCHG eAX,eDI */ 14088 codegen_xchg_eAX_Reg ( sz, opc - 0x90 ); 14089 break; 14090 14091 /* ------------------------ XLAT ----------------------- */ 14092 14093 case 0xD7: /* XLAT */ 14094 if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */ 14095 putIReg( 14096 1, 14097 R_EAX/*AL*/, 14098 loadLE(Ity_I8, 14099 handleSegOverride( 14100 sorb, 14101 binop(Iop_Add32, 14102 getIReg(4, R_EBX), 14103 unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/)))))); 14104 14105 DIP("xlat%c [ebx]\n", nameISize(sz)); 14106 break; 14107 14108 /* ------------------------ IN / OUT ----------------------- */ 14109 14110 case 0xE4: /* IN imm8, AL */ 14111 sz = 1; 14112 t1 = newTemp(Ity_I32); 14113 abyte = getIByte(delta); delta++; 14114 assign(t1, mkU32( abyte & 0xFF )); 14115 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14116 goto do_IN; 14117 case 0xE5: /* IN imm8, eAX */ 14118 vassert(sz == 2 || sz == 4); 14119 t1 = newTemp(Ity_I32); 14120 abyte = getIByte(delta); delta++; 14121 assign(t1, mkU32( abyte & 0xFF )); 14122 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX)); 14123 goto do_IN; 14124 case 0xEC: /* IN %DX, AL */ 14125 sz = 1; 14126 t1 = newTemp(Ity_I32); 14127 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14128 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14129 nameIReg(sz,R_EAX)); 14130 goto do_IN; 14131 case 0xED: /* IN %DX, eAX */ 14132 vassert(sz == 2 || sz == 4); 14133 t1 = newTemp(Ity_I32); 14134 assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX))); 14135 DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX), 14136 nameIReg(sz,R_EAX)); 14137 goto do_IN; 14138 do_IN: { 14139 /* At this point, sz indicates the width, and t1 is a 32-bit 14140 value giving port number. */ 14141 IRDirty* d; 14142 vassert(sz == 1 || sz == 2 || sz == 4); 14143 ty = szToITy(sz); 14144 t2 = newTemp(Ity_I32); 14145 d = unsafeIRDirty_1_N( 14146 t2, 14147 0/*regparms*/, 14148 "x86g_dirtyhelper_IN", 14149 &x86g_dirtyhelper_IN, 14150 mkIRExprVec_2( mkexpr(t1), mkU32(sz) ) 14151 ); 14152 /* do the call, dumping the result in t2. */ 14153 stmt( IRStmt_Dirty(d) ); 14154 putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) ); 14155 break; 14156 } 14157 14158 case 0xE6: /* OUT AL, imm8 */ 14159 sz = 1; 14160 t1 = newTemp(Ity_I32); 14161 abyte = getIByte(delta); delta++; 14162 assign( t1, mkU32( abyte & 0xFF ) ); 14163 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14164 goto do_OUT; 14165 case 0xE7: /* OUT eAX, imm8 */ 14166 vassert(sz == 2 || sz == 4); 14167 t1 = newTemp(Ity_I32); 14168 abyte = getIByte(delta); delta++; 14169 assign( t1, mkU32( abyte & 0xFF ) ); 14170 DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte); 14171 goto do_OUT; 14172 case 0xEE: /* OUT AL, %DX */ 14173 sz = 1; 14174 t1 = newTemp(Ity_I32); 14175 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14176 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14177 nameIReg(2,R_EDX)); 14178 goto do_OUT; 14179 case 0xEF: /* OUT eAX, %DX */ 14180 vassert(sz == 2 || sz == 4); 14181 t1 = newTemp(Ity_I32); 14182 assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) ); 14183 DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX), 14184 nameIReg(2,R_EDX)); 14185 goto do_OUT; 14186 do_OUT: { 14187 /* At this point, sz indicates the width, and t1 is a 32-bit 14188 value giving port number. */ 14189 IRDirty* d; 14190 vassert(sz == 1 || sz == 2 || sz == 4); 14191 ty = szToITy(sz); 14192 d = unsafeIRDirty_0_N( 14193 0/*regparms*/, 14194 "x86g_dirtyhelper_OUT", 14195 &x86g_dirtyhelper_OUT, 14196 mkIRExprVec_3( mkexpr(t1), 14197 widenUto32( getIReg(sz, R_EAX) ), 14198 mkU32(sz) ) 14199 ); 14200 stmt( IRStmt_Dirty(d) ); 14201 break; 14202 } 14203 14204 /* ------------------------ (Grp1 extensions) ---------- */ 14205 14206 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as 14207 case 0x80, but only in 32-bit mode. */ 14208 /* fallthru */ 14209 case 0x80: /* Grp1 Ib,Eb */ 14210 modrm = getIByte(delta); 14211 am_sz = lengthAMode(delta); 14212 sz = 1; 14213 d_sz = 1; 14214 d32 = getUChar(delta + am_sz); 14215 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14216 break; 14217 14218 case 0x81: /* Grp1 Iv,Ev */ 14219 modrm = getIByte(delta); 14220 am_sz = lengthAMode(delta); 14221 d_sz = sz; 14222 d32 = getUDisp(d_sz, delta + am_sz); 14223 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14224 break; 14225 14226 case 0x83: /* Grp1 Ib,Ev */ 14227 modrm = getIByte(delta); 14228 am_sz = lengthAMode(delta); 14229 d_sz = 1; 14230 d32 = getSDisp8(delta + am_sz); 14231 delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 ); 14232 break; 14233 14234 /* ------------------------ (Grp2 extensions) ---------- */ 14235 14236 case 0xC0: { /* Grp2 Ib,Eb */ 14237 Bool decode_OK = True; 14238 modrm = getIByte(delta); 14239 am_sz = lengthAMode(delta); 14240 d_sz = 1; 14241 d32 = getUChar(delta + am_sz); 14242 sz = 1; 14243 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14244 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14245 if (!decode_OK) 14246 goto decode_failure; 14247 break; 14248 } 14249 case 0xC1: { /* Grp2 Ib,Ev */ 14250 Bool decode_OK = True; 14251 modrm = getIByte(delta); 14252 am_sz = lengthAMode(delta); 14253 d_sz = 1; 14254 d32 = getUChar(delta + am_sz); 14255 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14256 mkU8(d32 & 0xFF), NULL, &decode_OK ); 14257 if (!decode_OK) 14258 goto decode_failure; 14259 break; 14260 } 14261 case 0xD0: { /* Grp2 1,Eb */ 14262 Bool decode_OK = True; 14263 modrm = getIByte(delta); 14264 am_sz = lengthAMode(delta); 14265 d_sz = 0; 14266 d32 = 1; 14267 sz = 1; 14268 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14269 mkU8(d32), NULL, &decode_OK ); 14270 if (!decode_OK) 14271 goto decode_failure; 14272 break; 14273 } 14274 case 0xD1: { /* Grp2 1,Ev */ 14275 Bool decode_OK = True; 14276 modrm = getUChar(delta); 14277 am_sz = lengthAMode(delta); 14278 d_sz = 0; 14279 d32 = 1; 14280 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14281 mkU8(d32), NULL, &decode_OK ); 14282 if (!decode_OK) 14283 goto decode_failure; 14284 break; 14285 } 14286 case 0xD2: { /* Grp2 CL,Eb */ 14287 Bool decode_OK = True; 14288 modrm = getUChar(delta); 14289 am_sz = lengthAMode(delta); 14290 d_sz = 0; 14291 sz = 1; 14292 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14293 getIReg(1,R_ECX), "%cl", &decode_OK ); 14294 if (!decode_OK) 14295 goto decode_failure; 14296 break; 14297 } 14298 case 0xD3: { /* Grp2 CL,Ev */ 14299 Bool decode_OK = True; 14300 modrm = getIByte(delta); 14301 am_sz = lengthAMode(delta); 14302 d_sz = 0; 14303 delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz, 14304 getIReg(1,R_ECX), "%cl", &decode_OK ); 14305 if (!decode_OK) 14306 goto decode_failure; 14307 break; 14308 } 14309 14310 /* ------------------------ (Grp3 extensions) ---------- */ 14311 14312 case 0xF6: { /* Grp3 Eb */ 14313 Bool decode_OK = True; 14314 delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK ); 14315 if (!decode_OK) 14316 goto decode_failure; 14317 break; 14318 } 14319 case 0xF7: { /* Grp3 Ev */ 14320 Bool decode_OK = True; 14321 delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK ); 14322 if (!decode_OK) 14323 goto decode_failure; 14324 break; 14325 } 14326 14327 /* ------------------------ (Grp4 extensions) ---------- */ 14328 14329 case 0xFE: { /* Grp4 Eb */ 14330 Bool decode_OK = True; 14331 delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK ); 14332 if (!decode_OK) 14333 goto decode_failure; 14334 break; 14335 } 14336 14337 /* ------------------------ (Grp5 extensions) ---------- */ 14338 14339 case 0xFF: { /* Grp5 Ev */ 14340 Bool decode_OK = True; 14341 delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK ); 14342 if (!decode_OK) 14343 goto decode_failure; 14344 break; 14345 } 14346 14347 /* ------------------------ Escapes to 2-byte opcodes -- */ 14348 14349 case 0x0F: { 14350 opc = getIByte(delta); delta++; 14351 switch (opc) { 14352 14353 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 14354 14355 case 0xBA: { /* Grp8 Ib,Ev */ 14356 Bool decode_OK = False; 14357 modrm = getUChar(delta); 14358 am_sz = lengthAMode(delta); 14359 d32 = getSDisp8(delta + am_sz); 14360 delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm, 14361 am_sz, sz, d32, &decode_OK ); 14362 if (!decode_OK) 14363 goto decode_failure; 14364 break; 14365 } 14366 14367 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 14368 14369 case 0xBC: /* BSF Gv,Ev */ 14370 delta = dis_bs_E_G ( sorb, sz, delta, True ); 14371 break; 14372 case 0xBD: /* BSR Gv,Ev */ 14373 delta = dis_bs_E_G ( sorb, sz, delta, False ); 14374 break; 14375 14376 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 14377 14378 case 0xC8: /* BSWAP %eax */ 14379 case 0xC9: 14380 case 0xCA: 14381 case 0xCB: 14382 case 0xCC: 14383 case 0xCD: 14384 case 0xCE: 14385 case 0xCF: /* BSWAP %edi */ 14386 /* AFAICS from the Intel docs, this only exists at size 4. */ 14387 vassert(sz == 4); 14388 t1 = newTemp(Ity_I32); 14389 t2 = newTemp(Ity_I32); 14390 assign( t1, getIReg(4, opc-0xC8) ); 14391 14392 assign( t2, 14393 binop(Iop_Or32, 14394 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 14395 binop(Iop_Or32, 14396 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 14397 mkU32(0x00FF0000)), 14398 binop(Iop_Or32, 14399 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 14400 mkU32(0x0000FF00)), 14401 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 14402 mkU32(0x000000FF) ) 14403 ))) 14404 ); 14405 14406 putIReg(4, opc-0xC8, mkexpr(t2)); 14407 DIP("bswapl %s\n", nameIReg(4, opc-0xC8)); 14408 break; 14409 14410 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 14411 14412 case 0xA3: /* BT Gv,Ev */ 14413 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone ); 14414 break; 14415 case 0xB3: /* BTR Gv,Ev */ 14416 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset ); 14417 break; 14418 case 0xAB: /* BTS Gv,Ev */ 14419 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet ); 14420 break; 14421 case 0xBB: /* BTC Gv,Ev */ 14422 delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp ); 14423 break; 14424 14425 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 14426 14427 case 0x40: 14428 case 0x41: 14429 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 14430 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 14431 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 14432 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 14433 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 14434 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 14435 case 0x48: /* CMOVSb (cmov negative) */ 14436 case 0x49: /* CMOVSb (cmov not negative) */ 14437 case 0x4A: /* CMOVP (cmov parity even) */ 14438 case 0x4B: /* CMOVNP (cmov parity odd) */ 14439 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 14440 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 14441 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 14442 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 14443 delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta); 14444 break; 14445 14446 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 14447 14448 case 0xB0: /* CMPXCHG Gb,Eb */ 14449 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta ); 14450 break; 14451 case 0xB1: /* CMPXCHG Gv,Ev */ 14452 delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta ); 14453 break; 14454 14455 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */ 14456 IRTemp expdHi = newTemp(Ity_I32); 14457 IRTemp expdLo = newTemp(Ity_I32); 14458 IRTemp dataHi = newTemp(Ity_I32); 14459 IRTemp dataLo = newTemp(Ity_I32); 14460 IRTemp oldHi = newTemp(Ity_I32); 14461 IRTemp oldLo = newTemp(Ity_I32); 14462 IRTemp flags_old = newTemp(Ity_I32); 14463 IRTemp flags_new = newTemp(Ity_I32); 14464 IRTemp success = newTemp(Ity_I1); 14465 14466 /* Translate this using a DCAS, even if there is no LOCK 14467 prefix. Life is too short to bother with generating two 14468 different translations for the with/without-LOCK-prefix 14469 cases. */ 14470 *expect_CAS = True; 14471 14472 /* Decode, and generate address. */ 14473 if (sz != 4) goto decode_failure; 14474 modrm = getIByte(delta); 14475 if (epartIsReg(modrm)) goto decode_failure; 14476 if (gregOfRM(modrm) != 1) goto decode_failure; 14477 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14478 delta += alen; 14479 14480 /* Get the expected and new values. */ 14481 assign( expdHi, getIReg(4,R_EDX) ); 14482 assign( expdLo, getIReg(4,R_EAX) ); 14483 assign( dataHi, getIReg(4,R_ECX) ); 14484 assign( dataLo, getIReg(4,R_EBX) ); 14485 14486 /* Do the DCAS */ 14487 stmt( IRStmt_CAS( 14488 mkIRCAS( oldHi, oldLo, 14489 Iend_LE, mkexpr(addr), 14490 mkexpr(expdHi), mkexpr(expdLo), 14491 mkexpr(dataHi), mkexpr(dataLo) 14492 ))); 14493 14494 /* success when oldHi:oldLo == expdHi:expdLo */ 14495 assign( success, 14496 binop(Iop_CasCmpEQ32, 14497 binop(Iop_Or32, 14498 binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)), 14499 binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo)) 14500 ), 14501 mkU32(0) 14502 )); 14503 14504 /* If the DCAS is successful, that is to say oldHi:oldLo == 14505 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX, 14506 which is where they came from originally. Both the actual 14507 contents of these two regs, and any shadow values, are 14508 unchanged. If the DCAS fails then we're putting into 14509 EDX:EAX the value seen in memory. */ 14510 putIReg(4, R_EDX, 14511 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14512 mkexpr(oldHi), 14513 mkexpr(expdHi) 14514 )); 14515 putIReg(4, R_EAX, 14516 IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)), 14517 mkexpr(oldLo), 14518 mkexpr(expdLo) 14519 )); 14520 14521 /* Copy the success bit into the Z flag and leave the others 14522 unchanged */ 14523 assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all())); 14524 assign( 14525 flags_new, 14526 binop(Iop_Or32, 14527 binop(Iop_And32, mkexpr(flags_old), 14528 mkU32(~X86G_CC_MASK_Z)), 14529 binop(Iop_Shl32, 14530 binop(Iop_And32, 14531 unop(Iop_1Uto32, mkexpr(success)), mkU32(1)), 14532 mkU8(X86G_CC_SHIFT_Z)) )); 14533 14534 stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) )); 14535 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 14536 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) )); 14537 /* Set NDEP even though it isn't used. This makes 14538 redundant-PUT elimination of previous stores to this field 14539 work better. */ 14540 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) )); 14541 14542 /* Sheesh. Aren't you glad it was me and not you that had to 14543 write and validate all this grunge? */ 14544 14545 DIP("cmpxchg8b %s\n", dis_buf); 14546 break; 14547 } 14548 14549 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 14550 14551 case 0xA2: { /* CPUID */ 14552 /* Uses dirty helper: 14553 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* ) 14554 declared to mod eax, wr ebx, ecx, edx 14555 */ 14556 IRDirty* d = NULL; 14557 HChar* fName = NULL; 14558 void* fAddr = NULL; 14559 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { 14560 fName = "x86g_dirtyhelper_CPUID_sse2"; 14561 fAddr = &x86g_dirtyhelper_CPUID_sse2; 14562 } 14563 else 14564 if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { 14565 fName = "x86g_dirtyhelper_CPUID_sse1"; 14566 fAddr = &x86g_dirtyhelper_CPUID_sse1; 14567 } 14568 else 14569 if (archinfo->hwcaps == 0/*no SSE*/) { 14570 fName = "x86g_dirtyhelper_CPUID_sse0"; 14571 fAddr = &x86g_dirtyhelper_CPUID_sse0; 14572 } else 14573 vpanic("disInstr(x86)(cpuid)"); 14574 14575 vassert(fName); vassert(fAddr); 14576 d = unsafeIRDirty_0_N ( 0/*regparms*/, 14577 fName, fAddr, mkIRExprVec_0() ); 14578 /* declare guest state effects */ 14579 d->needsBBP = True; 14580 d->nFxState = 4; 14581 d->fxState[0].fx = Ifx_Modify; 14582 d->fxState[0].offset = OFFB_EAX; 14583 d->fxState[0].size = 4; 14584 d->fxState[1].fx = Ifx_Write; 14585 d->fxState[1].offset = OFFB_EBX; 14586 d->fxState[1].size = 4; 14587 d->fxState[2].fx = Ifx_Modify; 14588 d->fxState[2].offset = OFFB_ECX; 14589 d->fxState[2].size = 4; 14590 d->fxState[3].fx = Ifx_Write; 14591 d->fxState[3].offset = OFFB_EDX; 14592 d->fxState[3].size = 4; 14593 /* execute the dirty call, side-effecting guest state */ 14594 stmt( IRStmt_Dirty(d) ); 14595 /* CPUID is a serialising insn. So, just in case someone is 14596 using it as a memory fence ... */ 14597 stmt( IRStmt_MBE(Imbe_Fence) ); 14598 DIP("cpuid\n"); 14599 break; 14600 } 14601 14602 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID)) 14603 //-- goto decode_failure; 14604 //-- 14605 //-- t1 = newTemp(cb); 14606 //-- t2 = newTemp(cb); 14607 //-- t3 = newTemp(cb); 14608 //-- t4 = newTemp(cb); 14609 //-- uInstr0(cb, CALLM_S, 0); 14610 //-- 14611 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); 14612 //-- uInstr1(cb, PUSH, 4, TempReg, t1); 14613 //-- 14614 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); 14615 //-- uLiteral(cb, 0); 14616 //-- uInstr1(cb, PUSH, 4, TempReg, t2); 14617 //-- 14618 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); 14619 //-- uLiteral(cb, 0); 14620 //-- uInstr1(cb, PUSH, 4, TempReg, t3); 14621 //-- 14622 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); 14623 //-- uLiteral(cb, 0); 14624 //-- uInstr1(cb, PUSH, 4, TempReg, t4); 14625 //-- 14626 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); 14627 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); 14628 //-- 14629 //-- uInstr1(cb, POP, 4, TempReg, t4); 14630 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); 14631 //-- 14632 //-- uInstr1(cb, POP, 4, TempReg, t3); 14633 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); 14634 //-- 14635 //-- uInstr1(cb, POP, 4, TempReg, t2); 14636 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); 14637 //-- 14638 //-- uInstr1(cb, POP, 4, TempReg, t1); 14639 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); 14640 //-- 14641 //-- uInstr0(cb, CALLM_E, 0); 14642 //-- DIP("cpuid\n"); 14643 //-- break; 14644 //-- 14645 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 14646 14647 case 0xB6: /* MOVZXb Eb,Gv */ 14648 if (sz != 2 && sz != 4) 14649 goto decode_failure; 14650 delta = dis_movx_E_G ( sorb, delta, 1, sz, False ); 14651 break; 14652 14653 case 0xB7: /* MOVZXw Ew,Gv */ 14654 if (sz != 4) 14655 goto decode_failure; 14656 delta = dis_movx_E_G ( sorb, delta, 2, 4, False ); 14657 break; 14658 14659 case 0xBE: /* MOVSXb Eb,Gv */ 14660 if (sz != 2 && sz != 4) 14661 goto decode_failure; 14662 delta = dis_movx_E_G ( sorb, delta, 1, sz, True ); 14663 break; 14664 14665 case 0xBF: /* MOVSXw Ew,Gv */ 14666 if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2) 14667 goto decode_failure; 14668 delta = dis_movx_E_G ( sorb, delta, 2, sz, True ); 14669 break; 14670 14671 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */ 14672 //-- 14673 //-- case 0xC3: /* MOVNTI Gv,Ev */ 14674 //-- vg_assert(sz == 4); 14675 //-- modrm = getUChar(eip); 14676 //-- vg_assert(!epartIsReg(modrm)); 14677 //-- t1 = newTemp(cb); 14678 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1); 14679 //-- pair = disAMode ( cb, sorb, eip, dis_buf ); 14680 //-- t2 = LOW24(pair); 14681 //-- eip += HI8(pair); 14682 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2); 14683 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf); 14684 //-- break; 14685 14686 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 14687 14688 case 0xAF: /* IMUL Ev, Gv */ 14689 delta = dis_mul_E_G ( sorb, sz, delta ); 14690 break; 14691 14692 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 14693 14694 case 0x1F: 14695 modrm = getUChar(delta); 14696 if (epartIsReg(modrm)) goto decode_failure; 14697 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14698 delta += alen; 14699 DIP("nop%c %s\n", nameISize(sz), dis_buf); 14700 break; 14701 14702 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 14703 case 0x80: 14704 case 0x81: 14705 case 0x82: /* JBb/JNAEb (jump below) */ 14706 case 0x83: /* JNBb/JAEb (jump not below) */ 14707 case 0x84: /* JZb/JEb (jump zero) */ 14708 case 0x85: /* JNZb/JNEb (jump not zero) */ 14709 case 0x86: /* JBEb/JNAb (jump below or equal) */ 14710 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 14711 case 0x88: /* JSb (jump negative) */ 14712 case 0x89: /* JSb (jump not negative) */ 14713 case 0x8A: /* JP (jump parity even) */ 14714 case 0x8B: /* JNP/JPO (jump parity odd) */ 14715 case 0x8C: /* JLb/JNGEb (jump less) */ 14716 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 14717 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 14718 case 0x8F: /* JGb/JNLEb (jump greater) */ 14719 { Int jmpDelta; 14720 HChar* comment = ""; 14721 jmpDelta = (Int)getUDisp32(delta); 14722 d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta; 14723 delta += 4; 14724 if (resteerCisOk 14725 && vex_control.guest_chase_cond 14726 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14727 && jmpDelta < 0 14728 && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) { 14729 /* Speculation: assume this backward branch is taken. So 14730 we need to emit a side-exit to the insn following this 14731 one, on the negation of the condition, and continue at 14732 the branch target address (d32). If we wind up back at 14733 the first instruction of the trace, just stop; it's 14734 better to let the IR loop unroller handle that case.*/ 14735 stmt( IRStmt_Exit( 14736 mk_x86g_calculate_condition((X86Condcode) 14737 (1 ^ (opc - 0x80))), 14738 Ijk_Boring, 14739 IRConst_U32(guest_EIP_bbstart+delta) ) ); 14740 dres.whatNext = Dis_ResteerC; 14741 dres.continueAt = (Addr64)(Addr32)d32; 14742 comment = "(assumed taken)"; 14743 } 14744 else 14745 if (resteerCisOk 14746 && vex_control.guest_chase_cond 14747 && (Addr32)d32 != (Addr32)guest_EIP_bbstart 14748 && jmpDelta >= 0 14749 && resteerOkFn( callback_opaque, 14750 (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) { 14751 /* Speculation: assume this forward branch is not taken. 14752 So we need to emit a side-exit to d32 (the dest) and 14753 continue disassembling at the insn immediately 14754 following this one. */ 14755 stmt( IRStmt_Exit( 14756 mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)), 14757 Ijk_Boring, 14758 IRConst_U32(d32) ) ); 14759 dres.whatNext = Dis_ResteerC; 14760 dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta); 14761 comment = "(assumed not taken)"; 14762 } 14763 else { 14764 /* Conservative default translation - end the block at 14765 this point. */ 14766 jcc_01( (X86Condcode)(opc - 0x80), 14767 (Addr32)(guest_EIP_bbstart+delta), d32); 14768 dres.whatNext = Dis_StopHere; 14769 } 14770 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment); 14771 break; 14772 } 14773 14774 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 14775 case 0x31: { /* RDTSC */ 14776 IRTemp val = newTemp(Ity_I64); 14777 IRExpr** args = mkIRExprVec_0(); 14778 IRDirty* d = unsafeIRDirty_1_N ( 14779 val, 14780 0/*regparms*/, 14781 "x86g_dirtyhelper_RDTSC", 14782 &x86g_dirtyhelper_RDTSC, 14783 args 14784 ); 14785 /* execute the dirty call, dumping the result in val. */ 14786 stmt( IRStmt_Dirty(d) ); 14787 putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val))); 14788 putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val))); 14789 DIP("rdtsc\n"); 14790 break; 14791 } 14792 14793 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */ 14794 14795 case 0xA1: /* POP %FS */ 14796 dis_pop_segreg( R_FS, sz ); break; 14797 case 0xA9: /* POP %GS */ 14798 dis_pop_segreg( R_GS, sz ); break; 14799 14800 case 0xA0: /* PUSH %FS */ 14801 dis_push_segreg( R_FS, sz ); break; 14802 case 0xA8: /* PUSH %GS */ 14803 dis_push_segreg( R_GS, sz ); break; 14804 14805 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 14806 case 0x90: 14807 case 0x91: 14808 case 0x92: /* set-Bb/set-NAEb (jump below) */ 14809 case 0x93: /* set-NBb/set-AEb (jump not below) */ 14810 case 0x94: /* set-Zb/set-Eb (jump zero) */ 14811 case 0x95: /* set-NZb/set-NEb (jump not zero) */ 14812 case 0x96: /* set-BEb/set-NAb (jump below or equal) */ 14813 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ 14814 case 0x98: /* set-Sb (jump negative) */ 14815 case 0x99: /* set-Sb (jump not negative) */ 14816 case 0x9A: /* set-P (jump parity even) */ 14817 case 0x9B: /* set-NP (jump parity odd) */ 14818 case 0x9C: /* set-Lb/set-NGEb (jump less) */ 14819 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ 14820 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ 14821 case 0x9F: /* set-Gb/set-NLEb (jump greater) */ 14822 t1 = newTemp(Ity_I8); 14823 assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) ); 14824 modrm = getIByte(delta); 14825 if (epartIsReg(modrm)) { 14826 delta++; 14827 putIReg(1, eregOfRM(modrm), mkexpr(t1)); 14828 DIP("set%s %s\n", name_X86Condcode(opc-0x90), 14829 nameIReg(1,eregOfRM(modrm))); 14830 } else { 14831 addr = disAMode ( &alen, sorb, delta, dis_buf ); 14832 delta += alen; 14833 storeLE( mkexpr(addr), mkexpr(t1) ); 14834 DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf); 14835 } 14836 break; 14837 14838 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 14839 14840 case 0xA4: /* SHLDv imm8,Gv,Ev */ 14841 modrm = getIByte(delta); 14842 d32 = delta + lengthAMode(delta); 14843 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14844 delta = dis_SHLRD_Gv_Ev ( 14845 sorb, delta, modrm, sz, 14846 mkU8(getIByte(d32)), True, /* literal */ 14847 dis_buf, True ); 14848 break; 14849 case 0xA5: /* SHLDv %cl,Gv,Ev */ 14850 modrm = getIByte(delta); 14851 delta = dis_SHLRD_Gv_Ev ( 14852 sorb, delta, modrm, sz, 14853 getIReg(1,R_ECX), False, /* not literal */ 14854 "%cl", True ); 14855 break; 14856 14857 case 0xAC: /* SHRDv imm8,Gv,Ev */ 14858 modrm = getIByte(delta); 14859 d32 = delta + lengthAMode(delta); 14860 vex_sprintf(dis_buf, "$%d", getIByte(d32)); 14861 delta = dis_SHLRD_Gv_Ev ( 14862 sorb, delta, modrm, sz, 14863 mkU8(getIByte(d32)), True, /* literal */ 14864 dis_buf, False ); 14865 break; 14866 case 0xAD: /* SHRDv %cl,Gv,Ev */ 14867 modrm = getIByte(delta); 14868 delta = dis_SHLRD_Gv_Ev ( 14869 sorb, delta, modrm, sz, 14870 getIReg(1,R_ECX), False, /* not literal */ 14871 "%cl", False ); 14872 break; 14873 14874 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */ 14875 14876 case 0x34: 14877 /* Simple implementation needing a long explaination. 14878 14879 sysenter is a kind of syscall entry. The key thing here 14880 is that the return address is not known -- that is 14881 something that is beyond Vex's knowledge. So this IR 14882 forces a return to the scheduler, which can do what it 14883 likes to simulate the systenter, but it MUST set this 14884 thread's guest_EIP field with the continuation address 14885 before resuming execution. If that doesn't happen, the 14886 thread will jump to address zero, which is probably 14887 fatal. 14888 */ 14889 14890 /* Note where we are, so we can back up the guest to this 14891 point if the syscall needs to be restarted. */ 14892 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, 14893 mkU32(guest_EIP_curr_instr) ) ); 14894 jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/); 14895 dres.whatNext = Dis_StopHere; 14896 DIP("sysenter"); 14897 break; 14898 14899 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 14900 14901 case 0xC0: { /* XADD Gb,Eb */ 14902 Bool decodeOK; 14903 delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK ); 14904 if (!decodeOK) goto decode_failure; 14905 break; 14906 } 14907 case 0xC1: { /* XADD Gv,Ev */ 14908 Bool decodeOK; 14909 delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK ); 14910 if (!decodeOK) goto decode_failure; 14911 break; 14912 } 14913 14914 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */ 14915 14916 case 0x71: 14917 case 0x72: 14918 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 14919 14920 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 14921 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 14922 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 14923 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 14924 14925 case 0xFC: 14926 case 0xFD: 14927 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 14928 14929 case 0xEC: 14930 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14931 14932 case 0xDC: 14933 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14934 14935 case 0xF8: 14936 case 0xF9: 14937 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 14938 14939 case 0xE8: 14940 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14941 14942 case 0xD8: 14943 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 14944 14945 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 14946 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 14947 14948 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 14949 14950 case 0x74: 14951 case 0x75: 14952 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 14953 14954 case 0x64: 14955 case 0x65: 14956 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 14957 14958 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 14959 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 14960 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 14961 14962 case 0x68: 14963 case 0x69: 14964 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 14965 14966 case 0x60: 14967 case 0x61: 14968 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14969 14970 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 14971 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 14972 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 14973 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 14974 14975 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14976 case 0xF2: 14977 case 0xF3: 14978 14979 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 14980 case 0xD2: 14981 case 0xD3: 14982 14983 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 14984 case 0xE2: 14985 { 14986 Int delta0 = delta-1; 14987 Bool decode_OK = False; 14988 14989 /* If sz==2 this is SSE, and we assume sse idec has 14990 already spotted those cases by now. */ 14991 if (sz != 4) 14992 goto decode_failure; 14993 14994 delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 ); 14995 if (!decode_OK) { 14996 delta = delta0; 14997 goto decode_failure; 14998 } 14999 break; 15000 } 15001 15002 case 0x77: /* EMMS */ 15003 if (sz != 4) 15004 goto decode_failure; 15005 do_EMMS_preamble(); 15006 DIP("emms\n"); 15007 break; 15008 15009 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 15010 case 0x01: /* 0F 01 /0 -- SGDT */ 15011 /* 0F 01 /1 -- SIDT */ 15012 { 15013 /* This is really revolting, but ... since each processor 15014 (core) only has one IDT and one GDT, just let the guest 15015 see it (pass-through semantics). I can't see any way to 15016 construct a faked-up value, so don't bother to try. */ 15017 modrm = getUChar(delta); 15018 addr = disAMode ( &alen, sorb, delta, dis_buf ); 15019 delta += alen; 15020 if (epartIsReg(modrm)) goto decode_failure; 15021 if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1) 15022 goto decode_failure; 15023 switch (gregOfRM(modrm)) { 15024 case 0: DIP("sgdt %s\n", dis_buf); break; 15025 case 1: DIP("sidt %s\n", dis_buf); break; 15026 default: vassert(0); /*NOTREACHED*/ 15027 } 15028 15029 IRDirty* d = unsafeIRDirty_0_N ( 15030 0/*regparms*/, 15031 "x86g_dirtyhelper_SxDT", 15032 &x86g_dirtyhelper_SxDT, 15033 mkIRExprVec_2( mkexpr(addr), 15034 mkU32(gregOfRM(modrm)) ) 15035 ); 15036 /* declare we're writing memory */ 15037 d->mFx = Ifx_Write; 15038 d->mAddr = mkexpr(addr); 15039 d->mSize = 6; 15040 stmt( IRStmt_Dirty(d) ); 15041 break; 15042 } 15043 15044 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 15045 15046 default: 15047 goto decode_failure; 15048 } /* switch (opc) for the 2-byte opcodes */ 15049 goto decode_success; 15050 } /* case 0x0F: of primary opcode */ 15051 15052 /* ------------------------ ??? ------------------------ */ 15053 15054 default: 15055 decode_failure: 15056 /* All decode failures end up here. */ 15057 vex_printf("vex x86->IR: unhandled instruction bytes: " 15058 "0x%x 0x%x 0x%x 0x%x\n", 15059 (Int)getIByte(delta_start+0), 15060 (Int)getIByte(delta_start+1), 15061 (Int)getIByte(delta_start+2), 15062 (Int)getIByte(delta_start+3) ); 15063 15064 /* Tell the dispatcher that this insn cannot be decoded, and so has 15065 not been executed, and (is currently) the next to be executed. 15066 EIP should be up-to-date since it made so at the start of each 15067 insn, but nevertheless be paranoid and update it again right 15068 now. */ 15069 stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) ); 15070 jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr); 15071 dres.whatNext = Dis_StopHere; 15072 dres.len = 0; 15073 /* We also need to say that a CAS is not expected now, regardless 15074 of what it might have been set to at the start of the function, 15075 since the IR that we've emitted just above (to synthesis a 15076 SIGILL) does not involve any CAS, and presumably no other IR has 15077 been emitted for this (non-decoded) insn. */ 15078 *expect_CAS = False; 15079 return dres; 15080 15081 } /* switch (opc) for the main (primary) opcode switch. */ 15082 15083 decode_success: 15084 /* All decode successes end up here. */ 15085 DIP("\n"); 15086 dres.len = delta - delta_start; 15087 return dres; 15088 } 15089 15090 #undef DIP 15091 #undef DIS 15092 15093 15094 /*------------------------------------------------------------*/ 15095 /*--- Top-level fn ---*/ 15096 /*------------------------------------------------------------*/ 15097 15098 /* Disassemble a single instruction into IR. The instruction 15099 is located in host memory at &guest_code[delta]. */ 15100 15101 DisResult disInstr_X86 ( IRSB* irsb_IN, 15102 Bool put_IP, 15103 Bool (*resteerOkFn) ( void*, Addr64 ), 15104 Bool resteerCisOk, 15105 void* callback_opaque, 15106 UChar* guest_code_IN, 15107 Long delta, 15108 Addr64 guest_IP, 15109 VexArch guest_arch, 15110 VexArchInfo* archinfo, 15111 VexAbiInfo* abiinfo, 15112 Bool host_bigendian_IN ) 15113 { 15114 Int i, x1, x2; 15115 Bool expect_CAS, has_CAS; 15116 DisResult dres; 15117 15118 /* Set globals (see top of this file) */ 15119 vassert(guest_arch == VexArchX86); 15120 guest_code = guest_code_IN; 15121 irsb = irsb_IN; 15122 host_is_bigendian = host_bigendian_IN; 15123 guest_EIP_curr_instr = (Addr32)guest_IP; 15124 guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta); 15125 15126 x1 = irsb_IN->stmts_used; 15127 expect_CAS = False; 15128 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, 15129 resteerCisOk, 15130 callback_opaque, 15131 delta, archinfo, abiinfo ); 15132 x2 = irsb_IN->stmts_used; 15133 vassert(x2 >= x1); 15134 15135 /* See comment at the top of disInstr_X86_WRK for meaning of 15136 expect_CAS. Here, we (sanity-)check for the presence/absence of 15137 IRCAS as directed by the returned expect_CAS value. */ 15138 has_CAS = False; 15139 for (i = x1; i < x2; i++) { 15140 if (irsb_IN->stmts[i]->tag == Ist_CAS) 15141 has_CAS = True; 15142 } 15143 15144 if (expect_CAS != has_CAS) { 15145 /* inconsistency detected. re-disassemble the instruction so as 15146 to generate a useful error message; then assert. */ 15147 vex_traceflags |= VEX_TRACE_FE; 15148 dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn, 15149 resteerCisOk, 15150 callback_opaque, 15151 delta, archinfo, abiinfo ); 15152 for (i = x1; i < x2; i++) { 15153 vex_printf("\t\t"); 15154 ppIRStmt(irsb_IN->stmts[i]); 15155 vex_printf("\n"); 15156 } 15157 /* Failure of this assertion is serious and denotes a bug in 15158 disInstr. */ 15159 vpanic("disInstr_X86: inconsistency in LOCK prefix handling"); 15160 } 15161 15162 return dres; 15163 } 15164 15165 15166 /*--------------------------------------------------------------------*/ 15167 /*--- end guest_x86_toIR.c ---*/ 15168 /*--------------------------------------------------------------------*/ 15169