1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2017 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * some of the FCOM cases could do with testing -- not convinced 55 that the args are the right way round. 56 57 * FSAVE does not re-initialise the FPU; it should do 58 59 * FINIT not only initialises the FPU environment, it also zeroes 60 all the FP registers. It should leave the registers unchanged. 61 62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 63 per Intel docs this bit has no meaning anyway. Since PUSHF is the 64 only way to observe eflags[1], a proper fix would be to make that 65 bit be set by PUSHF. 66 67 This module uses global variables and so is not MT-safe (if that 68 should ever become relevant). 69 */ 70 71 /* Notes re address size overrides (0x67). 72 73 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 75 and System Instructions"), Section 1.2.3 ("Address-Size Override 76 Prefix"): 77 78 0x67 applies to all explicit memory references, causing the top 79 32 bits of the effective address to become zero. 80 81 0x67 has no effect on stack references (push/pop); these always 82 use a 64-bit address. 83 84 0x67 changes the interpretation of instructions which implicitly 85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 86 instead. These are: 87 88 cmp{s,sb,sw,sd,sq} 89 in{s,sb,sw,sd} 90 jcxz, jecxz, jrcxz 91 lod{s,sb,sw,sd,sq} 92 loop{,e,bz,be,z} 93 mov{s,sb,sw,sd,sq} 94 out{s,sb,sw,sd} 95 rep{,e,ne,nz} 96 sca{s,sb,sw,sd,sq} 97 sto{s,sb,sw,sd,sq} 98 xlat{,b} */ 99 100 /* "Special" instructions. 101 102 This instruction decoder can decode three special instructions 103 which mean nothing natively (are no-ops as far as regs/mem are 104 concerned) but have meaning for supporting Valgrind. A special 105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 108 Following that, one of the following 3 are allowed (standard 109 interpretation in parentheses): 110 111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 114 4887F6 (xchgq %rdi,%rdi) IR injection 115 116 Any other bytes following the 16-byte preamble are illegal and 117 constitute a failure in instruction decoding. This all assumes 118 that the preamble will never occur except in specific code 119 fragments designed for Valgrind to catch. 120 121 No prefixes may precede a "Special" instruction. 122 */ 123 124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 125 insns: the side-exit back to the start of the insn is done with 126 Ijk_Boring. This is quite wrong, it should be done with 127 Ijk_NoRedir, since otherwise the side exit, which is intended to 128 restart the instruction for whatever reason, could go somewhere 129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 130 no-redir jumps performance critical, at least for rep-prefixed 131 instructions, since all iterations thereof would involve such a 132 jump. It's not such a big deal with casLE since the side exit is 133 only taken if the CAS fails, that is, the location is contended, 134 which is relatively unlikely. 135 136 Note also, the test for CAS success vs failure is done using 137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 139 shouldn't definedness-check these comparisons. See 140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 141 background/rationale. 142 */ 143 144 /* LOCK prefixed instructions. These are translated using IR-level 145 CAS statements (IRCAS) and are believed to preserve atomicity, even 146 from the point of view of some other process racing against a 147 simulated one (presumably they communicate via a shared memory 148 segment). 149 150 Handlers which are aware of LOCK prefixes are: 151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 152 dis_cmpxchg_G_E (cmpxchg) 153 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 154 dis_Grp3 (not, neg) 155 dis_Grp4 (inc, dec) 156 dis_Grp5 (inc, dec) 157 dis_Grp8_Imm (bts, btc, btr) 158 dis_bt_G_E (bts, btc, btr) 159 dis_xadd_G_E (xadd) 160 */ 161 162 163 #include "libvex_basictypes.h" 164 #include "libvex_ir.h" 165 #include "libvex.h" 166 #include "libvex_guest_amd64.h" 167 168 #include "main_util.h" 169 #include "main_globals.h" 170 #include "guest_generic_bb_to_IR.h" 171 #include "guest_generic_x87.h" 172 #include "guest_amd64_defs.h" 173 174 175 /*------------------------------------------------------------*/ 176 /*--- Globals ---*/ 177 /*------------------------------------------------------------*/ 178 179 /* These are set at the start of the translation of an insn, right 180 down in disInstr_AMD64, so that we don't have to pass them around 181 endlessly. They are all constant during the translation of any 182 given insn. */ 183 184 /* These are set at the start of the translation of a BB, so 185 that we don't have to pass them around endlessly. */ 186 187 /* We need to know this to do sub-register accesses correctly. */ 188 static VexEndness host_endness; 189 190 /* Pointer to the guest code area (points to start of BB, not to the 191 insn being processed). */ 192 static const UChar* guest_code; 193 194 /* The guest address corresponding to guest_code[0]. */ 195 static Addr64 guest_RIP_bbstart; 196 197 /* The guest address for the instruction currently being 198 translated. */ 199 static Addr64 guest_RIP_curr_instr; 200 201 /* The IRSB* into which we're generating code. */ 202 static IRSB* irsb; 203 204 /* For ensuring that %rip-relative addressing is done right. A read 205 of %rip generates the address of the next instruction. It may be 206 that we don't conveniently know that inside disAMode(). For sanity 207 checking, if the next insn %rip is needed, we make a guess at what 208 it is, record that guess here, and set the accompanying Bool to 209 indicate that -- after this insn's decode is finished -- that guess 210 needs to be checked. */ 211 212 /* At the start of each insn decode, is set to (0, False). 213 After the decode, if _mustcheck is now True, _assumed is 214 checked. */ 215 216 static Addr64 guest_RIP_next_assumed; 217 static Bool guest_RIP_next_mustcheck; 218 219 220 /*------------------------------------------------------------*/ 221 /*--- Helpers for constructing IR. ---*/ 222 /*------------------------------------------------------------*/ 223 224 /* Generate a new temporary of the given type. */ 225 static IRTemp newTemp ( IRType ty ) 226 { 227 vassert(isPlausibleIRType(ty)); 228 return newIRTemp( irsb->tyenv, ty ); 229 } 230 231 /* Add a statement to the list held by "irsb". */ 232 static void stmt ( IRStmt* st ) 233 { 234 addStmtToIRSB( irsb, st ); 235 } 236 237 /* Generate a statement "dst := e". */ 238 static void assign ( IRTemp dst, IRExpr* e ) 239 { 240 stmt( IRStmt_WrTmp(dst, e) ); 241 } 242 243 static IRExpr* unop ( IROp op, IRExpr* a ) 244 { 245 return IRExpr_Unop(op, a); 246 } 247 248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 249 { 250 return IRExpr_Binop(op, a1, a2); 251 } 252 253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 254 { 255 return IRExpr_Triop(op, a1, a2, a3); 256 } 257 258 static IRExpr* mkexpr ( IRTemp tmp ) 259 { 260 return IRExpr_RdTmp(tmp); 261 } 262 263 static IRExpr* mkU8 ( ULong i ) 264 { 265 vassert(i < 256); 266 return IRExpr_Const(IRConst_U8( (UChar)i )); 267 } 268 269 static IRExpr* mkU16 ( ULong i ) 270 { 271 vassert(i < 0x10000ULL); 272 return IRExpr_Const(IRConst_U16( (UShort)i )); 273 } 274 275 static IRExpr* mkU32 ( ULong i ) 276 { 277 vassert(i < 0x100000000ULL); 278 return IRExpr_Const(IRConst_U32( (UInt)i )); 279 } 280 281 static IRExpr* mkU64 ( ULong i ) 282 { 283 return IRExpr_Const(IRConst_U64(i)); 284 } 285 286 static IRExpr* mkU ( IRType ty, ULong i ) 287 { 288 switch (ty) { 289 case Ity_I8: return mkU8(i); 290 case Ity_I16: return mkU16(i); 291 case Ity_I32: return mkU32(i); 292 case Ity_I64: return mkU64(i); 293 default: vpanic("mkU(amd64)"); 294 } 295 } 296 297 static void storeLE ( IRExpr* addr, IRExpr* data ) 298 { 299 stmt( IRStmt_Store(Iend_LE, addr, data) ); 300 } 301 302 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 303 { 304 return IRExpr_Load(Iend_LE, ty, addr); 305 } 306 307 static IROp mkSizedOp ( IRType ty, IROp op8 ) 308 { 309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 310 || op8 == Iop_Mul8 311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 314 || op8 == Iop_CasCmpNE8 315 || op8 == Iop_Not8 ); 316 switch (ty) { 317 case Ity_I8: return 0 +op8; 318 case Ity_I16: return 1 +op8; 319 case Ity_I32: return 2 +op8; 320 case Ity_I64: return 3 +op8; 321 default: vpanic("mkSizedOp(amd64)"); 322 } 323 } 324 325 static 326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 327 { 328 if (szSmall == 1 && szBig == 4) { 329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 330 } 331 if (szSmall == 1 && szBig == 2) { 332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 333 } 334 if (szSmall == 2 && szBig == 4) { 335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 336 } 337 if (szSmall == 1 && szBig == 8 && !signd) { 338 return unop(Iop_8Uto64, src); 339 } 340 if (szSmall == 1 && szBig == 8 && signd) { 341 return unop(Iop_8Sto64, src); 342 } 343 if (szSmall == 2 && szBig == 8 && !signd) { 344 return unop(Iop_16Uto64, src); 345 } 346 if (szSmall == 2 && szBig == 8 && signd) { 347 return unop(Iop_16Sto64, src); 348 } 349 vpanic("doScalarWidening(amd64)"); 350 } 351 352 static 353 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value ) 354 { 355 IRType ty = typeOfIRExpr(irsb->tyenv, value); 356 stmt( IRStmt_Put(gstOffB, 357 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) ); 358 } 359 360 361 /*------------------------------------------------------------*/ 362 /*--- Debugging output ---*/ 363 /*------------------------------------------------------------*/ 364 365 /* Bomb out if we can't handle something. */ 366 __attribute__ ((noreturn)) 367 static void unimplemented ( const HChar* str ) 368 { 369 vex_printf("amd64toIR: unimplemented feature\n"); 370 vpanic(str); 371 } 372 373 #define DIP(format, args...) \ 374 if (vex_traceflags & VEX_TRACE_FE) \ 375 vex_printf(format, ## args) 376 377 #define DIS(buf, format, args...) \ 378 if (vex_traceflags & VEX_TRACE_FE) \ 379 vex_sprintf(buf, format, ## args) 380 381 382 /*------------------------------------------------------------*/ 383 /*--- Offsets of various parts of the amd64 guest state. ---*/ 384 /*------------------------------------------------------------*/ 385 386 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 387 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 388 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 389 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 390 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 391 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 392 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 393 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 394 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 395 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 396 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 397 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 398 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 399 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 400 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 401 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 402 403 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 404 405 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST) 406 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST) 407 408 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 409 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 410 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 411 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 412 413 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 414 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 415 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 416 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 417 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 418 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 419 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 420 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 421 422 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 423 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 424 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 425 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 426 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 427 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 428 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 429 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 430 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 431 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 432 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 433 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 434 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 435 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 436 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 437 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 438 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 439 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 440 441 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) 442 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) 443 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) 444 445 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 446 447 448 /*------------------------------------------------------------*/ 449 /*--- Helper bits and pieces for deconstructing the ---*/ 450 /*--- amd64 insn stream. ---*/ 451 /*------------------------------------------------------------*/ 452 453 /* This is the AMD64 register encoding -- integer regs. */ 454 #define R_RAX 0 455 #define R_RCX 1 456 #define R_RDX 2 457 #define R_RBX 3 458 #define R_RSP 4 459 #define R_RBP 5 460 #define R_RSI 6 461 #define R_RDI 7 462 #define R_R8 8 463 #define R_R9 9 464 #define R_R10 10 465 #define R_R11 11 466 #define R_R12 12 467 #define R_R13 13 468 #define R_R14 14 469 #define R_R15 15 470 471 /* This is the Intel register encoding -- segment regs. */ 472 #define R_ES 0 473 #define R_CS 1 474 #define R_SS 2 475 #define R_DS 3 476 #define R_FS 4 477 #define R_GS 5 478 479 480 /* Various simple conversions */ 481 482 static ULong extend_s_8to64 ( UChar x ) 483 { 484 return (ULong)((Long)(((ULong)x) << 56) >> 56); 485 } 486 487 static ULong extend_s_16to64 ( UShort x ) 488 { 489 return (ULong)((Long)(((ULong)x) << 48) >> 48); 490 } 491 492 static ULong extend_s_32to64 ( UInt x ) 493 { 494 return (ULong)((Long)(((ULong)x) << 32) >> 32); 495 } 496 497 /* Figure out whether the mod and rm parts of a modRM byte refer to a 498 register or memory. If so, the byte will have the form 11XXXYYY, 499 where YYY is the register number. */ 500 inline 501 static Bool epartIsReg ( UChar mod_reg_rm ) 502 { 503 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 504 } 505 506 /* Extract the 'g' field from a modRM byte. This only produces 3 507 bits, which is not a complete register number. You should avoid 508 this function if at all possible. */ 509 inline 510 static Int gregLO3ofRM ( UChar mod_reg_rm ) 511 { 512 return (Int)( (mod_reg_rm >> 3) & 7 ); 513 } 514 515 /* Ditto the 'e' field of a modRM byte. */ 516 inline 517 static Int eregLO3ofRM ( UChar mod_reg_rm ) 518 { 519 return (Int)(mod_reg_rm & 0x7); 520 } 521 522 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 523 524 static inline UChar getUChar ( Long delta ) 525 { 526 UChar v = guest_code[delta+0]; 527 return v; 528 } 529 530 static UInt getUDisp16 ( Long delta ) 531 { 532 UInt v = guest_code[delta+1]; v <<= 8; 533 v |= guest_code[delta+0]; 534 return v & 0xFFFF; 535 } 536 537 //.. static UInt getUDisp ( Int size, Long delta ) 538 //.. { 539 //.. switch (size) { 540 //.. case 4: return getUDisp32(delta); 541 //.. case 2: return getUDisp16(delta); 542 //.. case 1: return getUChar(delta); 543 //.. default: vpanic("getUDisp(x86)"); 544 //.. } 545 //.. return 0; /*notreached*/ 546 //.. } 547 548 549 /* Get a byte value out of the insn stream and sign-extend to 64 550 bits. */ 551 static Long getSDisp8 ( Long delta ) 552 { 553 return extend_s_8to64( guest_code[delta] ); 554 } 555 556 /* Get a 16-bit value out of the insn stream and sign-extend to 64 557 bits. */ 558 static Long getSDisp16 ( Long delta ) 559 { 560 UInt v = guest_code[delta+1]; v <<= 8; 561 v |= guest_code[delta+0]; 562 return extend_s_16to64( (UShort)v ); 563 } 564 565 /* Get a 32-bit value out of the insn stream and sign-extend to 64 566 bits. */ 567 static Long getSDisp32 ( Long delta ) 568 { 569 UInt v = guest_code[delta+3]; v <<= 8; 570 v |= guest_code[delta+2]; v <<= 8; 571 v |= guest_code[delta+1]; v <<= 8; 572 v |= guest_code[delta+0]; 573 return extend_s_32to64( v ); 574 } 575 576 /* Get a 64-bit value out of the insn stream. */ 577 static Long getDisp64 ( Long delta ) 578 { 579 ULong v = 0; 580 v |= guest_code[delta+7]; v <<= 8; 581 v |= guest_code[delta+6]; v <<= 8; 582 v |= guest_code[delta+5]; v <<= 8; 583 v |= guest_code[delta+4]; v <<= 8; 584 v |= guest_code[delta+3]; v <<= 8; 585 v |= guest_code[delta+2]; v <<= 8; 586 v |= guest_code[delta+1]; v <<= 8; 587 v |= guest_code[delta+0]; 588 return v; 589 } 590 591 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 592 if this is called with size==8. Should not happen. */ 593 static Long getSDisp ( Int size, Long delta ) 594 { 595 switch (size) { 596 case 4: return getSDisp32(delta); 597 case 2: return getSDisp16(delta); 598 case 1: return getSDisp8(delta); 599 default: vpanic("getSDisp(amd64)"); 600 } 601 } 602 603 static ULong mkSizeMask ( Int sz ) 604 { 605 switch (sz) { 606 case 1: return 0x00000000000000FFULL; 607 case 2: return 0x000000000000FFFFULL; 608 case 4: return 0x00000000FFFFFFFFULL; 609 case 8: return 0xFFFFFFFFFFFFFFFFULL; 610 default: vpanic("mkSzMask(amd64)"); 611 } 612 } 613 614 static Int imin ( Int a, Int b ) 615 { 616 return (a < b) ? a : b; 617 } 618 619 static IRType szToITy ( Int n ) 620 { 621 switch (n) { 622 case 1: return Ity_I8; 623 case 2: return Ity_I16; 624 case 4: return Ity_I32; 625 case 8: return Ity_I64; 626 default: vex_printf("\nszToITy(%d)\n", n); 627 vpanic("szToITy(amd64)"); 628 } 629 } 630 631 632 /*------------------------------------------------------------*/ 633 /*--- For dealing with prefixes. ---*/ 634 /*------------------------------------------------------------*/ 635 636 /* The idea is to pass around an int holding a bitmask summarising 637 info from the prefixes seen on the current instruction, including 638 info from the REX byte. This info is used in various places, but 639 most especially when making sense of register fields in 640 instructions. 641 642 The top 8 bits of the prefix are 0x55, just as a hacky way to 643 ensure it really is a valid prefix. 644 645 Things you can safely assume about a well-formed prefix: 646 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 647 * if REX is not present then REXW,REXR,REXX,REXB will read 648 as zero. 649 * F2 and F3 will not both be 1. 650 */ 651 652 typedef UInt Prefix; 653 654 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 655 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 656 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 657 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 658 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 659 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 660 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 661 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 662 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 663 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 664 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 665 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 666 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 667 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 668 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 669 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 670 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 671 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 672 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 673 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 674 positions. */ 675 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 676 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 677 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 678 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 679 680 681 #define PFX_EMPTY 0x55000000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 static Int getRexR ( Prefix pfx ) { 695 return (pfx & PFX_REXR) ? 1 : 0; 696 } 697 static Int getRexX ( Prefix pfx ) { 698 return (pfx & PFX_REXX) ? 1 : 0; 699 } 700 static Int getRexB ( Prefix pfx ) { 701 return (pfx & PFX_REXB) ? 1 : 0; 702 } 703 704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 705 completely changes what instruction it really is. */ 706 static Bool haveF2orF3 ( Prefix pfx ) { 707 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 708 } 709 static Bool haveF2andF3 ( Prefix pfx ) { 710 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3)); 711 } 712 static Bool haveF2 ( Prefix pfx ) { 713 return toBool((pfx & PFX_F2) > 0); 714 } 715 static Bool haveF3 ( Prefix pfx ) { 716 return toBool((pfx & PFX_F3) > 0); 717 } 718 719 static Bool have66 ( Prefix pfx ) { 720 return toBool((pfx & PFX_66) > 0); 721 } 722 static Bool haveASO ( Prefix pfx ) { 723 return toBool((pfx & PFX_ASO) > 0); 724 } 725 static Bool haveLOCK ( Prefix pfx ) { 726 return toBool((pfx & PFX_LOCK) > 0); 727 } 728 729 /* Return True iff pfx has 66 set and F2 and F3 clear */ 730 static Bool have66noF2noF3 ( Prefix pfx ) 731 { 732 return 733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 734 } 735 736 /* Return True iff pfx has F2 set and 66 and F3 clear */ 737 static Bool haveF2no66noF3 ( Prefix pfx ) 738 { 739 return 740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 741 } 742 743 /* Return True iff pfx has F3 set and 66 and F2 clear */ 744 static Bool haveF3no66noF2 ( Prefix pfx ) 745 { 746 return 747 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 748 } 749 750 /* Return True iff pfx has F3 set and F2 clear */ 751 static Bool haveF3noF2 ( Prefix pfx ) 752 { 753 return 754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 755 } 756 757 /* Return True iff pfx has F2 set and F3 clear */ 758 static Bool haveF2noF3 ( Prefix pfx ) 759 { 760 return 761 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 762 } 763 764 /* Return True iff pfx has 66, F2 and F3 clear */ 765 static Bool haveNo66noF2noF3 ( Prefix pfx ) 766 { 767 return 768 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 769 } 770 771 /* Return True iff pfx has any of 66, F2 and F3 set */ 772 static Bool have66orF2orF3 ( Prefix pfx ) 773 { 774 return toBool( ! haveNo66noF2noF3(pfx) ); 775 } 776 777 /* Return True iff pfx has 66 or F3 set */ 778 static Bool have66orF3 ( Prefix pfx ) 779 { 780 return toBool((pfx & (PFX_66|PFX_F3)) > 0); 781 } 782 783 /* Clear all the segment-override bits in a prefix. */ 784 static Prefix clearSegBits ( Prefix p ) 785 { 786 return 787 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 788 } 789 790 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 791 static UInt getVexNvvvv ( Prefix pfx ) { 792 UInt r = (UInt)pfx; 793 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 794 return r & 0xF; 795 } 796 797 static Bool haveVEX ( Prefix pfx ) { 798 return toBool(pfx & PFX_VEX); 799 } 800 801 static Int getVexL ( Prefix pfx ) { 802 return (pfx & PFX_VEXL) ? 1 : 0; 803 } 804 805 806 /*------------------------------------------------------------*/ 807 /*--- For dealing with escapes ---*/ 808 /*------------------------------------------------------------*/ 809 810 811 /* Escapes come after the prefixes, but before the primary opcode 812 byte. They escape the primary opcode byte into a bigger space. 813 The 0xF0000000 isn't significant, except so as to make it not 814 overlap valid Prefix values, for sanity checking. 815 */ 816 817 typedef 818 enum { 819 ESC_NONE=0xF0000000, // none 820 ESC_0F, // 0F 821 ESC_0F38, // 0F 38 822 ESC_0F3A // 0F 3A 823 } 824 Escape; 825 826 827 /*------------------------------------------------------------*/ 828 /*--- For dealing with integer registers ---*/ 829 /*------------------------------------------------------------*/ 830 831 /* This is somewhat complex. The rules are: 832 833 For 64, 32 and 16 bit register references, the e or g fields in the 834 modrm bytes supply the low 3 bits of the register number. The 835 fourth (most-significant) bit of the register number is supplied by 836 the REX byte, if it is present; else that bit is taken to be zero. 837 838 The REX.R bit supplies the high bit corresponding to the g register 839 field, and the REX.B bit supplies the high bit corresponding to the 840 e register field (when the mod part of modrm indicates that modrm's 841 e component refers to a register and not to memory). 842 843 The REX.X bit supplies a high register bit for certain registers 844 in SIB address modes, and is generally rarely used. 845 846 For 8 bit register references, the presence of the REX byte itself 847 has significance. If there is no REX present, then the 3-bit 848 number extracted from the modrm e or g field is treated as an index 849 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 850 old x86 encoding scheme. 851 852 But if there is a REX present, the register reference is 853 interpreted in the same way as for 64/32/16-bit references: a high 854 bit is extracted from REX, giving a 4-bit number, and the denoted 855 register is the lowest 8 bits of the 16 integer registers denoted 856 by the number. In particular, values 3 through 7 of this sequence 857 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 858 %rsp %rbp %rsi %rdi. 859 860 The REX.W bit has no bearing at all on register numbers. Instead 861 its presence indicates that the operand size is to be overridden 862 from its default value (32 bits) to 64 bits instead. This is in 863 the same fashion that an 0x66 prefix indicates the operand size is 864 to be overridden from 32 bits down to 16 bits. When both REX.W and 865 0x66 are present there is a conflict, and REX.W takes precedence. 866 867 Rather than try to handle this complexity using a single huge 868 function, several smaller ones are provided. The aim is to make it 869 as difficult as possible to screw up register decoding in a subtle 870 and hard-to-track-down way. 871 872 Because these routines fish around in the host's memory (that is, 873 in the guest state area) for sub-parts of guest registers, their 874 correctness depends on the host's endianness. So far these 875 routines only work for little-endian hosts. Those for which 876 endianness is important have assertions to ensure sanity. 877 */ 878 879 880 /* About the simplest question you can ask: where do the 64-bit 881 integer registers live (in the guest state) ? */ 882 883 static Int integerGuestReg64Offset ( UInt reg ) 884 { 885 switch (reg) { 886 case R_RAX: return OFFB_RAX; 887 case R_RCX: return OFFB_RCX; 888 case R_RDX: return OFFB_RDX; 889 case R_RBX: return OFFB_RBX; 890 case R_RSP: return OFFB_RSP; 891 case R_RBP: return OFFB_RBP; 892 case R_RSI: return OFFB_RSI; 893 case R_RDI: return OFFB_RDI; 894 case R_R8: return OFFB_R8; 895 case R_R9: return OFFB_R9; 896 case R_R10: return OFFB_R10; 897 case R_R11: return OFFB_R11; 898 case R_R12: return OFFB_R12; 899 case R_R13: return OFFB_R13; 900 case R_R14: return OFFB_R14; 901 case R_R15: return OFFB_R15; 902 default: vpanic("integerGuestReg64Offset(amd64)"); 903 } 904 } 905 906 907 /* Produce the name of an integer register, for printing purposes. 908 reg is a number in the range 0 .. 15 that has been generated from a 909 3-bit reg-field number and a REX extension bit. irregular denotes 910 the case where sz==1 and no REX byte is present. */ 911 912 static 913 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 914 { 915 static const HChar* ireg64_names[16] 916 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 917 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 918 static const HChar* ireg32_names[16] 919 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 920 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 921 static const HChar* ireg16_names[16] 922 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 923 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 924 static const HChar* ireg8_names[16] 925 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 926 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 927 static const HChar* ireg8_irregular[8] 928 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 929 930 vassert(reg < 16); 931 if (sz == 1) { 932 if (irregular) 933 vassert(reg < 8); 934 } else { 935 vassert(irregular == False); 936 } 937 938 switch (sz) { 939 case 8: return ireg64_names[reg]; 940 case 4: return ireg32_names[reg]; 941 case 2: return ireg16_names[reg]; 942 case 1: if (irregular) { 943 return ireg8_irregular[reg]; 944 } else { 945 return ireg8_names[reg]; 946 } 947 default: vpanic("nameIReg(amd64)"); 948 } 949 } 950 951 /* Using the same argument conventions as nameIReg, produce the 952 guest state offset of an integer register. */ 953 954 static 955 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 956 { 957 vassert(reg < 16); 958 if (sz == 1) { 959 if (irregular) 960 vassert(reg < 8); 961 } else { 962 vassert(irregular == False); 963 } 964 965 /* Deal with irregular case -- sz==1 and no REX present */ 966 if (sz == 1 && irregular) { 967 switch (reg) { 968 case R_RSP: return 1+ OFFB_RAX; 969 case R_RBP: return 1+ OFFB_RCX; 970 case R_RSI: return 1+ OFFB_RDX; 971 case R_RDI: return 1+ OFFB_RBX; 972 default: break; /* use the normal case */ 973 } 974 } 975 976 /* Normal case */ 977 return integerGuestReg64Offset(reg); 978 } 979 980 981 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 982 983 static IRExpr* getIRegCL ( void ) 984 { 985 vassert(host_endness == VexEndnessLE); 986 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 987 } 988 989 990 /* Write to the %AH register. */ 991 992 static void putIRegAH ( IRExpr* e ) 993 { 994 vassert(host_endness == VexEndnessLE); 995 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 996 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 997 } 998 999 1000 /* Read/write various widths of %RAX, as it has various 1001 special-purpose uses. */ 1002 1003 static const HChar* nameIRegRAX ( Int sz ) 1004 { 1005 switch (sz) { 1006 case 1: return "%al"; 1007 case 2: return "%ax"; 1008 case 4: return "%eax"; 1009 case 8: return "%rax"; 1010 default: vpanic("nameIRegRAX(amd64)"); 1011 } 1012 } 1013 1014 static IRExpr* getIRegRAX ( Int sz ) 1015 { 1016 vassert(host_endness == VexEndnessLE); 1017 switch (sz) { 1018 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1019 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1020 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1021 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1022 default: vpanic("getIRegRAX(amd64)"); 1023 } 1024 } 1025 1026 static void putIRegRAX ( Int sz, IRExpr* e ) 1027 { 1028 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1029 vassert(host_endness == VexEndnessLE); 1030 switch (sz) { 1031 case 8: vassert(ty == Ity_I64); 1032 stmt( IRStmt_Put( OFFB_RAX, e )); 1033 break; 1034 case 4: vassert(ty == Ity_I32); 1035 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1036 break; 1037 case 2: vassert(ty == Ity_I16); 1038 stmt( IRStmt_Put( OFFB_RAX, e )); 1039 break; 1040 case 1: vassert(ty == Ity_I8); 1041 stmt( IRStmt_Put( OFFB_RAX, e )); 1042 break; 1043 default: vpanic("putIRegRAX(amd64)"); 1044 } 1045 } 1046 1047 1048 /* Read/write various widths of %RDX, as it has various 1049 special-purpose uses. */ 1050 1051 static const HChar* nameIRegRDX ( Int sz ) 1052 { 1053 switch (sz) { 1054 case 1: return "%dl"; 1055 case 2: return "%dx"; 1056 case 4: return "%edx"; 1057 case 8: return "%rdx"; 1058 default: vpanic("nameIRegRDX(amd64)"); 1059 } 1060 } 1061 1062 static IRExpr* getIRegRDX ( Int sz ) 1063 { 1064 vassert(host_endness == VexEndnessLE); 1065 switch (sz) { 1066 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1067 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1068 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1069 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1070 default: vpanic("getIRegRDX(amd64)"); 1071 } 1072 } 1073 1074 static void putIRegRDX ( Int sz, IRExpr* e ) 1075 { 1076 vassert(host_endness == VexEndnessLE); 1077 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1078 switch (sz) { 1079 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1080 break; 1081 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1082 break; 1083 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1084 break; 1085 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1086 break; 1087 default: vpanic("putIRegRDX(amd64)"); 1088 } 1089 } 1090 1091 1092 /* Simplistic functions to deal with the integer registers as a 1093 straightforward bank of 16 64-bit regs. */ 1094 1095 static IRExpr* getIReg64 ( UInt regno ) 1096 { 1097 return IRExpr_Get( integerGuestReg64Offset(regno), 1098 Ity_I64 ); 1099 } 1100 1101 static void putIReg64 ( UInt regno, IRExpr* e ) 1102 { 1103 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1104 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1105 } 1106 1107 static const HChar* nameIReg64 ( UInt regno ) 1108 { 1109 return nameIReg( 8, regno, False ); 1110 } 1111 1112 1113 /* Simplistic functions to deal with the lower halves of integer 1114 registers as a straightforward bank of 16 32-bit regs. */ 1115 1116 static IRExpr* getIReg32 ( UInt regno ) 1117 { 1118 vassert(host_endness == VexEndnessLE); 1119 return unop(Iop_64to32, 1120 IRExpr_Get( integerGuestReg64Offset(regno), 1121 Ity_I64 )); 1122 } 1123 1124 static void putIReg32 ( UInt regno, IRExpr* e ) 1125 { 1126 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1127 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1128 unop(Iop_32Uto64,e) ) ); 1129 } 1130 1131 static const HChar* nameIReg32 ( UInt regno ) 1132 { 1133 return nameIReg( 4, regno, False ); 1134 } 1135 1136 1137 /* Simplistic functions to deal with the lower quarters of integer 1138 registers as a straightforward bank of 16 16-bit regs. */ 1139 1140 static IRExpr* getIReg16 ( UInt regno ) 1141 { 1142 vassert(host_endness == VexEndnessLE); 1143 return IRExpr_Get( integerGuestReg64Offset(regno), 1144 Ity_I16 ); 1145 } 1146 1147 static void putIReg16 ( UInt regno, IRExpr* e ) 1148 { 1149 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1150 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1151 unop(Iop_16Uto64,e) ) ); 1152 } 1153 1154 static const HChar* nameIReg16 ( UInt regno ) 1155 { 1156 return nameIReg( 2, regno, False ); 1157 } 1158 1159 1160 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1161 which field of the REX byte is to be used to extend to a 4-bit 1162 number. These functions cater for that situation. 1163 */ 1164 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1165 { 1166 vassert(lo3bits < 8); 1167 vassert(IS_VALID_PFX(pfx)); 1168 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1169 } 1170 1171 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1172 { 1173 vassert(lo3bits < 8); 1174 vassert(IS_VALID_PFX(pfx)); 1175 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1176 } 1177 1178 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1179 { 1180 vassert(lo3bits < 8); 1181 vassert(IS_VALID_PFX(pfx)); 1182 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1183 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1184 toBool(sz==1 && !haveREX(pfx)) ); 1185 } 1186 1187 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1188 { 1189 vassert(lo3bits < 8); 1190 vassert(IS_VALID_PFX(pfx)); 1191 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1192 if (sz == 4) { 1193 sz = 8; 1194 return unop(Iop_64to32, 1195 IRExpr_Get( 1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1197 False/*!irregular*/ ), 1198 szToITy(sz) 1199 ) 1200 ); 1201 } else { 1202 return IRExpr_Get( 1203 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1204 toBool(sz==1 && !haveREX(pfx)) ), 1205 szToITy(sz) 1206 ); 1207 } 1208 } 1209 1210 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1211 { 1212 vassert(lo3bits < 8); 1213 vassert(IS_VALID_PFX(pfx)); 1214 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1215 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1216 stmt( IRStmt_Put( 1217 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1218 toBool(sz==1 && !haveREX(pfx)) ), 1219 sz==4 ? unop(Iop_32Uto64,e) : e 1220 )); 1221 } 1222 1223 1224 /* Functions for getting register numbers from modrm bytes and REX 1225 when we don't have to consider the complexities of integer subreg 1226 accesses. 1227 */ 1228 /* Extract the g reg field from a modRM byte, and augment it using the 1229 REX.R bit from the supplied REX byte. The R bit usually is 1230 associated with the g register field. 1231 */ 1232 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1233 { 1234 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1235 reg += (pfx & PFX_REXR) ? 8 : 0; 1236 return reg; 1237 } 1238 1239 /* Extract the e reg field from a modRM byte, and augment it using the 1240 REX.B bit from the supplied REX byte. The B bit usually is 1241 associated with the e register field (when modrm indicates e is a 1242 register, that is). 1243 */ 1244 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1245 { 1246 Int rm; 1247 vassert(epartIsReg(mod_reg_rm)); 1248 rm = (Int)(mod_reg_rm & 0x7); 1249 rm += (pfx & PFX_REXB) ? 8 : 0; 1250 return rm; 1251 } 1252 1253 1254 /* General functions for dealing with integer register access. */ 1255 1256 /* Produce the guest state offset for a reference to the 'g' register 1257 field in a modrm byte, taking into account REX (or its absence), 1258 and the size of the access. 1259 */ 1260 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1261 { 1262 UInt reg; 1263 vassert(host_endness == VexEndnessLE); 1264 vassert(IS_VALID_PFX(pfx)); 1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1266 reg = gregOfRexRM( pfx, mod_reg_rm ); 1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1268 } 1269 1270 static 1271 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1272 { 1273 if (sz == 4) { 1274 sz = 8; 1275 return unop(Iop_64to32, 1276 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1277 szToITy(sz) )); 1278 } else { 1279 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1280 szToITy(sz) ); 1281 } 1282 } 1283 1284 static 1285 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1286 { 1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1288 if (sz == 4) { 1289 e = unop(Iop_32Uto64,e); 1290 } 1291 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1292 } 1293 1294 static 1295 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1296 { 1297 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1298 toBool(sz==1 && !haveREX(pfx)) ); 1299 } 1300 1301 1302 static 1303 IRExpr* getIRegV ( Int sz, Prefix pfx ) 1304 { 1305 if (sz == 4) { 1306 sz = 8; 1307 return unop(Iop_64to32, 1308 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1309 szToITy(sz) )); 1310 } else { 1311 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1312 szToITy(sz) ); 1313 } 1314 } 1315 1316 static 1317 void putIRegV ( Int sz, Prefix pfx, IRExpr* e ) 1318 { 1319 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1320 if (sz == 4) { 1321 e = unop(Iop_32Uto64,e); 1322 } 1323 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) ); 1324 } 1325 1326 static 1327 const HChar* nameIRegV ( Int sz, Prefix pfx ) 1328 { 1329 return nameIReg( sz, getVexNvvvv(pfx), False ); 1330 } 1331 1332 1333 1334 /* Produce the guest state offset for a reference to the 'e' register 1335 field in a modrm byte, taking into account REX (or its absence), 1336 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1337 denotes a memory access rather than a register access. 1338 */ 1339 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1340 { 1341 UInt reg; 1342 vassert(host_endness == VexEndnessLE); 1343 vassert(IS_VALID_PFX(pfx)); 1344 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1345 reg = eregOfRexRM( pfx, mod_reg_rm ); 1346 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1347 } 1348 1349 static 1350 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1351 { 1352 if (sz == 4) { 1353 sz = 8; 1354 return unop(Iop_64to32, 1355 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1356 szToITy(sz) )); 1357 } else { 1358 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1359 szToITy(sz) ); 1360 } 1361 } 1362 1363 static 1364 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1365 { 1366 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1367 if (sz == 4) { 1368 e = unop(Iop_32Uto64,e); 1369 } 1370 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1371 } 1372 1373 static 1374 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1375 { 1376 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1377 toBool(sz==1 && !haveREX(pfx)) ); 1378 } 1379 1380 1381 /*------------------------------------------------------------*/ 1382 /*--- For dealing with XMM registers ---*/ 1383 /*------------------------------------------------------------*/ 1384 1385 static Int ymmGuestRegOffset ( UInt ymmreg ) 1386 { 1387 switch (ymmreg) { 1388 case 0: return OFFB_YMM0; 1389 case 1: return OFFB_YMM1; 1390 case 2: return OFFB_YMM2; 1391 case 3: return OFFB_YMM3; 1392 case 4: return OFFB_YMM4; 1393 case 5: return OFFB_YMM5; 1394 case 6: return OFFB_YMM6; 1395 case 7: return OFFB_YMM7; 1396 case 8: return OFFB_YMM8; 1397 case 9: return OFFB_YMM9; 1398 case 10: return OFFB_YMM10; 1399 case 11: return OFFB_YMM11; 1400 case 12: return OFFB_YMM12; 1401 case 13: return OFFB_YMM13; 1402 case 14: return OFFB_YMM14; 1403 case 15: return OFFB_YMM15; 1404 default: vpanic("ymmGuestRegOffset(amd64)"); 1405 } 1406 } 1407 1408 static Int xmmGuestRegOffset ( UInt xmmreg ) 1409 { 1410 /* Correct for little-endian host only. */ 1411 vassert(host_endness == VexEndnessLE); 1412 return ymmGuestRegOffset( xmmreg ); 1413 } 1414 1415 /* Lanes of vector registers are always numbered from zero being the 1416 least significant lane (rightmost in the register). */ 1417 1418 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1419 { 1420 /* Correct for little-endian host only. */ 1421 vassert(host_endness == VexEndnessLE); 1422 vassert(laneno >= 0 && laneno < 8); 1423 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1424 } 1425 1426 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1427 { 1428 /* Correct for little-endian host only. */ 1429 vassert(host_endness == VexEndnessLE); 1430 vassert(laneno >= 0 && laneno < 4); 1431 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1432 } 1433 1434 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1435 { 1436 /* Correct for little-endian host only. */ 1437 vassert(host_endness == VexEndnessLE); 1438 vassert(laneno >= 0 && laneno < 2); 1439 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1440 } 1441 1442 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1443 { 1444 /* Correct for little-endian host only. */ 1445 vassert(host_endness == VexEndnessLE); 1446 vassert(laneno >= 0 && laneno < 2); 1447 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1448 } 1449 1450 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1451 { 1452 /* Correct for little-endian host only. */ 1453 vassert(host_endness == VexEndnessLE); 1454 vassert(laneno >= 0 && laneno < 4); 1455 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1456 } 1457 1458 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1459 { 1460 /* Correct for little-endian host only. */ 1461 vassert(host_endness == VexEndnessLE); 1462 vassert(laneno >= 0 && laneno < 8); 1463 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1464 } 1465 1466 static IRExpr* getXMMReg ( UInt xmmreg ) 1467 { 1468 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1469 } 1470 1471 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1472 { 1473 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1474 } 1475 1476 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1477 { 1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1479 } 1480 1481 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1482 { 1483 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1484 } 1485 1486 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1487 { 1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1489 } 1490 1491 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1492 { 1493 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1494 } 1495 1496 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1497 { 1498 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1499 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1500 } 1501 1502 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1503 { 1504 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1505 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1506 } 1507 1508 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1509 { 1510 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1511 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1512 } 1513 1514 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1515 { 1516 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1517 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1518 } 1519 1520 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1521 { 1522 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1523 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1524 } 1525 1526 static IRExpr* getYMMReg ( UInt xmmreg ) 1527 { 1528 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1529 } 1530 1531 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1532 { 1533 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1534 } 1535 1536 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno ) 1537 { 1538 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 ); 1539 } 1540 1541 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1542 { 1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1544 } 1545 1546 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno ) 1547 { 1548 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 ); 1549 } 1550 1551 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1552 { 1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1554 } 1555 1556 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1557 { 1558 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1559 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1560 } 1561 1562 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1563 { 1564 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1565 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1566 } 1567 1568 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1569 { 1570 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1571 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1572 } 1573 1574 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1575 { 1576 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1577 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1578 } 1579 1580 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1581 { 1582 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1583 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1584 } 1585 1586 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1587 { 1588 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1589 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1590 } 1591 1592 static IRExpr* mkV128 ( UShort mask ) 1593 { 1594 return IRExpr_Const(IRConst_V128(mask)); 1595 } 1596 1597 /* Write the low half of a YMM reg and zero out the upper half. */ 1598 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1599 { 1600 putYMMRegLane128( ymmreg, 0, e ); 1601 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1602 } 1603 1604 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1605 { 1606 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1607 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1608 return unop(Iop_64to1, 1609 binop(Iop_And64, 1610 unop(Iop_1Uto64,x), 1611 unop(Iop_1Uto64,y))); 1612 } 1613 1614 /* Generate a compare-and-swap operation, operating on memory at 1615 'addr'. The expected value is 'expVal' and the new value is 1616 'newVal'. If the operation fails, then transfer control (with a 1617 no-redir jump (XXX no -- see comment at top of this file)) to 1618 'restart_point', which is presumably the address of the guest 1619 instruction again -- retrying, essentially. */ 1620 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1621 Addr64 restart_point ) 1622 { 1623 IRCAS* cas; 1624 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1625 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1626 IRTemp oldTmp = newTemp(tyE); 1627 IRTemp expTmp = newTemp(tyE); 1628 vassert(tyE == tyN); 1629 vassert(tyE == Ity_I64 || tyE == Ity_I32 1630 || tyE == Ity_I16 || tyE == Ity_I8); 1631 assign(expTmp, expVal); 1632 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1633 NULL, mkexpr(expTmp), NULL, newVal ); 1634 stmt( IRStmt_CAS(cas) ); 1635 stmt( IRStmt_Exit( 1636 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1637 mkexpr(oldTmp), mkexpr(expTmp) ), 1638 Ijk_Boring, /*Ijk_NoRedir*/ 1639 IRConst_U64( restart_point ), 1640 OFFB_RIP 1641 )); 1642 } 1643 1644 1645 /*------------------------------------------------------------*/ 1646 /*--- Helpers for %rflags. ---*/ 1647 /*------------------------------------------------------------*/ 1648 1649 /* -------------- Evaluating the flags-thunk. -------------- */ 1650 1651 /* Build IR to calculate all the eflags from stored 1652 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1653 Ity_I64. */ 1654 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1655 { 1656 IRExpr** args 1657 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1658 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1659 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1660 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1661 IRExpr* call 1662 = mkIRExprCCall( 1663 Ity_I64, 1664 0/*regparm*/, 1665 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1666 args 1667 ); 1668 /* Exclude OP and NDEP from definedness checking. We're only 1669 interested in DEP1 and DEP2. */ 1670 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1671 return call; 1672 } 1673 1674 /* Build IR to calculate some particular condition from stored 1675 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1676 Ity_Bit. */ 1677 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1678 { 1679 IRExpr** args 1680 = mkIRExprVec_5( mkU64(cond), 1681 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1682 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1683 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1684 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1685 IRExpr* call 1686 = mkIRExprCCall( 1687 Ity_I64, 1688 0/*regparm*/, 1689 "amd64g_calculate_condition", &amd64g_calculate_condition, 1690 args 1691 ); 1692 /* Exclude the requested condition, OP and NDEP from definedness 1693 checking. We're only interested in DEP1 and DEP2. */ 1694 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1695 return unop(Iop_64to1, call); 1696 } 1697 1698 /* Build IR to calculate just the carry flag from stored 1699 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1700 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1701 { 1702 IRExpr** args 1703 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1704 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1705 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1706 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1707 IRExpr* call 1708 = mkIRExprCCall( 1709 Ity_I64, 1710 0/*regparm*/, 1711 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1712 args 1713 ); 1714 /* Exclude OP and NDEP from definedness checking. We're only 1715 interested in DEP1 and DEP2. */ 1716 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1717 return call; 1718 } 1719 1720 1721 /* -------------- Building the flags-thunk. -------------- */ 1722 1723 /* The machinery in this section builds the flag-thunk following a 1724 flag-setting operation. Hence the various setFlags_* functions. 1725 */ 1726 1727 static Bool isAddSub ( IROp op8 ) 1728 { 1729 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1730 } 1731 1732 static Bool isLogic ( IROp op8 ) 1733 { 1734 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1735 } 1736 1737 /* U-widen 1/8/16/32/64 bit int expr to 64. */ 1738 static IRExpr* widenUto64 ( IRExpr* e ) 1739 { 1740 switch (typeOfIRExpr(irsb->tyenv,e)) { 1741 case Ity_I64: return e; 1742 case Ity_I32: return unop(Iop_32Uto64, e); 1743 case Ity_I16: return unop(Iop_16Uto64, e); 1744 case Ity_I8: return unop(Iop_8Uto64, e); 1745 case Ity_I1: return unop(Iop_1Uto64, e); 1746 default: vpanic("widenUto64"); 1747 } 1748 } 1749 1750 /* S-widen 8/16/32/64 bit int expr to 32. */ 1751 static IRExpr* widenSto64 ( IRExpr* e ) 1752 { 1753 switch (typeOfIRExpr(irsb->tyenv,e)) { 1754 case Ity_I64: return e; 1755 case Ity_I32: return unop(Iop_32Sto64, e); 1756 case Ity_I16: return unop(Iop_16Sto64, e); 1757 case Ity_I8: return unop(Iop_8Sto64, e); 1758 default: vpanic("widenSto64"); 1759 } 1760 } 1761 1762 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1763 of these combinations make sense. */ 1764 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1765 { 1766 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1767 if (src_ty == dst_ty) 1768 return e; 1769 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1770 return unop(Iop_32to16, e); 1771 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1772 return unop(Iop_32to8, e); 1773 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1774 return unop(Iop_64to32, e); 1775 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1776 return unop(Iop_64to16, e); 1777 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1778 return unop(Iop_64to8, e); 1779 1780 vex_printf("\nsrc, dst tys are: "); 1781 ppIRType(src_ty); 1782 vex_printf(", "); 1783 ppIRType(dst_ty); 1784 vex_printf("\n"); 1785 vpanic("narrowTo(amd64)"); 1786 } 1787 1788 1789 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1790 auto-sized up to the real op. */ 1791 1792 static 1793 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1794 { 1795 Int ccOp = 0; 1796 switch (ty) { 1797 case Ity_I8: ccOp = 0; break; 1798 case Ity_I16: ccOp = 1; break; 1799 case Ity_I32: ccOp = 2; break; 1800 case Ity_I64: ccOp = 3; break; 1801 default: vassert(0); 1802 } 1803 switch (op8) { 1804 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1805 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1806 default: ppIROp(op8); 1807 vpanic("setFlags_DEP1_DEP2(amd64)"); 1808 } 1809 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1810 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1811 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1812 } 1813 1814 1815 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1816 1817 static 1818 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1819 { 1820 Int ccOp = 0; 1821 switch (ty) { 1822 case Ity_I8: ccOp = 0; break; 1823 case Ity_I16: ccOp = 1; break; 1824 case Ity_I32: ccOp = 2; break; 1825 case Ity_I64: ccOp = 3; break; 1826 default: vassert(0); 1827 } 1828 switch (op8) { 1829 case Iop_Or8: 1830 case Iop_And8: 1831 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1832 default: ppIROp(op8); 1833 vpanic("setFlags_DEP1(amd64)"); 1834 } 1835 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1836 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1837 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1838 } 1839 1840 1841 /* For shift operations, we put in the result and the undershifted 1842 result. Except if the shift amount is zero, the thunk is left 1843 unchanged. */ 1844 1845 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1846 IRTemp res, 1847 IRTemp resUS, 1848 IRType ty, 1849 IRTemp guard ) 1850 { 1851 Int ccOp = 0; 1852 switch (ty) { 1853 case Ity_I8: ccOp = 0; break; 1854 case Ity_I16: ccOp = 1; break; 1855 case Ity_I32: ccOp = 2; break; 1856 case Ity_I64: ccOp = 3; break; 1857 default: vassert(0); 1858 } 1859 1860 vassert(guard); 1861 1862 /* Both kinds of right shifts are handled by the same thunk 1863 operation. */ 1864 switch (op64) { 1865 case Iop_Shr64: 1866 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1867 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1868 default: ppIROp(op64); 1869 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1870 } 1871 1872 /* guard :: Ity_I8. We need to convert it to I1. */ 1873 IRTemp guardB = newTemp(Ity_I1); 1874 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 1875 1876 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1877 stmt( IRStmt_Put( OFFB_CC_OP, 1878 IRExpr_ITE( mkexpr(guardB), 1879 mkU64(ccOp), 1880 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 1881 stmt( IRStmt_Put( OFFB_CC_DEP1, 1882 IRExpr_ITE( mkexpr(guardB), 1883 widenUto64(mkexpr(res)), 1884 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 1885 stmt( IRStmt_Put( OFFB_CC_DEP2, 1886 IRExpr_ITE( mkexpr(guardB), 1887 widenUto64(mkexpr(resUS)), 1888 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 1889 } 1890 1891 1892 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1893 the former value of the carry flag, which unfortunately we have to 1894 compute. */ 1895 1896 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1897 { 1898 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1899 1900 switch (ty) { 1901 case Ity_I8: ccOp += 0; break; 1902 case Ity_I16: ccOp += 1; break; 1903 case Ity_I32: ccOp += 2; break; 1904 case Ity_I64: ccOp += 3; break; 1905 default: vassert(0); 1906 } 1907 1908 /* This has to come first, because calculating the C flag 1909 may require reading all four thunk fields. */ 1910 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1912 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1913 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1914 } 1915 1916 1917 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1918 two arguments. */ 1919 1920 static 1921 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1922 { 1923 switch (ty) { 1924 case Ity_I8: 1925 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1926 break; 1927 case Ity_I16: 1928 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1929 break; 1930 case Ity_I32: 1931 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1932 break; 1933 case Ity_I64: 1934 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1935 break; 1936 default: 1937 vpanic("setFlags_MUL(amd64)"); 1938 } 1939 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1940 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1941 } 1942 1943 1944 /* -------------- Condition codes. -------------- */ 1945 1946 /* Condition codes, using the AMD encoding. */ 1947 1948 static const HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1949 { 1950 switch (cond) { 1951 case AMD64CondO: return "o"; 1952 case AMD64CondNO: return "no"; 1953 case AMD64CondB: return "b"; 1954 case AMD64CondNB: return "ae"; /*"nb";*/ 1955 case AMD64CondZ: return "e"; /*"z";*/ 1956 case AMD64CondNZ: return "ne"; /*"nz";*/ 1957 case AMD64CondBE: return "be"; 1958 case AMD64CondNBE: return "a"; /*"nbe";*/ 1959 case AMD64CondS: return "s"; 1960 case AMD64CondNS: return "ns"; 1961 case AMD64CondP: return "p"; 1962 case AMD64CondNP: return "np"; 1963 case AMD64CondL: return "l"; 1964 case AMD64CondNL: return "ge"; /*"nl";*/ 1965 case AMD64CondLE: return "le"; 1966 case AMD64CondNLE: return "g"; /*"nle";*/ 1967 case AMD64CondAlways: return "ALWAYS"; 1968 default: vpanic("name_AMD64Condcode"); 1969 } 1970 } 1971 1972 static 1973 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1974 /*OUT*/Bool* needInvert ) 1975 { 1976 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1977 if (cond & 1) { 1978 *needInvert = True; 1979 return cond-1; 1980 } else { 1981 *needInvert = False; 1982 return cond; 1983 } 1984 } 1985 1986 1987 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1988 1989 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1990 appropriately. 1991 1992 Optionally, generate a store for the 'tres' value. This can either 1993 be a normal store, or it can be a cas-with-possible-failure style 1994 store: 1995 1996 if taddr is IRTemp_INVALID, then no store is generated. 1997 1998 if taddr is not IRTemp_INVALID, then a store (using taddr as 1999 the address) is generated: 2000 2001 if texpVal is IRTemp_INVALID then a normal store is 2002 generated, and restart_point must be zero (it is irrelevant). 2003 2004 if texpVal is not IRTemp_INVALID then a cas-style store is 2005 generated. texpVal is the expected value, restart_point 2006 is the restart point if the store fails, and texpVal must 2007 have the same type as tres. 2008 2009 */ 2010 static void helper_ADC ( Int sz, 2011 IRTemp tres, IRTemp ta1, IRTemp ta2, 2012 /* info about optional store: */ 2013 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2014 { 2015 UInt thunkOp; 2016 IRType ty = szToITy(sz); 2017 IRTemp oldc = newTemp(Ity_I64); 2018 IRTemp oldcn = newTemp(ty); 2019 IROp plus = mkSizedOp(ty, Iop_Add8); 2020 IROp xor = mkSizedOp(ty, Iop_Xor8); 2021 2022 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2023 2024 switch (sz) { 2025 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 2026 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 2027 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 2028 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 2029 default: vassert(0); 2030 } 2031 2032 /* oldc = old carry flag, 0 or 1 */ 2033 assign( oldc, binop(Iop_And64, 2034 mk_amd64g_calculate_rflags_c(), 2035 mkU64(1)) ); 2036 2037 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2038 2039 assign( tres, binop(plus, 2040 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2041 mkexpr(oldcn)) ); 2042 2043 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2044 start of this function. */ 2045 if (taddr != IRTemp_INVALID) { 2046 if (texpVal == IRTemp_INVALID) { 2047 vassert(restart_point == 0); 2048 storeLE( mkexpr(taddr), mkexpr(tres) ); 2049 } else { 2050 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2051 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2052 casLE( mkexpr(taddr), 2053 mkexpr(texpVal), mkexpr(tres), restart_point ); 2054 } 2055 } 2056 2057 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2058 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2059 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2060 mkexpr(oldcn)) )) ); 2061 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2062 } 2063 2064 2065 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2066 appropriately. As with helper_ADC, possibly generate a store of 2067 the result -- see comments on helper_ADC for details. 2068 */ 2069 static void helper_SBB ( Int sz, 2070 IRTemp tres, IRTemp ta1, IRTemp ta2, 2071 /* info about optional store: */ 2072 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2073 { 2074 UInt thunkOp; 2075 IRType ty = szToITy(sz); 2076 IRTemp oldc = newTemp(Ity_I64); 2077 IRTemp oldcn = newTemp(ty); 2078 IROp minus = mkSizedOp(ty, Iop_Sub8); 2079 IROp xor = mkSizedOp(ty, Iop_Xor8); 2080 2081 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2082 2083 switch (sz) { 2084 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2085 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2086 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2087 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2088 default: vassert(0); 2089 } 2090 2091 /* oldc = old carry flag, 0 or 1 */ 2092 assign( oldc, binop(Iop_And64, 2093 mk_amd64g_calculate_rflags_c(), 2094 mkU64(1)) ); 2095 2096 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2097 2098 assign( tres, binop(minus, 2099 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2100 mkexpr(oldcn)) ); 2101 2102 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2103 start of this function. */ 2104 if (taddr != IRTemp_INVALID) { 2105 if (texpVal == IRTemp_INVALID) { 2106 vassert(restart_point == 0); 2107 storeLE( mkexpr(taddr), mkexpr(tres) ); 2108 } else { 2109 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2110 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2111 casLE( mkexpr(taddr), 2112 mkexpr(texpVal), mkexpr(tres), restart_point ); 2113 } 2114 } 2115 2116 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2117 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2118 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2119 mkexpr(oldcn)) )) ); 2120 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2121 } 2122 2123 2124 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2) 2125 and set flags appropriately. 2126 */ 2127 static void helper_ADCX_ADOX ( Bool isADCX, Int sz, 2128 IRTemp tres, IRTemp ta1, IRTemp ta2 ) 2129 { 2130 UInt thunkOp; 2131 IRType ty = szToITy(sz); 2132 IRTemp oldflags = newTemp(Ity_I64); 2133 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag 2134 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed 2135 IROp plus = mkSizedOp(ty, Iop_Add8); 2136 IROp xor = mkSizedOp(ty, Iop_Xor8); 2137 2138 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2139 2140 switch (sz) { 2141 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64 2142 : AMD64G_CC_OP_ADOX64; break; 2143 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32 2144 : AMD64G_CC_OP_ADOX32; break; 2145 default: vassert(0); 2146 } 2147 2148 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 2149 2150 /* oldOC = old overflow/carry flag, 0 or 1 */ 2151 assign( oldOC, binop(Iop_And64, 2152 binop(Iop_Shr64, 2153 mkexpr(oldflags), 2154 mkU8(isADCX ? AMD64G_CC_SHIFT_C 2155 : AMD64G_CC_SHIFT_O)), 2156 mkU64(1)) ); 2157 2158 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) ); 2159 2160 assign( tres, binop(plus, 2161 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2162 mkexpr(oldOCn)) ); 2163 2164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2165 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2166 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2167 mkexpr(oldOCn)) )) ); 2168 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) ); 2169 } 2170 2171 2172 /* -------------- Helpers for disassembly printing. -------------- */ 2173 2174 static const HChar* nameGrp1 ( Int opc_aux ) 2175 { 2176 static const HChar* grp1_names[8] 2177 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2178 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2179 return grp1_names[opc_aux]; 2180 } 2181 2182 static const HChar* nameGrp2 ( Int opc_aux ) 2183 { 2184 static const HChar* grp2_names[8] 2185 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2186 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2187 return grp2_names[opc_aux]; 2188 } 2189 2190 static const HChar* nameGrp4 ( Int opc_aux ) 2191 { 2192 static const HChar* grp4_names[8] 2193 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2194 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2195 return grp4_names[opc_aux]; 2196 } 2197 2198 static const HChar* nameGrp5 ( Int opc_aux ) 2199 { 2200 static const HChar* grp5_names[8] 2201 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2202 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2203 return grp5_names[opc_aux]; 2204 } 2205 2206 static const HChar* nameGrp8 ( Int opc_aux ) 2207 { 2208 static const HChar* grp8_names[8] 2209 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2210 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2211 return grp8_names[opc_aux]; 2212 } 2213 2214 static const HChar* nameSReg ( UInt sreg ) 2215 { 2216 switch (sreg) { 2217 case R_ES: return "%es"; 2218 case R_CS: return "%cs"; 2219 case R_SS: return "%ss"; 2220 case R_DS: return "%ds"; 2221 case R_FS: return "%fs"; 2222 case R_GS: return "%gs"; 2223 default: vpanic("nameSReg(amd64)"); 2224 } 2225 } 2226 2227 static const HChar* nameMMXReg ( Int mmxreg ) 2228 { 2229 static const HChar* mmx_names[8] 2230 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2231 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2232 return mmx_names[mmxreg]; 2233 } 2234 2235 static const HChar* nameXMMReg ( Int xmmreg ) 2236 { 2237 static const HChar* xmm_names[16] 2238 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2239 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2240 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2241 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2242 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2243 return xmm_names[xmmreg]; 2244 } 2245 2246 static const HChar* nameMMXGran ( Int gran ) 2247 { 2248 switch (gran) { 2249 case 0: return "b"; 2250 case 1: return "w"; 2251 case 2: return "d"; 2252 case 3: return "q"; 2253 default: vpanic("nameMMXGran(amd64,guest)"); 2254 } 2255 } 2256 2257 static HChar nameISize ( Int size ) 2258 { 2259 switch (size) { 2260 case 8: return 'q'; 2261 case 4: return 'l'; 2262 case 2: return 'w'; 2263 case 1: return 'b'; 2264 default: vpanic("nameISize(amd64)"); 2265 } 2266 } 2267 2268 static const HChar* nameYMMReg ( Int ymmreg ) 2269 { 2270 static const HChar* ymm_names[16] 2271 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2272 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2273 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2274 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2275 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2276 return ymm_names[ymmreg]; 2277 } 2278 2279 2280 /*------------------------------------------------------------*/ 2281 /*--- JMP helpers ---*/ 2282 /*------------------------------------------------------------*/ 2283 2284 static void jmp_lit( /*MOD*/DisResult* dres, 2285 IRJumpKind kind, Addr64 d64 ) 2286 { 2287 vassert(dres->whatNext == Dis_Continue); 2288 vassert(dres->len == 0); 2289 vassert(dres->continueAt == 0); 2290 vassert(dres->jk_StopHere == Ijk_INVALID); 2291 dres->whatNext = Dis_StopHere; 2292 dres->jk_StopHere = kind; 2293 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2294 } 2295 2296 static void jmp_treg( /*MOD*/DisResult* dres, 2297 IRJumpKind kind, IRTemp t ) 2298 { 2299 vassert(dres->whatNext == Dis_Continue); 2300 vassert(dres->len == 0); 2301 vassert(dres->continueAt == 0); 2302 vassert(dres->jk_StopHere == Ijk_INVALID); 2303 dres->whatNext = Dis_StopHere; 2304 dres->jk_StopHere = kind; 2305 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2306 } 2307 2308 static 2309 void jcc_01 ( /*MOD*/DisResult* dres, 2310 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2311 { 2312 Bool invert; 2313 AMD64Condcode condPos; 2314 vassert(dres->whatNext == Dis_Continue); 2315 vassert(dres->len == 0); 2316 vassert(dres->continueAt == 0); 2317 vassert(dres->jk_StopHere == Ijk_INVALID); 2318 dres->whatNext = Dis_StopHere; 2319 dres->jk_StopHere = Ijk_Boring; 2320 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2321 if (invert) { 2322 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2323 Ijk_Boring, 2324 IRConst_U64(d64_false), 2325 OFFB_RIP ) ); 2326 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2327 } else { 2328 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2329 Ijk_Boring, 2330 IRConst_U64(d64_true), 2331 OFFB_RIP ) ); 2332 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2333 } 2334 } 2335 2336 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2337 guest address of the next instruction to be executed. 2338 2339 This function generates an AbiHint to say that -128(%rsp) 2340 .. -1(%rsp) should now be regarded as uninitialised. 2341 */ 2342 static 2343 void make_redzone_AbiHint ( const VexAbiInfo* vbi, 2344 IRTemp new_rsp, IRTemp nia, const HChar* who ) 2345 { 2346 Int szB = vbi->guest_stack_redzone_size; 2347 vassert(szB >= 0); 2348 2349 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2350 for is ELF. So just check it's the expected 128 value 2351 (paranoia). */ 2352 vassert(szB == 128); 2353 2354 if (0) vex_printf("AbiHint: %s\n", who); 2355 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2356 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2357 if (szB > 0) 2358 stmt( IRStmt_AbiHint( 2359 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2360 szB, 2361 mkexpr(nia) 2362 )); 2363 } 2364 2365 2366 /*------------------------------------------------------------*/ 2367 /*--- Disassembling addressing modes ---*/ 2368 /*------------------------------------------------------------*/ 2369 2370 static 2371 const HChar* segRegTxt ( Prefix pfx ) 2372 { 2373 if (pfx & PFX_CS) return "%cs:"; 2374 if (pfx & PFX_DS) return "%ds:"; 2375 if (pfx & PFX_ES) return "%es:"; 2376 if (pfx & PFX_FS) return "%fs:"; 2377 if (pfx & PFX_GS) return "%gs:"; 2378 if (pfx & PFX_SS) return "%ss:"; 2379 return ""; /* no override */ 2380 } 2381 2382 2383 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2384 linear address by adding any required segment override as indicated 2385 by sorb, and also dealing with any address size override 2386 present. */ 2387 static 2388 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi, 2389 Prefix pfx, IRExpr* virtual ) 2390 { 2391 /* --- address size override --- */ 2392 if (haveASO(pfx)) 2393 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2394 2395 /* Note that the below are hacks that relies on the assumption 2396 that %fs or %gs are constant. 2397 Typically, %fs is always 0x63 on linux (in the main thread, it 2398 stays at value 0), %gs always 0x60 on Darwin, ... */ 2399 /* --- segment overrides --- */ 2400 if (pfx & PFX_FS) { 2401 if (vbi->guest_amd64_assume_fs_is_const) { 2402 /* return virtual + guest_FS_CONST. */ 2403 virtual = binop(Iop_Add64, virtual, 2404 IRExpr_Get(OFFB_FS_CONST, Ity_I64)); 2405 } else { 2406 unimplemented("amd64 %fs segment override"); 2407 } 2408 } 2409 2410 if (pfx & PFX_GS) { 2411 if (vbi->guest_amd64_assume_gs_is_const) { 2412 /* return virtual + guest_GS_CONST. */ 2413 virtual = binop(Iop_Add64, virtual, 2414 IRExpr_Get(OFFB_GS_CONST, Ity_I64)); 2415 } else { 2416 unimplemented("amd64 %gs segment override"); 2417 } 2418 } 2419 2420 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2421 2422 return virtual; 2423 } 2424 2425 //.. { 2426 //.. Int sreg; 2427 //.. IRType hWordTy; 2428 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2429 //.. 2430 //.. if (sorb == 0) 2431 //.. /* the common case - no override */ 2432 //.. return virtual; 2433 //.. 2434 //.. switch (sorb) { 2435 //.. case 0x3E: sreg = R_DS; break; 2436 //.. case 0x26: sreg = R_ES; break; 2437 //.. case 0x64: sreg = R_FS; break; 2438 //.. case 0x65: sreg = R_GS; break; 2439 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2440 //.. } 2441 //.. 2442 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2443 //.. 2444 //.. seg_selector = newTemp(Ity_I32); 2445 //.. ldt_ptr = newTemp(hWordTy); 2446 //.. gdt_ptr = newTemp(hWordTy); 2447 //.. r64 = newTemp(Ity_I64); 2448 //.. 2449 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2450 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2451 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2452 //.. 2453 //.. /* 2454 //.. Call this to do the translation and limit checks: 2455 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2456 //.. UInt seg_selector, UInt virtual_addr ) 2457 //.. */ 2458 //.. assign( 2459 //.. r64, 2460 //.. mkIRExprCCall( 2461 //.. Ity_I64, 2462 //.. 0/*regparms*/, 2463 //.. "x86g_use_seg_selector", 2464 //.. &x86g_use_seg_selector, 2465 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2466 //.. mkexpr(seg_selector), virtual) 2467 //.. ) 2468 //.. ); 2469 //.. 2470 //.. /* If the high 32 of the result are non-zero, there was a 2471 //.. failure in address translation. In which case, make a 2472 //.. quick exit. 2473 //.. */ 2474 //.. stmt( 2475 //.. IRStmt_Exit( 2476 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2477 //.. Ijk_MapFail, 2478 //.. IRConst_U32( guest_eip_curr_instr ) 2479 //.. ) 2480 //.. ); 2481 //.. 2482 //.. /* otherwise, here's the translated result. */ 2483 //.. return unop(Iop_64to32, mkexpr(r64)); 2484 //.. } 2485 2486 2487 /* Generate IR to calculate an address indicated by a ModRM and 2488 following SIB bytes. The expression, and the number of bytes in 2489 the address mode, are returned (the latter in *len). Note that 2490 this fn should not be called if the R/M part of the address denotes 2491 a register instead of memory. If print_codegen is true, text of 2492 the addressing mode is placed in buf. 2493 2494 The computed address is stored in a new tempreg, and the 2495 identity of the tempreg is returned. 2496 2497 extra_bytes holds the number of bytes after the amode, as supplied 2498 by the caller. This is needed to make sense of %rip-relative 2499 addresses. Note that the value that *len is set to is only the 2500 length of the amode itself and does not include the value supplied 2501 in extra_bytes. 2502 */ 2503 2504 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2505 { 2506 IRTemp tmp = newTemp(Ity_I64); 2507 assign( tmp, addr64 ); 2508 return tmp; 2509 } 2510 2511 static 2512 IRTemp disAMode ( /*OUT*/Int* len, 2513 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2514 /*OUT*/HChar* buf, Int extra_bytes ) 2515 { 2516 UChar mod_reg_rm = getUChar(delta); 2517 delta++; 2518 2519 buf[0] = (UChar)0; 2520 vassert(extra_bytes >= 0 && extra_bytes < 10); 2521 2522 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2523 jump table seems a bit excessive. 2524 */ 2525 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2526 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2527 /* is now XX0XXYYY */ 2528 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2529 switch (mod_reg_rm) { 2530 2531 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2532 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2533 */ 2534 case 0x00: case 0x01: case 0x02: case 0x03: 2535 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2536 { UChar rm = toUChar(mod_reg_rm & 7); 2537 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2538 *len = 1; 2539 return disAMode_copy2tmp( 2540 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2541 } 2542 2543 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2544 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2545 */ 2546 case 0x08: case 0x09: case 0x0A: case 0x0B: 2547 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2548 { UChar rm = toUChar(mod_reg_rm & 7); 2549 Long d = getSDisp8(delta); 2550 if (d == 0) { 2551 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2552 } else { 2553 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2554 } 2555 *len = 2; 2556 return disAMode_copy2tmp( 2557 handleAddrOverrides(vbi, pfx, 2558 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2559 } 2560 2561 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2562 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2563 */ 2564 case 0x10: case 0x11: case 0x12: case 0x13: 2565 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2566 { UChar rm = toUChar(mod_reg_rm & 7); 2567 Long d = getSDisp32(delta); 2568 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2569 *len = 5; 2570 return disAMode_copy2tmp( 2571 handleAddrOverrides(vbi, pfx, 2572 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2573 } 2574 2575 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2576 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2577 case 0x18: case 0x19: case 0x1A: case 0x1B: 2578 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2579 vpanic("disAMode(amd64): not an addr!"); 2580 2581 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2582 correctly at the start of handling each instruction. */ 2583 case 0x05: 2584 { Long d = getSDisp32(delta); 2585 *len = 5; 2586 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2587 /* We need to know the next instruction's start address. 2588 Try and figure out what it is, record the guess, and ask 2589 the top-level driver logic (bbToIR_AMD64) to check we 2590 guessed right, after the instruction is completely 2591 decoded. */ 2592 guest_RIP_next_mustcheck = True; 2593 guest_RIP_next_assumed = guest_RIP_bbstart 2594 + delta+4 + extra_bytes; 2595 return disAMode_copy2tmp( 2596 handleAddrOverrides(vbi, pfx, 2597 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2598 mkU64(d)))); 2599 } 2600 2601 case 0x04: { 2602 /* SIB, with no displacement. Special cases: 2603 -- %rsp cannot act as an index value. 2604 If index_r indicates %rsp, zero is used for the index. 2605 -- when mod is zero and base indicates RBP or R13, base is 2606 instead a 32-bit sign-extended literal. 2607 It's all madness, I tell you. Extract %index, %base and 2608 scale from the SIB byte. The value denoted is then: 2609 | %index == %RSP && (%base == %RBP || %base == %R13) 2610 = d32 following SIB byte 2611 | %index == %RSP && !(%base == %RBP || %base == %R13) 2612 = %base 2613 | %index != %RSP && (%base == %RBP || %base == %R13) 2614 = d32 following SIB byte + (%index << scale) 2615 | %index != %RSP && !(%base == %RBP || %base == %R13) 2616 = %base + (%index << scale) 2617 */ 2618 UChar sib = getUChar(delta); 2619 UChar scale = toUChar((sib >> 6) & 3); 2620 UChar index_r = toUChar((sib >> 3) & 7); 2621 UChar base_r = toUChar(sib & 7); 2622 /* correct since #(R13) == 8 + #(RBP) */ 2623 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2624 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2625 delta++; 2626 2627 if ((!index_is_SP) && (!base_is_BPor13)) { 2628 if (scale == 0) { 2629 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2630 nameIRegRexB(8,pfx,base_r), 2631 nameIReg64rexX(pfx,index_r)); 2632 } else { 2633 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2634 nameIRegRexB(8,pfx,base_r), 2635 nameIReg64rexX(pfx,index_r), 1<<scale); 2636 } 2637 *len = 2; 2638 return 2639 disAMode_copy2tmp( 2640 handleAddrOverrides(vbi, pfx, 2641 binop(Iop_Add64, 2642 getIRegRexB(8,pfx,base_r), 2643 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2644 mkU8(scale))))); 2645 } 2646 2647 if ((!index_is_SP) && base_is_BPor13) { 2648 Long d = getSDisp32(delta); 2649 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2650 nameIReg64rexX(pfx,index_r), 1<<scale); 2651 *len = 6; 2652 return 2653 disAMode_copy2tmp( 2654 handleAddrOverrides(vbi, pfx, 2655 binop(Iop_Add64, 2656 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2657 mkU8(scale)), 2658 mkU64(d)))); 2659 } 2660 2661 if (index_is_SP && (!base_is_BPor13)) { 2662 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2663 *len = 2; 2664 return disAMode_copy2tmp( 2665 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2666 } 2667 2668 if (index_is_SP && base_is_BPor13) { 2669 Long d = getSDisp32(delta); 2670 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2671 *len = 6; 2672 return disAMode_copy2tmp( 2673 handleAddrOverrides(vbi, pfx, mkU64(d))); 2674 } 2675 2676 vassert(0); 2677 } 2678 2679 /* SIB, with 8-bit displacement. Special cases: 2680 -- %esp cannot act as an index value. 2681 If index_r indicates %esp, zero is used for the index. 2682 Denoted value is: 2683 | %index == %ESP 2684 = d8 + %base 2685 | %index != %ESP 2686 = d8 + %base + (%index << scale) 2687 */ 2688 case 0x0C: { 2689 UChar sib = getUChar(delta); 2690 UChar scale = toUChar((sib >> 6) & 3); 2691 UChar index_r = toUChar((sib >> 3) & 7); 2692 UChar base_r = toUChar(sib & 7); 2693 Long d = getSDisp8(delta+1); 2694 2695 if (index_r == R_RSP && 0==getRexX(pfx)) { 2696 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2697 d, nameIRegRexB(8,pfx,base_r)); 2698 *len = 3; 2699 return disAMode_copy2tmp( 2700 handleAddrOverrides(vbi, pfx, 2701 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2702 } else { 2703 if (scale == 0) { 2704 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2705 nameIRegRexB(8,pfx,base_r), 2706 nameIReg64rexX(pfx,index_r)); 2707 } else { 2708 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2709 nameIRegRexB(8,pfx,base_r), 2710 nameIReg64rexX(pfx,index_r), 1<<scale); 2711 } 2712 *len = 3; 2713 return 2714 disAMode_copy2tmp( 2715 handleAddrOverrides(vbi, pfx, 2716 binop(Iop_Add64, 2717 binop(Iop_Add64, 2718 getIRegRexB(8,pfx,base_r), 2719 binop(Iop_Shl64, 2720 getIReg64rexX(pfx,index_r), mkU8(scale))), 2721 mkU64(d)))); 2722 } 2723 vassert(0); /*NOTREACHED*/ 2724 } 2725 2726 /* SIB, with 32-bit displacement. Special cases: 2727 -- %rsp cannot act as an index value. 2728 If index_r indicates %rsp, zero is used for the index. 2729 Denoted value is: 2730 | %index == %RSP 2731 = d32 + %base 2732 | %index != %RSP 2733 = d32 + %base + (%index << scale) 2734 */ 2735 case 0x14: { 2736 UChar sib = getUChar(delta); 2737 UChar scale = toUChar((sib >> 6) & 3); 2738 UChar index_r = toUChar((sib >> 3) & 7); 2739 UChar base_r = toUChar(sib & 7); 2740 Long d = getSDisp32(delta+1); 2741 2742 if (index_r == R_RSP && 0==getRexX(pfx)) { 2743 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2744 d, nameIRegRexB(8,pfx,base_r)); 2745 *len = 6; 2746 return disAMode_copy2tmp( 2747 handleAddrOverrides(vbi, pfx, 2748 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2749 } else { 2750 if (scale == 0) { 2751 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2752 nameIRegRexB(8,pfx,base_r), 2753 nameIReg64rexX(pfx,index_r)); 2754 } else { 2755 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2756 nameIRegRexB(8,pfx,base_r), 2757 nameIReg64rexX(pfx,index_r), 1<<scale); 2758 } 2759 *len = 6; 2760 return 2761 disAMode_copy2tmp( 2762 handleAddrOverrides(vbi, pfx, 2763 binop(Iop_Add64, 2764 binop(Iop_Add64, 2765 getIRegRexB(8,pfx,base_r), 2766 binop(Iop_Shl64, 2767 getIReg64rexX(pfx,index_r), mkU8(scale))), 2768 mkU64(d)))); 2769 } 2770 vassert(0); /*NOTREACHED*/ 2771 } 2772 2773 default: 2774 vpanic("disAMode(amd64)"); 2775 return 0; /*notreached*/ 2776 } 2777 } 2778 2779 2780 /* Similarly for VSIB addressing. This returns just the addend, 2781 and fills in *rI and *vscale with the register number of the vector 2782 index and its multiplicand. */ 2783 static 2784 IRTemp disAVSIBMode ( /*OUT*/Int* len, 2785 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2786 /*OUT*/HChar* buf, /*OUT*/UInt* rI, 2787 IRType ty, /*OUT*/Int* vscale ) 2788 { 2789 UChar mod_reg_rm = getUChar(delta); 2790 const HChar *vindex; 2791 2792 *len = 0; 2793 *rI = 0; 2794 *vscale = 0; 2795 buf[0] = (UChar)0; 2796 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm)) 2797 return IRTemp_INVALID; 2798 2799 UChar sib = getUChar(delta+1); 2800 UChar scale = toUChar((sib >> 6) & 3); 2801 UChar index_r = toUChar((sib >> 3) & 7); 2802 UChar base_r = toUChar(sib & 7); 2803 Long d = 0; 2804 /* correct since #(R13) == 8 + #(RBP) */ 2805 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2806 delta += 2; 2807 *len = 2; 2808 2809 *rI = index_r | (getRexX(pfx) << 3); 2810 if (ty == Ity_V128) 2811 vindex = nameXMMReg(*rI); 2812 else 2813 vindex = nameYMMReg(*rI); 2814 *vscale = 1<<scale; 2815 2816 switch (mod_reg_rm >> 6) { 2817 case 0: 2818 if (base_is_BPor13) { 2819 d = getSDisp32(delta); 2820 *len += 4; 2821 if (scale == 0) { 2822 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex); 2823 } else { 2824 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale); 2825 } 2826 return disAMode_copy2tmp( mkU64(d) ); 2827 } else { 2828 if (scale == 0) { 2829 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2830 nameIRegRexB(8,pfx,base_r), vindex); 2831 } else { 2832 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2833 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2834 } 2835 } 2836 break; 2837 case 1: 2838 d = getSDisp8(delta); 2839 *len += 1; 2840 goto have_disp; 2841 case 2: 2842 d = getSDisp32(delta); 2843 *len += 4; 2844 have_disp: 2845 if (scale == 0) { 2846 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2847 nameIRegRexB(8,pfx,base_r), vindex); 2848 } else { 2849 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2850 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2851 } 2852 break; 2853 } 2854 2855 if (!d) 2856 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) ); 2857 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r), 2858 mkU64(d)) ); 2859 } 2860 2861 2862 /* Figure out the number of (insn-stream) bytes constituting the amode 2863 beginning at delta. Is useful for getting hold of literals beyond 2864 the end of the amode before it has been disassembled. */ 2865 2866 static UInt lengthAMode ( Prefix pfx, Long delta ) 2867 { 2868 UChar mod_reg_rm = getUChar(delta); 2869 delta++; 2870 2871 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2872 jump table seems a bit excessive. 2873 */ 2874 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2875 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2876 /* is now XX0XXYYY */ 2877 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2878 switch (mod_reg_rm) { 2879 2880 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2881 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2882 */ 2883 case 0x00: case 0x01: case 0x02: case 0x03: 2884 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2885 return 1; 2886 2887 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2888 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2889 */ 2890 case 0x08: case 0x09: case 0x0A: case 0x0B: 2891 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2892 return 2; 2893 2894 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2895 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2896 */ 2897 case 0x10: case 0x11: case 0x12: case 0x13: 2898 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2899 return 5; 2900 2901 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2902 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2903 /* Not an address, but still handled. */ 2904 case 0x18: case 0x19: case 0x1A: case 0x1B: 2905 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2906 return 1; 2907 2908 /* RIP + disp32. */ 2909 case 0x05: 2910 return 5; 2911 2912 case 0x04: { 2913 /* SIB, with no displacement. */ 2914 UChar sib = getUChar(delta); 2915 UChar base_r = toUChar(sib & 7); 2916 /* correct since #(R13) == 8 + #(RBP) */ 2917 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2918 2919 if (base_is_BPor13) { 2920 return 6; 2921 } else { 2922 return 2; 2923 } 2924 } 2925 2926 /* SIB, with 8-bit displacement. */ 2927 case 0x0C: 2928 return 3; 2929 2930 /* SIB, with 32-bit displacement. */ 2931 case 0x14: 2932 return 6; 2933 2934 default: 2935 vpanic("lengthAMode(amd64)"); 2936 return 0; /*notreached*/ 2937 } 2938 } 2939 2940 2941 /*------------------------------------------------------------*/ 2942 /*--- Disassembling common idioms ---*/ 2943 /*------------------------------------------------------------*/ 2944 2945 typedef 2946 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX } 2947 WithFlag; 2948 2949 /* Handle binary integer instructions of the form 2950 op E, G meaning 2951 op reg-or-mem, reg 2952 Is passed the a ptr to the modRM byte, the actual operation, and the 2953 data size. Returns the address advanced completely over this 2954 instruction. 2955 2956 E(src) is reg-or-mem 2957 G(dst) is reg. 2958 2959 If E is reg, --> GET %G, tmp 2960 OP %E, tmp 2961 PUT tmp, %G 2962 2963 If E is mem and OP is not reversible, 2964 --> (getAddr E) -> tmpa 2965 LD (tmpa), tmpa 2966 GET %G, tmp2 2967 OP tmpa, tmp2 2968 PUT tmp2, %G 2969 2970 If E is mem and OP is reversible 2971 --> (getAddr E) -> tmpa 2972 LD (tmpa), tmpa 2973 OP %G, tmpa 2974 PUT tmpa, %G 2975 */ 2976 static 2977 ULong dis_op2_E_G ( const VexAbiInfo* vbi, 2978 Prefix pfx, 2979 IROp op8, 2980 WithFlag flag, 2981 Bool keep, 2982 Int size, 2983 Long delta0, 2984 const HChar* t_amd64opc ) 2985 { 2986 HChar dis_buf[50]; 2987 Int len; 2988 IRType ty = szToITy(size); 2989 IRTemp dst1 = newTemp(ty); 2990 IRTemp src = newTemp(ty); 2991 IRTemp dst0 = newTemp(ty); 2992 UChar rm = getUChar(delta0); 2993 IRTemp addr = IRTemp_INVALID; 2994 2995 /* Stay sane -- check for valid (op8, flag, keep) combinations. */ 2996 switch (op8) { 2997 case Iop_Add8: 2998 switch (flag) { 2999 case WithFlagNone: case WithFlagCarry: 3000 case WithFlagCarryX: case WithFlagOverX: 3001 vassert(keep); 3002 break; 3003 default: 3004 vassert(0); 3005 } 3006 break; 3007 case Iop_Sub8: 3008 vassert(flag == WithFlagNone || flag == WithFlagCarry); 3009 if (flag == WithFlagCarry) vassert(keep); 3010 break; 3011 case Iop_And8: 3012 vassert(flag == WithFlagNone); 3013 break; 3014 case Iop_Or8: case Iop_Xor8: 3015 vassert(flag == WithFlagNone); 3016 vassert(keep); 3017 break; 3018 default: 3019 vassert(0); 3020 } 3021 3022 if (epartIsReg(rm)) { 3023 /* Specially handle XOR reg,reg, because that doesn't really 3024 depend on reg, and doing the obvious thing potentially 3025 generates a spurious value check failure due to the bogus 3026 dependency. Ditto SUB/SBB reg,reg. */ 3027 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep)) 3028 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3029 putIRegG(size,pfx,rm, mkU(ty,0)); 3030 } 3031 3032 assign( dst0, getIRegG(size,pfx,rm) ); 3033 assign( src, getIRegE(size,pfx,rm) ); 3034 3035 if (op8 == Iop_Add8 && flag == WithFlagCarry) { 3036 helper_ADC( size, dst1, dst0, src, 3037 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3038 putIRegG(size, pfx, rm, mkexpr(dst1)); 3039 } else 3040 if (op8 == Iop_Sub8 && flag == WithFlagCarry) { 3041 helper_SBB( size, dst1, dst0, src, 3042 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3043 putIRegG(size, pfx, rm, mkexpr(dst1)); 3044 } else 3045 if (op8 == Iop_Add8 && flag == WithFlagCarryX) { 3046 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src ); 3047 putIRegG(size, pfx, rm, mkexpr(dst1)); 3048 } else 3049 if (op8 == Iop_Add8 && flag == WithFlagOverX) { 3050 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src ); 3051 putIRegG(size, pfx, rm, mkexpr(dst1)); 3052 } else { 3053 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3054 if (isAddSub(op8)) 3055 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3056 else 3057 setFlags_DEP1(op8, dst1, ty); 3058 if (keep) 3059 putIRegG(size, pfx, rm, mkexpr(dst1)); 3060 } 3061 3062 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3063 nameIRegE(size,pfx,rm), 3064 nameIRegG(size,pfx,rm)); 3065 return 1+delta0; 3066 } else { 3067 /* E refers to memory */ 3068 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3069 assign( dst0, getIRegG(size,pfx,rm) ); 3070 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 3071 3072 if (op8 == Iop_Add8 && flag == WithFlagCarry) { 3073 helper_ADC( size, dst1, dst0, src, 3074 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3075 putIRegG(size, pfx, rm, mkexpr(dst1)); 3076 } else 3077 if (op8 == Iop_Sub8 && flag == WithFlagCarry) { 3078 helper_SBB( size, dst1, dst0, src, 3079 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3080 putIRegG(size, pfx, rm, mkexpr(dst1)); 3081 } else 3082 if (op8 == Iop_Add8 && flag == WithFlagCarryX) { 3083 /* normal store */ 3084 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src ); 3085 } else 3086 if (op8 == Iop_Add8 && flag == WithFlagOverX) { 3087 /* normal store */ 3088 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src ); 3089 } else { 3090 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3091 if (isAddSub(op8)) 3092 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3093 else 3094 setFlags_DEP1(op8, dst1, ty); 3095 if (keep) 3096 putIRegG(size, pfx, rm, mkexpr(dst1)); 3097 } 3098 3099 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3100 dis_buf, nameIRegG(size, pfx, rm)); 3101 return len+delta0; 3102 } 3103 } 3104 3105 3106 3107 /* Handle binary integer instructions of the form 3108 op G, E meaning 3109 op reg, reg-or-mem 3110 Is passed the a ptr to the modRM byte, the actual operation, and the 3111 data size. Returns the address advanced completely over this 3112 instruction. 3113 3114 G(src) is reg. 3115 E(dst) is reg-or-mem 3116 3117 If E is reg, --> GET %E, tmp 3118 OP %G, tmp 3119 PUT tmp, %E 3120 3121 If E is mem, --> (getAddr E) -> tmpa 3122 LD (tmpa), tmpv 3123 OP %G, tmpv 3124 ST tmpv, (tmpa) 3125 */ 3126 static 3127 ULong dis_op2_G_E ( const VexAbiInfo* vbi, 3128 Prefix pfx, 3129 IROp op8, 3130 WithFlag flag, 3131 Bool keep, 3132 Int size, 3133 Long delta0, 3134 const HChar* t_amd64opc ) 3135 { 3136 HChar dis_buf[50]; 3137 Int len; 3138 IRType ty = szToITy(size); 3139 IRTemp dst1 = newTemp(ty); 3140 IRTemp src = newTemp(ty); 3141 IRTemp dst0 = newTemp(ty); 3142 UChar rm = getUChar(delta0); 3143 IRTemp addr = IRTemp_INVALID; 3144 3145 /* Stay sane -- check for valid (op8, flag, keep) combinations. */ 3146 switch (op8) { 3147 case Iop_Add8: 3148 vassert(flag == WithFlagNone || flag == WithFlagCarry); 3149 vassert(keep); 3150 break; 3151 case Iop_Sub8: 3152 vassert(flag == WithFlagNone || flag == WithFlagCarry); 3153 if (flag == WithFlagCarry) vassert(keep); 3154 break; 3155 case Iop_And8: case Iop_Or8: case Iop_Xor8: 3156 vassert(flag == WithFlagNone); 3157 vassert(keep); 3158 break; 3159 default: 3160 vassert(0); 3161 } 3162 3163 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the 3164 intended operation is add-with-carry or subtract-with-borrow. */ 3165 3166 if (epartIsReg(rm)) { 3167 /* Specially handle XOR reg,reg, because that doesn't really 3168 depend on reg, and doing the obvious thing potentially 3169 generates a spurious value check failure due to the bogus 3170 dependency. Ditto SUB/SBB reg,reg. */ 3171 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep)) 3172 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3173 putIRegE(size,pfx,rm, mkU(ty,0)); 3174 } 3175 3176 assign(dst0, getIRegE(size,pfx,rm)); 3177 assign(src, getIRegG(size,pfx,rm)); 3178 3179 if (op8 == Iop_Add8 && flag == WithFlagCarry) { 3180 helper_ADC( size, dst1, dst0, src, 3181 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3182 putIRegE(size, pfx, rm, mkexpr(dst1)); 3183 } else 3184 if (op8 == Iop_Sub8 && flag == WithFlagCarry) { 3185 helper_SBB( size, dst1, dst0, src, 3186 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3187 putIRegE(size, pfx, rm, mkexpr(dst1)); 3188 } else { 3189 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3190 if (isAddSub(op8)) 3191 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3192 else 3193 setFlags_DEP1(op8, dst1, ty); 3194 if (keep) 3195 putIRegE(size, pfx, rm, mkexpr(dst1)); 3196 } 3197 3198 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3199 nameIRegG(size,pfx,rm), 3200 nameIRegE(size,pfx,rm)); 3201 return 1+delta0; 3202 } 3203 3204 /* E refers to memory */ 3205 { 3206 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3207 assign(dst0, loadLE(ty,mkexpr(addr))); 3208 assign(src, getIRegG(size,pfx,rm)); 3209 3210 if (op8 == Iop_Add8 && flag == WithFlagCarry) { 3211 if (haveLOCK(pfx)) { 3212 /* cas-style store */ 3213 helper_ADC( size, dst1, dst0, src, 3214 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3215 } else { 3216 /* normal store */ 3217 helper_ADC( size, dst1, dst0, src, 3218 /*store*/addr, IRTemp_INVALID, 0 ); 3219 } 3220 } else 3221 if (op8 == Iop_Sub8 && flag == WithFlagCarry) { 3222 if (haveLOCK(pfx)) { 3223 /* cas-style store */ 3224 helper_SBB( size, dst1, dst0, src, 3225 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3226 } else { 3227 /* normal store */ 3228 helper_SBB( size, dst1, dst0, src, 3229 /*store*/addr, IRTemp_INVALID, 0 ); 3230 } 3231 } else { 3232 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3233 if (keep) { 3234 if (haveLOCK(pfx)) { 3235 if (0) vex_printf("locked case\n" ); 3236 casLE( mkexpr(addr), 3237 mkexpr(dst0)/*expval*/, 3238 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3239 } else { 3240 if (0) vex_printf("nonlocked case\n"); 3241 storeLE(mkexpr(addr), mkexpr(dst1)); 3242 } 3243 } 3244 if (isAddSub(op8)) 3245 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3246 else 3247 setFlags_DEP1(op8, dst1, ty); 3248 } 3249 3250 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3251 nameIRegG(size,pfx,rm), dis_buf); 3252 return len+delta0; 3253 } 3254 } 3255 3256 3257 /* Handle move instructions of the form 3258 mov E, G meaning 3259 mov reg-or-mem, reg 3260 Is passed the a ptr to the modRM byte, and the data size. Returns 3261 the address advanced completely over this instruction. 3262 3263 E(src) is reg-or-mem 3264 G(dst) is reg. 3265 3266 If E is reg, --> GET %E, tmpv 3267 PUT tmpv, %G 3268 3269 If E is mem --> (getAddr E) -> tmpa 3270 LD (tmpa), tmpb 3271 PUT tmpb, %G 3272 */ 3273 static 3274 ULong dis_mov_E_G ( const VexAbiInfo* vbi, 3275 Prefix pfx, 3276 Int size, 3277 Long delta0 ) 3278 { 3279 Int len; 3280 UChar rm = getUChar(delta0); 3281 HChar dis_buf[50]; 3282 3283 if (epartIsReg(rm)) { 3284 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3285 DIP("mov%c %s,%s\n", nameISize(size), 3286 nameIRegE(size,pfx,rm), 3287 nameIRegG(size,pfx,rm)); 3288 return 1+delta0; 3289 } 3290 3291 /* E refers to memory */ 3292 { 3293 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3294 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3295 DIP("mov%c %s,%s\n", nameISize(size), 3296 dis_buf, 3297 nameIRegG(size,pfx,rm)); 3298 return delta0+len; 3299 } 3300 } 3301 3302 3303 /* Handle move instructions of the form 3304 mov G, E meaning 3305 mov reg, reg-or-mem 3306 Is passed the a ptr to the modRM byte, and the data size. Returns 3307 the address advanced completely over this instruction. 3308 We have to decide here whether F2 or F3 are acceptable. F2 never is. 3309 3310 G(src) is reg. 3311 E(dst) is reg-or-mem 3312 3313 If E is reg, --> GET %G, tmp 3314 PUT tmp, %E 3315 3316 If E is mem, --> (getAddr E) -> tmpa 3317 GET %G, tmpv 3318 ST tmpv, (tmpa) 3319 */ 3320 static 3321 ULong dis_mov_G_E ( const VexAbiInfo* vbi, 3322 Prefix pfx, 3323 Int size, 3324 Long delta0, 3325 /*OUT*/Bool* ok ) 3326 { 3327 Int len; 3328 UChar rm = getUChar(delta0); 3329 HChar dis_buf[50]; 3330 3331 *ok = True; 3332 3333 if (epartIsReg(rm)) { 3334 if (haveF2orF3(pfx)) { *ok = False; return delta0; } 3335 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3336 DIP("mov%c %s,%s\n", nameISize(size), 3337 nameIRegG(size,pfx,rm), 3338 nameIRegE(size,pfx,rm)); 3339 return 1+delta0; 3340 } 3341 3342 /* E refers to memory */ 3343 { 3344 if (haveF2(pfx)) { *ok = False; return delta0; } 3345 /* F3(XRELEASE) is acceptable, though. */ 3346 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3347 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3348 DIP("mov%c %s,%s\n", nameISize(size), 3349 nameIRegG(size,pfx,rm), 3350 dis_buf); 3351 return len+delta0; 3352 } 3353 } 3354 3355 3356 /* op $immediate, AL/AX/EAX/RAX. */ 3357 static 3358 ULong dis_op_imm_A ( Int size, 3359 Bool carrying, 3360 IROp op8, 3361 Bool keep, 3362 Long delta, 3363 const HChar* t_amd64opc ) 3364 { 3365 Int size4 = imin(size,4); 3366 IRType ty = szToITy(size); 3367 IRTemp dst0 = newTemp(ty); 3368 IRTemp src = newTemp(ty); 3369 IRTemp dst1 = newTemp(ty); 3370 Long lit = getSDisp(size4,delta); 3371 assign(dst0, getIRegRAX(size)); 3372 assign(src, mkU(ty,lit & mkSizeMask(size))); 3373 3374 if (isAddSub(op8) && !carrying) { 3375 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3376 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3377 } 3378 else 3379 if (isLogic(op8)) { 3380 vassert(!carrying); 3381 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3382 setFlags_DEP1(op8, dst1, ty); 3383 } 3384 else 3385 if (op8 == Iop_Add8 && carrying) { 3386 helper_ADC( size, dst1, dst0, src, 3387 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3388 } 3389 else 3390 if (op8 == Iop_Sub8 && carrying) { 3391 helper_SBB( size, dst1, dst0, src, 3392 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3393 } 3394 else 3395 vpanic("dis_op_imm_A(amd64,guest)"); 3396 3397 if (keep) 3398 putIRegRAX(size, mkexpr(dst1)); 3399 3400 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3401 lit, nameIRegRAX(size)); 3402 return delta+size4; 3403 } 3404 3405 3406 /* Sign- and Zero-extending moves. */ 3407 static 3408 ULong dis_movx_E_G ( const VexAbiInfo* vbi, 3409 Prefix pfx, 3410 Long delta, Int szs, Int szd, Bool sign_extend ) 3411 { 3412 UChar rm = getUChar(delta); 3413 if (epartIsReg(rm)) { 3414 putIRegG(szd, pfx, rm, 3415 doScalarWidening( 3416 szs,szd,sign_extend, 3417 getIRegE(szs,pfx,rm))); 3418 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3419 nameISize(szs), 3420 nameISize(szd), 3421 nameIRegE(szs,pfx,rm), 3422 nameIRegG(szd,pfx,rm)); 3423 return 1+delta; 3424 } 3425 3426 /* E refers to memory */ 3427 { 3428 Int len; 3429 HChar dis_buf[50]; 3430 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3431 putIRegG(szd, pfx, rm, 3432 doScalarWidening( 3433 szs,szd,sign_extend, 3434 loadLE(szToITy(szs),mkexpr(addr)))); 3435 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3436 nameISize(szs), 3437 nameISize(szd), 3438 dis_buf, 3439 nameIRegG(szd,pfx,rm)); 3440 return len+delta; 3441 } 3442 } 3443 3444 3445 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3446 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3447 static 3448 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3449 { 3450 /* special-case the 64-bit case */ 3451 if (sz == 8) { 3452 IROp op = signed_divide ? Iop_DivModS128to64 3453 : Iop_DivModU128to64; 3454 IRTemp src128 = newTemp(Ity_I128); 3455 IRTemp dst128 = newTemp(Ity_I128); 3456 assign( src128, binop(Iop_64HLto128, 3457 getIReg64(R_RDX), 3458 getIReg64(R_RAX)) ); 3459 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3460 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3461 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3462 } else { 3463 IROp op = signed_divide ? Iop_DivModS64to32 3464 : Iop_DivModU64to32; 3465 IRTemp src64 = newTemp(Ity_I64); 3466 IRTemp dst64 = newTemp(Ity_I64); 3467 switch (sz) { 3468 case 4: 3469 assign( src64, 3470 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3471 assign( dst64, 3472 binop(op, mkexpr(src64), mkexpr(t)) ); 3473 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3474 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3475 break; 3476 case 2: { 3477 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3478 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3479 assign( src64, unop(widen3264, 3480 binop(Iop_16HLto32, 3481 getIRegRDX(2), 3482 getIRegRAX(2))) ); 3483 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3484 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3485 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3486 break; 3487 } 3488 case 1: { 3489 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3490 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3491 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3492 assign( src64, unop(widen3264, 3493 unop(widen1632, getIRegRAX(2))) ); 3494 assign( dst64, 3495 binop(op, mkexpr(src64), 3496 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3497 putIRegRAX( 1, unop(Iop_16to8, 3498 unop(Iop_32to16, 3499 unop(Iop_64to32,mkexpr(dst64)))) ); 3500 putIRegAH( unop(Iop_16to8, 3501 unop(Iop_32to16, 3502 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3503 break; 3504 } 3505 default: 3506 vpanic("codegen_div(amd64)"); 3507 } 3508 } 3509 } 3510 3511 static 3512 ULong dis_Grp1 ( const VexAbiInfo* vbi, 3513 Prefix pfx, 3514 Long delta, UChar modrm, 3515 Int am_sz, Int d_sz, Int sz, Long d64 ) 3516 { 3517 Int len; 3518 HChar dis_buf[50]; 3519 IRType ty = szToITy(sz); 3520 IRTemp dst1 = newTemp(ty); 3521 IRTemp src = newTemp(ty); 3522 IRTemp dst0 = newTemp(ty); 3523 IRTemp addr = IRTemp_INVALID; 3524 IROp op8 = Iop_INVALID; 3525 ULong mask = mkSizeMask(sz); 3526 3527 switch (gregLO3ofRM(modrm)) { 3528 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3529 case 2: break; // ADC 3530 case 3: break; // SBB 3531 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3532 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3533 /*NOTREACHED*/ 3534 default: vpanic("dis_Grp1(amd64): unhandled case"); 3535 } 3536 3537 if (epartIsReg(modrm)) { 3538 vassert(am_sz == 1); 3539 3540 assign(dst0, getIRegE(sz,pfx,modrm)); 3541 assign(src, mkU(ty,d64 & mask)); 3542 3543 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3544 helper_ADC( sz, dst1, dst0, src, 3545 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3546 } else 3547 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3548 helper_SBB( sz, dst1, dst0, src, 3549 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3550 } else { 3551 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3552 if (isAddSub(op8)) 3553 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3554 else 3555 setFlags_DEP1(op8, dst1, ty); 3556 } 3557 3558 if (gregLO3ofRM(modrm) < 7) 3559 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3560 3561 delta += (am_sz + d_sz); 3562 DIP("%s%c $%lld, %s\n", 3563 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3564 nameIRegE(sz,pfx,modrm)); 3565 } else { 3566 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3567 3568 assign(dst0, loadLE(ty,mkexpr(addr))); 3569 assign(src, mkU(ty,d64 & mask)); 3570 3571 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3572 if (haveLOCK(pfx)) { 3573 /* cas-style store */ 3574 helper_ADC( sz, dst1, dst0, src, 3575 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3576 } else { 3577 /* normal store */ 3578 helper_ADC( sz, dst1, dst0, src, 3579 /*store*/addr, IRTemp_INVALID, 0 ); 3580 } 3581 } else 3582 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3583 if (haveLOCK(pfx)) { 3584 /* cas-style store */ 3585 helper_SBB( sz, dst1, dst0, src, 3586 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3587 } else { 3588 /* normal store */ 3589 helper_SBB( sz, dst1, dst0, src, 3590 /*store*/addr, IRTemp_INVALID, 0 ); 3591 } 3592 } else { 3593 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3594 if (gregLO3ofRM(modrm) < 7) { 3595 if (haveLOCK(pfx)) { 3596 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3597 mkexpr(dst1)/*newVal*/, 3598 guest_RIP_curr_instr ); 3599 } else { 3600 storeLE(mkexpr(addr), mkexpr(dst1)); 3601 } 3602 } 3603 if (isAddSub(op8)) 3604 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3605 else 3606 setFlags_DEP1(op8, dst1, ty); 3607 } 3608 3609 delta += (len+d_sz); 3610 DIP("%s%c $%lld, %s\n", 3611 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3612 d64, dis_buf); 3613 } 3614 return delta; 3615 } 3616 3617 3618 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3619 expression. */ 3620 3621 static 3622 ULong dis_Grp2 ( const VexAbiInfo* vbi, 3623 Prefix pfx, 3624 Long delta, UChar modrm, 3625 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3626 const HChar* shift_expr_txt, Bool* decode_OK ) 3627 { 3628 /* delta on entry points at the modrm byte. */ 3629 HChar dis_buf[50]; 3630 Int len; 3631 Bool isShift, isRotate, isRotateC; 3632 IRType ty = szToITy(sz); 3633 IRTemp dst0 = newTemp(ty); 3634 IRTemp dst1 = newTemp(ty); 3635 IRTemp addr = IRTemp_INVALID; 3636 3637 *decode_OK = True; 3638 3639 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3640 3641 /* Put value to shift/rotate in dst0. */ 3642 if (epartIsReg(modrm)) { 3643 assign(dst0, getIRegE(sz, pfx, modrm)); 3644 delta += (am_sz + d_sz); 3645 } else { 3646 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3647 assign(dst0, loadLE(ty,mkexpr(addr))); 3648 delta += len + d_sz; 3649 } 3650 3651 isShift = False; 3652 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3653 3654 isRotate = False; 3655 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3656 3657 isRotateC = False; 3658 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3659 3660 if (!isShift && !isRotate && !isRotateC) { 3661 /*NOTREACHED*/ 3662 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3663 } 3664 3665 if (isRotateC) { 3666 /* Call a helper; this insn is so ridiculous it does not deserve 3667 better. One problem is, the helper has to calculate both the 3668 new value and the new flags. This is more than 64 bits, and 3669 there is no way to return more than 64 bits from the helper. 3670 Hence the crude and obvious solution is to call it twice, 3671 using the sign of the sz field to indicate whether it is the 3672 value or rflags result we want. 3673 */ 3674 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3675 IRExpr** argsVALUE; 3676 IRExpr** argsRFLAGS; 3677 3678 IRTemp new_value = newTemp(Ity_I64); 3679 IRTemp new_rflags = newTemp(Ity_I64); 3680 IRTemp old_rflags = newTemp(Ity_I64); 3681 3682 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3683 3684 argsVALUE 3685 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3686 widenUto64(shift_expr), /* rotate amount */ 3687 mkexpr(old_rflags), 3688 mkU64(sz) ); 3689 assign( new_value, 3690 mkIRExprCCall( 3691 Ity_I64, 3692 0/*regparm*/, 3693 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3694 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3695 argsVALUE 3696 ) 3697 ); 3698 3699 argsRFLAGS 3700 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3701 widenUto64(shift_expr), /* rotate amount */ 3702 mkexpr(old_rflags), 3703 mkU64(-sz) ); 3704 assign( new_rflags, 3705 mkIRExprCCall( 3706 Ity_I64, 3707 0/*regparm*/, 3708 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3709 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3710 argsRFLAGS 3711 ) 3712 ); 3713 3714 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3715 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3716 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3717 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3718 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3719 } 3720 3721 else 3722 if (isShift) { 3723 3724 IRTemp pre64 = newTemp(Ity_I64); 3725 IRTemp res64 = newTemp(Ity_I64); 3726 IRTemp res64ss = newTemp(Ity_I64); 3727 IRTemp shift_amt = newTemp(Ity_I8); 3728 UChar mask = toUChar(sz==8 ? 63 : 31); 3729 IROp op64; 3730 3731 switch (gregLO3ofRM(modrm)) { 3732 case 4: op64 = Iop_Shl64; break; 3733 case 5: op64 = Iop_Shr64; break; 3734 case 6: op64 = Iop_Shl64; break; 3735 case 7: op64 = Iop_Sar64; break; 3736 /*NOTREACHED*/ 3737 default: vpanic("dis_Grp2:shift"); break; 3738 } 3739 3740 /* Widen the value to be shifted to 64 bits, do the shift, and 3741 narrow back down. This seems surprisingly long-winded, but 3742 unfortunately the AMD semantics requires that 8/16/32-bit 3743 shifts give defined results for shift values all the way up 3744 to 32, and this seems the simplest way to do it. It has the 3745 advantage that the only IR level shifts generated are of 64 3746 bit values, and the shift amount is guaranteed to be in the 3747 range 0 .. 63, thereby observing the IR semantics requiring 3748 all shift values to be in the range 0 .. 2^word_size-1. 3749 3750 Therefore the shift amount is masked with 63 for 64-bit shifts 3751 and 31 for all others. 3752 */ 3753 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3754 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3755 3756 /* suitably widen the value to be shifted to 64 bits. */ 3757 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3758 : widenUto64(mkexpr(dst0)) ); 3759 3760 /* res64 = pre64 `shift` shift_amt */ 3761 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3762 3763 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3764 assign( res64ss, 3765 binop(op64, 3766 mkexpr(pre64), 3767 binop(Iop_And8, 3768 binop(Iop_Sub8, 3769 mkexpr(shift_amt), mkU8(1)), 3770 mkU8(mask))) ); 3771 3772 /* Build the flags thunk. */ 3773 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3774 3775 /* Narrow the result back down. */ 3776 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3777 3778 } /* if (isShift) */ 3779 3780 else 3781 if (isRotate) { 3782 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3783 : (ty==Ity_I32 ? 2 : 3)); 3784 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3785 IRTemp rot_amt = newTemp(Ity_I8); 3786 IRTemp rot_amt64 = newTemp(Ity_I8); 3787 IRTemp oldFlags = newTemp(Ity_I64); 3788 UChar mask = toUChar(sz==8 ? 63 : 31); 3789 3790 /* rot_amt = shift_expr & mask */ 3791 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3792 expressions never shift beyond the word size and thus remain 3793 well defined. */ 3794 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3795 3796 if (ty == Ity_I64) 3797 assign(rot_amt, mkexpr(rot_amt64)); 3798 else 3799 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3800 3801 if (left) { 3802 3803 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3804 assign(dst1, 3805 binop( mkSizedOp(ty,Iop_Or8), 3806 binop( mkSizedOp(ty,Iop_Shl8), 3807 mkexpr(dst0), 3808 mkexpr(rot_amt) 3809 ), 3810 binop( mkSizedOp(ty,Iop_Shr8), 3811 mkexpr(dst0), 3812 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3813 ) 3814 ) 3815 ); 3816 ccOp += AMD64G_CC_OP_ROLB; 3817 3818 } else { /* right */ 3819 3820 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3821 assign(dst1, 3822 binop( mkSizedOp(ty,Iop_Or8), 3823 binop( mkSizedOp(ty,Iop_Shr8), 3824 mkexpr(dst0), 3825 mkexpr(rot_amt) 3826 ), 3827 binop( mkSizedOp(ty,Iop_Shl8), 3828 mkexpr(dst0), 3829 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3830 ) 3831 ) 3832 ); 3833 ccOp += AMD64G_CC_OP_RORB; 3834 3835 } 3836 3837 /* dst1 now holds the rotated value. Build flag thunk. We 3838 need the resulting value for this, and the previous flags. 3839 Except don't set it if the rotate count is zero. */ 3840 3841 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3842 3843 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */ 3844 IRTemp rot_amt64b = newTemp(Ity_I1); 3845 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) ); 3846 3847 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3848 stmt( IRStmt_Put( OFFB_CC_OP, 3849 IRExpr_ITE( mkexpr(rot_amt64b), 3850 mkU64(ccOp), 3851 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 3852 stmt( IRStmt_Put( OFFB_CC_DEP1, 3853 IRExpr_ITE( mkexpr(rot_amt64b), 3854 widenUto64(mkexpr(dst1)), 3855 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 3856 stmt( IRStmt_Put( OFFB_CC_DEP2, 3857 IRExpr_ITE( mkexpr(rot_amt64b), 3858 mkU64(0), 3859 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 3860 stmt( IRStmt_Put( OFFB_CC_NDEP, 3861 IRExpr_ITE( mkexpr(rot_amt64b), 3862 mkexpr(oldFlags), 3863 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) )); 3864 } /* if (isRotate) */ 3865 3866 /* Save result, and finish up. */ 3867 if (epartIsReg(modrm)) { 3868 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3869 if (vex_traceflags & VEX_TRACE_FE) { 3870 vex_printf("%s%c ", 3871 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3872 if (shift_expr_txt) 3873 vex_printf("%s", shift_expr_txt); 3874 else 3875 ppIRExpr(shift_expr); 3876 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3877 } 3878 } else { 3879 storeLE(mkexpr(addr), mkexpr(dst1)); 3880 if (vex_traceflags & VEX_TRACE_FE) { 3881 vex_printf("%s%c ", 3882 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3883 if (shift_expr_txt) 3884 vex_printf("%s", shift_expr_txt); 3885 else 3886 ppIRExpr(shift_expr); 3887 vex_printf(", %s\n", dis_buf); 3888 } 3889 } 3890 return delta; 3891 } 3892 3893 3894 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3895 static 3896 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi, 3897 Prefix pfx, 3898 Long delta, UChar modrm, 3899 Int am_sz, Int sz, ULong src_val, 3900 Bool* decode_OK ) 3901 { 3902 /* src_val denotes a d8. 3903 And delta on entry points at the modrm byte. */ 3904 3905 IRType ty = szToITy(sz); 3906 IRTemp t2 = newTemp(Ity_I64); 3907 IRTemp t2m = newTemp(Ity_I64); 3908 IRTemp t_addr = IRTemp_INVALID; 3909 HChar dis_buf[50]; 3910 ULong mask; 3911 3912 /* we're optimists :-) */ 3913 *decode_OK = True; 3914 3915 /* Check whether F2 or F3 are acceptable. */ 3916 if (epartIsReg(modrm)) { 3917 /* F2 or F3 are not allowed in the register case. */ 3918 if (haveF2orF3(pfx)) { 3919 *decode_OK = False; 3920 return delta; 3921 } 3922 } else { 3923 /* F2 or F3 (but not both) are allowable provided LOCK is also 3924 present. */ 3925 if (haveF2orF3(pfx)) { 3926 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 3927 *decode_OK = False; 3928 return delta; 3929 } 3930 } 3931 } 3932 3933 /* Limit src_val -- the bit offset -- to something within a word. 3934 The Intel docs say that literal offsets larger than a word are 3935 masked in this way. */ 3936 switch (sz) { 3937 case 2: src_val &= 15; break; 3938 case 4: src_val &= 31; break; 3939 case 8: src_val &= 63; break; 3940 default: *decode_OK = False; return delta; 3941 } 3942 3943 /* Invent a mask suitable for the operation. */ 3944 switch (gregLO3ofRM(modrm)) { 3945 case 4: /* BT */ mask = 0; break; 3946 case 5: /* BTS */ mask = 1ULL << src_val; break; 3947 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3948 case 7: /* BTC */ mask = 1ULL << src_val; break; 3949 /* If this needs to be extended, probably simplest to make a 3950 new function to handle the other cases (0 .. 3). The 3951 Intel docs do however not indicate any use for 0 .. 3, so 3952 we don't expect this to happen. */ 3953 default: *decode_OK = False; return delta; 3954 } 3955 3956 /* Fetch the value to be tested and modified into t2, which is 3957 64-bits wide regardless of sz. */ 3958 if (epartIsReg(modrm)) { 3959 vassert(am_sz == 1); 3960 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3961 delta += (am_sz + 1); 3962 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3963 nameISize(sz), 3964 src_val, nameIRegE(sz,pfx,modrm)); 3965 } else { 3966 Int len; 3967 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3968 delta += (len+1); 3969 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3970 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3971 nameISize(sz), 3972 src_val, dis_buf); 3973 } 3974 3975 /* Compute the new value into t2m, if non-BT. */ 3976 switch (gregLO3ofRM(modrm)) { 3977 case 4: /* BT */ 3978 break; 3979 case 5: /* BTS */ 3980 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3981 break; 3982 case 6: /* BTR */ 3983 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3984 break; 3985 case 7: /* BTC */ 3986 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3987 break; 3988 default: 3989 /*NOTREACHED*/ /*the previous switch guards this*/ 3990 vassert(0); 3991 } 3992 3993 /* Write the result back, if non-BT. */ 3994 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3995 if (epartIsReg(modrm)) { 3996 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3997 } else { 3998 if (haveLOCK(pfx)) { 3999 casLE( mkexpr(t_addr), 4000 narrowTo(ty, mkexpr(t2))/*expd*/, 4001 narrowTo(ty, mkexpr(t2m))/*new*/, 4002 guest_RIP_curr_instr ); 4003 } else { 4004 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 4005 } 4006 } 4007 } 4008 4009 /* Copy relevant bit from t2 into the carry flag. */ 4010 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 4011 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 4012 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 4013 stmt( IRStmt_Put( 4014 OFFB_CC_DEP1, 4015 binop(Iop_And64, 4016 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 4017 mkU64(1)) 4018 )); 4019 /* Set NDEP even though it isn't used. This makes redundant-PUT 4020 elimination of previous stores to this field work better. */ 4021 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 4022 4023 return delta; 4024 } 4025 4026 4027 /* Signed/unsigned widening multiply. Generate IR to multiply the 4028 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 4029 RDX:RAX/EDX:EAX/DX:AX/AX. 4030 */ 4031 static void codegen_mulL_A_D ( Int sz, Bool syned, 4032 IRTemp tmp, const HChar* tmp_txt ) 4033 { 4034 IRType ty = szToITy(sz); 4035 IRTemp t1 = newTemp(ty); 4036 4037 assign( t1, getIRegRAX(sz) ); 4038 4039 switch (ty) { 4040 case Ity_I64: { 4041 IRTemp res128 = newTemp(Ity_I128); 4042 IRTemp resHi = newTemp(Ity_I64); 4043 IRTemp resLo = newTemp(Ity_I64); 4044 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 4045 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 4046 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 4047 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 4048 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 4049 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 4050 putIReg64(R_RDX, mkexpr(resHi)); 4051 putIReg64(R_RAX, mkexpr(resLo)); 4052 break; 4053 } 4054 case Ity_I32: { 4055 IRTemp res64 = newTemp(Ity_I64); 4056 IRTemp resHi = newTemp(Ity_I32); 4057 IRTemp resLo = newTemp(Ity_I32); 4058 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 4059 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 4060 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 4061 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 4062 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 4063 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 4064 putIRegRDX(4, mkexpr(resHi)); 4065 putIRegRAX(4, mkexpr(resLo)); 4066 break; 4067 } 4068 case Ity_I16: { 4069 IRTemp res32 = newTemp(Ity_I32); 4070 IRTemp resHi = newTemp(Ity_I16); 4071 IRTemp resLo = newTemp(Ity_I16); 4072 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 4073 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 4074 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 4075 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 4076 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 4077 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 4078 putIRegRDX(2, mkexpr(resHi)); 4079 putIRegRAX(2, mkexpr(resLo)); 4080 break; 4081 } 4082 case Ity_I8: { 4083 IRTemp res16 = newTemp(Ity_I16); 4084 IRTemp resHi = newTemp(Ity_I8); 4085 IRTemp resLo = newTemp(Ity_I8); 4086 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 4087 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 4088 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 4089 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 4090 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 4091 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 4092 putIRegRAX(2, mkexpr(res16)); 4093 break; 4094 } 4095 default: 4096 ppIRType(ty); 4097 vpanic("codegen_mulL_A_D(amd64)"); 4098 } 4099 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 4100 } 4101 4102 4103 /* Group 3 extended opcodes. We have to decide here whether F2 and F3 4104 might be valid.*/ 4105 static 4106 ULong dis_Grp3 ( const VexAbiInfo* vbi, 4107 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 4108 { 4109 Long d64; 4110 UChar modrm; 4111 HChar dis_buf[50]; 4112 Int len; 4113 IRTemp addr; 4114 IRType ty = szToITy(sz); 4115 IRTemp t1 = newTemp(ty); 4116 IRTemp dst1, src, dst0; 4117 *decode_OK = True; 4118 modrm = getUChar(delta); 4119 if (epartIsReg(modrm)) { 4120 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4121 if (haveF2orF3(pfx)) goto unhandled; 4122 switch (gregLO3ofRM(modrm)) { 4123 case 0: { /* TEST */ 4124 delta++; 4125 d64 = getSDisp(imin(4,sz), delta); 4126 delta += imin(4,sz); 4127 dst1 = newTemp(ty); 4128 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4129 getIRegE(sz,pfx,modrm), 4130 mkU(ty, d64 & mkSizeMask(sz)))); 4131 setFlags_DEP1( Iop_And8, dst1, ty ); 4132 DIP("test%c $%lld, %s\n", 4133 nameISize(sz), d64, 4134 nameIRegE(sz, pfx, modrm)); 4135 break; 4136 } 4137 case 1: 4138 *decode_OK = False; 4139 return delta; 4140 case 2: /* NOT */ 4141 delta++; 4142 putIRegE(sz, pfx, modrm, 4143 unop(mkSizedOp(ty,Iop_Not8), 4144 getIRegE(sz, pfx, modrm))); 4145 DIP("not%c %s\n", nameISize(sz), 4146 nameIRegE(sz, pfx, modrm)); 4147 break; 4148 case 3: /* NEG */ 4149 delta++; 4150 dst0 = newTemp(ty); 4151 src = newTemp(ty); 4152 dst1 = newTemp(ty); 4153 assign(dst0, mkU(ty,0)); 4154 assign(src, getIRegE(sz, pfx, modrm)); 4155 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4156 mkexpr(src))); 4157 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4158 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 4159 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 4160 break; 4161 case 4: /* MUL (unsigned widening) */ 4162 delta++; 4163 src = newTemp(ty); 4164 assign(src, getIRegE(sz,pfx,modrm)); 4165 codegen_mulL_A_D ( sz, False, src, 4166 nameIRegE(sz,pfx,modrm) ); 4167 break; 4168 case 5: /* IMUL (signed widening) */ 4169 delta++; 4170 src = newTemp(ty); 4171 assign(src, getIRegE(sz,pfx,modrm)); 4172 codegen_mulL_A_D ( sz, True, src, 4173 nameIRegE(sz,pfx,modrm) ); 4174 break; 4175 case 6: /* DIV */ 4176 delta++; 4177 assign( t1, getIRegE(sz, pfx, modrm) ); 4178 codegen_div ( sz, t1, False ); 4179 DIP("div%c %s\n", nameISize(sz), 4180 nameIRegE(sz, pfx, modrm)); 4181 break; 4182 case 7: /* IDIV */ 4183 delta++; 4184 assign( t1, getIRegE(sz, pfx, modrm) ); 4185 codegen_div ( sz, t1, True ); 4186 DIP("idiv%c %s\n", nameISize(sz), 4187 nameIRegE(sz, pfx, modrm)); 4188 break; 4189 default: 4190 /*NOTREACHED*/ 4191 vpanic("Grp3(amd64,R)"); 4192 } 4193 } else { 4194 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4195 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4196 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/) 4197 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4198 validF2orF3 = True; 4199 } 4200 if (!validF2orF3) goto unhandled; 4201 /* */ 4202 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 4203 /* we have to inform disAMode of any immediate 4204 bytes used */ 4205 gregLO3ofRM(modrm)==0/*TEST*/ 4206 ? imin(4,sz) 4207 : 0 4208 ); 4209 t1 = newTemp(ty); 4210 delta += len; 4211 assign(t1, loadLE(ty,mkexpr(addr))); 4212 switch (gregLO3ofRM(modrm)) { 4213 case 0: { /* TEST */ 4214 d64 = getSDisp(imin(4,sz), delta); 4215 delta += imin(4,sz); 4216 dst1 = newTemp(ty); 4217 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4218 mkexpr(t1), 4219 mkU(ty, d64 & mkSizeMask(sz)))); 4220 setFlags_DEP1( Iop_And8, dst1, ty ); 4221 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 4222 break; 4223 } 4224 case 1: 4225 *decode_OK = False; 4226 return delta; 4227 case 2: /* NOT */ 4228 dst1 = newTemp(ty); 4229 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 4230 if (haveLOCK(pfx)) { 4231 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4232 guest_RIP_curr_instr ); 4233 } else { 4234 storeLE( mkexpr(addr), mkexpr(dst1) ); 4235 } 4236 DIP("not%c %s\n", nameISize(sz), dis_buf); 4237 break; 4238 case 3: /* NEG */ 4239 dst0 = newTemp(ty); 4240 src = newTemp(ty); 4241 dst1 = newTemp(ty); 4242 assign(dst0, mkU(ty,0)); 4243 assign(src, mkexpr(t1)); 4244 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4245 mkexpr(src))); 4246 if (haveLOCK(pfx)) { 4247 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4248 guest_RIP_curr_instr ); 4249 } else { 4250 storeLE( mkexpr(addr), mkexpr(dst1) ); 4251 } 4252 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4253 DIP("neg%c %s\n", nameISize(sz), dis_buf); 4254 break; 4255 case 4: /* MUL (unsigned widening) */ 4256 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 4257 break; 4258 case 5: /* IMUL */ 4259 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 4260 break; 4261 case 6: /* DIV */ 4262 codegen_div ( sz, t1, False ); 4263 DIP("div%c %s\n", nameISize(sz), dis_buf); 4264 break; 4265 case 7: /* IDIV */ 4266 codegen_div ( sz, t1, True ); 4267 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 4268 break; 4269 default: 4270 /*NOTREACHED*/ 4271 vpanic("Grp3(amd64,M)"); 4272 } 4273 } 4274 return delta; 4275 unhandled: 4276 *decode_OK = False; 4277 return delta; 4278 } 4279 4280 4281 /* Group 4 extended opcodes. We have to decide here whether F2 and F3 4282 might be valid. */ 4283 static 4284 ULong dis_Grp4 ( const VexAbiInfo* vbi, 4285 Prefix pfx, Long delta, Bool* decode_OK ) 4286 { 4287 Int alen; 4288 UChar modrm; 4289 HChar dis_buf[50]; 4290 IRType ty = Ity_I8; 4291 IRTemp t1 = newTemp(ty); 4292 IRTemp t2 = newTemp(ty); 4293 4294 *decode_OK = True; 4295 4296 modrm = getUChar(delta); 4297 if (epartIsReg(modrm)) { 4298 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4299 if (haveF2orF3(pfx)) goto unhandled; 4300 assign(t1, getIRegE(1, pfx, modrm)); 4301 switch (gregLO3ofRM(modrm)) { 4302 case 0: /* INC */ 4303 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4304 putIRegE(1, pfx, modrm, mkexpr(t2)); 4305 setFlags_INC_DEC( True, t2, ty ); 4306 break; 4307 case 1: /* DEC */ 4308 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4309 putIRegE(1, pfx, modrm, mkexpr(t2)); 4310 setFlags_INC_DEC( False, t2, ty ); 4311 break; 4312 default: 4313 *decode_OK = False; 4314 return delta; 4315 } 4316 delta++; 4317 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4318 nameIRegE(1, pfx, modrm)); 4319 } else { 4320 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4321 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4322 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4323 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4324 validF2orF3 = True; 4325 } 4326 if (!validF2orF3) goto unhandled; 4327 /* */ 4328 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4329 assign( t1, loadLE(ty, mkexpr(addr)) ); 4330 switch (gregLO3ofRM(modrm)) { 4331 case 0: /* INC */ 4332 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4333 if (haveLOCK(pfx)) { 4334 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4335 guest_RIP_curr_instr ); 4336 } else { 4337 storeLE( mkexpr(addr), mkexpr(t2) ); 4338 } 4339 setFlags_INC_DEC( True, t2, ty ); 4340 break; 4341 case 1: /* DEC */ 4342 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4343 if (haveLOCK(pfx)) { 4344 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4345 guest_RIP_curr_instr ); 4346 } else { 4347 storeLE( mkexpr(addr), mkexpr(t2) ); 4348 } 4349 setFlags_INC_DEC( False, t2, ty ); 4350 break; 4351 default: 4352 *decode_OK = False; 4353 return delta; 4354 } 4355 delta += alen; 4356 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4357 } 4358 return delta; 4359 unhandled: 4360 *decode_OK = False; 4361 return delta; 4362 } 4363 4364 4365 /* Group 5 extended opcodes. We have to decide here whether F2 and F3 4366 might be valid. */ 4367 static 4368 ULong dis_Grp5 ( const VexAbiInfo* vbi, 4369 Prefix pfx, Int sz, Long delta, 4370 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4371 { 4372 Int len; 4373 UChar modrm; 4374 HChar dis_buf[50]; 4375 IRTemp addr = IRTemp_INVALID; 4376 IRType ty = szToITy(sz); 4377 IRTemp t1 = newTemp(ty); 4378 IRTemp t2 = IRTemp_INVALID; 4379 IRTemp t3 = IRTemp_INVALID; 4380 Bool showSz = True; 4381 4382 *decode_OK = True; 4383 4384 modrm = getUChar(delta); 4385 if (epartIsReg(modrm)) { 4386 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. 4387 F2/CALL and F2/JMP may have bnd prefix. */ 4388 if (haveF2orF3(pfx) 4389 && ! (haveF2(pfx) 4390 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) 4391 goto unhandledR; 4392 assign(t1, getIRegE(sz,pfx,modrm)); 4393 switch (gregLO3ofRM(modrm)) { 4394 case 0: /* INC */ 4395 t2 = newTemp(ty); 4396 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4397 mkexpr(t1), mkU(ty,1))); 4398 setFlags_INC_DEC( True, t2, ty ); 4399 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4400 break; 4401 case 1: /* DEC */ 4402 t2 = newTemp(ty); 4403 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4404 mkexpr(t1), mkU(ty,1))); 4405 setFlags_INC_DEC( False, t2, ty ); 4406 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4407 break; 4408 case 2: /* call Ev */ 4409 /* Ignore any sz value and operate as if sz==8. */ 4410 if (!(sz == 4 || sz == 8)) goto unhandledR; 4411 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4412 sz = 8; 4413 t3 = newTemp(Ity_I64); 4414 assign(t3, getIRegE(sz,pfx,modrm)); 4415 t2 = newTemp(Ity_I64); 4416 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4417 putIReg64(R_RSP, mkexpr(t2)); 4418 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4419 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4420 jmp_treg(dres, Ijk_Call, t3); 4421 vassert(dres->whatNext == Dis_StopHere); 4422 showSz = False; 4423 break; 4424 case 4: /* jmp Ev */ 4425 /* Ignore any sz value and operate as if sz==8. */ 4426 if (!(sz == 4 || sz == 8)) goto unhandledR; 4427 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4428 sz = 8; 4429 t3 = newTemp(Ity_I64); 4430 assign(t3, getIRegE(sz,pfx,modrm)); 4431 jmp_treg(dres, Ijk_Boring, t3); 4432 vassert(dres->whatNext == Dis_StopHere); 4433 showSz = False; 4434 break; 4435 case 6: /* PUSH Ev */ 4436 /* There is no encoding for 32-bit operand size; hence ... */ 4437 if (sz == 4) sz = 8; 4438 if (sz == 8 || sz == 2) { 4439 ty = szToITy(sz); /* redo it, since sz might have changed */ 4440 t3 = newTemp(ty); 4441 assign(t3, getIRegE(sz,pfx,modrm)); 4442 t2 = newTemp(Ity_I64); 4443 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4444 putIReg64(R_RSP, mkexpr(t2) ); 4445 storeLE( mkexpr(t2), mkexpr(t3) ); 4446 break; 4447 } else { 4448 goto unhandledR; /* awaiting test case */ 4449 } 4450 default: 4451 unhandledR: 4452 *decode_OK = False; 4453 return delta; 4454 } 4455 delta++; 4456 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4457 showSz ? nameISize(sz) : ' ', 4458 nameIRegE(sz, pfx, modrm)); 4459 } else { 4460 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ 4461 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4462 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4463 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4464 validF2orF3 = True; 4465 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) 4466 && (haveF2(pfx) && !haveF3(pfx))) { 4467 validF2orF3 = True; 4468 } 4469 if (!validF2orF3) goto unhandledM; 4470 /* */ 4471 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4472 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4473 && gregLO3ofRM(modrm) != 6) { 4474 assign(t1, loadLE(ty,mkexpr(addr))); 4475 } 4476 switch (gregLO3ofRM(modrm)) { 4477 case 0: /* INC */ 4478 t2 = newTemp(ty); 4479 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4480 mkexpr(t1), mkU(ty,1))); 4481 if (haveLOCK(pfx)) { 4482 casLE( mkexpr(addr), 4483 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4484 } else { 4485 storeLE(mkexpr(addr),mkexpr(t2)); 4486 } 4487 setFlags_INC_DEC( True, t2, ty ); 4488 break; 4489 case 1: /* DEC */ 4490 t2 = newTemp(ty); 4491 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4492 mkexpr(t1), mkU(ty,1))); 4493 if (haveLOCK(pfx)) { 4494 casLE( mkexpr(addr), 4495 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4496 } else { 4497 storeLE(mkexpr(addr),mkexpr(t2)); 4498 } 4499 setFlags_INC_DEC( False, t2, ty ); 4500 break; 4501 case 2: /* call Ev */ 4502 /* Ignore any sz value and operate as if sz==8. */ 4503 if (!(sz == 4 || sz == 8)) goto unhandledM; 4504 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4505 sz = 8; 4506 t3 = newTemp(Ity_I64); 4507 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4508 t2 = newTemp(Ity_I64); 4509 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4510 putIReg64(R_RSP, mkexpr(t2)); 4511 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4512 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4513 jmp_treg(dres, Ijk_Call, t3); 4514 vassert(dres->whatNext == Dis_StopHere); 4515 showSz = False; 4516 break; 4517 case 4: /* JMP Ev */ 4518 /* Ignore any sz value and operate as if sz==8. */ 4519 if (!(sz == 4 || sz == 8)) goto unhandledM; 4520 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4521 sz = 8; 4522 t3 = newTemp(Ity_I64); 4523 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4524 jmp_treg(dres, Ijk_Boring, t3); 4525 vassert(dres->whatNext == Dis_StopHere); 4526 showSz = False; 4527 break; 4528 case 6: /* PUSH Ev */ 4529 /* There is no encoding for 32-bit operand size; hence ... */ 4530 if (sz == 4) sz = 8; 4531 if (sz == 8 || sz == 2) { 4532 ty = szToITy(sz); /* redo it, since sz might have changed */ 4533 t3 = newTemp(ty); 4534 assign(t3, loadLE(ty,mkexpr(addr))); 4535 t2 = newTemp(Ity_I64); 4536 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4537 putIReg64(R_RSP, mkexpr(t2) ); 4538 storeLE( mkexpr(t2), mkexpr(t3) ); 4539 break; 4540 } else { 4541 goto unhandledM; /* awaiting test case */ 4542 } 4543 default: 4544 unhandledM: 4545 *decode_OK = False; 4546 return delta; 4547 } 4548 delta += len; 4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4550 showSz ? nameISize(sz) : ' ', 4551 dis_buf); 4552 } 4553 return delta; 4554 } 4555 4556 4557 /*------------------------------------------------------------*/ 4558 /*--- Disassembling string ops (including REP prefixes) ---*/ 4559 /*------------------------------------------------------------*/ 4560 4561 /* Code shared by all the string ops */ 4562 static 4563 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4564 { 4565 UChar logSz; 4566 if (sz == 8 || sz == 4 || sz == 2) { 4567 logSz = 1; 4568 if (sz == 4) logSz = 2; 4569 if (sz == 8) logSz = 3; 4570 assign( t_inc, 4571 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4572 mkU8(logSz) ) ); 4573 } else { 4574 assign( t_inc, 4575 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4576 } 4577 } 4578 4579 static 4580 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4581 Int sz, const HChar* name, Prefix pfx ) 4582 { 4583 IRTemp t_inc = newTemp(Ity_I64); 4584 /* Really we ought to inspect the override prefixes, but we don't. 4585 The following assertion catches any resulting sillyness. */ 4586 vassert(pfx == clearSegBits(pfx)); 4587 dis_string_op_increment(sz, t_inc); 4588 dis_OP( sz, t_inc, pfx ); 4589 DIP("%s%c\n", name, nameISize(sz)); 4590 } 4591 4592 static 4593 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4594 { 4595 IRType ty = szToITy(sz); 4596 IRTemp td = newTemp(Ity_I64); /* RDI */ 4597 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4598 IRExpr *incd, *incs; 4599 4600 if (haveASO(pfx)) { 4601 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4602 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4603 } else { 4604 assign( td, getIReg64(R_RDI) ); 4605 assign( ts, getIReg64(R_RSI) ); 4606 } 4607 4608 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4609 4610 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4611 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4612 if (haveASO(pfx)) { 4613 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4614 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4615 } 4616 putIReg64( R_RDI, incd ); 4617 putIReg64( R_RSI, incs ); 4618 } 4619 4620 static 4621 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4622 { 4623 IRType ty = szToITy(sz); 4624 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4625 IRExpr *incs; 4626 4627 if (haveASO(pfx)) 4628 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4629 else 4630 assign( ts, getIReg64(R_RSI) ); 4631 4632 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4633 4634 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4635 if (haveASO(pfx)) 4636 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4637 putIReg64( R_RSI, incs ); 4638 } 4639 4640 static 4641 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4642 { 4643 IRType ty = szToITy(sz); 4644 IRTemp ta = newTemp(ty); /* rAX */ 4645 IRTemp td = newTemp(Ity_I64); /* RDI */ 4646 IRExpr *incd; 4647 4648 assign( ta, getIRegRAX(sz) ); 4649 4650 if (haveASO(pfx)) 4651 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4652 else 4653 assign( td, getIReg64(R_RDI) ); 4654 4655 storeLE( mkexpr(td), mkexpr(ta) ); 4656 4657 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4658 if (haveASO(pfx)) 4659 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4660 putIReg64( R_RDI, incd ); 4661 } 4662 4663 static 4664 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4665 { 4666 IRType ty = szToITy(sz); 4667 IRTemp tdv = newTemp(ty); /* (RDI) */ 4668 IRTemp tsv = newTemp(ty); /* (RSI) */ 4669 IRTemp td = newTemp(Ity_I64); /* RDI */ 4670 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4671 IRExpr *incd, *incs; 4672 4673 if (haveASO(pfx)) { 4674 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4675 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4676 } else { 4677 assign( td, getIReg64(R_RDI) ); 4678 assign( ts, getIReg64(R_RSI) ); 4679 } 4680 4681 assign( tdv, loadLE(ty,mkexpr(td)) ); 4682 4683 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4684 4685 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4686 4687 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4688 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4689 if (haveASO(pfx)) { 4690 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4691 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4692 } 4693 putIReg64( R_RDI, incd ); 4694 putIReg64( R_RSI, incs ); 4695 } 4696 4697 static 4698 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4699 { 4700 IRType ty = szToITy(sz); 4701 IRTemp ta = newTemp(ty); /* rAX */ 4702 IRTemp td = newTemp(Ity_I64); /* RDI */ 4703 IRTemp tdv = newTemp(ty); /* (RDI) */ 4704 IRExpr *incd; 4705 4706 assign( ta, getIRegRAX(sz) ); 4707 4708 if (haveASO(pfx)) 4709 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4710 else 4711 assign( td, getIReg64(R_RDI) ); 4712 4713 assign( tdv, loadLE(ty,mkexpr(td)) ); 4714 4715 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4716 4717 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4718 if (haveASO(pfx)) 4719 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4720 putIReg64( R_RDI, incd ); 4721 } 4722 4723 4724 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4725 the insn is the last one in the basic block, and so emit a jump to 4726 the next insn, rather than just falling through. */ 4727 static 4728 void dis_REP_op ( /*MOD*/DisResult* dres, 4729 AMD64Condcode cond, 4730 void (*dis_OP)(Int, IRTemp, Prefix), 4731 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name, 4732 Prefix pfx ) 4733 { 4734 IRTemp t_inc = newTemp(Ity_I64); 4735 IRTemp tc; 4736 IRExpr* cmp; 4737 4738 /* Really we ought to inspect the override prefixes, but we don't. 4739 The following assertion catches any resulting sillyness. */ 4740 vassert(pfx == clearSegBits(pfx)); 4741 4742 if (haveASO(pfx)) { 4743 tc = newTemp(Ity_I32); /* ECX */ 4744 assign( tc, getIReg32(R_RCX) ); 4745 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4746 } else { 4747 tc = newTemp(Ity_I64); /* RCX */ 4748 assign( tc, getIReg64(R_RCX) ); 4749 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4750 } 4751 4752 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4753 IRConst_U64(rip_next), OFFB_RIP ) ); 4754 4755 if (haveASO(pfx)) 4756 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4757 else 4758 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4759 4760 dis_string_op_increment(sz, t_inc); 4761 dis_OP (sz, t_inc, pfx); 4762 4763 if (cond == AMD64CondAlways) { 4764 jmp_lit(dres, Ijk_Boring, rip); 4765 vassert(dres->whatNext == Dis_StopHere); 4766 } else { 4767 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4768 Ijk_Boring, 4769 IRConst_U64(rip), 4770 OFFB_RIP ) ); 4771 jmp_lit(dres, Ijk_Boring, rip_next); 4772 vassert(dres->whatNext == Dis_StopHere); 4773 } 4774 DIP("%s%c\n", name, nameISize(sz)); 4775 } 4776 4777 4778 /*------------------------------------------------------------*/ 4779 /*--- Arithmetic, etc. ---*/ 4780 /*------------------------------------------------------------*/ 4781 4782 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4783 static 4784 ULong dis_mul_E_G ( const VexAbiInfo* vbi, 4785 Prefix pfx, 4786 Int size, 4787 Long delta0 ) 4788 { 4789 Int alen; 4790 HChar dis_buf[50]; 4791 UChar rm = getUChar(delta0); 4792 IRType ty = szToITy(size); 4793 IRTemp te = newTemp(ty); 4794 IRTemp tg = newTemp(ty); 4795 IRTemp resLo = newTemp(ty); 4796 4797 assign( tg, getIRegG(size, pfx, rm) ); 4798 if (epartIsReg(rm)) { 4799 assign( te, getIRegE(size, pfx, rm) ); 4800 } else { 4801 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4802 assign( te, loadLE(ty,mkexpr(addr)) ); 4803 } 4804 4805 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4806 4807 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4808 4809 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4810 4811 if (epartIsReg(rm)) { 4812 DIP("imul%c %s, %s\n", nameISize(size), 4813 nameIRegE(size,pfx,rm), 4814 nameIRegG(size,pfx,rm)); 4815 return 1+delta0; 4816 } else { 4817 DIP("imul%c %s, %s\n", nameISize(size), 4818 dis_buf, 4819 nameIRegG(size,pfx,rm)); 4820 return alen+delta0; 4821 } 4822 } 4823 4824 4825 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4826 static 4827 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi, 4828 Prefix pfx, 4829 Int size, 4830 Long delta, 4831 Int litsize ) 4832 { 4833 Long d64; 4834 Int alen; 4835 HChar dis_buf[50]; 4836 UChar rm = getUChar(delta); 4837 IRType ty = szToITy(size); 4838 IRTemp te = newTemp(ty); 4839 IRTemp tl = newTemp(ty); 4840 IRTemp resLo = newTemp(ty); 4841 4842 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4843 4844 if (epartIsReg(rm)) { 4845 assign(te, getIRegE(size, pfx, rm)); 4846 delta++; 4847 } else { 4848 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4849 imin(4,litsize) ); 4850 assign(te, loadLE(ty, mkexpr(addr))); 4851 delta += alen; 4852 } 4853 d64 = getSDisp(imin(4,litsize),delta); 4854 delta += imin(4,litsize); 4855 4856 d64 &= mkSizeMask(size); 4857 assign(tl, mkU(ty,d64)); 4858 4859 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4860 4861 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4862 4863 putIRegG(size, pfx, rm, mkexpr(resLo)); 4864 4865 DIP("imul%c $%lld, %s, %s\n", 4866 nameISize(size), d64, 4867 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4868 nameIRegG(size,pfx,rm) ); 4869 return delta; 4870 } 4871 4872 4873 /* Generate an IR sequence to do a popcount operation on the supplied 4874 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4875 Ity_I16, Ity_I32 or Ity_I64 only. */ 4876 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4877 { 4878 Int i; 4879 if (ty == Ity_I16) { 4880 IRTemp old = IRTemp_INVALID; 4881 IRTemp nyu = IRTemp_INVALID; 4882 IRTemp mask[4], shift[4]; 4883 for (i = 0; i < 4; i++) { 4884 mask[i] = newTemp(ty); 4885 shift[i] = 1 << i; 4886 } 4887 assign(mask[0], mkU16(0x5555)); 4888 assign(mask[1], mkU16(0x3333)); 4889 assign(mask[2], mkU16(0x0F0F)); 4890 assign(mask[3], mkU16(0x00FF)); 4891 old = src; 4892 for (i = 0; i < 4; i++) { 4893 nyu = newTemp(ty); 4894 assign(nyu, 4895 binop(Iop_Add16, 4896 binop(Iop_And16, 4897 mkexpr(old), 4898 mkexpr(mask[i])), 4899 binop(Iop_And16, 4900 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4901 mkexpr(mask[i])))); 4902 old = nyu; 4903 } 4904 return nyu; 4905 } 4906 if (ty == Ity_I32) { 4907 IRTemp old = IRTemp_INVALID; 4908 IRTemp nyu = IRTemp_INVALID; 4909 IRTemp mask[5], shift[5]; 4910 for (i = 0; i < 5; i++) { 4911 mask[i] = newTemp(ty); 4912 shift[i] = 1 << i; 4913 } 4914 assign(mask[0], mkU32(0x55555555)); 4915 assign(mask[1], mkU32(0x33333333)); 4916 assign(mask[2], mkU32(0x0F0F0F0F)); 4917 assign(mask[3], mkU32(0x00FF00FF)); 4918 assign(mask[4], mkU32(0x0000FFFF)); 4919 old = src; 4920 for (i = 0; i < 5; i++) { 4921 nyu = newTemp(ty); 4922 assign(nyu, 4923 binop(Iop_Add32, 4924 binop(Iop_And32, 4925 mkexpr(old), 4926 mkexpr(mask[i])), 4927 binop(Iop_And32, 4928 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4929 mkexpr(mask[i])))); 4930 old = nyu; 4931 } 4932 return nyu; 4933 } 4934 if (ty == Ity_I64) { 4935 IRTemp old = IRTemp_INVALID; 4936 IRTemp nyu = IRTemp_INVALID; 4937 IRTemp mask[6], shift[6]; 4938 for (i = 0; i < 6; i++) { 4939 mask[i] = newTemp(ty); 4940 shift[i] = 1 << i; 4941 } 4942 assign(mask[0], mkU64(0x5555555555555555ULL)); 4943 assign(mask[1], mkU64(0x3333333333333333ULL)); 4944 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4945 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4946 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4947 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4948 old = src; 4949 for (i = 0; i < 6; i++) { 4950 nyu = newTemp(ty); 4951 assign(nyu, 4952 binop(Iop_Add64, 4953 binop(Iop_And64, 4954 mkexpr(old), 4955 mkexpr(mask[i])), 4956 binop(Iop_And64, 4957 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4958 mkexpr(mask[i])))); 4959 old = nyu; 4960 } 4961 return nyu; 4962 } 4963 /*NOTREACHED*/ 4964 vassert(0); 4965 } 4966 4967 4968 /* Generate an IR sequence to do a count-leading-zeroes operation on 4969 the supplied IRTemp, and return a new IRTemp holding the result. 4970 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4971 the argument is zero, return the number of bits in the word (the 4972 natural semantics). */ 4973 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4974 { 4975 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4976 4977 IRTemp src64 = newTemp(Ity_I64); 4978 assign(src64, widenUto64( mkexpr(src) )); 4979 4980 IRTemp src64x = newTemp(Ity_I64); 4981 assign(src64x, 4982 binop(Iop_Shl64, mkexpr(src64), 4983 mkU8(64 - 8 * sizeofIRType(ty)))); 4984 4985 // Clz64 has undefined semantics when its input is zero, so 4986 // special-case around that. 4987 IRTemp res64 = newTemp(Ity_I64); 4988 assign(res64, 4989 IRExpr_ITE( 4990 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), 4991 mkU64(8 * sizeofIRType(ty)), 4992 unop(Iop_Clz64, mkexpr(src64x)) 4993 )); 4994 4995 IRTemp res = newTemp(ty); 4996 assign(res, narrowTo(ty, mkexpr(res64))); 4997 return res; 4998 } 4999 5000 5001 /* Generate an IR sequence to do a count-trailing-zeroes operation on 5002 the supplied IRTemp, and return a new IRTemp holding the result. 5003 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 5004 the argument is zero, return the number of bits in the word (the 5005 natural semantics). */ 5006 static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) 5007 { 5008 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 5009 5010 IRTemp src64 = newTemp(Ity_I64); 5011 assign(src64, widenUto64( mkexpr(src) )); 5012 5013 // Ctz64 has undefined semantics when its input is zero, so 5014 // special-case around that. 5015 IRTemp res64 = newTemp(Ity_I64); 5016 assign(res64, 5017 IRExpr_ITE( 5018 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), 5019 mkU64(8 * sizeofIRType(ty)), 5020 unop(Iop_Ctz64, mkexpr(src64)) 5021 )); 5022 5023 IRTemp res = newTemp(ty); 5024 assign(res, narrowTo(ty, mkexpr(res64))); 5025 return res; 5026 } 5027 5028 5029 /*------------------------------------------------------------*/ 5030 /*--- ---*/ 5031 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 5032 /*--- ---*/ 5033 /*------------------------------------------------------------*/ 5034 5035 /* --- Helper functions for dealing with the register stack. --- */ 5036 5037 /* --- Set the emulation-warning pseudo-register. --- */ 5038 5039 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 5040 { 5041 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 5042 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 5043 } 5044 5045 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 5046 5047 static IRExpr* mkQNaN64 ( void ) 5048 { 5049 /* QNaN is 0 2047 1 0(51times) 5050 == 0b 11111111111b 1 0(51times) 5051 == 0x7FF8 0000 0000 0000 5052 */ 5053 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 5054 } 5055 5056 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 5057 5058 static IRExpr* get_ftop ( void ) 5059 { 5060 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 5061 } 5062 5063 static void put_ftop ( IRExpr* e ) 5064 { 5065 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 5066 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 5067 } 5068 5069 /* --------- Get/put the C3210 bits. --------- */ 5070 5071 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 5072 { 5073 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 5074 } 5075 5076 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 5077 { 5078 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 5079 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 5080 } 5081 5082 /* --------- Get/put the FPU rounding mode. --------- */ 5083 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 5084 { 5085 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 5086 } 5087 5088 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 5089 { 5090 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 5091 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 5092 } 5093 5094 5095 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 5096 /* Produces a value in 0 .. 3, which is encoded as per the type 5097 IRRoundingMode. Since the guest_FPROUND value is also encoded as 5098 per IRRoundingMode, we merely need to get it and mask it for 5099 safety. 5100 */ 5101 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 5102 { 5103 return binop( Iop_And32, get_fpround(), mkU32(3) ); 5104 } 5105 5106 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 5107 { 5108 return mkU32(Irrm_NEAREST); 5109 } 5110 5111 5112 /* --------- Get/set FP register tag bytes. --------- */ 5113 5114 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 5115 5116 static void put_ST_TAG ( Int i, IRExpr* value ) 5117 { 5118 IRRegArray* descr; 5119 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 5120 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5121 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5122 } 5123 5124 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 5125 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 5126 5127 static IRExpr* get_ST_TAG ( Int i ) 5128 { 5129 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5130 return IRExpr_GetI( descr, get_ftop(), i ); 5131 } 5132 5133 5134 /* --------- Get/set FP registers. --------- */ 5135 5136 /* Given i, and some expression e, emit 'ST(i) = e' and set the 5137 register's tag to indicate the register is full. The previous 5138 state of the register is not checked. */ 5139 5140 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 5141 { 5142 IRRegArray* descr; 5143 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 5144 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5145 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5146 /* Mark the register as in-use. */ 5147 put_ST_TAG(i, mkU8(1)); 5148 } 5149 5150 /* Given i, and some expression e, emit 5151 ST(i) = is_full(i) ? NaN : e 5152 and set the tag accordingly. 5153 */ 5154 5155 static void put_ST ( Int i, IRExpr* value ) 5156 { 5157 put_ST_UNCHECKED( 5158 i, 5159 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5160 /* non-0 means full */ 5161 mkQNaN64(), 5162 /* 0 means empty */ 5163 value 5164 ) 5165 ); 5166 } 5167 5168 5169 /* Given i, generate an expression yielding 'ST(i)'. */ 5170 5171 static IRExpr* get_ST_UNCHECKED ( Int i ) 5172 { 5173 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5174 return IRExpr_GetI( descr, get_ftop(), i ); 5175 } 5176 5177 5178 /* Given i, generate an expression yielding 5179 is_full(i) ? ST(i) : NaN 5180 */ 5181 5182 static IRExpr* get_ST ( Int i ) 5183 { 5184 return 5185 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5186 /* non-0 means full */ 5187 get_ST_UNCHECKED(i), 5188 /* 0 means empty */ 5189 mkQNaN64()); 5190 } 5191 5192 5193 /* Given i, and some expression e, and a condition cond, generate IR 5194 which has the same effect as put_ST(i,e) when cond is true and has 5195 no effect when cond is false. Given the lack of proper 5196 if-then-else in the IR, this is pretty tricky. 5197 */ 5198 5199 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 5200 { 5201 // new_tag = if cond then FULL else old_tag 5202 // new_val = if cond then (if old_tag==FULL then NaN else val) 5203 // else old_val 5204 5205 IRTemp old_tag = newTemp(Ity_I8); 5206 assign(old_tag, get_ST_TAG(i)); 5207 IRTemp new_tag = newTemp(Ity_I8); 5208 assign(new_tag, 5209 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 5210 5211 IRTemp old_val = newTemp(Ity_F64); 5212 assign(old_val, get_ST_UNCHECKED(i)); 5213 IRTemp new_val = newTemp(Ity_F64); 5214 assign(new_val, 5215 IRExpr_ITE(mkexpr(cond), 5216 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 5217 /* non-0 means full */ 5218 mkQNaN64(), 5219 /* 0 means empty */ 5220 value), 5221 mkexpr(old_val))); 5222 5223 put_ST_UNCHECKED(i, mkexpr(new_val)); 5224 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 5225 // now set it to new_tag instead. 5226 put_ST_TAG(i, mkexpr(new_tag)); 5227 } 5228 5229 /* Adjust FTOP downwards by one register. */ 5230 5231 static void fp_push ( void ) 5232 { 5233 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 5234 } 5235 5236 /* Adjust FTOP downwards by one register when COND is 1:I1. Else 5237 don't change it. */ 5238 5239 static void maybe_fp_push ( IRTemp cond ) 5240 { 5241 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 5242 } 5243 5244 /* Adjust FTOP upwards by one register, and mark the vacated register 5245 as empty. */ 5246 5247 static void fp_pop ( void ) 5248 { 5249 put_ST_TAG(0, mkU8(0)); 5250 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5251 } 5252 5253 /* Set the C2 bit of the FPU status register to e[0]. Assumes that 5254 e[31:1] == 0. 5255 */ 5256 static void set_C2 ( IRExpr* e ) 5257 { 5258 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); 5259 put_C3210( binop(Iop_Or64, 5260 cleared, 5261 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); 5262 } 5263 5264 /* Generate code to check that abs(d64) < 2^63 and is finite. This is 5265 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 5266 test is simple, but the derivation of it is not so simple. 5267 5268 The exponent field for an IEEE754 double is 11 bits. That means it 5269 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 5270 the number is either a NaN or an Infinity and so is not finite. 5271 Furthermore, a finite value of exactly 2^63 is the smallest value 5272 that has exponent value 0x43E. Hence, what we need to do is 5273 extract the exponent, ignoring the sign bit and mantissa, and check 5274 it is < 0x43E, or <= 0x43D. 5275 5276 To make this easily applicable to 32- and 64-bit targets, a 5277 roundabout approach is used. First the number is converted to I64, 5278 then the top 32 bits are taken. Shifting them right by 20 bits 5279 places the sign bit and exponent in the bottom 12 bits. Anding 5280 with 0x7FF gets rid of the sign bit, leaving just the exponent 5281 available for comparison. 5282 */ 5283 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 5284 { 5285 IRTemp i64 = newTemp(Ity_I64); 5286 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 5287 IRTemp exponent = newTemp(Ity_I32); 5288 assign(exponent, 5289 binop(Iop_And32, 5290 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 5291 mkU32(0x7FF))); 5292 IRTemp in_range_and_finite = newTemp(Ity_I1); 5293 assign(in_range_and_finite, 5294 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 5295 return in_range_and_finite; 5296 } 5297 5298 /* Invent a plausible-looking FPU status word value: 5299 ((ftop & 7) << 11) | (c3210 & 0x4700) 5300 */ 5301 static IRExpr* get_FPU_sw ( void ) 5302 { 5303 return 5304 unop(Iop_32to16, 5305 binop(Iop_Or32, 5306 binop(Iop_Shl32, 5307 binop(Iop_And32, get_ftop(), mkU32(7)), 5308 mkU8(11)), 5309 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 5310 mkU32(0x4700)) 5311 )); 5312 } 5313 5314 5315 /* Generate a dirty helper call that initialises the x87 state a la 5316 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise 5317 |guard| is used as a guarding condition. 5318 */ 5319 static void gen_FINIT_SEQUENCE ( IRExpr* guard ) 5320 { 5321 /* Uses dirty helper: 5322 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5323 IRDirty* d = unsafeIRDirty_0_N ( 5324 0/*regparms*/, 5325 "amd64g_dirtyhelper_FINIT", 5326 &amd64g_dirtyhelper_FINIT, 5327 mkIRExprVec_1( IRExpr_GSPTR() ) 5328 ); 5329 5330 /* declare we're writing guest state */ 5331 d->nFxState = 5; 5332 vex_bzero(&d->fxState, sizeof(d->fxState)); 5333 5334 d->fxState[0].fx = Ifx_Write; 5335 d->fxState[0].offset = OFFB_FTOP; 5336 d->fxState[0].size = sizeof(UInt); 5337 5338 d->fxState[1].fx = Ifx_Write; 5339 d->fxState[1].offset = OFFB_FPREGS; 5340 d->fxState[1].size = 8 * sizeof(ULong); 5341 5342 d->fxState[2].fx = Ifx_Write; 5343 d->fxState[2].offset = OFFB_FPTAGS; 5344 d->fxState[2].size = 8 * sizeof(UChar); 5345 5346 d->fxState[3].fx = Ifx_Write; 5347 d->fxState[3].offset = OFFB_FPROUND; 5348 d->fxState[3].size = sizeof(ULong); 5349 5350 d->fxState[4].fx = Ifx_Write; 5351 d->fxState[4].offset = OFFB_FC3210; 5352 d->fxState[4].size = sizeof(ULong); 5353 5354 if (guard) 5355 d->guard = guard; 5356 5357 stmt( IRStmt_Dirty(d) ); 5358 } 5359 5360 5361 /* ------------------------------------------------------- */ 5362 /* Given all that stack-mangling junk, we can now go ahead 5363 and describe FP instructions. 5364 */ 5365 5366 /* ST(0) = ST(0) `op` mem64/32(addr) 5367 Need to check ST(0)'s tag on read, but not on write. 5368 */ 5369 static 5370 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5371 IROp op, Bool dbl ) 5372 { 5373 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5374 if (dbl) { 5375 put_ST_UNCHECKED(0, 5376 triop( op, 5377 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5378 get_ST(0), 5379 loadLE(Ity_F64,mkexpr(addr)) 5380 )); 5381 } else { 5382 put_ST_UNCHECKED(0, 5383 triop( op, 5384 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5385 get_ST(0), 5386 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 5387 )); 5388 } 5389 } 5390 5391 5392 /* ST(0) = mem64/32(addr) `op` ST(0) 5393 Need to check ST(0)'s tag on read, but not on write. 5394 */ 5395 static 5396 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5397 IROp op, Bool dbl ) 5398 { 5399 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5400 if (dbl) { 5401 put_ST_UNCHECKED(0, 5402 triop( op, 5403 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5404 loadLE(Ity_F64,mkexpr(addr)), 5405 get_ST(0) 5406 )); 5407 } else { 5408 put_ST_UNCHECKED(0, 5409 triop( op, 5410 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5411 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 5412 get_ST(0) 5413 )); 5414 } 5415 } 5416 5417 5418 /* ST(dst) = ST(dst) `op` ST(src). 5419 Check dst and src tags when reading but not on write. 5420 */ 5421 static 5422 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5423 Bool pop_after ) 5424 { 5425 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5426 put_ST_UNCHECKED( 5427 st_dst, 5428 triop( op, 5429 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5430 get_ST(st_dst), 5431 get_ST(st_src) ) 5432 ); 5433 if (pop_after) 5434 fp_pop(); 5435 } 5436 5437 /* ST(dst) = ST(src) `op` ST(dst). 5438 Check dst and src tags when reading but not on write. 5439 */ 5440 static 5441 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5442 Bool pop_after ) 5443 { 5444 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5445 put_ST_UNCHECKED( 5446 st_dst, 5447 triop( op, 5448 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5449 get_ST(st_src), 5450 get_ST(st_dst) ) 5451 ); 5452 if (pop_after) 5453 fp_pop(); 5454 } 5455 5456 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 5457 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 5458 { 5459 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 5460 /* This is a bit of a hack (and isn't really right). It sets 5461 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 5462 documentation implies A and S are unchanged. 5463 */ 5464 /* It's also fishy in that it is used both for COMIP and 5465 UCOMIP, and they aren't the same (although similar). */ 5466 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 5467 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 5468 stmt( IRStmt_Put( 5469 OFFB_CC_DEP1, 5470 binop( Iop_And64, 5471 unop( Iop_32Uto64, 5472 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 5473 mkU64(0x45) 5474 ))); 5475 if (pop_after) 5476 fp_pop(); 5477 } 5478 5479 5480 /* returns 5481 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 5482 */ 5483 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 5484 { 5485 IRTemp t32 = newTemp(Ity_I32); 5486 assign( t32, e32 ); 5487 return 5488 IRExpr_ITE( 5489 binop(Iop_CmpLT64U, 5490 unop(Iop_32Uto64, 5491 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5492 mkU64(65536)), 5493 unop(Iop_32to16, mkexpr(t32)), 5494 mkU16( 0x8000 ) ); 5495 } 5496 5497 5498 static 5499 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5500 const VexAbiInfo* vbi, Prefix pfx, Long delta ) 5501 { 5502 Int len; 5503 UInt r_src, r_dst; 5504 HChar dis_buf[50]; 5505 IRTemp t1, t2; 5506 5507 /* On entry, delta points at the second byte of the insn (the modrm 5508 byte).*/ 5509 UChar first_opcode = getUChar(delta-1); 5510 UChar modrm = getUChar(delta+0); 5511 5512 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5513 5514 if (first_opcode == 0xD8) { 5515 if (modrm < 0xC0) { 5516 5517 /* bits 5,4,3 are an opcode extension, and the modRM also 5518 specifies an address. */ 5519 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5520 delta += len; 5521 5522 switch (gregLO3ofRM(modrm)) { 5523 5524 case 0: /* FADD single-real */ 5525 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5526 break; 5527 5528 case 1: /* FMUL single-real */ 5529 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5530 break; 5531 5532 case 2: /* FCOM single-real */ 5533 DIP("fcoms %s\n", dis_buf); 5534 /* This forces C1 to zero, which isn't right. */ 5535 /* The AMD documentation suggests that forcing C1 to 5536 zero is correct (Eliot Moss) */ 5537 put_C3210( 5538 unop( Iop_32Uto64, 5539 binop( Iop_And32, 5540 binop(Iop_Shl32, 5541 binop(Iop_CmpF64, 5542 get_ST(0), 5543 unop(Iop_F32toF64, 5544 loadLE(Ity_F32,mkexpr(addr)))), 5545 mkU8(8)), 5546 mkU32(0x4500) 5547 ))); 5548 break; 5549 5550 case 3: /* FCOMP single-real */ 5551 /* The AMD documentation suggests that forcing C1 to 5552 zero is correct (Eliot Moss) */ 5553 DIP("fcomps %s\n", dis_buf); 5554 /* This forces C1 to zero, which isn't right. */ 5555 put_C3210( 5556 unop( Iop_32Uto64, 5557 binop( Iop_And32, 5558 binop(Iop_Shl32, 5559 binop(Iop_CmpF64, 5560 get_ST(0), 5561 unop(Iop_F32toF64, 5562 loadLE(Ity_F32,mkexpr(addr)))), 5563 mkU8(8)), 5564 mkU32(0x4500) 5565 ))); 5566 fp_pop(); 5567 break; 5568 5569 case 4: /* FSUB single-real */ 5570 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5571 break; 5572 5573 case 5: /* FSUBR single-real */ 5574 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5575 break; 5576 5577 case 6: /* FDIV single-real */ 5578 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5579 break; 5580 5581 case 7: /* FDIVR single-real */ 5582 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5583 break; 5584 5585 default: 5586 vex_printf("unhandled opc_aux = 0x%2x\n", 5587 (UInt)gregLO3ofRM(modrm)); 5588 vex_printf("first_opcode == 0xD8\n"); 5589 goto decode_fail; 5590 } 5591 } else { 5592 delta++; 5593 switch (modrm) { 5594 5595 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5596 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5597 break; 5598 5599 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5600 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5601 break; 5602 5603 /* Dunno if this is right */ 5604 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5605 r_dst = (UInt)modrm - 0xD0; 5606 DIP("fcom %%st(0),%%st(%u)\n", r_dst); 5607 /* This forces C1 to zero, which isn't right. */ 5608 put_C3210( 5609 unop(Iop_32Uto64, 5610 binop( Iop_And32, 5611 binop(Iop_Shl32, 5612 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5613 mkU8(8)), 5614 mkU32(0x4500) 5615 ))); 5616 break; 5617 5618 /* Dunno if this is right */ 5619 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5620 r_dst = (UInt)modrm - 0xD8; 5621 DIP("fcomp %%st(0),%%st(%u)\n", r_dst); 5622 /* This forces C1 to zero, which isn't right. */ 5623 put_C3210( 5624 unop(Iop_32Uto64, 5625 binop( Iop_And32, 5626 binop(Iop_Shl32, 5627 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5628 mkU8(8)), 5629 mkU32(0x4500) 5630 ))); 5631 fp_pop(); 5632 break; 5633 5634 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5635 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5636 break; 5637 5638 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5639 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5640 break; 5641 5642 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5643 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5644 break; 5645 5646 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5647 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5648 break; 5649 5650 default: 5651 goto decode_fail; 5652 } 5653 } 5654 } 5655 5656 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5657 else 5658 if (first_opcode == 0xD9) { 5659 if (modrm < 0xC0) { 5660 5661 /* bits 5,4,3 are an opcode extension, and the modRM also 5662 specifies an address. */ 5663 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5664 delta += len; 5665 5666 switch (gregLO3ofRM(modrm)) { 5667 5668 case 0: /* FLD single-real */ 5669 DIP("flds %s\n", dis_buf); 5670 fp_push(); 5671 put_ST(0, unop(Iop_F32toF64, 5672 loadLE(Ity_F32, mkexpr(addr)))); 5673 break; 5674 5675 case 2: /* FST single-real */ 5676 DIP("fsts %s\n", dis_buf); 5677 storeLE(mkexpr(addr), 5678 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5679 break; 5680 5681 case 3: /* FSTP single-real */ 5682 DIP("fstps %s\n", dis_buf); 5683 storeLE(mkexpr(addr), 5684 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5685 fp_pop(); 5686 break; 5687 5688 case 4: { /* FLDENV m28 */ 5689 /* Uses dirty helper: 5690 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5691 IRTemp ew = newTemp(Ity_I32); 5692 IRTemp w64 = newTemp(Ity_I64); 5693 IRDirty* d = unsafeIRDirty_0_N ( 5694 0/*regparms*/, 5695 "amd64g_dirtyhelper_FLDENV", 5696 &amd64g_dirtyhelper_FLDENV, 5697 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 5698 ); 5699 d->tmp = w64; 5700 /* declare we're reading memory */ 5701 d->mFx = Ifx_Read; 5702 d->mAddr = mkexpr(addr); 5703 d->mSize = 28; 5704 5705 /* declare we're writing guest state */ 5706 d->nFxState = 4; 5707 vex_bzero(&d->fxState, sizeof(d->fxState)); 5708 5709 d->fxState[0].fx = Ifx_Write; 5710 d->fxState[0].offset = OFFB_FTOP; 5711 d->fxState[0].size = sizeof(UInt); 5712 5713 d->fxState[1].fx = Ifx_Write; 5714 d->fxState[1].offset = OFFB_FPTAGS; 5715 d->fxState[1].size = 8 * sizeof(UChar); 5716 5717 d->fxState[2].fx = Ifx_Write; 5718 d->fxState[2].offset = OFFB_FPROUND; 5719 d->fxState[2].size = sizeof(ULong); 5720 5721 d->fxState[3].fx = Ifx_Write; 5722 d->fxState[3].offset = OFFB_FC3210; 5723 d->fxState[3].size = sizeof(ULong); 5724 5725 stmt( IRStmt_Dirty(d) ); 5726 5727 /* ew contains any emulation warning we may need to 5728 issue. If needed, side-exit to the next insn, 5729 reporting the warning, so that Valgrind's dispatcher 5730 sees the warning. */ 5731 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5732 put_emwarn( mkexpr(ew) ); 5733 stmt( 5734 IRStmt_Exit( 5735 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5736 Ijk_EmWarn, 5737 IRConst_U64( guest_RIP_bbstart+delta ), 5738 OFFB_RIP 5739 ) 5740 ); 5741 5742 DIP("fldenv %s\n", dis_buf); 5743 break; 5744 } 5745 5746 case 5: {/* FLDCW */ 5747 /* The only thing we observe in the control word is the 5748 rounding mode. Therefore, pass the 16-bit value 5749 (x87 native-format control word) to a clean helper, 5750 getting back a 64-bit value, the lower half of which 5751 is the FPROUND value to store, and the upper half of 5752 which is the emulation-warning token which may be 5753 generated. 5754 */ 5755 /* ULong amd64h_check_fldcw ( ULong ); */ 5756 IRTemp t64 = newTemp(Ity_I64); 5757 IRTemp ew = newTemp(Ity_I32); 5758 DIP("fldcw %s\n", dis_buf); 5759 assign( t64, mkIRExprCCall( 5760 Ity_I64, 0/*regparms*/, 5761 "amd64g_check_fldcw", 5762 &amd64g_check_fldcw, 5763 mkIRExprVec_1( 5764 unop( Iop_16Uto64, 5765 loadLE(Ity_I16, mkexpr(addr))) 5766 ) 5767 ) 5768 ); 5769 5770 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5771 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5772 put_emwarn( mkexpr(ew) ); 5773 /* Finally, if an emulation warning was reported, 5774 side-exit to the next insn, reporting the warning, 5775 so that Valgrind's dispatcher sees the warning. */ 5776 stmt( 5777 IRStmt_Exit( 5778 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5779 Ijk_EmWarn, 5780 IRConst_U64( guest_RIP_bbstart+delta ), 5781 OFFB_RIP 5782 ) 5783 ); 5784 break; 5785 } 5786 5787 case 6: { /* FNSTENV m28 */ 5788 /* Uses dirty helper: 5789 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5790 IRDirty* d = unsafeIRDirty_0_N ( 5791 0/*regparms*/, 5792 "amd64g_dirtyhelper_FSTENV", 5793 &amd64g_dirtyhelper_FSTENV, 5794 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 5795 ); 5796 /* declare we're writing memory */ 5797 d->mFx = Ifx_Write; 5798 d->mAddr = mkexpr(addr); 5799 d->mSize = 28; 5800 5801 /* declare we're reading guest state */ 5802 d->nFxState = 4; 5803 vex_bzero(&d->fxState, sizeof(d->fxState)); 5804 5805 d->fxState[0].fx = Ifx_Read; 5806 d->fxState[0].offset = OFFB_FTOP; 5807 d->fxState[0].size = sizeof(UInt); 5808 5809 d->fxState[1].fx = Ifx_Read; 5810 d->fxState[1].offset = OFFB_FPTAGS; 5811 d->fxState[1].size = 8 * sizeof(UChar); 5812 5813 d->fxState[2].fx = Ifx_Read; 5814 d->fxState[2].offset = OFFB_FPROUND; 5815 d->fxState[2].size = sizeof(ULong); 5816 5817 d->fxState[3].fx = Ifx_Read; 5818 d->fxState[3].offset = OFFB_FC3210; 5819 d->fxState[3].size = sizeof(ULong); 5820 5821 stmt( IRStmt_Dirty(d) ); 5822 5823 DIP("fnstenv %s\n", dis_buf); 5824 break; 5825 } 5826 5827 case 7: /* FNSTCW */ 5828 /* Fake up a native x87 FPU control word. The only 5829 thing it depends on is FPROUND[1:0], so call a clean 5830 helper to cook it up. */ 5831 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5832 DIP("fnstcw %s\n", dis_buf); 5833 storeLE( 5834 mkexpr(addr), 5835 unop( Iop_64to16, 5836 mkIRExprCCall( 5837 Ity_I64, 0/*regp*/, 5838 "amd64g_create_fpucw", &amd64g_create_fpucw, 5839 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5840 ) 5841 ) 5842 ); 5843 break; 5844 5845 default: 5846 vex_printf("unhandled opc_aux = 0x%2x\n", 5847 (UInt)gregLO3ofRM(modrm)); 5848 vex_printf("first_opcode == 0xD9\n"); 5849 goto decode_fail; 5850 } 5851 5852 } else { 5853 delta++; 5854 switch (modrm) { 5855 5856 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5857 r_src = (UInt)modrm - 0xC0; 5858 DIP("fld %%st(%u)\n", r_src); 5859 t1 = newTemp(Ity_F64); 5860 assign(t1, get_ST(r_src)); 5861 fp_push(); 5862 put_ST(0, mkexpr(t1)); 5863 break; 5864 5865 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5866 r_src = (UInt)modrm - 0xC8; 5867 DIP("fxch %%st(%u)\n", r_src); 5868 t1 = newTemp(Ity_F64); 5869 t2 = newTemp(Ity_F64); 5870 assign(t1, get_ST(0)); 5871 assign(t2, get_ST(r_src)); 5872 put_ST_UNCHECKED(0, mkexpr(t2)); 5873 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5874 break; 5875 5876 case 0xE0: /* FCHS */ 5877 DIP("fchs\n"); 5878 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5879 break; 5880 5881 case 0xE1: /* FABS */ 5882 DIP("fabs\n"); 5883 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5884 break; 5885 5886 case 0xE5: { /* FXAM */ 5887 /* This is an interesting one. It examines %st(0), 5888 regardless of whether the tag says it's empty or not. 5889 Here, just pass both the tag (in our format) and the 5890 value (as a double, actually a ULong) to a helper 5891 function. */ 5892 IRExpr** args 5893 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5894 unop(Iop_ReinterpF64asI64, 5895 get_ST_UNCHECKED(0)) ); 5896 put_C3210(mkIRExprCCall( 5897 Ity_I64, 5898 0/*regparm*/, 5899 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5900 args 5901 )); 5902 DIP("fxam\n"); 5903 break; 5904 } 5905 5906 case 0xE8: /* FLD1 */ 5907 DIP("fld1\n"); 5908 fp_push(); 5909 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5910 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5911 break; 5912 5913 case 0xE9: /* FLDL2T */ 5914 DIP("fldl2t\n"); 5915 fp_push(); 5916 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5917 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5918 break; 5919 5920 case 0xEA: /* FLDL2E */ 5921 DIP("fldl2e\n"); 5922 fp_push(); 5923 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5924 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5925 break; 5926 5927 case 0xEB: /* FLDPI */ 5928 DIP("fldpi\n"); 5929 fp_push(); 5930 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5931 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5932 break; 5933 5934 case 0xEC: /* FLDLG2 */ 5935 DIP("fldlg2\n"); 5936 fp_push(); 5937 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5938 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5939 break; 5940 5941 case 0xED: /* FLDLN2 */ 5942 DIP("fldln2\n"); 5943 fp_push(); 5944 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5945 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5946 break; 5947 5948 case 0xEE: /* FLDZ */ 5949 DIP("fldz\n"); 5950 fp_push(); 5951 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5952 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5953 break; 5954 5955 case 0xF0: /* F2XM1 */ 5956 DIP("f2xm1\n"); 5957 put_ST_UNCHECKED(0, 5958 binop(Iop_2xm1F64, 5959 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5960 get_ST(0))); 5961 break; 5962 5963 case 0xF1: /* FYL2X */ 5964 DIP("fyl2x\n"); 5965 put_ST_UNCHECKED(1, 5966 triop(Iop_Yl2xF64, 5967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5968 get_ST(1), 5969 get_ST(0))); 5970 fp_pop(); 5971 break; 5972 5973 case 0xF2: { /* FPTAN */ 5974 DIP("fptan\n"); 5975 IRTemp argD = newTemp(Ity_F64); 5976 assign(argD, get_ST(0)); 5977 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5978 IRTemp resD = newTemp(Ity_F64); 5979 assign(resD, 5980 IRExpr_ITE( 5981 mkexpr(argOK), 5982 binop(Iop_TanF64, 5983 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5984 mkexpr(argD)), 5985 mkexpr(argD)) 5986 ); 5987 put_ST_UNCHECKED(0, mkexpr(resD)); 5988 /* Conditionally push 1.0 on the stack, if the arg is 5989 in range */ 5990 maybe_fp_push(argOK); 5991 maybe_put_ST(argOK, 0, 5992 IRExpr_Const(IRConst_F64(1.0))); 5993 set_C2( binop(Iop_Xor64, 5994 unop(Iop_1Uto64, mkexpr(argOK)), 5995 mkU64(1)) ); 5996 break; 5997 } 5998 5999 case 0xF3: /* FPATAN */ 6000 DIP("fpatan\n"); 6001 put_ST_UNCHECKED(1, 6002 triop(Iop_AtanF64, 6003 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6004 get_ST(1), 6005 get_ST(0))); 6006 fp_pop(); 6007 break; 6008 6009 case 0xF4: { /* FXTRACT */ 6010 IRTemp argF = newTemp(Ity_F64); 6011 IRTemp sigF = newTemp(Ity_F64); 6012 IRTemp expF = newTemp(Ity_F64); 6013 IRTemp argI = newTemp(Ity_I64); 6014 IRTemp sigI = newTemp(Ity_I64); 6015 IRTemp expI = newTemp(Ity_I64); 6016 DIP("fxtract\n"); 6017 assign( argF, get_ST(0) ); 6018 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 6019 assign( sigI, 6020 mkIRExprCCall( 6021 Ity_I64, 0/*regparms*/, 6022 "x86amd64g_calculate_FXTRACT", 6023 &x86amd64g_calculate_FXTRACT, 6024 mkIRExprVec_2( mkexpr(argI), 6025 mkIRExpr_HWord(0)/*sig*/ )) 6026 ); 6027 assign( expI, 6028 mkIRExprCCall( 6029 Ity_I64, 0/*regparms*/, 6030 "x86amd64g_calculate_FXTRACT", 6031 &x86amd64g_calculate_FXTRACT, 6032 mkIRExprVec_2( mkexpr(argI), 6033 mkIRExpr_HWord(1)/*exp*/ )) 6034 ); 6035 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 6036 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 6037 /* exponent */ 6038 put_ST_UNCHECKED(0, mkexpr(expF) ); 6039 fp_push(); 6040 /* significand */ 6041 put_ST(0, mkexpr(sigF) ); 6042 break; 6043 } 6044 6045 case 0xF5: { /* FPREM1 -- IEEE compliant */ 6046 IRTemp a1 = newTemp(Ity_F64); 6047 IRTemp a2 = newTemp(Ity_F64); 6048 DIP("fprem1\n"); 6049 /* Do FPREM1 twice, once to get the remainder, and once 6050 to get the C3210 flag values. */ 6051 assign( a1, get_ST(0) ); 6052 assign( a2, get_ST(1) ); 6053 put_ST_UNCHECKED(0, 6054 triop(Iop_PRem1F64, 6055 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6056 mkexpr(a1), 6057 mkexpr(a2))); 6058 put_C3210( 6059 unop(Iop_32Uto64, 6060 triop(Iop_PRem1C3210F64, 6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6062 mkexpr(a1), 6063 mkexpr(a2)) )); 6064 break; 6065 } 6066 6067 case 0xF7: /* FINCSTP */ 6068 DIP("fincstp\n"); 6069 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 6070 break; 6071 6072 case 0xF8: { /* FPREM -- not IEEE compliant */ 6073 IRTemp a1 = newTemp(Ity_F64); 6074 IRTemp a2 = newTemp(Ity_F64); 6075 DIP("fprem\n"); 6076 /* Do FPREM twice, once to get the remainder, and once 6077 to get the C3210 flag values. */ 6078 assign( a1, get_ST(0) ); 6079 assign( a2, get_ST(1) ); 6080 put_ST_UNCHECKED(0, 6081 triop(Iop_PRemF64, 6082 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6083 mkexpr(a1), 6084 mkexpr(a2))); 6085 put_C3210( 6086 unop(Iop_32Uto64, 6087 triop(Iop_PRemC3210F64, 6088 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6089 mkexpr(a1), 6090 mkexpr(a2)) )); 6091 break; 6092 } 6093 6094 case 0xF9: /* FYL2XP1 */ 6095 DIP("fyl2xp1\n"); 6096 put_ST_UNCHECKED(1, 6097 triop(Iop_Yl2xp1F64, 6098 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6099 get_ST(1), 6100 get_ST(0))); 6101 fp_pop(); 6102 break; 6103 6104 case 0xFA: /* FSQRT */ 6105 DIP("fsqrt\n"); 6106 put_ST_UNCHECKED(0, 6107 binop(Iop_SqrtF64, 6108 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6109 get_ST(0))); 6110 break; 6111 6112 case 0xFB: { /* FSINCOS */ 6113 DIP("fsincos\n"); 6114 IRTemp argD = newTemp(Ity_F64); 6115 assign(argD, get_ST(0)); 6116 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6117 IRTemp resD = newTemp(Ity_F64); 6118 assign(resD, 6119 IRExpr_ITE( 6120 mkexpr(argOK), 6121 binop(Iop_SinF64, 6122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6123 mkexpr(argD)), 6124 mkexpr(argD)) 6125 ); 6126 put_ST_UNCHECKED(0, mkexpr(resD)); 6127 /* Conditionally push the cos value on the stack, if 6128 the arg is in range */ 6129 maybe_fp_push(argOK); 6130 maybe_put_ST(argOK, 0, 6131 binop(Iop_CosF64, 6132 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6133 mkexpr(argD))); 6134 set_C2( binop(Iop_Xor64, 6135 unop(Iop_1Uto64, mkexpr(argOK)), 6136 mkU64(1)) ); 6137 break; 6138 } 6139 6140 case 0xFC: /* FRNDINT */ 6141 DIP("frndint\n"); 6142 put_ST_UNCHECKED(0, 6143 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 6144 break; 6145 6146 case 0xFD: /* FSCALE */ 6147 DIP("fscale\n"); 6148 put_ST_UNCHECKED(0, 6149 triop(Iop_ScaleF64, 6150 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6151 get_ST(0), 6152 get_ST(1))); 6153 break; 6154 6155 case 0xFE: /* FSIN */ 6156 case 0xFF: { /* FCOS */ 6157 Bool isSIN = modrm == 0xFE; 6158 DIP("%s\n", isSIN ? "fsin" : "fcos"); 6159 IRTemp argD = newTemp(Ity_F64); 6160 assign(argD, get_ST(0)); 6161 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6162 IRTemp resD = newTemp(Ity_F64); 6163 assign(resD, 6164 IRExpr_ITE( 6165 mkexpr(argOK), 6166 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 6167 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6168 mkexpr(argD)), 6169 mkexpr(argD)) 6170 ); 6171 put_ST_UNCHECKED(0, mkexpr(resD)); 6172 set_C2( binop(Iop_Xor64, 6173 unop(Iop_1Uto64, mkexpr(argOK)), 6174 mkU64(1)) ); 6175 break; 6176 } 6177 6178 default: 6179 goto decode_fail; 6180 } 6181 } 6182 } 6183 6184 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 6185 else 6186 if (first_opcode == 0xDA) { 6187 6188 if (modrm < 0xC0) { 6189 6190 /* bits 5,4,3 are an opcode extension, and the modRM also 6191 specifies an address. */ 6192 IROp fop; 6193 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6194 delta += len; 6195 switch (gregLO3ofRM(modrm)) { 6196 6197 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 6198 DIP("fiaddl %s\n", dis_buf); 6199 fop = Iop_AddF64; 6200 goto do_fop_m32; 6201 6202 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 6203 DIP("fimull %s\n", dis_buf); 6204 fop = Iop_MulF64; 6205 goto do_fop_m32; 6206 6207 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 6208 DIP("fisubl %s\n", dis_buf); 6209 fop = Iop_SubF64; 6210 goto do_fop_m32; 6211 6212 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 6213 DIP("fisubrl %s\n", dis_buf); 6214 fop = Iop_SubF64; 6215 goto do_foprev_m32; 6216 6217 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 6218 DIP("fisubl %s\n", dis_buf); 6219 fop = Iop_DivF64; 6220 goto do_fop_m32; 6221 6222 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 6223 DIP("fidivrl %s\n", dis_buf); 6224 fop = Iop_DivF64; 6225 goto do_foprev_m32; 6226 6227 do_fop_m32: 6228 put_ST_UNCHECKED(0, 6229 triop(fop, 6230 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6231 get_ST(0), 6232 unop(Iop_I32StoF64, 6233 loadLE(Ity_I32, mkexpr(addr))))); 6234 break; 6235 6236 do_foprev_m32: 6237 put_ST_UNCHECKED(0, 6238 triop(fop, 6239 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6240 unop(Iop_I32StoF64, 6241 loadLE(Ity_I32, mkexpr(addr))), 6242 get_ST(0))); 6243 break; 6244 6245 default: 6246 vex_printf("unhandled opc_aux = 0x%2x\n", 6247 (UInt)gregLO3ofRM(modrm)); 6248 vex_printf("first_opcode == 0xDA\n"); 6249 goto decode_fail; 6250 } 6251 6252 } else { 6253 6254 delta++; 6255 switch (modrm) { 6256 6257 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 6258 r_src = (UInt)modrm - 0xC0; 6259 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 6260 put_ST_UNCHECKED(0, 6261 IRExpr_ITE( 6262 mk_amd64g_calculate_condition(AMD64CondB), 6263 get_ST(r_src), get_ST(0)) ); 6264 break; 6265 6266 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 6267 r_src = (UInt)modrm - 0xC8; 6268 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 6269 put_ST_UNCHECKED(0, 6270 IRExpr_ITE( 6271 mk_amd64g_calculate_condition(AMD64CondZ), 6272 get_ST(r_src), get_ST(0)) ); 6273 break; 6274 6275 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 6276 r_src = (UInt)modrm - 0xD0; 6277 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 6278 put_ST_UNCHECKED(0, 6279 IRExpr_ITE( 6280 mk_amd64g_calculate_condition(AMD64CondBE), 6281 get_ST(r_src), get_ST(0)) ); 6282 break; 6283 6284 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 6285 r_src = (UInt)modrm - 0xD8; 6286 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 6287 put_ST_UNCHECKED(0, 6288 IRExpr_ITE( 6289 mk_amd64g_calculate_condition(AMD64CondP), 6290 get_ST(r_src), get_ST(0)) ); 6291 break; 6292 6293 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 6294 DIP("fucompp %%st(0),%%st(1)\n"); 6295 /* This forces C1 to zero, which isn't right. */ 6296 put_C3210( 6297 unop(Iop_32Uto64, 6298 binop( Iop_And32, 6299 binop(Iop_Shl32, 6300 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6301 mkU8(8)), 6302 mkU32(0x4500) 6303 ))); 6304 fp_pop(); 6305 fp_pop(); 6306 break; 6307 6308 default: 6309 goto decode_fail; 6310 } 6311 6312 } 6313 } 6314 6315 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 6316 else 6317 if (first_opcode == 0xDB) { 6318 if (modrm < 0xC0) { 6319 6320 /* bits 5,4,3 are an opcode extension, and the modRM also 6321 specifies an address. */ 6322 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6323 delta += len; 6324 6325 switch (gregLO3ofRM(modrm)) { 6326 6327 case 0: /* FILD m32int */ 6328 DIP("fildl %s\n", dis_buf); 6329 fp_push(); 6330 put_ST(0, unop(Iop_I32StoF64, 6331 loadLE(Ity_I32, mkexpr(addr)))); 6332 break; 6333 6334 case 1: /* FISTTPL m32 (SSE3) */ 6335 DIP("fisttpl %s\n", dis_buf); 6336 storeLE( mkexpr(addr), 6337 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 6338 fp_pop(); 6339 break; 6340 6341 case 2: /* FIST m32 */ 6342 DIP("fistl %s\n", dis_buf); 6343 storeLE( mkexpr(addr), 6344 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6345 break; 6346 6347 case 3: /* FISTP m32 */ 6348 DIP("fistpl %s\n", dis_buf); 6349 storeLE( mkexpr(addr), 6350 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6351 fp_pop(); 6352 break; 6353 6354 case 5: { /* FLD extended-real */ 6355 /* Uses dirty helper: 6356 ULong amd64g_loadF80le ( ULong ) 6357 addr holds the address. First, do a dirty call to 6358 get hold of the data. */ 6359 IRTemp val = newTemp(Ity_I64); 6360 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 6361 6362 IRDirty* d = unsafeIRDirty_1_N ( 6363 val, 6364 0/*regparms*/, 6365 "amd64g_dirtyhelper_loadF80le", 6366 &amd64g_dirtyhelper_loadF80le, 6367 args 6368 ); 6369 /* declare that we're reading memory */ 6370 d->mFx = Ifx_Read; 6371 d->mAddr = mkexpr(addr); 6372 d->mSize = 10; 6373 6374 /* execute the dirty call, dumping the result in val. */ 6375 stmt( IRStmt_Dirty(d) ); 6376 fp_push(); 6377 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 6378 6379 DIP("fldt %s\n", dis_buf); 6380 break; 6381 } 6382 6383 case 7: { /* FSTP extended-real */ 6384 /* Uses dirty helper: 6385 void amd64g_storeF80le ( ULong addr, ULong data ) 6386 */ 6387 IRExpr** args 6388 = mkIRExprVec_2( mkexpr(addr), 6389 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 6390 6391 IRDirty* d = unsafeIRDirty_0_N ( 6392 0/*regparms*/, 6393 "amd64g_dirtyhelper_storeF80le", 6394 &amd64g_dirtyhelper_storeF80le, 6395 args 6396 ); 6397 /* declare we're writing memory */ 6398 d->mFx = Ifx_Write; 6399 d->mAddr = mkexpr(addr); 6400 d->mSize = 10; 6401 6402 /* execute the dirty call. */ 6403 stmt( IRStmt_Dirty(d) ); 6404 fp_pop(); 6405 6406 DIP("fstpt\n %s", dis_buf); 6407 break; 6408 } 6409 6410 default: 6411 vex_printf("unhandled opc_aux = 0x%2x\n", 6412 (UInt)gregLO3ofRM(modrm)); 6413 vex_printf("first_opcode == 0xDB\n"); 6414 goto decode_fail; 6415 } 6416 6417 } else { 6418 6419 delta++; 6420 switch (modrm) { 6421 6422 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 6423 r_src = (UInt)modrm - 0xC0; 6424 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 6425 put_ST_UNCHECKED(0, 6426 IRExpr_ITE( 6427 mk_amd64g_calculate_condition(AMD64CondNB), 6428 get_ST(r_src), get_ST(0)) ); 6429 break; 6430 6431 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 6432 r_src = (UInt)modrm - 0xC8; 6433 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 6434 put_ST_UNCHECKED( 6435 0, 6436 IRExpr_ITE( 6437 mk_amd64g_calculate_condition(AMD64CondNZ), 6438 get_ST(r_src), 6439 get_ST(0) 6440 ) 6441 ); 6442 break; 6443 6444 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 6445 r_src = (UInt)modrm - 0xD0; 6446 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 6447 put_ST_UNCHECKED( 6448 0, 6449 IRExpr_ITE( 6450 mk_amd64g_calculate_condition(AMD64CondNBE), 6451 get_ST(r_src), 6452 get_ST(0) 6453 ) 6454 ); 6455 break; 6456 6457 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 6458 r_src = (UInt)modrm - 0xD8; 6459 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 6460 put_ST_UNCHECKED( 6461 0, 6462 IRExpr_ITE( 6463 mk_amd64g_calculate_condition(AMD64CondNP), 6464 get_ST(r_src), 6465 get_ST(0) 6466 ) 6467 ); 6468 break; 6469 6470 case 0xE2: 6471 DIP("fnclex\n"); 6472 break; 6473 6474 case 0xE3: { 6475 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/); 6476 DIP("fninit\n"); 6477 break; 6478 } 6479 6480 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6481 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6482 break; 6483 6484 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6485 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6486 break; 6487 6488 default: 6489 goto decode_fail; 6490 } 6491 } 6492 } 6493 6494 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6495 else 6496 if (first_opcode == 0xDC) { 6497 if (modrm < 0xC0) { 6498 6499 /* bits 5,4,3 are an opcode extension, and the modRM also 6500 specifies an address. */ 6501 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6502 delta += len; 6503 6504 switch (gregLO3ofRM(modrm)) { 6505 6506 case 0: /* FADD double-real */ 6507 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6508 break; 6509 6510 case 1: /* FMUL double-real */ 6511 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6512 break; 6513 6514 case 2: /* FCOM double-real */ 6515 DIP("fcoml %s\n", dis_buf); 6516 /* This forces C1 to zero, which isn't right. */ 6517 put_C3210( 6518 unop(Iop_32Uto64, 6519 binop( Iop_And32, 6520 binop(Iop_Shl32, 6521 binop(Iop_CmpF64, 6522 get_ST(0), 6523 loadLE(Ity_F64,mkexpr(addr))), 6524 mkU8(8)), 6525 mkU32(0x4500) 6526 ))); 6527 break; 6528 6529 case 3: /* FCOMP double-real */ 6530 DIP("fcompl %s\n", dis_buf); 6531 /* This forces C1 to zero, which isn't right. */ 6532 put_C3210( 6533 unop(Iop_32Uto64, 6534 binop( Iop_And32, 6535 binop(Iop_Shl32, 6536 binop(Iop_CmpF64, 6537 get_ST(0), 6538 loadLE(Ity_F64,mkexpr(addr))), 6539 mkU8(8)), 6540 mkU32(0x4500) 6541 ))); 6542 fp_pop(); 6543 break; 6544 6545 case 4: /* FSUB double-real */ 6546 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6547 break; 6548 6549 case 5: /* FSUBR double-real */ 6550 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6551 break; 6552 6553 case 6: /* FDIV double-real */ 6554 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6555 break; 6556 6557 case 7: /* FDIVR double-real */ 6558 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6559 break; 6560 6561 default: 6562 vex_printf("unhandled opc_aux = 0x%2x\n", 6563 (UInt)gregLO3ofRM(modrm)); 6564 vex_printf("first_opcode == 0xDC\n"); 6565 goto decode_fail; 6566 } 6567 6568 } else { 6569 6570 delta++; 6571 switch (modrm) { 6572 6573 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6574 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6575 break; 6576 6577 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6578 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6579 break; 6580 6581 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6582 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6583 break; 6584 6585 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6586 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6587 break; 6588 6589 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6590 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6591 break; 6592 6593 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6594 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6595 break; 6596 6597 default: 6598 goto decode_fail; 6599 } 6600 6601 } 6602 } 6603 6604 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6605 else 6606 if (first_opcode == 0xDD) { 6607 6608 if (modrm < 0xC0) { 6609 6610 /* bits 5,4,3 are an opcode extension, and the modRM also 6611 specifies an address. */ 6612 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6613 delta += len; 6614 6615 switch (gregLO3ofRM(modrm)) { 6616 6617 case 0: /* FLD double-real */ 6618 DIP("fldl %s\n", dis_buf); 6619 fp_push(); 6620 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6621 break; 6622 6623 case 1: /* FISTTPQ m64 (SSE3) */ 6624 DIP("fistppll %s\n", dis_buf); 6625 storeLE( mkexpr(addr), 6626 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6627 fp_pop(); 6628 break; 6629 6630 case 2: /* FST double-real */ 6631 DIP("fstl %s\n", dis_buf); 6632 storeLE(mkexpr(addr), get_ST(0)); 6633 break; 6634 6635 case 3: /* FSTP double-real */ 6636 DIP("fstpl %s\n", dis_buf); 6637 storeLE(mkexpr(addr), get_ST(0)); 6638 fp_pop(); 6639 break; 6640 6641 case 4: { /* FRSTOR m94/m108 */ 6642 IRTemp ew = newTemp(Ity_I32); 6643 IRTemp w64 = newTemp(Ity_I64); 6644 IRDirty* d; 6645 if ( have66(pfx) ) { 6646 /* Uses dirty helper: 6647 VexEmNote amd64g_dirtyhelper_FRSTORS 6648 ( VexGuestAMD64State*, HWord ) */ 6649 d = unsafeIRDirty_0_N ( 6650 0/*regparms*/, 6651 "amd64g_dirtyhelper_FRSTORS", 6652 &amd64g_dirtyhelper_FRSTORS, 6653 mkIRExprVec_1( mkexpr(addr) ) 6654 ); 6655 d->mSize = 94; 6656 } else { 6657 /* Uses dirty helper: 6658 VexEmNote amd64g_dirtyhelper_FRSTOR 6659 ( VexGuestAMD64State*, HWord ) */ 6660 d = unsafeIRDirty_0_N ( 6661 0/*regparms*/, 6662 "amd64g_dirtyhelper_FRSTOR", 6663 &amd64g_dirtyhelper_FRSTOR, 6664 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 6665 ); 6666 d->mSize = 108; 6667 } 6668 6669 d->tmp = w64; 6670 /* declare we're reading memory */ 6671 d->mFx = Ifx_Read; 6672 d->mAddr = mkexpr(addr); 6673 /* d->mSize set above */ 6674 6675 /* declare we're writing guest state */ 6676 d->nFxState = 5; 6677 vex_bzero(&d->fxState, sizeof(d->fxState)); 6678 6679 d->fxState[0].fx = Ifx_Write; 6680 d->fxState[0].offset = OFFB_FTOP; 6681 d->fxState[0].size = sizeof(UInt); 6682 6683 d->fxState[1].fx = Ifx_Write; 6684 d->fxState[1].offset = OFFB_FPREGS; 6685 d->fxState[1].size = 8 * sizeof(ULong); 6686 6687 d->fxState[2].fx = Ifx_Write; 6688 d->fxState[2].offset = OFFB_FPTAGS; 6689 d->fxState[2].size = 8 * sizeof(UChar); 6690 6691 d->fxState[3].fx = Ifx_Write; 6692 d->fxState[3].offset = OFFB_FPROUND; 6693 d->fxState[3].size = sizeof(ULong); 6694 6695 d->fxState[4].fx = Ifx_Write; 6696 d->fxState[4].offset = OFFB_FC3210; 6697 d->fxState[4].size = sizeof(ULong); 6698 6699 stmt( IRStmt_Dirty(d) ); 6700 6701 /* ew contains any emulation warning we may need to 6702 issue. If needed, side-exit to the next insn, 6703 reporting the warning, so that Valgrind's dispatcher 6704 sees the warning. */ 6705 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6706 put_emwarn( mkexpr(ew) ); 6707 stmt( 6708 IRStmt_Exit( 6709 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6710 Ijk_EmWarn, 6711 IRConst_U64( guest_RIP_bbstart+delta ), 6712 OFFB_RIP 6713 ) 6714 ); 6715 6716 if ( have66(pfx) ) { 6717 DIP("frstors %s\n", dis_buf); 6718 } else { 6719 DIP("frstor %s\n", dis_buf); 6720 } 6721 break; 6722 } 6723 6724 case 6: { /* FNSAVE m94/m108 */ 6725 IRDirty *d; 6726 if ( have66(pfx) ) { 6727 /* Uses dirty helper: 6728 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, 6729 HWord ) */ 6730 d = unsafeIRDirty_0_N ( 6731 0/*regparms*/, 6732 "amd64g_dirtyhelper_FNSAVES", 6733 &amd64g_dirtyhelper_FNSAVES, 6734 mkIRExprVec_1( mkexpr(addr) ) 6735 ); 6736 d->mSize = 94; 6737 } else { 6738 /* Uses dirty helper: 6739 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*, 6740 HWord ) */ 6741 d = unsafeIRDirty_0_N ( 6742 0/*regparms*/, 6743 "amd64g_dirtyhelper_FNSAVE", 6744 &amd64g_dirtyhelper_FNSAVE, 6745 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 6746 ); 6747 d->mSize = 108; 6748 } 6749 6750 /* declare we're writing memory */ 6751 d->mFx = Ifx_Write; 6752 d->mAddr = mkexpr(addr); 6753 /* d->mSize set above */ 6754 6755 /* declare we're reading guest state */ 6756 d->nFxState = 5; 6757 vex_bzero(&d->fxState, sizeof(d->fxState)); 6758 6759 d->fxState[0].fx = Ifx_Read; 6760 d->fxState[0].offset = OFFB_FTOP; 6761 d->fxState[0].size = sizeof(UInt); 6762 6763 d->fxState[1].fx = Ifx_Read; 6764 d->fxState[1].offset = OFFB_FPREGS; 6765 d->fxState[1].size = 8 * sizeof(ULong); 6766 6767 d->fxState[2].fx = Ifx_Read; 6768 d->fxState[2].offset = OFFB_FPTAGS; 6769 d->fxState[2].size = 8 * sizeof(UChar); 6770 6771 d->fxState[3].fx = Ifx_Read; 6772 d->fxState[3].offset = OFFB_FPROUND; 6773 d->fxState[3].size = sizeof(ULong); 6774 6775 d->fxState[4].fx = Ifx_Read; 6776 d->fxState[4].offset = OFFB_FC3210; 6777 d->fxState[4].size = sizeof(ULong); 6778 6779 stmt( IRStmt_Dirty(d) ); 6780 6781 if ( have66(pfx) ) { 6782 DIP("fnsaves %s\n", dis_buf); 6783 } else { 6784 DIP("fnsave %s\n", dis_buf); 6785 } 6786 break; 6787 } 6788 6789 case 7: { /* FNSTSW m16 */ 6790 IRExpr* sw = get_FPU_sw(); 6791 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6792 storeLE( mkexpr(addr), sw ); 6793 DIP("fnstsw %s\n", dis_buf); 6794 break; 6795 } 6796 6797 default: 6798 vex_printf("unhandled opc_aux = 0x%2x\n", 6799 (UInt)gregLO3ofRM(modrm)); 6800 vex_printf("first_opcode == 0xDD\n"); 6801 goto decode_fail; 6802 } 6803 } else { 6804 delta++; 6805 switch (modrm) { 6806 6807 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6808 r_dst = (UInt)modrm - 0xC0; 6809 DIP("ffree %%st(%u)\n", r_dst); 6810 put_ST_TAG ( r_dst, mkU8(0) ); 6811 break; 6812 6813 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6814 r_dst = (UInt)modrm - 0xD0; 6815 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6816 /* P4 manual says: "If the destination operand is a 6817 non-empty register, the invalid-operation exception 6818 is not generated. Hence put_ST_UNCHECKED. */ 6819 put_ST_UNCHECKED(r_dst, get_ST(0)); 6820 break; 6821 6822 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6823 r_dst = (UInt)modrm - 0xD8; 6824 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6825 /* P4 manual says: "If the destination operand is a 6826 non-empty register, the invalid-operation exception 6827 is not generated. Hence put_ST_UNCHECKED. */ 6828 put_ST_UNCHECKED(r_dst, get_ST(0)); 6829 fp_pop(); 6830 break; 6831 6832 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6833 r_dst = (UInt)modrm - 0xE0; 6834 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6835 /* This forces C1 to zero, which isn't right. */ 6836 put_C3210( 6837 unop(Iop_32Uto64, 6838 binop( Iop_And32, 6839 binop(Iop_Shl32, 6840 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6841 mkU8(8)), 6842 mkU32(0x4500) 6843 ))); 6844 break; 6845 6846 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6847 r_dst = (UInt)modrm - 0xE8; 6848 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6849 /* This forces C1 to zero, which isn't right. */ 6850 put_C3210( 6851 unop(Iop_32Uto64, 6852 binop( Iop_And32, 6853 binop(Iop_Shl32, 6854 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6855 mkU8(8)), 6856 mkU32(0x4500) 6857 ))); 6858 fp_pop(); 6859 break; 6860 6861 default: 6862 goto decode_fail; 6863 } 6864 } 6865 } 6866 6867 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6868 else 6869 if (first_opcode == 0xDE) { 6870 6871 if (modrm < 0xC0) { 6872 6873 /* bits 5,4,3 are an opcode extension, and the modRM also 6874 specifies an address. */ 6875 IROp fop; 6876 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6877 delta += len; 6878 6879 switch (gregLO3ofRM(modrm)) { 6880 6881 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6882 DIP("fiaddw %s\n", dis_buf); 6883 fop = Iop_AddF64; 6884 goto do_fop_m16; 6885 6886 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6887 DIP("fimulw %s\n", dis_buf); 6888 fop = Iop_MulF64; 6889 goto do_fop_m16; 6890 6891 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6892 DIP("fisubw %s\n", dis_buf); 6893 fop = Iop_SubF64; 6894 goto do_fop_m16; 6895 6896 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6897 DIP("fisubrw %s\n", dis_buf); 6898 fop = Iop_SubF64; 6899 goto do_foprev_m16; 6900 6901 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6902 DIP("fisubw %s\n", dis_buf); 6903 fop = Iop_DivF64; 6904 goto do_fop_m16; 6905 6906 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6907 DIP("fidivrw %s\n", dis_buf); 6908 fop = Iop_DivF64; 6909 goto do_foprev_m16; 6910 6911 do_fop_m16: 6912 put_ST_UNCHECKED(0, 6913 triop(fop, 6914 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6915 get_ST(0), 6916 unop(Iop_I32StoF64, 6917 unop(Iop_16Sto32, 6918 loadLE(Ity_I16, mkexpr(addr)))))); 6919 break; 6920 6921 do_foprev_m16: 6922 put_ST_UNCHECKED(0, 6923 triop(fop, 6924 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6925 unop(Iop_I32StoF64, 6926 unop(Iop_16Sto32, 6927 loadLE(Ity_I16, mkexpr(addr)))), 6928 get_ST(0))); 6929 break; 6930 6931 default: 6932 vex_printf("unhandled opc_aux = 0x%2x\n", 6933 (UInt)gregLO3ofRM(modrm)); 6934 vex_printf("first_opcode == 0xDE\n"); 6935 goto decode_fail; 6936 } 6937 6938 } else { 6939 6940 delta++; 6941 switch (modrm) { 6942 6943 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6944 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6945 break; 6946 6947 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6948 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6949 break; 6950 6951 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6952 DIP("fcompp %%st(0),%%st(1)\n"); 6953 /* This forces C1 to zero, which isn't right. */ 6954 put_C3210( 6955 unop(Iop_32Uto64, 6956 binop( Iop_And32, 6957 binop(Iop_Shl32, 6958 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6959 mkU8(8)), 6960 mkU32(0x4500) 6961 ))); 6962 fp_pop(); 6963 fp_pop(); 6964 break; 6965 6966 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6967 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6968 break; 6969 6970 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6971 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6972 break; 6973 6974 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6975 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6976 break; 6977 6978 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6979 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6980 break; 6981 6982 default: 6983 goto decode_fail; 6984 } 6985 6986 } 6987 } 6988 6989 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6990 else 6991 if (first_opcode == 0xDF) { 6992 6993 if (modrm < 0xC0) { 6994 6995 /* bits 5,4,3 are an opcode extension, and the modRM also 6996 specifies an address. */ 6997 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6998 delta += len; 6999 7000 switch (gregLO3ofRM(modrm)) { 7001 7002 case 0: /* FILD m16int */ 7003 DIP("fildw %s\n", dis_buf); 7004 fp_push(); 7005 put_ST(0, unop(Iop_I32StoF64, 7006 unop(Iop_16Sto32, 7007 loadLE(Ity_I16, mkexpr(addr))))); 7008 break; 7009 7010 case 1: /* FISTTPS m16 (SSE3) */ 7011 DIP("fisttps %s\n", dis_buf); 7012 storeLE( mkexpr(addr), 7013 x87ishly_qnarrow_32_to_16( 7014 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 7015 fp_pop(); 7016 break; 7017 7018 case 2: /* FIST m16 */ 7019 DIP("fists %s\n", dis_buf); 7020 storeLE( mkexpr(addr), 7021 x87ishly_qnarrow_32_to_16( 7022 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 7023 break; 7024 7025 case 3: /* FISTP m16 */ 7026 DIP("fistps %s\n", dis_buf); 7027 storeLE( mkexpr(addr), 7028 x87ishly_qnarrow_32_to_16( 7029 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 7030 fp_pop(); 7031 break; 7032 7033 case 5: /* FILD m64 */ 7034 DIP("fildll %s\n", dis_buf); 7035 fp_push(); 7036 put_ST(0, binop(Iop_I64StoF64, 7037 get_roundingmode(), 7038 loadLE(Ity_I64, mkexpr(addr)))); 7039 break; 7040 7041 case 7: /* FISTP m64 */ 7042 DIP("fistpll %s\n", dis_buf); 7043 storeLE( mkexpr(addr), 7044 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 7045 fp_pop(); 7046 break; 7047 7048 default: 7049 vex_printf("unhandled opc_aux = 0x%2x\n", 7050 (UInt)gregLO3ofRM(modrm)); 7051 vex_printf("first_opcode == 0xDF\n"); 7052 goto decode_fail; 7053 } 7054 7055 } else { 7056 7057 delta++; 7058 switch (modrm) { 7059 7060 case 0xC0: /* FFREEP %st(0) */ 7061 DIP("ffreep %%st(%d)\n", 0); 7062 put_ST_TAG ( 0, mkU8(0) ); 7063 fp_pop(); 7064 break; 7065 7066 case 0xE0: /* FNSTSW %ax */ 7067 DIP("fnstsw %%ax\n"); 7068 /* Invent a plausible-looking FPU status word value and 7069 dump it in %AX: 7070 ((ftop & 7) << 11) | (c3210 & 0x4700) 7071 */ 7072 putIRegRAX( 7073 2, 7074 unop(Iop_32to16, 7075 binop(Iop_Or32, 7076 binop(Iop_Shl32, 7077 binop(Iop_And32, get_ftop(), mkU32(7)), 7078 mkU8(11)), 7079 binop(Iop_And32, 7080 unop(Iop_64to32, get_C3210()), 7081 mkU32(0x4700)) 7082 ))); 7083 break; 7084 7085 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 7086 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 7087 break; 7088 7089 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 7090 /* not really right since COMIP != UCOMIP */ 7091 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 7092 break; 7093 7094 default: 7095 goto decode_fail; 7096 } 7097 } 7098 7099 } 7100 7101 else 7102 goto decode_fail; 7103 7104 *decode_ok = True; 7105 return delta; 7106 7107 decode_fail: 7108 *decode_ok = False; 7109 return delta; 7110 } 7111 7112 7113 /*------------------------------------------------------------*/ 7114 /*--- ---*/ 7115 /*--- MMX INSTRUCTIONS ---*/ 7116 /*--- ---*/ 7117 /*------------------------------------------------------------*/ 7118 7119 /* Effect of MMX insns on x87 FPU state (table 11-2 of 7120 IA32 arch manual, volume 3): 7121 7122 Read from, or write to MMX register (viz, any insn except EMMS): 7123 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 7124 * FP stack pointer set to zero 7125 7126 EMMS: 7127 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 7128 * FP stack pointer set to zero 7129 */ 7130 7131 static void do_MMX_preamble ( void ) 7132 { 7133 Int i; 7134 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7135 IRExpr* zero = mkU32(0); 7136 IRExpr* tag1 = mkU8(1); 7137 put_ftop(zero); 7138 for (i = 0; i < 8; i++) 7139 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 7140 } 7141 7142 static void do_EMMS_preamble ( void ) 7143 { 7144 Int i; 7145 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7146 IRExpr* zero = mkU32(0); 7147 IRExpr* tag0 = mkU8(0); 7148 put_ftop(zero); 7149 for (i = 0; i < 8; i++) 7150 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 7151 } 7152 7153 7154 static IRExpr* getMMXReg ( UInt archreg ) 7155 { 7156 vassert(archreg < 8); 7157 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 7158 } 7159 7160 7161 static void putMMXReg ( UInt archreg, IRExpr* e ) 7162 { 7163 vassert(archreg < 8); 7164 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 7165 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 7166 } 7167 7168 7169 /* Helper for non-shift MMX insns. Note this is incomplete in the 7170 sense that it does not first call do_MMX_preamble() -- that is the 7171 responsibility of its caller. */ 7172 7173 static 7174 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi, 7175 Prefix pfx, 7176 Long delta, 7177 UChar opc, 7178 const HChar* name, 7179 Bool show_granularity ) 7180 { 7181 HChar dis_buf[50]; 7182 UChar modrm = getUChar(delta); 7183 Bool isReg = epartIsReg(modrm); 7184 IRExpr* argL = NULL; 7185 IRExpr* argR = NULL; 7186 IRExpr* argG = NULL; 7187 IRExpr* argE = NULL; 7188 IRTemp res = newTemp(Ity_I64); 7189 7190 Bool invG = False; 7191 IROp op = Iop_INVALID; 7192 void* hAddr = NULL; 7193 const HChar* hName = NULL; 7194 Bool eLeft = False; 7195 7196 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 7197 7198 switch (opc) { 7199 /* Original MMX ones */ 7200 case 0xFC: op = Iop_Add8x8; break; 7201 case 0xFD: op = Iop_Add16x4; break; 7202 case 0xFE: op = Iop_Add32x2; break; 7203 7204 case 0xEC: op = Iop_QAdd8Sx8; break; 7205 case 0xED: op = Iop_QAdd16Sx4; break; 7206 7207 case 0xDC: op = Iop_QAdd8Ux8; break; 7208 case 0xDD: op = Iop_QAdd16Ux4; break; 7209 7210 case 0xF8: op = Iop_Sub8x8; break; 7211 case 0xF9: op = Iop_Sub16x4; break; 7212 case 0xFA: op = Iop_Sub32x2; break; 7213 7214 case 0xE8: op = Iop_QSub8Sx8; break; 7215 case 0xE9: op = Iop_QSub16Sx4; break; 7216 7217 case 0xD8: op = Iop_QSub8Ux8; break; 7218 case 0xD9: op = Iop_QSub16Ux4; break; 7219 7220 case 0xE5: op = Iop_MulHi16Sx4; break; 7221 case 0xD5: op = Iop_Mul16x4; break; 7222 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 7223 7224 case 0x74: op = Iop_CmpEQ8x8; break; 7225 case 0x75: op = Iop_CmpEQ16x4; break; 7226 case 0x76: op = Iop_CmpEQ32x2; break; 7227 7228 case 0x64: op = Iop_CmpGT8Sx8; break; 7229 case 0x65: op = Iop_CmpGT16Sx4; break; 7230 case 0x66: op = Iop_CmpGT32Sx2; break; 7231 7232 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 7233 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 7234 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 7235 7236 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 7237 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 7238 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 7239 7240 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 7241 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 7242 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 7243 7244 case 0xDB: op = Iop_And64; break; 7245 case 0xDF: op = Iop_And64; invG = True; break; 7246 case 0xEB: op = Iop_Or64; break; 7247 case 0xEF: /* Possibly do better here if argL and argR are the 7248 same reg */ 7249 op = Iop_Xor64; break; 7250 7251 /* Introduced in SSE1 */ 7252 case 0xE0: op = Iop_Avg8Ux8; break; 7253 case 0xE3: op = Iop_Avg16Ux4; break; 7254 case 0xEE: op = Iop_Max16Sx4; break; 7255 case 0xDE: op = Iop_Max8Ux8; break; 7256 case 0xEA: op = Iop_Min16Sx4; break; 7257 case 0xDA: op = Iop_Min8Ux8; break; 7258 case 0xE4: op = Iop_MulHi16Ux4; break; 7259 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 7260 7261 /* Introduced in SSE2 */ 7262 case 0xD4: op = Iop_Add64; break; 7263 case 0xFB: op = Iop_Sub64; break; 7264 7265 default: 7266 vex_printf("\n0x%x\n", (UInt)opc); 7267 vpanic("dis_MMXop_regmem_to_reg"); 7268 } 7269 7270 # undef XXX 7271 7272 argG = getMMXReg(gregLO3ofRM(modrm)); 7273 if (invG) 7274 argG = unop(Iop_Not64, argG); 7275 7276 if (isReg) { 7277 delta++; 7278 argE = getMMXReg(eregLO3ofRM(modrm)); 7279 } else { 7280 Int len; 7281 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7282 delta += len; 7283 argE = loadLE(Ity_I64, mkexpr(addr)); 7284 } 7285 7286 if (eLeft) { 7287 argL = argE; 7288 argR = argG; 7289 } else { 7290 argL = argG; 7291 argR = argE; 7292 } 7293 7294 if (op != Iop_INVALID) { 7295 vassert(hName == NULL); 7296 vassert(hAddr == NULL); 7297 assign(res, binop(op, argL, argR)); 7298 } else { 7299 vassert(hName != NULL); 7300 vassert(hAddr != NULL); 7301 assign( res, 7302 mkIRExprCCall( 7303 Ity_I64, 7304 0/*regparms*/, hName, hAddr, 7305 mkIRExprVec_2( argL, argR ) 7306 ) 7307 ); 7308 } 7309 7310 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 7311 7312 DIP("%s%s %s, %s\n", 7313 name, show_granularity ? nameMMXGran(opc & 3) : "", 7314 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 7315 nameMMXReg(gregLO3ofRM(modrm)) ); 7316 7317 return delta; 7318 } 7319 7320 7321 /* Vector by scalar shift of G by the amount specified at the bottom 7322 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 7323 7324 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi, 7325 Prefix pfx, Long delta, 7326 const HChar* opname, IROp op ) 7327 { 7328 HChar dis_buf[50]; 7329 Int alen, size; 7330 IRTemp addr; 7331 Bool shl, shr, sar; 7332 UChar rm = getUChar(delta); 7333 IRTemp g0 = newTemp(Ity_I64); 7334 IRTemp g1 = newTemp(Ity_I64); 7335 IRTemp amt = newTemp(Ity_I64); 7336 IRTemp amt8 = newTemp(Ity_I8); 7337 7338 if (epartIsReg(rm)) { 7339 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 7340 DIP("%s %s,%s\n", opname, 7341 nameMMXReg(eregLO3ofRM(rm)), 7342 nameMMXReg(gregLO3ofRM(rm)) ); 7343 delta++; 7344 } else { 7345 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 7346 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 7347 DIP("%s %s,%s\n", opname, 7348 dis_buf, 7349 nameMMXReg(gregLO3ofRM(rm)) ); 7350 delta += alen; 7351 } 7352 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 7353 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 7354 7355 shl = shr = sar = False; 7356 size = 0; 7357 switch (op) { 7358 case Iop_ShlN16x4: shl = True; size = 32; break; 7359 case Iop_ShlN32x2: shl = True; size = 32; break; 7360 case Iop_Shl64: shl = True; size = 64; break; 7361 case Iop_ShrN16x4: shr = True; size = 16; break; 7362 case Iop_ShrN32x2: shr = True; size = 32; break; 7363 case Iop_Shr64: shr = True; size = 64; break; 7364 case Iop_SarN16x4: sar = True; size = 16; break; 7365 case Iop_SarN32x2: sar = True; size = 32; break; 7366 default: vassert(0); 7367 } 7368 7369 if (shl || shr) { 7370 assign( 7371 g1, 7372 IRExpr_ITE( 7373 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7374 binop(op, mkexpr(g0), mkexpr(amt8)), 7375 mkU64(0) 7376 ) 7377 ); 7378 } else 7379 if (sar) { 7380 assign( 7381 g1, 7382 IRExpr_ITE( 7383 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7384 binop(op, mkexpr(g0), mkexpr(amt8)), 7385 binop(op, mkexpr(g0), mkU8(size-1)) 7386 ) 7387 ); 7388 } else { 7389 vassert(0); 7390 } 7391 7392 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 7393 return delta; 7394 } 7395 7396 7397 /* Vector by scalar shift of E by an immediate byte. This is a 7398 straight copy of dis_SSE_shiftE_imm. */ 7399 7400 static 7401 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op ) 7402 { 7403 Bool shl, shr, sar; 7404 UChar rm = getUChar(delta); 7405 IRTemp e0 = newTemp(Ity_I64); 7406 IRTemp e1 = newTemp(Ity_I64); 7407 UChar amt, size; 7408 vassert(epartIsReg(rm)); 7409 vassert(gregLO3ofRM(rm) == 2 7410 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 7411 amt = getUChar(delta+1); 7412 delta += 2; 7413 DIP("%s $%d,%s\n", opname, 7414 (Int)amt, 7415 nameMMXReg(eregLO3ofRM(rm)) ); 7416 7417 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 7418 7419 shl = shr = sar = False; 7420 size = 0; 7421 switch (op) { 7422 case Iop_ShlN16x4: shl = True; size = 16; break; 7423 case Iop_ShlN32x2: shl = True; size = 32; break; 7424 case Iop_Shl64: shl = True; size = 64; break; 7425 case Iop_SarN16x4: sar = True; size = 16; break; 7426 case Iop_SarN32x2: sar = True; size = 32; break; 7427 case Iop_ShrN16x4: shr = True; size = 16; break; 7428 case Iop_ShrN32x2: shr = True; size = 32; break; 7429 case Iop_Shr64: shr = True; size = 64; break; 7430 default: vassert(0); 7431 } 7432 7433 if (shl || shr) { 7434 assign( e1, amt >= size 7435 ? mkU64(0) 7436 : binop(op, mkexpr(e0), mkU8(amt)) 7437 ); 7438 } else 7439 if (sar) { 7440 assign( e1, amt >= size 7441 ? binop(op, mkexpr(e0), mkU8(size-1)) 7442 : binop(op, mkexpr(e0), mkU8(amt)) 7443 ); 7444 } else { 7445 vassert(0); 7446 } 7447 7448 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 7449 return delta; 7450 } 7451 7452 7453 /* Completely handle all MMX instructions except emms. */ 7454 7455 static 7456 ULong dis_MMX ( Bool* decode_ok, 7457 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 7458 { 7459 Int len; 7460 UChar modrm; 7461 HChar dis_buf[50]; 7462 UChar opc = getUChar(delta); 7463 delta++; 7464 7465 /* dis_MMX handles all insns except emms. */ 7466 do_MMX_preamble(); 7467 7468 switch (opc) { 7469 7470 case 0x6E: 7471 if (sz == 4) { 7472 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 7473 modrm = getUChar(delta); 7474 if (epartIsReg(modrm)) { 7475 delta++; 7476 putMMXReg( 7477 gregLO3ofRM(modrm), 7478 binop( Iop_32HLto64, 7479 mkU32(0), 7480 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 7481 DIP("movd %s, %s\n", 7482 nameIReg32(eregOfRexRM(pfx,modrm)), 7483 nameMMXReg(gregLO3ofRM(modrm))); 7484 } else { 7485 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7486 delta += len; 7487 putMMXReg( 7488 gregLO3ofRM(modrm), 7489 binop( Iop_32HLto64, 7490 mkU32(0), 7491 loadLE(Ity_I32, mkexpr(addr)) ) ); 7492 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7493 } 7494 } 7495 else 7496 if (sz == 8) { 7497 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7498 modrm = getUChar(delta); 7499 if (epartIsReg(modrm)) { 7500 delta++; 7501 putMMXReg( gregLO3ofRM(modrm), 7502 getIReg64(eregOfRexRM(pfx,modrm)) ); 7503 DIP("movd %s, %s\n", 7504 nameIReg64(eregOfRexRM(pfx,modrm)), 7505 nameMMXReg(gregLO3ofRM(modrm))); 7506 } else { 7507 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7508 delta += len; 7509 putMMXReg( gregLO3ofRM(modrm), 7510 loadLE(Ity_I64, mkexpr(addr)) ); 7511 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7512 } 7513 } 7514 else { 7515 goto mmx_decode_failure; 7516 } 7517 break; 7518 7519 case 0x7E: 7520 if (sz == 4) { 7521 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7522 modrm = getUChar(delta); 7523 if (epartIsReg(modrm)) { 7524 delta++; 7525 putIReg32( eregOfRexRM(pfx,modrm), 7526 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7527 DIP("movd %s, %s\n", 7528 nameMMXReg(gregLO3ofRM(modrm)), 7529 nameIReg32(eregOfRexRM(pfx,modrm))); 7530 } else { 7531 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7532 delta += len; 7533 storeLE( mkexpr(addr), 7534 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7535 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7536 } 7537 } 7538 else 7539 if (sz == 8) { 7540 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7541 modrm = getUChar(delta); 7542 if (epartIsReg(modrm)) { 7543 delta++; 7544 putIReg64( eregOfRexRM(pfx,modrm), 7545 getMMXReg(gregLO3ofRM(modrm)) ); 7546 DIP("movd %s, %s\n", 7547 nameMMXReg(gregLO3ofRM(modrm)), 7548 nameIReg64(eregOfRexRM(pfx,modrm))); 7549 } else { 7550 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7551 delta += len; 7552 storeLE( mkexpr(addr), 7553 getMMXReg(gregLO3ofRM(modrm)) ); 7554 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7555 } 7556 } else { 7557 goto mmx_decode_failure; 7558 } 7559 break; 7560 7561 case 0x6F: 7562 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7563 if (sz != 4 7564 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7565 goto mmx_decode_failure; 7566 modrm = getUChar(delta); 7567 if (epartIsReg(modrm)) { 7568 delta++; 7569 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7570 DIP("movq %s, %s\n", 7571 nameMMXReg(eregLO3ofRM(modrm)), 7572 nameMMXReg(gregLO3ofRM(modrm))); 7573 } else { 7574 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7575 delta += len; 7576 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7577 DIP("movq %s, %s\n", 7578 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7579 } 7580 break; 7581 7582 case 0x7F: 7583 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7584 if (sz != 4 7585 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7586 goto mmx_decode_failure; 7587 modrm = getUChar(delta); 7588 if (epartIsReg(modrm)) { 7589 delta++; 7590 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7591 DIP("movq %s, %s\n", 7592 nameMMXReg(gregLO3ofRM(modrm)), 7593 nameMMXReg(eregLO3ofRM(modrm))); 7594 } else { 7595 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7596 delta += len; 7597 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7598 DIP("mov(nt)q %s, %s\n", 7599 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7600 } 7601 break; 7602 7603 case 0xFC: 7604 case 0xFD: 7605 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7606 if (sz != 4) 7607 goto mmx_decode_failure; 7608 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7609 break; 7610 7611 case 0xEC: 7612 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7613 if (sz != 4 7614 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7615 goto mmx_decode_failure; 7616 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7617 break; 7618 7619 case 0xDC: 7620 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7621 if (sz != 4) 7622 goto mmx_decode_failure; 7623 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7624 break; 7625 7626 case 0xF8: 7627 case 0xF9: 7628 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7629 if (sz != 4) 7630 goto mmx_decode_failure; 7631 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7632 break; 7633 7634 case 0xE8: 7635 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7636 if (sz != 4) 7637 goto mmx_decode_failure; 7638 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7639 break; 7640 7641 case 0xD8: 7642 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7643 if (sz != 4) 7644 goto mmx_decode_failure; 7645 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7646 break; 7647 7648 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7649 if (sz != 4) 7650 goto mmx_decode_failure; 7651 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7652 break; 7653 7654 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7655 if (sz != 4) 7656 goto mmx_decode_failure; 7657 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7658 break; 7659 7660 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7661 vassert(sz == 4); 7662 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7663 break; 7664 7665 case 0x74: 7666 case 0x75: 7667 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7668 if (sz != 4) 7669 goto mmx_decode_failure; 7670 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7671 break; 7672 7673 case 0x64: 7674 case 0x65: 7675 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7676 if (sz != 4) 7677 goto mmx_decode_failure; 7678 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7679 break; 7680 7681 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7682 if (sz != 4) 7683 goto mmx_decode_failure; 7684 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7685 break; 7686 7687 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7688 if (sz != 4) 7689 goto mmx_decode_failure; 7690 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7691 break; 7692 7693 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7694 if (sz != 4) 7695 goto mmx_decode_failure; 7696 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7697 break; 7698 7699 case 0x68: 7700 case 0x69: 7701 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7702 if (sz != 4 7703 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7704 goto mmx_decode_failure; 7705 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7706 break; 7707 7708 case 0x60: 7709 case 0x61: 7710 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7711 if (sz != 4 7712 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7713 goto mmx_decode_failure; 7714 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7715 break; 7716 7717 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7718 if (sz != 4) 7719 goto mmx_decode_failure; 7720 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7721 break; 7722 7723 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7724 if (sz != 4) 7725 goto mmx_decode_failure; 7726 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7727 break; 7728 7729 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7730 if (sz != 4) 7731 goto mmx_decode_failure; 7732 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7733 break; 7734 7735 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7736 if (sz != 4) 7737 goto mmx_decode_failure; 7738 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7739 break; 7740 7741 # define SHIFT_BY_REG(_name,_op) \ 7742 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7743 break; 7744 7745 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7746 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7747 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7748 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7749 7750 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7751 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7752 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7753 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7754 7755 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7756 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7757 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7758 7759 # undef SHIFT_BY_REG 7760 7761 case 0x71: 7762 case 0x72: 7763 case 0x73: { 7764 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7765 UChar byte2, subopc; 7766 if (sz != 4) 7767 goto mmx_decode_failure; 7768 byte2 = getUChar(delta); /* amode / sub-opcode */ 7769 subopc = toUChar( (byte2 >> 3) & 7 ); 7770 7771 # define SHIFT_BY_IMM(_name,_op) \ 7772 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7773 } while (0) 7774 7775 if (subopc == 2 /*SRL*/ && opc == 0x71) 7776 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7777 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7778 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7779 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7780 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7781 7782 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7783 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7784 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7785 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7786 7787 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7788 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7789 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7790 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7791 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7792 SHIFT_BY_IMM("psllq", Iop_Shl64); 7793 7794 else goto mmx_decode_failure; 7795 7796 # undef SHIFT_BY_IMM 7797 break; 7798 } 7799 7800 case 0xF7: { 7801 IRTemp addr = newTemp(Ity_I64); 7802 IRTemp regD = newTemp(Ity_I64); 7803 IRTemp regM = newTemp(Ity_I64); 7804 IRTemp mask = newTemp(Ity_I64); 7805 IRTemp olddata = newTemp(Ity_I64); 7806 IRTemp newdata = newTemp(Ity_I64); 7807 7808 modrm = getUChar(delta); 7809 if (sz != 4 || (!epartIsReg(modrm))) 7810 goto mmx_decode_failure; 7811 delta++; 7812 7813 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7814 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7815 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7816 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7817 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7818 assign( newdata, 7819 binop(Iop_Or64, 7820 binop(Iop_And64, 7821 mkexpr(regD), 7822 mkexpr(mask) ), 7823 binop(Iop_And64, 7824 mkexpr(olddata), 7825 unop(Iop_Not64, mkexpr(mask)))) ); 7826 storeLE( mkexpr(addr), mkexpr(newdata) ); 7827 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7828 nameMMXReg( gregLO3ofRM(modrm) ) ); 7829 break; 7830 } 7831 7832 /* --- MMX decode failure --- */ 7833 default: 7834 mmx_decode_failure: 7835 *decode_ok = False; 7836 return delta; /* ignored */ 7837 7838 } 7839 7840 *decode_ok = True; 7841 return delta; 7842 } 7843 7844 7845 /*------------------------------------------------------------*/ 7846 /*--- More misc arithmetic and other obscure insns. ---*/ 7847 /*------------------------------------------------------------*/ 7848 7849 /* Generate base << amt with vacated places filled with stuff 7850 from xtra. amt guaranteed in 0 .. 63. */ 7851 static 7852 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7853 { 7854 /* if amt == 0 7855 then base 7856 else (base << amt) | (xtra >>u (64-amt)) 7857 */ 7858 return 7859 IRExpr_ITE( 7860 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7861 binop(Iop_Or64, 7862 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7863 binop(Iop_Shr64, mkexpr(xtra), 7864 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7865 ), 7866 mkexpr(base) 7867 ); 7868 } 7869 7870 /* Generate base >>u amt with vacated places filled with stuff 7871 from xtra. amt guaranteed in 0 .. 63. */ 7872 static 7873 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7874 { 7875 /* if amt == 0 7876 then base 7877 else (base >>u amt) | (xtra << (64-amt)) 7878 */ 7879 return 7880 IRExpr_ITE( 7881 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7882 binop(Iop_Or64, 7883 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7884 binop(Iop_Shl64, mkexpr(xtra), 7885 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7886 ), 7887 mkexpr(base) 7888 ); 7889 } 7890 7891 /* Double length left and right shifts. Apparently only required in 7892 v-size (no b- variant). */ 7893 static 7894 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi, 7895 Prefix pfx, 7896 Long delta, UChar modrm, 7897 Int sz, 7898 IRExpr* shift_amt, 7899 Bool amt_is_literal, 7900 const HChar* shift_amt_txt, 7901 Bool left_shift ) 7902 { 7903 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7904 for printing it. And eip on entry points at the modrm byte. */ 7905 Int len; 7906 HChar dis_buf[50]; 7907 7908 IRType ty = szToITy(sz); 7909 IRTemp gsrc = newTemp(ty); 7910 IRTemp esrc = newTemp(ty); 7911 IRTemp addr = IRTemp_INVALID; 7912 IRTemp tmpSH = newTemp(Ity_I8); 7913 IRTemp tmpSS = newTemp(Ity_I8); 7914 IRTemp tmp64 = IRTemp_INVALID; 7915 IRTemp res64 = IRTemp_INVALID; 7916 IRTemp rss64 = IRTemp_INVALID; 7917 IRTemp resTy = IRTemp_INVALID; 7918 IRTemp rssTy = IRTemp_INVALID; 7919 Int mask = sz==8 ? 63 : 31; 7920 7921 vassert(sz == 2 || sz == 4 || sz == 8); 7922 7923 /* The E-part is the destination; this is shifted. The G-part 7924 supplies bits to be shifted into the E-part, but is not 7925 changed. 7926 7927 If shifting left, form a double-length word with E at the top 7928 and G at the bottom, and shift this left. The result is then in 7929 the high part. 7930 7931 If shifting right, form a double-length word with G at the top 7932 and E at the bottom, and shift this right. The result is then 7933 at the bottom. */ 7934 7935 /* Fetch the operands. */ 7936 7937 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7938 7939 if (epartIsReg(modrm)) { 7940 delta++; 7941 assign( esrc, getIRegE(sz, pfx, modrm) ); 7942 DIP("sh%cd%c %s, %s, %s\n", 7943 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7944 shift_amt_txt, 7945 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7946 } else { 7947 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7948 /* # bytes following amode */ 7949 amt_is_literal ? 1 : 0 ); 7950 delta += len; 7951 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7952 DIP("sh%cd%c %s, %s, %s\n", 7953 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7954 shift_amt_txt, 7955 nameIRegG(sz, pfx, modrm), dis_buf); 7956 } 7957 7958 /* Calculate the masked shift amount (tmpSH), the masked subshift 7959 amount (tmpSS), the shifted value (res64) and the subshifted 7960 value (rss64). */ 7961 7962 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7963 assign( tmpSS, binop(Iop_And8, 7964 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7965 mkU8(mask))); 7966 7967 tmp64 = newTemp(Ity_I64); 7968 res64 = newTemp(Ity_I64); 7969 rss64 = newTemp(Ity_I64); 7970 7971 if (sz == 2 || sz == 4) { 7972 7973 /* G is xtra; E is data */ 7974 /* what a freaking nightmare: */ 7975 if (sz == 4 && left_shift) { 7976 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7977 assign( res64, 7978 binop(Iop_Shr64, 7979 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7980 mkU8(32)) ); 7981 assign( rss64, 7982 binop(Iop_Shr64, 7983 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7984 mkU8(32)) ); 7985 } 7986 else 7987 if (sz == 4 && !left_shift) { 7988 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7989 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7990 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7991 } 7992 else 7993 if (sz == 2 && left_shift) { 7994 assign( tmp64, 7995 binop(Iop_32HLto64, 7996 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7997 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7998 )); 7999 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 8000 assign( res64, 8001 binop(Iop_Shr64, 8002 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 8003 mkU8(48)) ); 8004 /* subshift formed by shifting [esrc'0000'0000'0000] */ 8005 assign( rss64, 8006 binop(Iop_Shr64, 8007 binop(Iop_Shl64, 8008 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 8009 mkU8(48)), 8010 mkexpr(tmpSS)), 8011 mkU8(48)) ); 8012 } 8013 else 8014 if (sz == 2 && !left_shift) { 8015 assign( tmp64, 8016 binop(Iop_32HLto64, 8017 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 8018 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 8019 )); 8020 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 8021 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 8022 /* subshift formed by shifting [0000'0000'0000'esrc] */ 8023 assign( rss64, binop(Iop_Shr64, 8024 unop(Iop_16Uto64, mkexpr(esrc)), 8025 mkexpr(tmpSS)) ); 8026 } 8027 8028 } else { 8029 8030 vassert(sz == 8); 8031 if (left_shift) { 8032 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 8033 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 8034 } else { 8035 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 8036 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 8037 } 8038 8039 } 8040 8041 resTy = newTemp(ty); 8042 rssTy = newTemp(ty); 8043 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 8044 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 8045 8046 /* Put result back and write the flags thunk. */ 8047 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 8048 resTy, rssTy, ty, tmpSH ); 8049 8050 if (epartIsReg(modrm)) { 8051 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 8052 } else { 8053 storeLE( mkexpr(addr), mkexpr(resTy) ); 8054 } 8055 8056 if (amt_is_literal) delta++; 8057 return delta; 8058 } 8059 8060 8061 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 8062 required. */ 8063 8064 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 8065 8066 static const HChar* nameBtOp ( BtOp op ) 8067 { 8068 switch (op) { 8069 case BtOpNone: return ""; 8070 case BtOpSet: return "s"; 8071 case BtOpReset: return "r"; 8072 case BtOpComp: return "c"; 8073 default: vpanic("nameBtOp(amd64)"); 8074 } 8075 } 8076 8077 8078 static 8079 ULong dis_bt_G_E ( const VexAbiInfo* vbi, 8080 Prefix pfx, Int sz, Long delta, BtOp op, 8081 /*OUT*/Bool* decode_OK ) 8082 { 8083 HChar dis_buf[50]; 8084 UChar modrm; 8085 Int len; 8086 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 8087 t_addr1, t_rsp, t_mask, t_new; 8088 8089 vassert(sz == 2 || sz == 4 || sz == 8); 8090 8091 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 8092 = t_addr0 = t_addr1 = t_rsp 8093 = t_mask = t_new = IRTemp_INVALID; 8094 8095 t_fetched = newTemp(Ity_I8); 8096 t_new = newTemp(Ity_I8); 8097 t_bitno0 = newTemp(Ity_I64); 8098 t_bitno1 = newTemp(Ity_I64); 8099 t_bitno2 = newTemp(Ity_I8); 8100 t_addr1 = newTemp(Ity_I64); 8101 modrm = getUChar(delta); 8102 8103 *decode_OK = True; 8104 if (epartIsReg(modrm)) { 8105 /* F2 and F3 are never acceptable. */ 8106 if (haveF2orF3(pfx)) { 8107 *decode_OK = False; 8108 return delta; 8109 } 8110 } else { 8111 /* F2 or F3 (but not both) are allowed, provided LOCK is also 8112 present, and only for the BTC/BTS/BTR cases (not BT). */ 8113 if (haveF2orF3(pfx)) { 8114 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) { 8115 *decode_OK = False; 8116 return delta; 8117 } 8118 } 8119 } 8120 8121 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 8122 8123 if (epartIsReg(modrm)) { 8124 delta++; 8125 /* Get it onto the client's stack. Oh, this is a horrible 8126 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 8127 Because of the ELF ABI stack redzone, there may be live data 8128 up to 128 bytes below %RSP. So we can't just push it on the 8129 stack, else we may wind up trashing live data, and causing 8130 impossible-to-find simulation errors. (Yes, this did 8131 happen.) So we need to drop RSP before at least 128 before 8132 pushing it. That unfortunately means hitting Memcheck's 8133 fast-case painting code. Ideally we should drop more than 8134 128, to reduce the chances of breaking buggy programs that 8135 have live data below -128(%RSP). Memcheck fast-cases moves 8136 of 288 bytes due to the need to handle ppc64-linux quickly, 8137 so let's use 288. Of course the real fix is to get rid of 8138 this kludge entirely. */ 8139 t_rsp = newTemp(Ity_I64); 8140 t_addr0 = newTemp(Ity_I64); 8141 8142 vassert(vbi->guest_stack_redzone_size == 128); 8143 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 8144 putIReg64(R_RSP, mkexpr(t_rsp)); 8145 8146 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 8147 8148 /* Make t_addr0 point at it. */ 8149 assign( t_addr0, mkexpr(t_rsp) ); 8150 8151 /* Mask out upper bits of the shift amount, since we're doing a 8152 reg. */ 8153 assign( t_bitno1, binop(Iop_And64, 8154 mkexpr(t_bitno0), 8155 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 8156 8157 } else { 8158 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 8159 delta += len; 8160 assign( t_bitno1, mkexpr(t_bitno0) ); 8161 } 8162 8163 /* At this point: t_addr0 is the address being operated on. If it 8164 was a reg, we will have pushed it onto the client's stack. 8165 t_bitno1 is the bit number, suitably masked in the case of a 8166 reg. */ 8167 8168 /* Now the main sequence. */ 8169 assign( t_addr1, 8170 binop(Iop_Add64, 8171 mkexpr(t_addr0), 8172 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 8173 8174 /* t_addr1 now holds effective address */ 8175 8176 assign( t_bitno2, 8177 unop(Iop_64to8, 8178 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 8179 8180 /* t_bitno2 contains offset of bit within byte */ 8181 8182 if (op != BtOpNone) { 8183 t_mask = newTemp(Ity_I8); 8184 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 8185 } 8186 8187 /* t_mask is now a suitable byte mask */ 8188 8189 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 8190 8191 if (op != BtOpNone) { 8192 switch (op) { 8193 case BtOpSet: 8194 assign( t_new, 8195 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8196 break; 8197 case BtOpComp: 8198 assign( t_new, 8199 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8200 break; 8201 case BtOpReset: 8202 assign( t_new, 8203 binop(Iop_And8, mkexpr(t_fetched), 8204 unop(Iop_Not8, mkexpr(t_mask))) ); 8205 break; 8206 default: 8207 vpanic("dis_bt_G_E(amd64)"); 8208 } 8209 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) { 8210 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 8211 mkexpr(t_new)/*new*/, 8212 guest_RIP_curr_instr ); 8213 } else { 8214 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 8215 } 8216 } 8217 8218 /* Side effect done; now get selected bit into Carry flag. The Intel docs 8219 (as of 2015, at least) say that C holds the result, Z is unchanged, and 8220 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P 8221 are also unchanged, so let's do that. */ 8222 const ULong maskC = AMD64G_CC_MASK_C; 8223 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S 8224 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A 8225 | AMD64G_CC_MASK_P; 8226 8227 IRTemp old_rflags = newTemp(Ity_I64); 8228 assign(old_rflags, mk_amd64g_calculate_rflags_all()); 8229 8230 IRTemp new_rflags = newTemp(Ity_I64); 8231 assign(new_rflags, 8232 binop(Iop_Or64, 8233 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)), 8234 binop(Iop_And64, 8235 binop(Iop_Shr64, 8236 unop(Iop_8Uto64, mkexpr(t_fetched)), 8237 mkexpr(t_bitno2)), 8238 mkU64(maskC)))); 8239 8240 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8241 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8242 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 8243 /* Set NDEP even though it isn't used. This makes redundant-PUT 8244 elimination of previous stores to this field work better. */ 8245 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8246 8247 /* Move reg operand from stack back to reg */ 8248 if (epartIsReg(modrm)) { 8249 /* t_rsp still points at it. */ 8250 /* only write the reg if actually modifying it; doing otherwise 8251 zeroes the top half erroneously when doing btl due to 8252 standard zero-extend rule */ 8253 if (op != BtOpNone) 8254 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 8255 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 8256 } 8257 8258 DIP("bt%s%c %s, %s\n", 8259 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 8260 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 8261 8262 return delta; 8263 } 8264 8265 8266 8267 /* Handle BSF/BSR. Only v-size seems necessary. */ 8268 static 8269 ULong dis_bs_E_G ( const VexAbiInfo* vbi, 8270 Prefix pfx, Int sz, Long delta, Bool fwds ) 8271 { 8272 Bool isReg; 8273 UChar modrm; 8274 HChar dis_buf[50]; 8275 8276 IRType ty = szToITy(sz); 8277 IRTemp src = newTemp(ty); 8278 IRTemp dst = newTemp(ty); 8279 IRTemp src64 = newTemp(Ity_I64); 8280 IRTemp dst64 = newTemp(Ity_I64); 8281 IRTemp srcB = newTemp(Ity_I1); 8282 8283 vassert(sz == 8 || sz == 4 || sz == 2); 8284 8285 modrm = getUChar(delta); 8286 isReg = epartIsReg(modrm); 8287 if (isReg) { 8288 delta++; 8289 assign( src, getIRegE(sz, pfx, modrm) ); 8290 } else { 8291 Int len; 8292 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 8293 delta += len; 8294 assign( src, loadLE(ty, mkexpr(addr)) ); 8295 } 8296 8297 DIP("bs%c%c %s, %s\n", 8298 fwds ? 'f' : 'r', nameISize(sz), 8299 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 8300 nameIRegG(sz, pfx, modrm)); 8301 8302 /* First, widen src to 64 bits if it is not already. */ 8303 assign( src64, widenUto64(mkexpr(src)) ); 8304 8305 /* Generate a bool expression which is zero iff the original is 8306 zero, and nonzero otherwise. Ask for a CmpNE version which, if 8307 instrumented by Memcheck, is instrumented expensively, since 8308 this may be used on the output of a preceding movmskb insn, 8309 which has been known to be partially defined, and in need of 8310 careful handling. */ 8311 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) ); 8312 8313 /* Flags: Z is 1 iff source value is zero. All others 8314 are undefined -- we force them to zero. */ 8315 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8316 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8317 stmt( IRStmt_Put( 8318 OFFB_CC_DEP1, 8319 IRExpr_ITE( mkexpr(srcB), 8320 /* src!=0 */ 8321 mkU64(0), 8322 /* src==0 */ 8323 mkU64(AMD64G_CC_MASK_Z) 8324 ) 8325 )); 8326 /* Set NDEP even though it isn't used. This makes redundant-PUT 8327 elimination of previous stores to this field work better. */ 8328 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8329 8330 /* Result: iff source value is zero, we can't use 8331 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 8332 But anyway, amd64 semantics say the result is undefined in 8333 such situations. Hence handle the zero case specially. */ 8334 8335 /* Bleh. What we compute: 8336 8337 bsf64: if src == 0 then {dst is unchanged} 8338 else Ctz64(src) 8339 8340 bsr64: if src == 0 then {dst is unchanged} 8341 else 63 - Clz64(src) 8342 8343 bsf32: if src == 0 then {dst is unchanged} 8344 else Ctz64(32Uto64(src)) 8345 8346 bsr32: if src == 0 then {dst is unchanged} 8347 else 63 - Clz64(32Uto64(src)) 8348 8349 bsf16: if src == 0 then {dst is unchanged} 8350 else Ctz64(32Uto64(16Uto32(src))) 8351 8352 bsr16: if src == 0 then {dst is unchanged} 8353 else 63 - Clz64(32Uto64(16Uto32(src))) 8354 */ 8355 8356 /* The main computation, guarding against zero. */ 8357 assign( dst64, 8358 IRExpr_ITE( 8359 mkexpr(srcB), 8360 /* src != 0 */ 8361 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 8362 : binop(Iop_Sub64, 8363 mkU64(63), 8364 unop(Iop_Clz64, mkexpr(src64))), 8365 /* src == 0 -- leave dst unchanged */ 8366 widenUto64( getIRegG( sz, pfx, modrm ) ) 8367 ) 8368 ); 8369 8370 if (sz == 2) 8371 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 8372 else 8373 if (sz == 4) 8374 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 8375 else 8376 assign( dst, mkexpr(dst64) ); 8377 8378 /* dump result back */ 8379 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 8380 8381 return delta; 8382 } 8383 8384 8385 /* swap rAX with the reg specified by reg and REX.B */ 8386 static 8387 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 8388 { 8389 IRType ty = szToITy(sz); 8390 IRTemp t1 = newTemp(ty); 8391 IRTemp t2 = newTemp(ty); 8392 vassert(sz == 2 || sz == 4 || sz == 8); 8393 vassert(regLo3 < 8); 8394 if (sz == 8) { 8395 assign( t1, getIReg64(R_RAX) ); 8396 assign( t2, getIRegRexB(8, pfx, regLo3) ); 8397 putIReg64( R_RAX, mkexpr(t2) ); 8398 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 8399 } else if (sz == 4) { 8400 assign( t1, getIReg32(R_RAX) ); 8401 assign( t2, getIRegRexB(4, pfx, regLo3) ); 8402 putIReg32( R_RAX, mkexpr(t2) ); 8403 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 8404 } else { 8405 assign( t1, getIReg16(R_RAX) ); 8406 assign( t2, getIRegRexB(2, pfx, regLo3) ); 8407 putIReg16( R_RAX, mkexpr(t2) ); 8408 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 8409 } 8410 DIP("xchg%c %s, %s\n", 8411 nameISize(sz), nameIRegRAX(sz), 8412 nameIRegRexB(sz,pfx, regLo3)); 8413 } 8414 8415 8416 static 8417 void codegen_SAHF ( void ) 8418 { 8419 /* Set the flags to: 8420 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 8421 -- retain the old O flag 8422 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8423 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 8424 */ 8425 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8426 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8427 IRTemp oldflags = newTemp(Ity_I64); 8428 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 8429 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8430 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8431 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8432 stmt( IRStmt_Put( OFFB_CC_DEP1, 8433 binop(Iop_Or64, 8434 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 8435 binop(Iop_And64, 8436 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 8437 mkU64(mask_SZACP)) 8438 ) 8439 )); 8440 } 8441 8442 8443 static 8444 void codegen_LAHF ( void ) 8445 { 8446 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 8447 IRExpr* rax_with_hole; 8448 IRExpr* new_byte; 8449 IRExpr* new_rax; 8450 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8451 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8452 8453 IRTemp flags = newTemp(Ity_I64); 8454 assign( flags, mk_amd64g_calculate_rflags_all() ); 8455 8456 rax_with_hole 8457 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 8458 new_byte 8459 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 8460 mkU64(1<<1)); 8461 new_rax 8462 = binop(Iop_Or64, rax_with_hole, 8463 binop(Iop_Shl64, new_byte, mkU8(8))); 8464 putIReg64(R_RAX, new_rax); 8465 } 8466 8467 8468 static 8469 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 8470 const VexAbiInfo* vbi, 8471 Prefix pfx, 8472 Int size, 8473 Long delta0 ) 8474 { 8475 HChar dis_buf[50]; 8476 Int len; 8477 8478 IRType ty = szToITy(size); 8479 IRTemp acc = newTemp(ty); 8480 IRTemp src = newTemp(ty); 8481 IRTemp dest = newTemp(ty); 8482 IRTemp dest2 = newTemp(ty); 8483 IRTemp acc2 = newTemp(ty); 8484 IRTemp cond = newTemp(Ity_I1); 8485 IRTemp addr = IRTemp_INVALID; 8486 UChar rm = getUChar(delta0); 8487 8488 /* There are 3 cases to consider: 8489 8490 reg-reg: ignore any lock prefix, generate sequence based 8491 on ITE 8492 8493 reg-mem, not locked: ignore any lock prefix, generate sequence 8494 based on ITE 8495 8496 reg-mem, locked: use IRCAS 8497 */ 8498 8499 /* Decide whether F2 or F3 are acceptable. Never for register 8500 case, but for the memory case, one or the other is OK provided 8501 LOCK is also present. */ 8502 if (epartIsReg(rm)) { 8503 if (haveF2orF3(pfx)) { 8504 *ok = False; 8505 return delta0; 8506 } 8507 } else { 8508 if (haveF2orF3(pfx)) { 8509 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 8510 *ok = False; 8511 return delta0; 8512 } 8513 } 8514 } 8515 8516 if (epartIsReg(rm)) { 8517 /* case 1 */ 8518 assign( dest, getIRegE(size, pfx, rm) ); 8519 delta0++; 8520 assign( src, getIRegG(size, pfx, rm) ); 8521 assign( acc, getIRegRAX(size) ); 8522 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8523 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8524 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8525 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8526 putIRegRAX(size, mkexpr(acc2)); 8527 putIRegE(size, pfx, rm, mkexpr(dest2)); 8528 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8529 nameIRegG(size,pfx,rm), 8530 nameIRegE(size,pfx,rm) ); 8531 } 8532 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8533 /* case 2 */ 8534 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8535 assign( dest, loadLE(ty, mkexpr(addr)) ); 8536 delta0 += len; 8537 assign( src, getIRegG(size, pfx, rm) ); 8538 assign( acc, getIRegRAX(size) ); 8539 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8540 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8541 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8542 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8543 putIRegRAX(size, mkexpr(acc2)); 8544 storeLE( mkexpr(addr), mkexpr(dest2) ); 8545 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8546 nameIRegG(size,pfx,rm), dis_buf); 8547 } 8548 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8549 /* case 3 */ 8550 /* src is new value. acc is expected value. dest is old value. 8551 Compute success from the output of the IRCAS, and steer the 8552 new value for RAX accordingly: in case of success, RAX is 8553 unchanged. */ 8554 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8555 delta0 += len; 8556 assign( src, getIRegG(size, pfx, rm) ); 8557 assign( acc, getIRegRAX(size) ); 8558 stmt( IRStmt_CAS( 8559 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8560 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8561 )); 8562 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8563 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8564 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8565 putIRegRAX(size, mkexpr(acc2)); 8566 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8567 nameIRegG(size,pfx,rm), dis_buf); 8568 } 8569 else vassert(0); 8570 8571 *ok = True; 8572 return delta0; 8573 } 8574 8575 8576 /* Handle conditional move instructions of the form 8577 cmovcc E(reg-or-mem), G(reg) 8578 8579 E(src) is reg-or-mem 8580 G(dst) is reg. 8581 8582 If E is reg, --> GET %E, tmps 8583 GET %G, tmpd 8584 CMOVcc tmps, tmpd 8585 PUT tmpd, %G 8586 8587 If E is mem --> (getAddr E) -> tmpa 8588 LD (tmpa), tmps 8589 GET %G, tmpd 8590 CMOVcc tmps, tmpd 8591 PUT tmpd, %G 8592 */ 8593 static 8594 ULong dis_cmov_E_G ( const VexAbiInfo* vbi, 8595 Prefix pfx, 8596 Int sz, 8597 AMD64Condcode cond, 8598 Long delta0 ) 8599 { 8600 UChar rm = getUChar(delta0); 8601 HChar dis_buf[50]; 8602 Int len; 8603 8604 IRType ty = szToITy(sz); 8605 IRTemp tmps = newTemp(ty); 8606 IRTemp tmpd = newTemp(ty); 8607 8608 if (epartIsReg(rm)) { 8609 assign( tmps, getIRegE(sz, pfx, rm) ); 8610 assign( tmpd, getIRegG(sz, pfx, rm) ); 8611 8612 putIRegG( sz, pfx, rm, 8613 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8614 mkexpr(tmps), 8615 mkexpr(tmpd) ) 8616 ); 8617 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8618 nameIRegE(sz,pfx,rm), 8619 nameIRegG(sz,pfx,rm)); 8620 return 1+delta0; 8621 } 8622 8623 /* E refers to memory */ 8624 { 8625 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8626 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8627 assign( tmpd, getIRegG(sz, pfx, rm) ); 8628 8629 putIRegG( sz, pfx, rm, 8630 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8631 mkexpr(tmps), 8632 mkexpr(tmpd) ) 8633 ); 8634 8635 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8636 dis_buf, 8637 nameIRegG(sz,pfx,rm)); 8638 return len+delta0; 8639 } 8640 } 8641 8642 8643 static 8644 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8645 const VexAbiInfo* vbi, 8646 Prefix pfx, Int sz, Long delta0 ) 8647 { 8648 Int len; 8649 UChar rm = getUChar(delta0); 8650 HChar dis_buf[50]; 8651 8652 IRType ty = szToITy(sz); 8653 IRTemp tmpd = newTemp(ty); 8654 IRTemp tmpt0 = newTemp(ty); 8655 IRTemp tmpt1 = newTemp(ty); 8656 8657 /* There are 3 cases to consider: 8658 8659 reg-reg: ignore any lock prefix, 8660 generate 'naive' (non-atomic) sequence 8661 8662 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8663 (non-atomic) sequence 8664 8665 reg-mem, locked: use IRCAS 8666 */ 8667 8668 if (epartIsReg(rm)) { 8669 /* case 1 */ 8670 assign( tmpd, getIRegE(sz, pfx, rm) ); 8671 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8672 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8673 mkexpr(tmpd), mkexpr(tmpt0)) ); 8674 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8675 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8676 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8677 DIP("xadd%c %s, %s\n", 8678 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm)); 8679 *decode_ok = True; 8680 return 1+delta0; 8681 } 8682 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8683 /* case 2 */ 8684 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8685 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8686 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8687 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8688 mkexpr(tmpd), mkexpr(tmpt0)) ); 8689 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8690 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8691 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8692 DIP("xadd%c %s, %s\n", 8693 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8694 *decode_ok = True; 8695 return len+delta0; 8696 } 8697 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8698 /* case 3 */ 8699 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8700 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8701 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8702 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8703 mkexpr(tmpd), mkexpr(tmpt0)) ); 8704 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8705 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8706 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8707 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8708 DIP("xadd%c %s, %s\n", 8709 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8710 *decode_ok = True; 8711 return len+delta0; 8712 } 8713 /*UNREACHED*/ 8714 vassert(0); 8715 } 8716 8717 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8718 //.. 8719 //.. static 8720 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8721 //.. { 8722 //.. Int len; 8723 //.. IRTemp addr; 8724 //.. UChar rm = getUChar(delta0); 8725 //.. HChar dis_buf[50]; 8726 //.. 8727 //.. if (epartIsReg(rm)) { 8728 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8729 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8730 //.. return 1+delta0; 8731 //.. } else { 8732 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8733 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8734 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8735 //.. return len+delta0; 8736 //.. } 8737 //.. } 8738 //.. 8739 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8740 //.. dst is ireg and sz==4, zero out top half of it. */ 8741 //.. 8742 //.. static 8743 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8744 //.. Int sz, 8745 //.. UInt delta0 ) 8746 //.. { 8747 //.. Int len; 8748 //.. IRTemp addr; 8749 //.. UChar rm = getUChar(delta0); 8750 //.. HChar dis_buf[50]; 8751 //.. 8752 //.. vassert(sz == 2 || sz == 4); 8753 //.. 8754 //.. if (epartIsReg(rm)) { 8755 //.. if (sz == 4) 8756 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8757 //.. else 8758 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8759 //.. 8760 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8761 //.. return 1+delta0; 8762 //.. } else { 8763 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8764 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8765 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8766 //.. return len+delta0; 8767 //.. } 8768 //.. } 8769 8770 /* Handle move instructions of the form 8771 mov S, E meaning 8772 mov sreg, reg-or-mem 8773 Is passed the a ptr to the modRM byte, and the data size. Returns 8774 the address advanced completely over this instruction. 8775 8776 VEX does not currently simulate segment registers on AMD64 which means that 8777 instead of moving a value of a segment register, zero is moved to the 8778 destination. The zero value represents a null (unused) selector. This is 8779 not correct (especially for the %cs, %fs and %gs registers) but it seems to 8780 provide a sufficient simulation for currently seen programs that use this 8781 instruction. If some program actually decides to use the obtained segment 8782 selector for something meaningful then the zero value should be a clear 8783 indicator that there is some problem. 8784 8785 S(src) is sreg. 8786 E(dst) is reg-or-mem 8787 8788 If E is reg, --> PUT $0, %E 8789 8790 If E is mem, --> (getAddr E) -> tmpa 8791 ST $0, (tmpa) 8792 */ 8793 static 8794 ULong dis_mov_S_E ( const VexAbiInfo* vbi, 8795 Prefix pfx, 8796 Int size, 8797 Long delta0 ) 8798 { 8799 Int len; 8800 UChar rm = getUChar(delta0); 8801 HChar dis_buf[50]; 8802 8803 if (epartIsReg(rm)) { 8804 putIRegE(size, pfx, rm, mkU(szToITy(size), 0)); 8805 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8806 nameIRegE(size, pfx, rm)); 8807 return 1+delta0; 8808 } 8809 8810 /* E refers to memory */ 8811 { 8812 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0); 8813 storeLE(mkexpr(addr), mkU16(0)); 8814 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8815 dis_buf); 8816 return len+delta0; 8817 } 8818 } 8819 8820 //.. static 8821 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8822 //.. { 8823 //.. IRTemp t1 = newTemp(Ity_I16); 8824 //.. IRTemp ta = newTemp(Ity_I32); 8825 //.. vassert(sz == 2 || sz == 4); 8826 //.. 8827 //.. assign( t1, getSReg(sreg) ); 8828 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8829 //.. putIReg(4, R_ESP, mkexpr(ta)); 8830 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8831 //.. 8832 //.. DIP("pushw %s\n", nameSReg(sreg)); 8833 //.. } 8834 //.. 8835 //.. static 8836 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8837 //.. { 8838 //.. IRTemp t1 = newTemp(Ity_I16); 8839 //.. IRTemp ta = newTemp(Ity_I32); 8840 //.. vassert(sz == 2 || sz == 4); 8841 //.. 8842 //.. assign( ta, getIReg(4, R_ESP) ); 8843 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8844 //.. 8845 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8846 //.. putSReg( sreg, mkexpr(t1) ); 8847 //.. DIP("pop %s\n", nameSReg(sreg)); 8848 //.. } 8849 8850 static 8851 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 ) 8852 { 8853 IRTemp t1 = newTemp(Ity_I64); 8854 IRTemp t2 = newTemp(Ity_I64); 8855 IRTemp t3 = newTemp(Ity_I64); 8856 assign(t1, getIReg64(R_RSP)); 8857 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8858 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8859 putIReg64(R_RSP, mkexpr(t3)); 8860 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8861 jmp_treg(dres, Ijk_Ret, t2); 8862 vassert(dres->whatNext == Dis_StopHere); 8863 } 8864 8865 8866 /*------------------------------------------------------------*/ 8867 /*--- SSE/SSE2/SSE3 helpers ---*/ 8868 /*------------------------------------------------------------*/ 8869 8870 /* Indicates whether the op requires a rounding-mode argument. Note 8871 that this covers only vector floating point arithmetic ops, and 8872 omits the scalar ones that need rounding modes. Note also that 8873 inconsistencies here will get picked up later by the IR sanity 8874 checker, so this isn't correctness-critical. */ 8875 static Bool requiresRMode ( IROp op ) 8876 { 8877 switch (op) { 8878 /* 128 bit ops */ 8879 case Iop_Add32Fx4: case Iop_Sub32Fx4: 8880 case Iop_Mul32Fx4: case Iop_Div32Fx4: 8881 case Iop_Add64Fx2: case Iop_Sub64Fx2: 8882 case Iop_Mul64Fx2: case Iop_Div64Fx2: 8883 /* 256 bit ops */ 8884 case Iop_Add32Fx8: case Iop_Sub32Fx8: 8885 case Iop_Mul32Fx8: case Iop_Div32Fx8: 8886 case Iop_Add64Fx4: case Iop_Sub64Fx4: 8887 case Iop_Mul64Fx4: case Iop_Div64Fx4: 8888 return True; 8889 default: 8890 break; 8891 } 8892 return False; 8893 } 8894 8895 8896 /* Worker function; do not call directly. 8897 Handles full width G = G `op` E and G = (not G) `op` E. 8898 */ 8899 8900 static ULong dis_SSE_E_to_G_all_wrk ( 8901 const VexAbiInfo* vbi, 8902 Prefix pfx, Long delta, 8903 const HChar* opname, IROp op, 8904 Bool invertG 8905 ) 8906 { 8907 HChar dis_buf[50]; 8908 Int alen; 8909 IRTemp addr; 8910 UChar rm = getUChar(delta); 8911 Bool needsRMode = requiresRMode(op); 8912 IRExpr* gpart 8913 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8914 : getXMMReg(gregOfRexRM(pfx,rm)); 8915 if (epartIsReg(rm)) { 8916 putXMMReg( 8917 gregOfRexRM(pfx,rm), 8918 needsRMode 8919 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8920 gpart, 8921 getXMMReg(eregOfRexRM(pfx,rm))) 8922 : binop(op, gpart, 8923 getXMMReg(eregOfRexRM(pfx,rm))) 8924 ); 8925 DIP("%s %s,%s\n", opname, 8926 nameXMMReg(eregOfRexRM(pfx,rm)), 8927 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8928 return delta+1; 8929 } else { 8930 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8931 putXMMReg( 8932 gregOfRexRM(pfx,rm), 8933 needsRMode 8934 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8935 gpart, 8936 loadLE(Ity_V128, mkexpr(addr))) 8937 : binop(op, gpart, 8938 loadLE(Ity_V128, mkexpr(addr))) 8939 ); 8940 DIP("%s %s,%s\n", opname, 8941 dis_buf, 8942 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8943 return delta+alen; 8944 } 8945 } 8946 8947 8948 /* All lanes SSE binary operation, G = G `op` E. */ 8949 8950 static 8951 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi, 8952 Prefix pfx, Long delta, 8953 const HChar* opname, IROp op ) 8954 { 8955 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8956 } 8957 8958 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8959 8960 static 8961 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi, 8962 Prefix pfx, Long delta, 8963 const HChar* opname, IROp op ) 8964 { 8965 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8966 } 8967 8968 8969 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8970 8971 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi, 8972 Prefix pfx, Long delta, 8973 const HChar* opname, IROp op ) 8974 { 8975 HChar dis_buf[50]; 8976 Int alen; 8977 IRTemp addr; 8978 UChar rm = getUChar(delta); 8979 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8980 if (epartIsReg(rm)) { 8981 putXMMReg( gregOfRexRM(pfx,rm), 8982 binop(op, gpart, 8983 getXMMReg(eregOfRexRM(pfx,rm))) ); 8984 DIP("%s %s,%s\n", opname, 8985 nameXMMReg(eregOfRexRM(pfx,rm)), 8986 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8987 return delta+1; 8988 } else { 8989 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8990 E operand needs to be made simply of zeroes. */ 8991 IRTemp epart = newTemp(Ity_V128); 8992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8993 assign( epart, unop( Iop_32UtoV128, 8994 loadLE(Ity_I32, mkexpr(addr))) ); 8995 putXMMReg( gregOfRexRM(pfx,rm), 8996 binop(op, gpart, mkexpr(epart)) ); 8997 DIP("%s %s,%s\n", opname, 8998 dis_buf, 8999 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9000 return delta+alen; 9001 } 9002 } 9003 9004 9005 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 9006 9007 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi, 9008 Prefix pfx, Long delta, 9009 const HChar* opname, IROp op ) 9010 { 9011 HChar dis_buf[50]; 9012 Int alen; 9013 IRTemp addr; 9014 UChar rm = getUChar(delta); 9015 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 9016 if (epartIsReg(rm)) { 9017 putXMMReg( gregOfRexRM(pfx,rm), 9018 binop(op, gpart, 9019 getXMMReg(eregOfRexRM(pfx,rm))) ); 9020 DIP("%s %s,%s\n", opname, 9021 nameXMMReg(eregOfRexRM(pfx,rm)), 9022 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9023 return delta+1; 9024 } else { 9025 /* We can only do a 64-bit memory read, so the upper half of the 9026 E operand needs to be made simply of zeroes. */ 9027 IRTemp epart = newTemp(Ity_V128); 9028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9029 assign( epart, unop( Iop_64UtoV128, 9030 loadLE(Ity_I64, mkexpr(addr))) ); 9031 putXMMReg( gregOfRexRM(pfx,rm), 9032 binop(op, gpart, mkexpr(epart)) ); 9033 DIP("%s %s,%s\n", opname, 9034 dis_buf, 9035 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9036 return delta+alen; 9037 } 9038 } 9039 9040 9041 /* All lanes unary SSE operation, G = op(E). */ 9042 9043 static ULong dis_SSE_E_to_G_unary_all ( 9044 const VexAbiInfo* vbi, 9045 Prefix pfx, Long delta, 9046 const HChar* opname, IROp op 9047 ) 9048 { 9049 HChar dis_buf[50]; 9050 Int alen; 9051 IRTemp addr; 9052 UChar rm = getUChar(delta); 9053 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 9054 // up in the usual way. 9055 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 9056 if (epartIsReg(rm)) { 9057 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm)); 9058 /* XXXROUNDINGFIXME */ 9059 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 9060 : unop(op, src); 9061 putXMMReg( gregOfRexRM(pfx,rm), res ); 9062 DIP("%s %s,%s\n", opname, 9063 nameXMMReg(eregOfRexRM(pfx,rm)), 9064 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9065 return delta+1; 9066 } else { 9067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9068 IRExpr* src = loadLE(Ity_V128, mkexpr(addr)); 9069 /* XXXROUNDINGFIXME */ 9070 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 9071 : unop(op, src); 9072 putXMMReg( gregOfRexRM(pfx,rm), res ); 9073 DIP("%s %s,%s\n", opname, 9074 dis_buf, 9075 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9076 return delta+alen; 9077 } 9078 } 9079 9080 9081 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 9082 9083 static ULong dis_SSE_E_to_G_unary_lo32 ( 9084 const VexAbiInfo* vbi, 9085 Prefix pfx, Long delta, 9086 const HChar* opname, IROp op 9087 ) 9088 { 9089 /* First we need to get the old G value and patch the low 32 bits 9090 of the E operand into it. Then apply op and write back to G. */ 9091 HChar dis_buf[50]; 9092 Int alen; 9093 IRTemp addr; 9094 UChar rm = getUChar(delta); 9095 IRTemp oldG0 = newTemp(Ity_V128); 9096 IRTemp oldG1 = newTemp(Ity_V128); 9097 9098 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9099 9100 if (epartIsReg(rm)) { 9101 assign( oldG1, 9102 binop( Iop_SetV128lo32, 9103 mkexpr(oldG0), 9104 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 9105 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9106 DIP("%s %s,%s\n", opname, 9107 nameXMMReg(eregOfRexRM(pfx,rm)), 9108 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9109 return delta+1; 9110 } else { 9111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9112 assign( oldG1, 9113 binop( Iop_SetV128lo32, 9114 mkexpr(oldG0), 9115 loadLE(Ity_I32, mkexpr(addr)) )); 9116 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9117 DIP("%s %s,%s\n", opname, 9118 dis_buf, 9119 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9120 return delta+alen; 9121 } 9122 } 9123 9124 9125 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 9126 9127 static ULong dis_SSE_E_to_G_unary_lo64 ( 9128 const VexAbiInfo* vbi, 9129 Prefix pfx, Long delta, 9130 const HChar* opname, IROp op 9131 ) 9132 { 9133 /* First we need to get the old G value and patch the low 64 bits 9134 of the E operand into it. Then apply op and write back to G. */ 9135 HChar dis_buf[50]; 9136 Int alen; 9137 IRTemp addr; 9138 UChar rm = getUChar(delta); 9139 IRTemp oldG0 = newTemp(Ity_V128); 9140 IRTemp oldG1 = newTemp(Ity_V128); 9141 9142 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9143 9144 if (epartIsReg(rm)) { 9145 assign( oldG1, 9146 binop( Iop_SetV128lo64, 9147 mkexpr(oldG0), 9148 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 9149 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9150 DIP("%s %s,%s\n", opname, 9151 nameXMMReg(eregOfRexRM(pfx,rm)), 9152 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9153 return delta+1; 9154 } else { 9155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9156 assign( oldG1, 9157 binop( Iop_SetV128lo64, 9158 mkexpr(oldG0), 9159 loadLE(Ity_I64, mkexpr(addr)) )); 9160 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9161 DIP("%s %s,%s\n", opname, 9162 dis_buf, 9163 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9164 return delta+alen; 9165 } 9166 } 9167 9168 9169 /* SSE integer binary operation: 9170 G = G `op` E (eLeft == False) 9171 G = E `op` G (eLeft == True) 9172 */ 9173 static ULong dis_SSEint_E_to_G( 9174 const VexAbiInfo* vbi, 9175 Prefix pfx, Long delta, 9176 const HChar* opname, IROp op, 9177 Bool eLeft 9178 ) 9179 { 9180 HChar dis_buf[50]; 9181 Int alen; 9182 IRTemp addr; 9183 UChar rm = getUChar(delta); 9184 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 9185 IRExpr* epart = NULL; 9186 if (epartIsReg(rm)) { 9187 epart = getXMMReg(eregOfRexRM(pfx,rm)); 9188 DIP("%s %s,%s\n", opname, 9189 nameXMMReg(eregOfRexRM(pfx,rm)), 9190 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9191 delta += 1; 9192 } else { 9193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9194 epart = loadLE(Ity_V128, mkexpr(addr)); 9195 DIP("%s %s,%s\n", opname, 9196 dis_buf, 9197 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9198 delta += alen; 9199 } 9200 putXMMReg( gregOfRexRM(pfx,rm), 9201 eLeft ? binop(op, epart, gpart) 9202 : binop(op, gpart, epart) ); 9203 return delta; 9204 } 9205 9206 9207 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 9208 This is all a bit of a kludge in that it ignores the subtleties of 9209 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 9210 spec. */ 9211 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 9212 /*OUT*/IROp* opP, 9213 /*OUT*/Bool* postNotP, 9214 UInt imm8, Bool all_lanes, Int sz ) 9215 { 9216 if (imm8 >= 32) return False; 9217 9218 /* First, compute a (preSwap, op, postNot) triple from 9219 the supplied imm8. */ 9220 Bool pre = False; 9221 IROp op = Iop_INVALID; 9222 Bool not = False; 9223 9224 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 9225 // If you add a case here, add a corresponding test for both VCMPSD_128 9226 // and VCMPSS_128 in avx-1.c. 9227 // Cases 0xA and above are 9228 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]" 9229 switch (imm8) { 9230 // "O" = ordered, "U" = unordered 9231 // "Q" = non-signalling (quiet), "S" = signalling 9232 // 9233 // swap operands? 9234 // | 9235 // | cmp op invert after? 9236 // | | | 9237 // v v v 9238 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 9239 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ 9240 case 0x10: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OS 9241 case 0x18: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_US 9242 // 9243 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 9244 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 9245 // 9246 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 9247 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 9248 // 9249 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 9250 case 0x13: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_S 9251 // 9252 // 0xC: this isn't really right because it returns all-1s when 9253 // either operand is a NaN, and it should return all-0s. 9254 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 9255 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 9256 case 0x14: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_US 9257 case 0x1C: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS 9258 // 9259 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 9260 case 0x15: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_UQ 9261 // 9262 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 9263 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 9264 // 9265 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 9266 case 0x17: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_S 9267 // 9268 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 9269 case 0x19: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_UQ 9270 // 9271 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 9272 case 0x1A: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_UQ 9273 // 9274 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 9275 case 0x1D: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OQ 9276 // 9277 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 9278 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 9279 // Unhandled: 9280 // 0xB FALSE_OQ 9281 // 0xF TRUE_UQ 9282 // 0x1B FALSE_OS 9283 // 0x1F TRUE_US 9284 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 9285 avx-1.c if new cases turn up. */ 9286 default: break; 9287 } 9288 # undef XXX 9289 if (op == Iop_INVALID) return False; 9290 9291 /* Now convert the op into one with the same arithmetic but that is 9292 correct for the width and laneage requirements. */ 9293 9294 /**/ if (sz == 4 && all_lanes) { 9295 switch (op) { 9296 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 9297 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 9298 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 9299 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 9300 default: vassert(0); 9301 } 9302 } 9303 else if (sz == 4 && !all_lanes) { 9304 switch (op) { 9305 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 9306 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 9307 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 9308 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 9309 default: vassert(0); 9310 } 9311 } 9312 else if (sz == 8 && all_lanes) { 9313 switch (op) { 9314 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 9315 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 9316 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 9317 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 9318 default: vassert(0); 9319 } 9320 } 9321 else if (sz == 8 && !all_lanes) { 9322 switch (op) { 9323 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 9324 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 9325 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 9326 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 9327 default: vassert(0); 9328 } 9329 } 9330 else { 9331 vpanic("findSSECmpOp(amd64,guest)"); 9332 } 9333 9334 *preSwapP = pre; *opP = op; *postNotP = not; 9335 return True; 9336 } 9337 9338 9339 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 9340 returns the original delta to indicate failure. */ 9341 9342 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi, 9343 Prefix pfx, Long delta, 9344 const HChar* opname, Bool all_lanes, Int sz ) 9345 { 9346 Long delta0 = delta; 9347 HChar dis_buf[50]; 9348 Int alen; 9349 UInt imm8; 9350 IRTemp addr; 9351 Bool preSwap = False; 9352 IROp op = Iop_INVALID; 9353 Bool postNot = False; 9354 IRTemp plain = newTemp(Ity_V128); 9355 UChar rm = getUChar(delta); 9356 UShort mask = 0; 9357 vassert(sz == 4 || sz == 8); 9358 if (epartIsReg(rm)) { 9359 imm8 = getUChar(delta+1); 9360 if (imm8 >= 8) return delta0; /* FAIL */ 9361 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9362 if (!ok) return delta0; /* FAIL */ 9363 vassert(!preSwap); /* never needed for imm8 < 8 */ 9364 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 9365 getXMMReg(eregOfRexRM(pfx,rm))) ); 9366 delta += 2; 9367 DIP("%s $%u,%s,%s\n", opname, 9368 imm8, 9369 nameXMMReg(eregOfRexRM(pfx,rm)), 9370 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9371 } else { 9372 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 9373 imm8 = getUChar(delta+alen); 9374 if (imm8 >= 8) return delta0; /* FAIL */ 9375 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9376 if (!ok) return delta0; /* FAIL */ 9377 vassert(!preSwap); /* never needed for imm8 < 8 */ 9378 assign( plain, 9379 binop( 9380 op, 9381 getXMMReg(gregOfRexRM(pfx,rm)), 9382 all_lanes 9383 ? loadLE(Ity_V128, mkexpr(addr)) 9384 : sz == 8 9385 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 9386 : /*sz==4*/ 9387 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 9388 ) 9389 ); 9390 delta += alen+1; 9391 DIP("%s $%u,%s,%s\n", opname, 9392 imm8, 9393 dis_buf, 9394 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9395 } 9396 9397 if (postNot && all_lanes) { 9398 putXMMReg( gregOfRexRM(pfx,rm), 9399 unop(Iop_NotV128, mkexpr(plain)) ); 9400 } 9401 else 9402 if (postNot && !all_lanes) { 9403 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 9404 putXMMReg( gregOfRexRM(pfx,rm), 9405 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 9406 } 9407 else { 9408 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 9409 } 9410 9411 return delta; 9412 } 9413 9414 9415 /* Vector by scalar shift of G by the amount specified at the bottom 9416 of E. */ 9417 9418 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi, 9419 Prefix pfx, Long delta, 9420 const HChar* opname, IROp op ) 9421 { 9422 HChar dis_buf[50]; 9423 Int alen, size; 9424 IRTemp addr; 9425 Bool shl, shr, sar; 9426 UChar rm = getUChar(delta); 9427 IRTemp g0 = newTemp(Ity_V128); 9428 IRTemp g1 = newTemp(Ity_V128); 9429 IRTemp amt = newTemp(Ity_I64); 9430 IRTemp amt8 = newTemp(Ity_I8); 9431 if (epartIsReg(rm)) { 9432 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 9433 DIP("%s %s,%s\n", opname, 9434 nameXMMReg(eregOfRexRM(pfx,rm)), 9435 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9436 delta++; 9437 } else { 9438 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9439 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 9440 DIP("%s %s,%s\n", opname, 9441 dis_buf, 9442 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9443 delta += alen; 9444 } 9445 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9446 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 9447 9448 shl = shr = sar = False; 9449 size = 0; 9450 switch (op) { 9451 case Iop_ShlN16x8: shl = True; size = 32; break; 9452 case Iop_ShlN32x4: shl = True; size = 32; break; 9453 case Iop_ShlN64x2: shl = True; size = 64; break; 9454 case Iop_SarN16x8: sar = True; size = 16; break; 9455 case Iop_SarN32x4: sar = True; size = 32; break; 9456 case Iop_ShrN16x8: shr = True; size = 16; break; 9457 case Iop_ShrN32x4: shr = True; size = 32; break; 9458 case Iop_ShrN64x2: shr = True; size = 64; break; 9459 default: vassert(0); 9460 } 9461 9462 if (shl || shr) { 9463 assign( 9464 g1, 9465 IRExpr_ITE( 9466 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9467 binop(op, mkexpr(g0), mkexpr(amt8)), 9468 mkV128(0x0000) 9469 ) 9470 ); 9471 } else 9472 if (sar) { 9473 assign( 9474 g1, 9475 IRExpr_ITE( 9476 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9477 binop(op, mkexpr(g0), mkexpr(amt8)), 9478 binop(op, mkexpr(g0), mkU8(size-1)) 9479 ) 9480 ); 9481 } else { 9482 vassert(0); 9483 } 9484 9485 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 9486 return delta; 9487 } 9488 9489 9490 /* Vector by scalar shift of E by an immediate byte. */ 9491 9492 static 9493 ULong dis_SSE_shiftE_imm ( Prefix pfx, 9494 Long delta, const HChar* opname, IROp op ) 9495 { 9496 Bool shl, shr, sar; 9497 UChar rm = getUChar(delta); 9498 IRTemp e0 = newTemp(Ity_V128); 9499 IRTemp e1 = newTemp(Ity_V128); 9500 UChar amt, size; 9501 vassert(epartIsReg(rm)); 9502 vassert(gregLO3ofRM(rm) == 2 9503 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 9504 amt = getUChar(delta+1); 9505 delta += 2; 9506 DIP("%s $%d,%s\n", opname, 9507 (Int)amt, 9508 nameXMMReg(eregOfRexRM(pfx,rm)) ); 9509 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 9510 9511 shl = shr = sar = False; 9512 size = 0; 9513 switch (op) { 9514 case Iop_ShlN16x8: shl = True; size = 16; break; 9515 case Iop_ShlN32x4: shl = True; size = 32; break; 9516 case Iop_ShlN64x2: shl = True; size = 64; break; 9517 case Iop_SarN16x8: sar = True; size = 16; break; 9518 case Iop_SarN32x4: sar = True; size = 32; break; 9519 case Iop_ShrN16x8: shr = True; size = 16; break; 9520 case Iop_ShrN32x4: shr = True; size = 32; break; 9521 case Iop_ShrN64x2: shr = True; size = 64; break; 9522 default: vassert(0); 9523 } 9524 9525 if (shl || shr) { 9526 assign( e1, amt >= size 9527 ? mkV128(0x0000) 9528 : binop(op, mkexpr(e0), mkU8(amt)) 9529 ); 9530 } else 9531 if (sar) { 9532 assign( e1, amt >= size 9533 ? binop(op, mkexpr(e0), mkU8(size-1)) 9534 : binop(op, mkexpr(e0), mkU8(amt)) 9535 ); 9536 } else { 9537 vassert(0); 9538 } 9539 9540 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 9541 return delta; 9542 } 9543 9544 9545 /* Get the current SSE rounding mode. */ 9546 9547 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 9548 { 9549 return 9550 unop( Iop_64to32, 9551 binop( Iop_And64, 9552 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 9553 mkU64(3) )); 9554 } 9555 9556 static void put_sse_roundingmode ( IRExpr* sseround ) 9557 { 9558 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 9559 stmt( IRStmt_Put( OFFB_SSEROUND, 9560 unop(Iop_32Uto64,sseround) ) ); 9561 } 9562 9563 /* Break a V128-bit value up into four 32-bit ints. */ 9564 9565 static void breakupV128to32s ( IRTemp t128, 9566 /*OUTs*/ 9567 IRTemp* t3, IRTemp* t2, 9568 IRTemp* t1, IRTemp* t0 ) 9569 { 9570 IRTemp hi64 = newTemp(Ity_I64); 9571 IRTemp lo64 = newTemp(Ity_I64); 9572 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 9573 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 9574 9575 vassert(t0 && *t0 == IRTemp_INVALID); 9576 vassert(t1 && *t1 == IRTemp_INVALID); 9577 vassert(t2 && *t2 == IRTemp_INVALID); 9578 vassert(t3 && *t3 == IRTemp_INVALID); 9579 9580 *t0 = newTemp(Ity_I32); 9581 *t1 = newTemp(Ity_I32); 9582 *t2 = newTemp(Ity_I32); 9583 *t3 = newTemp(Ity_I32); 9584 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 9585 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 9586 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 9587 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 9588 } 9589 9590 /* Construct a V128-bit value from four 32-bit ints. */ 9591 9592 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 9593 IRTemp t1, IRTemp t0 ) 9594 { 9595 return 9596 binop( Iop_64HLtoV128, 9597 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9598 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 9599 ); 9600 } 9601 9602 /* Break a 64-bit value up into four 16-bit ints. */ 9603 9604 static void breakup64to16s ( IRTemp t64, 9605 /*OUTs*/ 9606 IRTemp* t3, IRTemp* t2, 9607 IRTemp* t1, IRTemp* t0 ) 9608 { 9609 IRTemp hi32 = newTemp(Ity_I32); 9610 IRTemp lo32 = newTemp(Ity_I32); 9611 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 9612 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 9613 9614 vassert(t0 && *t0 == IRTemp_INVALID); 9615 vassert(t1 && *t1 == IRTemp_INVALID); 9616 vassert(t2 && *t2 == IRTemp_INVALID); 9617 vassert(t3 && *t3 == IRTemp_INVALID); 9618 9619 *t0 = newTemp(Ity_I16); 9620 *t1 = newTemp(Ity_I16); 9621 *t2 = newTemp(Ity_I16); 9622 *t3 = newTemp(Ity_I16); 9623 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 9624 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 9625 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 9626 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 9627 } 9628 9629 /* Construct a 64-bit value from four 16-bit ints. */ 9630 9631 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 9632 IRTemp t1, IRTemp t0 ) 9633 { 9634 return 9635 binop( Iop_32HLto64, 9636 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9637 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9638 ); 9639 } 9640 9641 /* Break a V256-bit value up into four 64-bit ints. */ 9642 9643 static void breakupV256to64s ( IRTemp t256, 9644 /*OUTs*/ 9645 IRTemp* t3, IRTemp* t2, 9646 IRTemp* t1, IRTemp* t0 ) 9647 { 9648 vassert(t0 && *t0 == IRTemp_INVALID); 9649 vassert(t1 && *t1 == IRTemp_INVALID); 9650 vassert(t2 && *t2 == IRTemp_INVALID); 9651 vassert(t3 && *t3 == IRTemp_INVALID); 9652 *t0 = newTemp(Ity_I64); 9653 *t1 = newTemp(Ity_I64); 9654 *t2 = newTemp(Ity_I64); 9655 *t3 = newTemp(Ity_I64); 9656 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9657 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9658 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9659 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9660 } 9661 9662 /* Break a V256-bit value up into two V128s. */ 9663 9664 static void breakupV256toV128s ( IRTemp t256, 9665 /*OUTs*/ 9666 IRTemp* t1, IRTemp* t0 ) 9667 { 9668 vassert(t0 && *t0 == IRTemp_INVALID); 9669 vassert(t1 && *t1 == IRTemp_INVALID); 9670 *t0 = newTemp(Ity_V128); 9671 *t1 = newTemp(Ity_V128); 9672 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9673 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9674 } 9675 9676 /* Break a V256-bit value up into eight 32-bit ints. */ 9677 9678 static void breakupV256to32s ( IRTemp t256, 9679 /*OUTs*/ 9680 IRTemp* t7, IRTemp* t6, 9681 IRTemp* t5, IRTemp* t4, 9682 IRTemp* t3, IRTemp* t2, 9683 IRTemp* t1, IRTemp* t0 ) 9684 { 9685 IRTemp t128_1 = IRTemp_INVALID; 9686 IRTemp t128_0 = IRTemp_INVALID; 9687 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9688 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9689 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9690 } 9691 9692 /* Break a V128-bit value up into two 64-bit ints. */ 9693 9694 static void breakupV128to64s ( IRTemp t128, 9695 /*OUTs*/ 9696 IRTemp* t1, IRTemp* t0 ) 9697 { 9698 vassert(t0 && *t0 == IRTemp_INVALID); 9699 vassert(t1 && *t1 == IRTemp_INVALID); 9700 *t0 = newTemp(Ity_I64); 9701 *t1 = newTemp(Ity_I64); 9702 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9703 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9704 } 9705 9706 /* Construct a V256-bit value from eight 32-bit ints. */ 9707 9708 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9709 IRTemp t5, IRTemp t4, 9710 IRTemp t3, IRTemp t2, 9711 IRTemp t1, IRTemp t0 ) 9712 { 9713 return 9714 binop( Iop_V128HLtoV256, 9715 binop( Iop_64HLtoV128, 9716 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9717 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9718 binop( Iop_64HLtoV128, 9719 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9720 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9721 ); 9722 } 9723 9724 /* Construct a V256-bit value from four 64-bit ints. */ 9725 9726 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9727 IRTemp t1, IRTemp t0 ) 9728 { 9729 return 9730 binop( Iop_V128HLtoV256, 9731 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9732 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9733 ); 9734 } 9735 9736 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9737 values (aa,bb), computes, for each of the 4 16-bit lanes: 9738 9739 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9740 */ 9741 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9742 { 9743 IRTemp aa = newTemp(Ity_I64); 9744 IRTemp bb = newTemp(Ity_I64); 9745 IRTemp aahi32s = newTemp(Ity_I64); 9746 IRTemp aalo32s = newTemp(Ity_I64); 9747 IRTemp bbhi32s = newTemp(Ity_I64); 9748 IRTemp bblo32s = newTemp(Ity_I64); 9749 IRTemp rHi = newTemp(Ity_I64); 9750 IRTemp rLo = newTemp(Ity_I64); 9751 IRTemp one32x2 = newTemp(Ity_I64); 9752 assign(aa, aax); 9753 assign(bb, bbx); 9754 assign( aahi32s, 9755 binop(Iop_SarN32x2, 9756 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9757 mkU8(16) )); 9758 assign( aalo32s, 9759 binop(Iop_SarN32x2, 9760 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9761 mkU8(16) )); 9762 assign( bbhi32s, 9763 binop(Iop_SarN32x2, 9764 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9765 mkU8(16) )); 9766 assign( bblo32s, 9767 binop(Iop_SarN32x2, 9768 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9769 mkU8(16) )); 9770 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9771 assign( 9772 rHi, 9773 binop( 9774 Iop_ShrN32x2, 9775 binop( 9776 Iop_Add32x2, 9777 binop( 9778 Iop_ShrN32x2, 9779 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9780 mkU8(14) 9781 ), 9782 mkexpr(one32x2) 9783 ), 9784 mkU8(1) 9785 ) 9786 ); 9787 assign( 9788 rLo, 9789 binop( 9790 Iop_ShrN32x2, 9791 binop( 9792 Iop_Add32x2, 9793 binop( 9794 Iop_ShrN32x2, 9795 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9796 mkU8(14) 9797 ), 9798 mkexpr(one32x2) 9799 ), 9800 mkU8(1) 9801 ) 9802 ); 9803 return 9804 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9805 } 9806 9807 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9808 values (aa,bb), computes, for each lane: 9809 9810 if aa_lane < 0 then - bb_lane 9811 else if aa_lane > 0 then bb_lane 9812 else 0 9813 */ 9814 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9815 { 9816 IRTemp aa = newTemp(Ity_I64); 9817 IRTemp bb = newTemp(Ity_I64); 9818 IRTemp zero = newTemp(Ity_I64); 9819 IRTemp bbNeg = newTemp(Ity_I64); 9820 IRTemp negMask = newTemp(Ity_I64); 9821 IRTemp posMask = newTemp(Ity_I64); 9822 IROp opSub = Iop_INVALID; 9823 IROp opCmpGTS = Iop_INVALID; 9824 9825 switch (laneszB) { 9826 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9827 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9828 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9829 default: vassert(0); 9830 } 9831 9832 assign( aa, aax ); 9833 assign( bb, bbx ); 9834 assign( zero, mkU64(0) ); 9835 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9836 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9837 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9838 9839 return 9840 binop(Iop_Or64, 9841 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9842 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9843 9844 } 9845 9846 9847 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9848 value aa, computes, for each lane 9849 9850 if aa < 0 then -aa else aa 9851 9852 Note that the result is interpreted as unsigned, so that the 9853 absolute value of the most negative signed input can be 9854 represented. 9855 */ 9856 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9857 { 9858 IRTemp res = newTemp(Ity_I64); 9859 IRTemp zero = newTemp(Ity_I64); 9860 IRTemp aaNeg = newTemp(Ity_I64); 9861 IRTemp negMask = newTemp(Ity_I64); 9862 IRTemp posMask = newTemp(Ity_I64); 9863 IROp opSub = Iop_INVALID; 9864 IROp opSarN = Iop_INVALID; 9865 9866 switch (laneszB) { 9867 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9868 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9869 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9870 default: vassert(0); 9871 } 9872 9873 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9874 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9875 assign( zero, mkU64(0) ); 9876 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9877 assign( res, 9878 binop(Iop_Or64, 9879 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9880 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9881 return res; 9882 } 9883 9884 /* XMM version of math_PABS_MMX. */ 9885 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9886 { 9887 IRTemp res = newTemp(Ity_V128); 9888 IRTemp aaHi = newTemp(Ity_I64); 9889 IRTemp aaLo = newTemp(Ity_I64); 9890 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9891 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9892 assign(res, binop(Iop_64HLtoV128, 9893 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9894 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9895 return res; 9896 } 9897 9898 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9899 partial applications in C :-( */ 9900 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9901 return math_PABS_XMM(aa, 4); 9902 } 9903 9904 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9905 return math_PABS_XMM(aa, 2); 9906 } 9907 9908 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9909 return math_PABS_XMM(aa, 1); 9910 } 9911 9912 /* YMM version of math_PABS_XMM. */ 9913 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB ) 9914 { 9915 IRTemp res = newTemp(Ity_V256); 9916 IRTemp aaHi = IRTemp_INVALID; 9917 IRTemp aaLo = IRTemp_INVALID; 9918 breakupV256toV128s(aa, &aaHi, &aaLo); 9919 assign(res, binop(Iop_V128HLtoV256, 9920 mkexpr(math_PABS_XMM(aaHi, laneszB)), 9921 mkexpr(math_PABS_XMM(aaLo, laneszB)))); 9922 return res; 9923 } 9924 9925 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) { 9926 return math_PABS_YMM(aa, 4); 9927 } 9928 9929 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) { 9930 return math_PABS_YMM(aa, 2); 9931 } 9932 9933 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) { 9934 return math_PABS_YMM(aa, 1); 9935 } 9936 9937 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9938 IRTemp lo64, Long byteShift ) 9939 { 9940 vassert(byteShift >= 1 && byteShift <= 7); 9941 return 9942 binop(Iop_Or64, 9943 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9944 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9945 ); 9946 } 9947 9948 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9949 { 9950 IRTemp res = newTemp(Ity_V128); 9951 IRTemp sHi = newTemp(Ity_I64); 9952 IRTemp sLo = newTemp(Ity_I64); 9953 IRTemp dHi = newTemp(Ity_I64); 9954 IRTemp dLo = newTemp(Ity_I64); 9955 IRTemp rHi = newTemp(Ity_I64); 9956 IRTemp rLo = newTemp(Ity_I64); 9957 9958 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9959 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9960 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9961 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9962 9963 if (imm8 == 0) { 9964 assign( rHi, mkexpr(sHi) ); 9965 assign( rLo, mkexpr(sLo) ); 9966 } 9967 else if (imm8 >= 1 && imm8 <= 7) { 9968 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9969 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9970 } 9971 else if (imm8 == 8) { 9972 assign( rHi, mkexpr(dLo) ); 9973 assign( rLo, mkexpr(sHi) ); 9974 } 9975 else if (imm8 >= 9 && imm8 <= 15) { 9976 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9977 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9978 } 9979 else if (imm8 == 16) { 9980 assign( rHi, mkexpr(dHi) ); 9981 assign( rLo, mkexpr(dLo) ); 9982 } 9983 else if (imm8 >= 17 && imm8 <= 23) { 9984 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9985 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9986 } 9987 else if (imm8 == 24) { 9988 assign( rHi, mkU64(0) ); 9989 assign( rLo, mkexpr(dHi) ); 9990 } 9991 else if (imm8 >= 25 && imm8 <= 31) { 9992 assign( rHi, mkU64(0) ); 9993 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9994 } 9995 else if (imm8 >= 32 && imm8 <= 255) { 9996 assign( rHi, mkU64(0) ); 9997 assign( rLo, mkU64(0) ); 9998 } 9999 else 10000 vassert(0); 10001 10002 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 10003 return res; 10004 } 10005 10006 10007 /* Generate a SIGSEGV followed by a restart of the current instruction 10008 if effective_addr is not 16-aligned. This is required behaviour 10009 for some SSE3 instructions and all 128-bit SSSE3 instructions. 10010 This assumes that guest_RIP_curr_instr is set correctly! */ 10011 static 10012 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 10013 { 10014 stmt( 10015 IRStmt_Exit( 10016 binop(Iop_CmpNE64, 10017 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 10018 mkU64(0)), 10019 Ijk_SigSEGV, 10020 IRConst_U64(guest_RIP_curr_instr), 10021 OFFB_RIP 10022 ) 10023 ); 10024 } 10025 10026 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 10027 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 10028 } 10029 10030 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 10031 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 10032 } 10033 10034 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) { 10035 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1); 10036 } 10037 10038 /* Helper for deciding whether a given insn (starting at the opcode 10039 byte) may validly be used with a LOCK prefix. The following insns 10040 may be used with LOCK when their destination operand is in memory. 10041 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 10042 10043 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 10044 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 10045 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 10046 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 10047 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 10048 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 10049 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 10050 10051 DEC FE /1, FF /1 10052 INC FE /0, FF /0 10053 10054 NEG F6 /3, F7 /3 10055 NOT F6 /2, F7 /2 10056 10057 XCHG 86, 87 10058 10059 BTC 0F BB, 0F BA /7 10060 BTR 0F B3, 0F BA /6 10061 BTS 0F AB, 0F BA /5 10062 10063 CMPXCHG 0F B0, 0F B1 10064 CMPXCHG8B 0F C7 /1 10065 10066 XADD 0F C0, 0F C1 10067 10068 ------------------------------ 10069 10070 80 /0 = addb $imm8, rm8 10071 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 10072 82 /0 = addb $imm8, rm8 10073 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 10074 10075 00 = addb r8, rm8 10076 01 = addl r32, rm32 and addw r16, rm16 10077 10078 Same for ADD OR ADC SBB AND SUB XOR 10079 10080 FE /1 = dec rm8 10081 FF /1 = dec rm32 and dec rm16 10082 10083 FE /0 = inc rm8 10084 FF /0 = inc rm32 and inc rm16 10085 10086 F6 /3 = neg rm8 10087 F7 /3 = neg rm32 and neg rm16 10088 10089 F6 /2 = not rm8 10090 F7 /2 = not rm32 and not rm16 10091 10092 0F BB = btcw r16, rm16 and btcl r32, rm32 10093 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 10094 10095 Same for BTS, BTR 10096 */ 10097 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc ) 10098 { 10099 switch (opc[0]) { 10100 case 0x00: case 0x01: case 0x08: case 0x09: 10101 case 0x10: case 0x11: case 0x18: case 0x19: 10102 case 0x20: case 0x21: case 0x28: case 0x29: 10103 case 0x30: case 0x31: 10104 if (!epartIsReg(opc[1])) 10105 return True; 10106 break; 10107 10108 case 0x80: case 0x81: case 0x82: case 0x83: 10109 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 10110 && !epartIsReg(opc[1])) 10111 return True; 10112 break; 10113 10114 case 0xFE: case 0xFF: 10115 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 10116 && !epartIsReg(opc[1])) 10117 return True; 10118 break; 10119 10120 case 0xF6: case 0xF7: 10121 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 10122 && !epartIsReg(opc[1])) 10123 return True; 10124 break; 10125 10126 case 0x86: case 0x87: 10127 if (!epartIsReg(opc[1])) 10128 return True; 10129 break; 10130 10131 case 0x0F: { 10132 switch (opc[1]) { 10133 case 0xBB: case 0xB3: case 0xAB: 10134 if (!epartIsReg(opc[2])) 10135 return True; 10136 break; 10137 case 0xBA: 10138 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 10139 && !epartIsReg(opc[2])) 10140 return True; 10141 break; 10142 case 0xB0: case 0xB1: 10143 if (!epartIsReg(opc[2])) 10144 return True; 10145 break; 10146 case 0xC7: 10147 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 10148 return True; 10149 break; 10150 case 0xC0: case 0xC1: 10151 if (!epartIsReg(opc[2])) 10152 return True; 10153 break; 10154 default: 10155 break; 10156 } /* switch (opc[1]) */ 10157 break; 10158 } 10159 10160 default: 10161 break; 10162 } /* switch (opc[0]) */ 10163 10164 return False; 10165 } 10166 10167 10168 /*------------------------------------------------------------*/ 10169 /*--- ---*/ 10170 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 10171 /*--- ---*/ 10172 /*------------------------------------------------------------*/ 10173 10174 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx, 10175 Long delta, Bool isAvx, UChar opc ) 10176 { 10177 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 10178 Int alen = 0; 10179 HChar dis_buf[50]; 10180 IRTemp argL = newTemp(Ity_F64); 10181 IRTemp argR = newTemp(Ity_F64); 10182 UChar modrm = getUChar(delta); 10183 IRTemp addr = IRTemp_INVALID; 10184 if (epartIsReg(modrm)) { 10185 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 10186 0/*lowest lane*/ ) ); 10187 delta += 1; 10188 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10189 opc==0x2E ? "u" : "", 10190 nameXMMReg(eregOfRexRM(pfx,modrm)), 10191 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10192 } else { 10193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10194 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 10195 delta += alen; 10196 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10197 opc==0x2E ? "u" : "", 10198 dis_buf, 10199 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10200 } 10201 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 10202 0/*lowest lane*/ ) ); 10203 10204 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10205 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10206 stmt( IRStmt_Put( 10207 OFFB_CC_DEP1, 10208 binop( Iop_And64, 10209 unop( Iop_32Uto64, 10210 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 10211 mkU64(0x45) 10212 ))); 10213 return delta; 10214 } 10215 10216 10217 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx, 10218 Long delta, Bool isAvx, UChar opc ) 10219 { 10220 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 10221 Int alen = 0; 10222 HChar dis_buf[50]; 10223 IRTemp argL = newTemp(Ity_F32); 10224 IRTemp argR = newTemp(Ity_F32); 10225 UChar modrm = getUChar(delta); 10226 IRTemp addr = IRTemp_INVALID; 10227 if (epartIsReg(modrm)) { 10228 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 10229 0/*lowest lane*/ ) ); 10230 delta += 1; 10231 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10232 opc==0x2E ? "u" : "", 10233 nameXMMReg(eregOfRexRM(pfx,modrm)), 10234 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10235 } else { 10236 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10237 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 10238 delta += alen; 10239 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10240 opc==0x2E ? "u" : "", 10241 dis_buf, 10242 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10243 } 10244 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 10245 0/*lowest lane*/ ) ); 10246 10247 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10248 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10249 stmt( IRStmt_Put( 10250 OFFB_CC_DEP1, 10251 binop( Iop_And64, 10252 unop( Iop_32Uto64, 10253 binop(Iop_CmpF64, 10254 unop(Iop_F32toF64,mkexpr(argL)), 10255 unop(Iop_F32toF64,mkexpr(argR)))), 10256 mkU64(0x45) 10257 ))); 10258 return delta; 10259 } 10260 10261 10262 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx, 10263 Long delta, Bool writesYmm ) 10264 { 10265 Int order; 10266 Int alen = 0; 10267 HChar dis_buf[50]; 10268 IRTemp sV = newTemp(Ity_V128); 10269 UChar modrm = getUChar(delta); 10270 const HChar* strV = writesYmm ? "v" : ""; 10271 IRTemp addr = IRTemp_INVALID; 10272 if (epartIsReg(modrm)) { 10273 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10274 order = (Int)getUChar(delta+1); 10275 delta += 1+1; 10276 DIP("%spshufd $%d,%s,%s\n", strV, order, 10277 nameXMMReg(eregOfRexRM(pfx,modrm)), 10278 nameXMMReg(gregOfRexRM(pfx,modrm))); 10279 } else { 10280 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10281 1/*byte after the amode*/ ); 10282 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10283 order = (Int)getUChar(delta+alen); 10284 delta += alen+1; 10285 DIP("%spshufd $%d,%s,%s\n", strV, order, 10286 dis_buf, 10287 nameXMMReg(gregOfRexRM(pfx,modrm))); 10288 } 10289 10290 IRTemp s3, s2, s1, s0; 10291 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10292 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10293 10294 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10295 IRTemp dV = newTemp(Ity_V128); 10296 assign(dV, 10297 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 10298 SEL((order>>2)&3), SEL((order>>0)&3) ) 10299 ); 10300 # undef SEL 10301 10302 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 10303 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 10304 return delta; 10305 } 10306 10307 10308 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 10309 { 10310 Int order; 10311 Int alen = 0; 10312 HChar dis_buf[50]; 10313 IRTemp sV = newTemp(Ity_V256); 10314 UChar modrm = getUChar(delta); 10315 IRTemp addr = IRTemp_INVALID; 10316 UInt rG = gregOfRexRM(pfx,modrm); 10317 if (epartIsReg(modrm)) { 10318 UInt rE = eregOfRexRM(pfx,modrm); 10319 assign( sV, getYMMReg(rE) ); 10320 order = (Int)getUChar(delta+1); 10321 delta += 1+1; 10322 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG)); 10323 } else { 10324 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10325 1/*byte after the amode*/ ); 10326 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 10327 order = (Int)getUChar(delta+alen); 10328 delta += alen+1; 10329 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG)); 10330 } 10331 10332 IRTemp s[8]; 10333 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 10334 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 10335 &s[3], &s[2], &s[1], &s[0] ); 10336 10337 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)], 10338 s[4 + ((order>>4)&3)], 10339 s[4 + ((order>>2)&3)], 10340 s[4 + ((order>>0)&3)], 10341 s[0 + ((order>>6)&3)], 10342 s[0 + ((order>>4)&3)], 10343 s[0 + ((order>>2)&3)], 10344 s[0 + ((order>>0)&3)] ) ); 10345 return delta; 10346 } 10347 10348 10349 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 10350 { 10351 IRTemp dV = newTemp(Ity_V128); 10352 IRTemp hi64 = newTemp(Ity_I64); 10353 IRTemp lo64 = newTemp(Ity_I64); 10354 IRTemp hi64r = newTemp(Ity_I64); 10355 IRTemp lo64r = newTemp(Ity_I64); 10356 10357 vassert(imm >= 0 && imm <= 255); 10358 if (imm >= 16) { 10359 assign(dV, mkV128(0x0000)); 10360 return dV; 10361 } 10362 10363 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10364 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10365 10366 if (imm == 0) { 10367 assign( lo64r, mkexpr(lo64) ); 10368 assign( hi64r, mkexpr(hi64) ); 10369 } 10370 else 10371 if (imm == 8) { 10372 assign( hi64r, mkU64(0) ); 10373 assign( lo64r, mkexpr(hi64) ); 10374 } 10375 else 10376 if (imm > 8) { 10377 assign( hi64r, mkU64(0) ); 10378 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 10379 } else { 10380 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 10381 assign( lo64r, 10382 binop( Iop_Or64, 10383 binop(Iop_Shr64, mkexpr(lo64), 10384 mkU8(8 * imm)), 10385 binop(Iop_Shl64, mkexpr(hi64), 10386 mkU8(8 * (8 - imm)) ) 10387 ) 10388 ); 10389 } 10390 10391 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10392 return dV; 10393 } 10394 10395 10396 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 10397 { 10398 IRTemp dV = newTemp(Ity_V128); 10399 IRTemp hi64 = newTemp(Ity_I64); 10400 IRTemp lo64 = newTemp(Ity_I64); 10401 IRTemp hi64r = newTemp(Ity_I64); 10402 IRTemp lo64r = newTemp(Ity_I64); 10403 10404 vassert(imm >= 0 && imm <= 255); 10405 if (imm >= 16) { 10406 assign(dV, mkV128(0x0000)); 10407 return dV; 10408 } 10409 10410 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10411 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10412 10413 if (imm == 0) { 10414 assign( lo64r, mkexpr(lo64) ); 10415 assign( hi64r, mkexpr(hi64) ); 10416 } 10417 else 10418 if (imm == 8) { 10419 assign( lo64r, mkU64(0) ); 10420 assign( hi64r, mkexpr(lo64) ); 10421 } 10422 else 10423 if (imm > 8) { 10424 assign( lo64r, mkU64(0) ); 10425 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 10426 } else { 10427 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 10428 assign( hi64r, 10429 binop( Iop_Or64, 10430 binop(Iop_Shl64, mkexpr(hi64), 10431 mkU8(8 * imm)), 10432 binop(Iop_Shr64, mkexpr(lo64), 10433 mkU8(8 * (8 - imm)) ) 10434 ) 10435 ); 10436 } 10437 10438 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10439 return dV; 10440 } 10441 10442 10443 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx, 10444 Long delta, Bool isAvx, UChar opc, Int sz ) 10445 { 10446 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 10447 HChar dis_buf[50]; 10448 Int alen = 0; 10449 UChar modrm = getUChar(delta); 10450 IRTemp addr = IRTemp_INVALID; 10451 IRTemp rmode = newTemp(Ity_I32); 10452 IRTemp f64lo = newTemp(Ity_F64); 10453 Bool r2zero = toBool(opc == 0x2C); 10454 10455 if (epartIsReg(modrm)) { 10456 delta += 1; 10457 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10458 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10459 nameXMMReg(eregOfRexRM(pfx,modrm)), 10460 nameIReg(sz, gregOfRexRM(pfx,modrm), 10461 False)); 10462 } else { 10463 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10464 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10465 delta += alen; 10466 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10467 dis_buf, 10468 nameIReg(sz, gregOfRexRM(pfx,modrm), 10469 False)); 10470 } 10471 10472 if (r2zero) { 10473 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10474 } else { 10475 assign( rmode, get_sse_roundingmode() ); 10476 } 10477 10478 if (sz == 4) { 10479 putIReg32( gregOfRexRM(pfx,modrm), 10480 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10481 } else { 10482 vassert(sz == 8); 10483 putIReg64( gregOfRexRM(pfx,modrm), 10484 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10485 } 10486 10487 return delta; 10488 } 10489 10490 10491 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx, 10492 Long delta, Bool isAvx, UChar opc, Int sz ) 10493 { 10494 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 10495 HChar dis_buf[50]; 10496 Int alen = 0; 10497 UChar modrm = getUChar(delta); 10498 IRTemp addr = IRTemp_INVALID; 10499 IRTemp rmode = newTemp(Ity_I32); 10500 IRTemp f32lo = newTemp(Ity_F32); 10501 Bool r2zero = toBool(opc == 0x2C); 10502 10503 if (epartIsReg(modrm)) { 10504 delta += 1; 10505 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 10506 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10507 nameXMMReg(eregOfRexRM(pfx,modrm)), 10508 nameIReg(sz, gregOfRexRM(pfx,modrm), 10509 False)); 10510 } else { 10511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10512 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10513 delta += alen; 10514 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10515 dis_buf, 10516 nameIReg(sz, gregOfRexRM(pfx,modrm), 10517 False)); 10518 } 10519 10520 if (r2zero) { 10521 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10522 } else { 10523 assign( rmode, get_sse_roundingmode() ); 10524 } 10525 10526 if (sz == 4) { 10527 putIReg32( gregOfRexRM(pfx,modrm), 10528 binop( Iop_F64toI32S, 10529 mkexpr(rmode), 10530 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10531 } else { 10532 vassert(sz == 8); 10533 putIReg64( gregOfRexRM(pfx,modrm), 10534 binop( Iop_F64toI64S, 10535 mkexpr(rmode), 10536 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10537 } 10538 10539 return delta; 10540 } 10541 10542 10543 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 10544 Long delta, Bool isAvx ) 10545 { 10546 IRTemp addr = IRTemp_INVALID; 10547 Int alen = 0; 10548 HChar dis_buf[50]; 10549 IRTemp f32lo = newTemp(Ity_F32); 10550 IRTemp f32hi = newTemp(Ity_F32); 10551 UChar modrm = getUChar(delta); 10552 UInt rG = gregOfRexRM(pfx,modrm); 10553 if (epartIsReg(modrm)) { 10554 UInt rE = eregOfRexRM(pfx,modrm); 10555 assign( f32lo, getXMMRegLane32F(rE, 0) ); 10556 assign( f32hi, getXMMRegLane32F(rE, 1) ); 10557 delta += 1; 10558 DIP("%scvtps2pd %s,%s\n", 10559 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10560 } else { 10561 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10562 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10563 assign( f32hi, loadLE(Ity_F32, 10564 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10565 delta += alen; 10566 DIP("%scvtps2pd %s,%s\n", 10567 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 10568 } 10569 10570 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 10571 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 10572 if (isAvx) 10573 putYMMRegLane128( rG, 1, mkV128(0)); 10574 return delta; 10575 } 10576 10577 10578 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 10579 Long delta ) 10580 { 10581 IRTemp addr = IRTemp_INVALID; 10582 Int alen = 0; 10583 HChar dis_buf[50]; 10584 IRTemp f32_0 = newTemp(Ity_F32); 10585 IRTemp f32_1 = newTemp(Ity_F32); 10586 IRTemp f32_2 = newTemp(Ity_F32); 10587 IRTemp f32_3 = newTemp(Ity_F32); 10588 UChar modrm = getUChar(delta); 10589 UInt rG = gregOfRexRM(pfx,modrm); 10590 if (epartIsReg(modrm)) { 10591 UInt rE = eregOfRexRM(pfx,modrm); 10592 assign( f32_0, getXMMRegLane32F(rE, 0) ); 10593 assign( f32_1, getXMMRegLane32F(rE, 1) ); 10594 assign( f32_2, getXMMRegLane32F(rE, 2) ); 10595 assign( f32_3, getXMMRegLane32F(rE, 3) ); 10596 delta += 1; 10597 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 10598 } else { 10599 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10600 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 10601 assign( f32_1, loadLE(Ity_F32, 10602 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10603 assign( f32_2, loadLE(Ity_F32, 10604 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 10605 assign( f32_3, loadLE(Ity_F32, 10606 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 10607 delta += alen; 10608 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 10609 } 10610 10611 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 10612 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 10613 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 10614 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 10615 return delta; 10616 } 10617 10618 10619 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10620 Long delta, Bool isAvx ) 10621 { 10622 IRTemp addr = IRTemp_INVALID; 10623 Int alen = 0; 10624 HChar dis_buf[50]; 10625 UChar modrm = getUChar(delta); 10626 UInt rG = gregOfRexRM(pfx,modrm); 10627 IRTemp argV = newTemp(Ity_V128); 10628 IRTemp rmode = newTemp(Ity_I32); 10629 if (epartIsReg(modrm)) { 10630 UInt rE = eregOfRexRM(pfx,modrm); 10631 assign( argV, getXMMReg(rE) ); 10632 delta += 1; 10633 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10634 nameXMMReg(rE), nameXMMReg(rG)); 10635 } else { 10636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10637 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10638 delta += alen; 10639 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10640 dis_buf, nameXMMReg(rG) ); 10641 } 10642 10643 assign( rmode, get_sse_roundingmode() ); 10644 IRTemp t0 = newTemp(Ity_F64); 10645 IRTemp t1 = newTemp(Ity_F64); 10646 assign( t0, unop(Iop_ReinterpI64asF64, 10647 unop(Iop_V128to64, mkexpr(argV))) ); 10648 assign( t1, unop(Iop_ReinterpI64asF64, 10649 unop(Iop_V128HIto64, mkexpr(argV))) ); 10650 10651 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 10652 putXMMRegLane32( rG, 3, mkU32(0) ); 10653 putXMMRegLane32( rG, 2, mkU32(0) ); 10654 putXMMRegLane32F( rG, 1, CVT(t1) ); 10655 putXMMRegLane32F( rG, 0, CVT(t0) ); 10656 # undef CVT 10657 if (isAvx) 10658 putYMMRegLane128( rG, 1, mkV128(0) ); 10659 10660 return delta; 10661 } 10662 10663 10664 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10665 Long delta, Bool isAvx, Bool r2zero ) 10666 { 10667 IRTemp addr = IRTemp_INVALID; 10668 Int alen = 0; 10669 HChar dis_buf[50]; 10670 UChar modrm = getUChar(delta); 10671 IRTemp argV = newTemp(Ity_V128); 10672 IRTemp rmode = newTemp(Ity_I32); 10673 UInt rG = gregOfRexRM(pfx,modrm); 10674 IRTemp t0, t1, t2, t3; 10675 10676 if (epartIsReg(modrm)) { 10677 UInt rE = eregOfRexRM(pfx,modrm); 10678 assign( argV, getXMMReg(rE) ); 10679 delta += 1; 10680 DIP("%scvt%sps2dq %s,%s\n", 10681 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10682 } else { 10683 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10684 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10685 delta += alen; 10686 DIP("%scvt%sps2dq %s,%s\n", 10687 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10688 } 10689 10690 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10691 : get_sse_roundingmode() ); 10692 t0 = t1 = t2 = t3 = IRTemp_INVALID; 10693 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10694 /* This is less than ideal. If it turns out to be a performance 10695 bottleneck it can be improved. */ 10696 # define CVT(_t) \ 10697 binop( Iop_F64toI32S, \ 10698 mkexpr(rmode), \ 10699 unop( Iop_F32toF64, \ 10700 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10701 10702 putXMMRegLane32( rG, 3, CVT(t3) ); 10703 putXMMRegLane32( rG, 2, CVT(t2) ); 10704 putXMMRegLane32( rG, 1, CVT(t1) ); 10705 putXMMRegLane32( rG, 0, CVT(t0) ); 10706 # undef CVT 10707 if (isAvx) 10708 putYMMRegLane128( rG, 1, mkV128(0) ); 10709 10710 return delta; 10711 } 10712 10713 10714 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10715 Long delta, Bool r2zero ) 10716 { 10717 IRTemp addr = IRTemp_INVALID; 10718 Int alen = 0; 10719 HChar dis_buf[50]; 10720 UChar modrm = getUChar(delta); 10721 IRTemp argV = newTemp(Ity_V256); 10722 IRTemp rmode = newTemp(Ity_I32); 10723 UInt rG = gregOfRexRM(pfx,modrm); 10724 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10725 10726 if (epartIsReg(modrm)) { 10727 UInt rE = eregOfRexRM(pfx,modrm); 10728 assign( argV, getYMMReg(rE) ); 10729 delta += 1; 10730 DIP("vcvt%sps2dq %s,%s\n", 10731 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10732 } else { 10733 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10734 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10735 delta += alen; 10736 DIP("vcvt%sps2dq %s,%s\n", 10737 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10738 } 10739 10740 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10741 : get_sse_roundingmode() ); 10742 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10743 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10744 /* This is less than ideal. If it turns out to be a performance 10745 bottleneck it can be improved. */ 10746 # define CVT(_t) \ 10747 binop( Iop_F64toI32S, \ 10748 mkexpr(rmode), \ 10749 unop( Iop_F32toF64, \ 10750 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10751 10752 putYMMRegLane32( rG, 7, CVT(t7) ); 10753 putYMMRegLane32( rG, 6, CVT(t6) ); 10754 putYMMRegLane32( rG, 5, CVT(t5) ); 10755 putYMMRegLane32( rG, 4, CVT(t4) ); 10756 putYMMRegLane32( rG, 3, CVT(t3) ); 10757 putYMMRegLane32( rG, 2, CVT(t2) ); 10758 putYMMRegLane32( rG, 1, CVT(t1) ); 10759 putYMMRegLane32( rG, 0, CVT(t0) ); 10760 # undef CVT 10761 10762 return delta; 10763 } 10764 10765 10766 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10767 Long delta, Bool isAvx, Bool r2zero ) 10768 { 10769 IRTemp addr = IRTemp_INVALID; 10770 Int alen = 0; 10771 HChar dis_buf[50]; 10772 UChar modrm = getUChar(delta); 10773 IRTemp argV = newTemp(Ity_V128); 10774 IRTemp rmode = newTemp(Ity_I32); 10775 UInt rG = gregOfRexRM(pfx,modrm); 10776 IRTemp t0, t1; 10777 10778 if (epartIsReg(modrm)) { 10779 UInt rE = eregOfRexRM(pfx,modrm); 10780 assign( argV, getXMMReg(rE) ); 10781 delta += 1; 10782 DIP("%scvt%spd2dq %s,%s\n", 10783 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10784 } else { 10785 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10786 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10787 delta += alen; 10788 DIP("%scvt%spd2dqx %s,%s\n", 10789 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10790 } 10791 10792 if (r2zero) { 10793 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10794 } else { 10795 assign( rmode, get_sse_roundingmode() ); 10796 } 10797 10798 t0 = newTemp(Ity_F64); 10799 t1 = newTemp(Ity_F64); 10800 assign( t0, unop(Iop_ReinterpI64asF64, 10801 unop(Iop_V128to64, mkexpr(argV))) ); 10802 assign( t1, unop(Iop_ReinterpI64asF64, 10803 unop(Iop_V128HIto64, mkexpr(argV))) ); 10804 10805 # define CVT(_t) binop( Iop_F64toI32S, \ 10806 mkexpr(rmode), \ 10807 mkexpr(_t) ) 10808 10809 putXMMRegLane32( rG, 3, mkU32(0) ); 10810 putXMMRegLane32( rG, 2, mkU32(0) ); 10811 putXMMRegLane32( rG, 1, CVT(t1) ); 10812 putXMMRegLane32( rG, 0, CVT(t0) ); 10813 # undef CVT 10814 if (isAvx) 10815 putYMMRegLane128( rG, 1, mkV128(0) ); 10816 10817 return delta; 10818 } 10819 10820 10821 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10822 Long delta, Bool r2zero ) 10823 { 10824 IRTemp addr = IRTemp_INVALID; 10825 Int alen = 0; 10826 HChar dis_buf[50]; 10827 UChar modrm = getUChar(delta); 10828 IRTemp argV = newTemp(Ity_V256); 10829 IRTemp rmode = newTemp(Ity_I32); 10830 UInt rG = gregOfRexRM(pfx,modrm); 10831 IRTemp t0, t1, t2, t3; 10832 10833 if (epartIsReg(modrm)) { 10834 UInt rE = eregOfRexRM(pfx,modrm); 10835 assign( argV, getYMMReg(rE) ); 10836 delta += 1; 10837 DIP("vcvt%spd2dq %s,%s\n", 10838 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10839 } else { 10840 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10841 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10842 delta += alen; 10843 DIP("vcvt%spd2dqy %s,%s\n", 10844 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10845 } 10846 10847 if (r2zero) { 10848 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10849 } else { 10850 assign( rmode, get_sse_roundingmode() ); 10851 } 10852 10853 t0 = IRTemp_INVALID; 10854 t1 = IRTemp_INVALID; 10855 t2 = IRTemp_INVALID; 10856 t3 = IRTemp_INVALID; 10857 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10858 10859 # define CVT(_t) binop( Iop_F64toI32S, \ 10860 mkexpr(rmode), \ 10861 unop( Iop_ReinterpI64asF64, \ 10862 mkexpr(_t) ) ) 10863 10864 putXMMRegLane32( rG, 3, CVT(t3) ); 10865 putXMMRegLane32( rG, 2, CVT(t2) ); 10866 putXMMRegLane32( rG, 1, CVT(t1) ); 10867 putXMMRegLane32( rG, 0, CVT(t0) ); 10868 # undef CVT 10869 putYMMRegLane128( rG, 1, mkV128(0) ); 10870 10871 return delta; 10872 } 10873 10874 10875 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10876 Long delta, Bool isAvx ) 10877 { 10878 IRTemp addr = IRTemp_INVALID; 10879 Int alen = 0; 10880 HChar dis_buf[50]; 10881 UChar modrm = getUChar(delta); 10882 IRTemp argV = newTemp(Ity_V128); 10883 IRTemp rmode = newTemp(Ity_I32); 10884 UInt rG = gregOfRexRM(pfx,modrm); 10885 IRTemp t0, t1, t2, t3; 10886 10887 if (epartIsReg(modrm)) { 10888 UInt rE = eregOfRexRM(pfx,modrm); 10889 assign( argV, getXMMReg(rE) ); 10890 delta += 1; 10891 DIP("%scvtdq2ps %s,%s\n", 10892 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10893 } else { 10894 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10895 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10896 delta += alen; 10897 DIP("%scvtdq2ps %s,%s\n", 10898 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10899 } 10900 10901 assign( rmode, get_sse_roundingmode() ); 10902 t0 = IRTemp_INVALID; 10903 t1 = IRTemp_INVALID; 10904 t2 = IRTemp_INVALID; 10905 t3 = IRTemp_INVALID; 10906 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10907 10908 # define CVT(_t) binop( Iop_F64toF32, \ 10909 mkexpr(rmode), \ 10910 unop(Iop_I32StoF64,mkexpr(_t))) 10911 10912 putXMMRegLane32F( rG, 3, CVT(t3) ); 10913 putXMMRegLane32F( rG, 2, CVT(t2) ); 10914 putXMMRegLane32F( rG, 1, CVT(t1) ); 10915 putXMMRegLane32F( rG, 0, CVT(t0) ); 10916 # undef CVT 10917 if (isAvx) 10918 putYMMRegLane128( rG, 1, mkV128(0) ); 10919 10920 return delta; 10921 } 10922 10923 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 10924 Long delta ) 10925 { 10926 IRTemp addr = IRTemp_INVALID; 10927 Int alen = 0; 10928 HChar dis_buf[50]; 10929 UChar modrm = getUChar(delta); 10930 IRTemp argV = newTemp(Ity_V256); 10931 IRTemp rmode = newTemp(Ity_I32); 10932 UInt rG = gregOfRexRM(pfx,modrm); 10933 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10934 10935 if (epartIsReg(modrm)) { 10936 UInt rE = eregOfRexRM(pfx,modrm); 10937 assign( argV, getYMMReg(rE) ); 10938 delta += 1; 10939 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10940 } else { 10941 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10942 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10943 delta += alen; 10944 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10945 } 10946 10947 assign( rmode, get_sse_roundingmode() ); 10948 t0 = IRTemp_INVALID; 10949 t1 = IRTemp_INVALID; 10950 t2 = IRTemp_INVALID; 10951 t3 = IRTemp_INVALID; 10952 t4 = IRTemp_INVALID; 10953 t5 = IRTemp_INVALID; 10954 t6 = IRTemp_INVALID; 10955 t7 = IRTemp_INVALID; 10956 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10957 10958 # define CVT(_t) binop( Iop_F64toF32, \ 10959 mkexpr(rmode), \ 10960 unop(Iop_I32StoF64,mkexpr(_t))) 10961 10962 putYMMRegLane32F( rG, 7, CVT(t7) ); 10963 putYMMRegLane32F( rG, 6, CVT(t6) ); 10964 putYMMRegLane32F( rG, 5, CVT(t5) ); 10965 putYMMRegLane32F( rG, 4, CVT(t4) ); 10966 putYMMRegLane32F( rG, 3, CVT(t3) ); 10967 putYMMRegLane32F( rG, 2, CVT(t2) ); 10968 putYMMRegLane32F( rG, 1, CVT(t1) ); 10969 putYMMRegLane32F( rG, 0, CVT(t0) ); 10970 # undef CVT 10971 10972 return delta; 10973 } 10974 10975 10976 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx, 10977 Long delta, Bool isAvx ) 10978 { 10979 UChar modrm = getUChar(delta); 10980 vassert(epartIsReg(modrm)); /* ensured by caller */ 10981 UInt rE = eregOfRexRM(pfx,modrm); 10982 UInt rG = gregOfRexRM(pfx,modrm); 10983 IRTemp t0 = newTemp(Ity_V128); 10984 IRTemp t1 = newTemp(Ity_I32); 10985 assign(t0, getXMMReg(rE)); 10986 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0)))); 10987 putIReg32(rG, mkexpr(t1)); 10988 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10989 nameIReg32(rG)); 10990 delta += 1; 10991 return delta; 10992 } 10993 10994 10995 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx, 10996 Long delta ) 10997 { 10998 UChar modrm = getUChar(delta); 10999 vassert(epartIsReg(modrm)); /* ensured by caller */ 11000 UInt rE = eregOfRexRM(pfx,modrm); 11001 UInt rG = gregOfRexRM(pfx,modrm); 11002 IRTemp t0 = newTemp(Ity_V128); 11003 IRTemp t1 = newTemp(Ity_V128); 11004 IRTemp t2 = newTemp(Ity_I16); 11005 IRTemp t3 = newTemp(Ity_I16); 11006 assign(t0, getYMMRegLane128(rE, 0)); 11007 assign(t1, getYMMRegLane128(rE, 1)); 11008 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0))); 11009 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1))); 11010 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2))); 11011 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11012 delta += 1; 11013 return delta; 11014 } 11015 11016 11017 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 11018 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 11019 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 11020 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 11021 { 11022 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11023 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11024 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11025 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11026 IRTemp res = newTemp(Ity_V128); 11027 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 11028 : mkV128from32s( s1, d1, s0, d0 )); 11029 return res; 11030 } 11031 11032 11033 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 11034 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 11035 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 11036 { 11037 IRTemp s1 = newTemp(Ity_I64); 11038 IRTemp s0 = newTemp(Ity_I64); 11039 IRTemp d1 = newTemp(Ity_I64); 11040 IRTemp d0 = newTemp(Ity_I64); 11041 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11042 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11043 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11044 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11045 IRTemp res = newTemp(Ity_V128); 11046 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 11047 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 11048 return res; 11049 } 11050 11051 11052 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 11053 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 11054 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 11055 way. */ 11056 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 11057 { 11058 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11059 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11060 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 11061 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 11062 IRTemp res = newTemp(Ity_V256); 11063 assign(res, xIsH 11064 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 11065 mkexpr(s1), mkexpr(d1)) 11066 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 11067 mkexpr(s0), mkexpr(d0))); 11068 return res; 11069 } 11070 11071 11072 /* FIXME: this is really bad. Surely can do something better here? 11073 One observation is that the steering in the upper and lower 128 bit 11074 halves is the same as with math_UNPCKxPS_128, so we simply split 11075 into two halves, and use that. Consequently any improvement in 11076 math_UNPCKxPS_128 (probably, to use interleave-style primops) 11077 benefits this too. */ 11078 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 11079 { 11080 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11081 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11082 breakupV256toV128s( sV, &sVhi, &sVlo ); 11083 breakupV256toV128s( dV, &dVhi, &dVlo ); 11084 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 11085 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 11086 IRTemp rV = newTemp(Ity_V256); 11087 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11088 return rV; 11089 } 11090 11091 11092 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11093 { 11094 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11095 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11096 vassert(imm8 < 256); 11097 11098 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11099 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11100 11101 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 11102 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11103 IRTemp res = newTemp(Ity_V128); 11104 assign(res, 11105 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 11106 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 11107 # undef SELD 11108 # undef SELS 11109 return res; 11110 } 11111 11112 11113 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 11114 identically. Hence do the clueless thing and use math_SHUFPS_128 11115 twice. */ 11116 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11117 { 11118 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11119 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11120 breakupV256toV128s( sV, &sVhi, &sVlo ); 11121 breakupV256toV128s( dV, &dVhi, &dVlo ); 11122 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 11123 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 11124 IRTemp rV = newTemp(Ity_V256); 11125 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11126 return rV; 11127 } 11128 11129 11130 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11131 { 11132 IRTemp s1 = newTemp(Ity_I64); 11133 IRTemp s0 = newTemp(Ity_I64); 11134 IRTemp d1 = newTemp(Ity_I64); 11135 IRTemp d0 = newTemp(Ity_I64); 11136 11137 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11138 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11139 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11140 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11141 11142 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 11143 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 11144 11145 IRTemp res = newTemp(Ity_V128); 11146 assign(res, binop( Iop_64HLtoV128, 11147 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 11148 11149 # undef SELD 11150 # undef SELS 11151 return res; 11152 } 11153 11154 11155 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11156 { 11157 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11158 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11159 breakupV256toV128s( sV, &sVhi, &sVlo ); 11160 breakupV256toV128s( dV, &dVhi, &dVlo ); 11161 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11162 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 11163 IRTemp rV = newTemp(Ity_V256); 11164 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11165 return rV; 11166 } 11167 11168 11169 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11170 { 11171 UShort imm8_mask_16; 11172 IRTemp imm8_mask = newTemp(Ity_V128); 11173 11174 switch( imm8 & 3 ) { 11175 case 0: imm8_mask_16 = 0x0000; break; 11176 case 1: imm8_mask_16 = 0x00FF; break; 11177 case 2: imm8_mask_16 = 0xFF00; break; 11178 case 3: imm8_mask_16 = 0xFFFF; break; 11179 default: vassert(0); break; 11180 } 11181 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 11182 11183 IRTemp res = newTemp(Ity_V128); 11184 assign ( res, binop( Iop_OrV128, 11185 binop( Iop_AndV128, mkexpr(sV), 11186 mkexpr(imm8_mask) ), 11187 binop( Iop_AndV128, mkexpr(dV), 11188 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11189 return res; 11190 } 11191 11192 11193 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11194 { 11195 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11196 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11197 breakupV256toV128s( sV, &sVhi, &sVlo ); 11198 breakupV256toV128s( dV, &dVhi, &dVlo ); 11199 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11200 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 11201 IRTemp rV = newTemp(Ity_V256); 11202 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11203 return rV; 11204 } 11205 11206 11207 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11208 { 11209 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 11210 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 11211 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 11212 0xFFFF }; 11213 IRTemp imm8_mask = newTemp(Ity_V128); 11214 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 11215 11216 IRTemp res = newTemp(Ity_V128); 11217 assign ( res, binop( Iop_OrV128, 11218 binop( Iop_AndV128, mkexpr(sV), 11219 mkexpr(imm8_mask) ), 11220 binop( Iop_AndV128, mkexpr(dV), 11221 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11222 return res; 11223 } 11224 11225 11226 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11227 { 11228 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11229 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11230 breakupV256toV128s( sV, &sVhi, &sVlo ); 11231 breakupV256toV128s( dV, &dVhi, &dVlo ); 11232 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 11233 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 11234 IRTemp rV = newTemp(Ity_V256); 11235 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11236 return rV; 11237 } 11238 11239 11240 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11241 { 11242 /* Make w be a 16-bit version of imm8, formed by duplicating each 11243 bit in imm8. */ 11244 Int i; 11245 UShort imm16 = 0; 11246 for (i = 0; i < 8; i++) { 11247 if (imm8 & (1 << i)) 11248 imm16 |= (3 << (2*i)); 11249 } 11250 IRTemp imm16_mask = newTemp(Ity_V128); 11251 assign( imm16_mask, mkV128( imm16 )); 11252 11253 IRTemp res = newTemp(Ity_V128); 11254 assign ( res, binop( Iop_OrV128, 11255 binop( Iop_AndV128, mkexpr(sV), 11256 mkexpr(imm16_mask) ), 11257 binop( Iop_AndV128, mkexpr(dV), 11258 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 11259 return res; 11260 } 11261 11262 11263 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 11264 { 11265 /* This is a really poor translation -- could be improved if 11266 performance critical */ 11267 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11268 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11269 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11270 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11271 IRTemp res = newTemp(Ity_V128); 11272 assign(res, binop(Iop_64HLtoV128, 11273 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 11274 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 11275 return res; 11276 } 11277 11278 11279 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV ) 11280 { 11281 /* This is a really poor translation -- could be improved if 11282 performance critical */ 11283 IRTemp sHi, sLo, dHi, dLo; 11284 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11285 breakupV256toV128s( dV, &dHi, &dLo); 11286 breakupV256toV128s( sV, &sHi, &sLo); 11287 IRTemp res = newTemp(Ity_V256); 11288 assign(res, binop(Iop_V128HLtoV256, 11289 mkexpr(math_PMULUDQ_128(sHi, dHi)), 11290 mkexpr(math_PMULUDQ_128(sLo, dLo)))); 11291 return res; 11292 } 11293 11294 11295 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 11296 { 11297 /* This is a really poor translation -- could be improved if 11298 performance critical */ 11299 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11300 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11301 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11302 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11303 IRTemp res = newTemp(Ity_V128); 11304 assign(res, binop(Iop_64HLtoV128, 11305 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 11306 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 11307 return res; 11308 } 11309 11310 11311 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV ) 11312 { 11313 /* This is a really poor translation -- could be improved if 11314 performance critical */ 11315 IRTemp sHi, sLo, dHi, dLo; 11316 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11317 breakupV256toV128s( dV, &dHi, &dLo); 11318 breakupV256toV128s( sV, &sHi, &sLo); 11319 IRTemp res = newTemp(Ity_V256); 11320 assign(res, binop(Iop_V128HLtoV256, 11321 mkexpr(math_PMULDQ_128(sHi, dHi)), 11322 mkexpr(math_PMULDQ_128(sLo, dLo)))); 11323 return res; 11324 } 11325 11326 11327 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 11328 { 11329 IRTemp sVhi, sVlo, dVhi, dVlo; 11330 IRTemp resHi = newTemp(Ity_I64); 11331 IRTemp resLo = newTemp(Ity_I64); 11332 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 11333 breakupV128to64s( sV, &sVhi, &sVlo ); 11334 breakupV128to64s( dV, &dVhi, &dVlo ); 11335 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11336 "amd64g_calculate_mmx_pmaddwd", 11337 &amd64g_calculate_mmx_pmaddwd, 11338 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 11339 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11340 "amd64g_calculate_mmx_pmaddwd", 11341 &amd64g_calculate_mmx_pmaddwd, 11342 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 11343 IRTemp res = newTemp(Ity_V128); 11344 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 11345 return res; 11346 } 11347 11348 11349 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV ) 11350 { 11351 IRTemp sHi, sLo, dHi, dLo; 11352 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11353 breakupV256toV128s( dV, &dHi, &dLo); 11354 breakupV256toV128s( sV, &sHi, &sLo); 11355 IRTemp res = newTemp(Ity_V256); 11356 assign(res, binop(Iop_V128HLtoV256, 11357 mkexpr(math_PMADDWD_128(dHi, sHi)), 11358 mkexpr(math_PMADDWD_128(dLo, sLo)))); 11359 return res; 11360 } 11361 11362 11363 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 11364 { 11365 IRTemp addV = newTemp(Ity_V128); 11366 IRTemp subV = newTemp(Ity_V128); 11367 IRTemp a1 = newTemp(Ity_I64); 11368 IRTemp s0 = newTemp(Ity_I64); 11369 IRTemp rm = newTemp(Ity_I32); 11370 11371 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11372 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11373 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11374 11375 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11376 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11377 11378 IRTemp res = newTemp(Ity_V128); 11379 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11380 return res; 11381 } 11382 11383 11384 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 11385 { 11386 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11387 IRTemp addV = newTemp(Ity_V256); 11388 IRTemp subV = newTemp(Ity_V256); 11389 IRTemp rm = newTemp(Ity_I32); 11390 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11391 11392 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11393 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11394 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11395 11396 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 11397 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 11398 11399 IRTemp res = newTemp(Ity_V256); 11400 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 11401 return res; 11402 } 11403 11404 11405 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 11406 { 11407 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11408 IRTemp addV = newTemp(Ity_V128); 11409 IRTemp subV = newTemp(Ity_V128); 11410 IRTemp rm = newTemp(Ity_I32); 11411 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11412 11413 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11414 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11415 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11416 11417 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 11418 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 11419 11420 IRTemp res = newTemp(Ity_V128); 11421 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 11422 return res; 11423 } 11424 11425 11426 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 11427 { 11428 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 11429 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 11430 IRTemp addV = newTemp(Ity_V256); 11431 IRTemp subV = newTemp(Ity_V256); 11432 IRTemp rm = newTemp(Ity_I32); 11433 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 11434 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11435 11436 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11437 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11438 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11439 11440 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 11441 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 11442 11443 IRTemp res = newTemp(Ity_V256); 11444 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 11445 return res; 11446 } 11447 11448 11449 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 11450 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx, 11451 Long delta, Bool isAvx, Bool xIsH ) 11452 { 11453 IRTemp addr = IRTemp_INVALID; 11454 Int alen = 0; 11455 HChar dis_buf[50]; 11456 UChar modrm = getUChar(delta); 11457 UInt rG = gregOfRexRM(pfx,modrm); 11458 UInt imm8; 11459 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 11460 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11461 sV = newTemp(Ity_V128); 11462 dV = newTemp(Ity_V128); 11463 sVmut = newTemp(Ity_I64); 11464 dVmut = newTemp(Ity_I64); 11465 sVcon = newTemp(Ity_I64); 11466 if (epartIsReg(modrm)) { 11467 UInt rE = eregOfRexRM(pfx,modrm); 11468 assign( sV, getXMMReg(rE) ); 11469 imm8 = (UInt)getUChar(delta+1); 11470 delta += 1+1; 11471 DIP("%spshuf%cw $%u,%s,%s\n", 11472 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11473 imm8, nameXMMReg(rE), nameXMMReg(rG)); 11474 } else { 11475 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11476 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11477 imm8 = (UInt)getUChar(delta+alen); 11478 delta += alen+1; 11479 DIP("%spshuf%cw $%u,%s,%s\n", 11480 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11481 imm8, dis_buf, nameXMMReg(rG)); 11482 } 11483 11484 /* Get the to-be-changed (mut) and unchanging (con) bits of the 11485 source. */ 11486 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 11487 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 11488 11489 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 11490 # define SEL(n) \ 11491 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11492 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 11493 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 11494 # undef SEL 11495 11496 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 11497 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 11498 11499 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 11500 return delta; 11501 } 11502 11503 11504 /* Handle 256 bit PSHUFLW and PSHUFHW. */ 11505 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx, 11506 Long delta, Bool xIsH ) 11507 { 11508 IRTemp addr = IRTemp_INVALID; 11509 Int alen = 0; 11510 HChar dis_buf[50]; 11511 UChar modrm = getUChar(delta); 11512 UInt rG = gregOfRexRM(pfx,modrm); 11513 UInt imm8; 11514 IRTemp sV, s[8], sV64[4], dVhi, dVlo; 11515 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID; 11516 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 11517 sV = newTemp(Ity_V256); 11518 dVhi = newTemp(Ity_I64); 11519 dVlo = newTemp(Ity_I64); 11520 if (epartIsReg(modrm)) { 11521 UInt rE = eregOfRexRM(pfx,modrm); 11522 assign( sV, getYMMReg(rE) ); 11523 imm8 = (UInt)getUChar(delta+1); 11524 delta += 1+1; 11525 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11526 imm8, nameYMMReg(rE), nameYMMReg(rG)); 11527 } else { 11528 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11529 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 11530 imm8 = (UInt)getUChar(delta+alen); 11531 delta += alen+1; 11532 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11533 imm8, dis_buf, nameYMMReg(rG)); 11534 } 11535 11536 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] ); 11537 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] ); 11538 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] ); 11539 11540 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)], 11541 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) ); 11542 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)], 11543 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) ); 11544 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3], 11545 xIsH ? sV64[2] : dVhi, 11546 xIsH ? dVlo : sV64[1], 11547 xIsH ? sV64[0] : dVlo ) ); 11548 return delta; 11549 } 11550 11551 11552 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx, 11553 Long delta, Bool isAvx ) 11554 { 11555 Long deltaIN = delta; 11556 UChar modrm = getUChar(delta); 11557 UInt rG = gregOfRexRM(pfx,modrm); 11558 IRTemp sV = newTemp(Ity_V128); 11559 IRTemp d16 = newTemp(Ity_I16); 11560 UInt imm8; 11561 IRTemp s0, s1, s2, s3; 11562 if (epartIsReg(modrm)) { 11563 UInt rE = eregOfRexRM(pfx,modrm); 11564 assign(sV, getXMMReg(rE)); 11565 imm8 = getUChar(delta+1) & 7; 11566 delta += 1+1; 11567 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "", 11568 imm8, nameXMMReg(rE), nameIReg32(rG)); 11569 } else { 11570 /* The memory case is disallowed, apparently. */ 11571 return deltaIN; /* FAIL */ 11572 } 11573 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11574 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11575 switch (imm8) { 11576 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 11577 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 11578 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 11579 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 11580 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 11581 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 11582 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 11583 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 11584 default: vassert(0); 11585 } 11586 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 11587 return delta; 11588 } 11589 11590 11591 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 11592 Long delta, Bool isAvx ) 11593 { 11594 IRTemp addr = IRTemp_INVALID; 11595 Int alen = 0; 11596 HChar dis_buf[50]; 11597 UChar modrm = getUChar(delta); 11598 IRTemp arg64 = newTemp(Ity_I64); 11599 UInt rG = gregOfRexRM(pfx,modrm); 11600 const HChar* mbV = isAvx ? "v" : ""; 11601 if (epartIsReg(modrm)) { 11602 UInt rE = eregOfRexRM(pfx,modrm); 11603 assign( arg64, getXMMRegLane64(rE, 0) ); 11604 delta += 1; 11605 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 11606 } else { 11607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11608 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11609 delta += alen; 11610 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 11611 } 11612 putXMMRegLane64F( 11613 rG, 0, 11614 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 11615 ); 11616 putXMMRegLane64F( 11617 rG, 1, 11618 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 11619 ); 11620 if (isAvx) 11621 putYMMRegLane128(rG, 1, mkV128(0)); 11622 return delta; 11623 } 11624 11625 11626 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11627 Long delta, Bool isAvx ) 11628 { 11629 IRTemp addr = IRTemp_INVALID; 11630 Int alen = 0; 11631 HChar dis_buf[50]; 11632 UChar modrm = getUChar(delta); 11633 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11634 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 11635 11636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11637 delta += alen; 11638 11639 /* Fake up a native SSE mxcsr word. The only thing it depends on 11640 is SSEROUND[1:0], so call a clean helper to cook it up. 11641 */ 11642 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 11643 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11644 storeLE( 11645 mkexpr(addr), 11646 unop(Iop_64to32, 11647 mkIRExprCCall( 11648 Ity_I64, 0/*regp*/, 11649 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 11650 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 11651 ) 11652 ) 11653 ); 11654 return delta; 11655 } 11656 11657 11658 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11659 Long delta, Bool isAvx ) 11660 { 11661 IRTemp addr = IRTemp_INVALID; 11662 Int alen = 0; 11663 HChar dis_buf[50]; 11664 UChar modrm = getUChar(delta); 11665 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11666 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 11667 11668 IRTemp t64 = newTemp(Ity_I64); 11669 IRTemp ew = newTemp(Ity_I32); 11670 11671 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11672 delta += alen; 11673 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11674 11675 /* The only thing we observe in %mxcsr is the rounding mode. 11676 Therefore, pass the 32-bit value (SSE native-format control 11677 word) to a clean helper, getting back a 64-bit value, the 11678 lower half of which is the SSEROUND value to store, and the 11679 upper half of which is the emulation-warning token which may 11680 be generated. 11681 */ 11682 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 11683 assign( t64, mkIRExprCCall( 11684 Ity_I64, 0/*regparms*/, 11685 "amd64g_check_ldmxcsr", 11686 &amd64g_check_ldmxcsr, 11687 mkIRExprVec_1( 11688 unop(Iop_32Uto64, 11689 loadLE(Ity_I32, mkexpr(addr)) 11690 ) 11691 ) 11692 ) 11693 ); 11694 11695 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 11696 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 11697 put_emwarn( mkexpr(ew) ); 11698 /* Finally, if an emulation warning was reported, side-exit to 11699 the next insn, reporting the warning, so that Valgrind's 11700 dispatcher sees the warning. */ 11701 stmt( 11702 IRStmt_Exit( 11703 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 11704 Ijk_EmWarn, 11705 IRConst_U64(guest_RIP_bbstart+delta), 11706 OFFB_RIP 11707 ) 11708 ); 11709 return delta; 11710 } 11711 11712 11713 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm ) 11714 { 11715 /* ------ rfbm[0] gates the x87 state ------ */ 11716 11717 /* Uses dirty helper: 11718 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11719 */ 11720 IRDirty* d0 = unsafeIRDirty_0_N ( 11721 0/*regparms*/, 11722 "amd64g_dirtyhelper_XSAVE_COMPONENT_0", 11723 &amd64g_dirtyhelper_XSAVE_COMPONENT_0, 11724 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 11725 ); 11726 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)), 11727 mkU64(1)); 11728 11729 /* Declare we're writing memory. Really, bytes 24 through 31 11730 (MXCSR and MXCSR_MASK) aren't written, but we can't express more 11731 than 1 memory area here, so just mark the whole thing as 11732 written. */ 11733 d0->mFx = Ifx_Write; 11734 d0->mAddr = mkexpr(addr); 11735 d0->mSize = 160; 11736 11737 /* declare we're reading guest state */ 11738 d0->nFxState = 5; 11739 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11740 11741 d0->fxState[0].fx = Ifx_Read; 11742 d0->fxState[0].offset = OFFB_FTOP; 11743 d0->fxState[0].size = sizeof(UInt); 11744 11745 d0->fxState[1].fx = Ifx_Read; 11746 d0->fxState[1].offset = OFFB_FPREGS; 11747 d0->fxState[1].size = 8 * sizeof(ULong); 11748 11749 d0->fxState[2].fx = Ifx_Read; 11750 d0->fxState[2].offset = OFFB_FPTAGS; 11751 d0->fxState[2].size = 8 * sizeof(UChar); 11752 11753 d0->fxState[3].fx = Ifx_Read; 11754 d0->fxState[3].offset = OFFB_FPROUND; 11755 d0->fxState[3].size = sizeof(ULong); 11756 11757 d0->fxState[4].fx = Ifx_Read; 11758 d0->fxState[4].offset = OFFB_FC3210; 11759 d0->fxState[4].size = sizeof(ULong); 11760 11761 stmt( IRStmt_Dirty(d0) ); 11762 11763 /* ------ rfbm[1] gates the SSE state ------ */ 11764 11765 IRTemp rfbm_1 = newTemp(Ity_I64); 11766 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11767 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11768 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 11769 11770 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2)); 11771 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 11772 11773 /* Uses dirty helper: 11774 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS 11775 ( VexGuestAMD64State*, ULong ) 11776 This creates only MXCSR and MXCSR_MASK. We need to do this if 11777 either components 1 (SSE) or 2 (AVX) are requested. Hence the 11778 guard condition is a bit more complex. 11779 */ 11780 IRDirty* d1 = unsafeIRDirty_0_N ( 11781 0/*regparms*/, 11782 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS", 11783 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS, 11784 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 11785 ); 11786 d1->guard = guard_1or2; 11787 11788 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that 11789 the code for rbfm[0] just above claims a write of 0 .. 159, so 11790 this duplicates it. But at least correctly connects 24 .. 31 to 11791 the MXCSR guest state representation (SSEROUND field). */ 11792 d1->mFx = Ifx_Write; 11793 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 11794 d1->mSize = 8; 11795 11796 /* declare we're reading guest state */ 11797 d1->nFxState = 1; 11798 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 11799 11800 d1->fxState[0].fx = Ifx_Read; 11801 d1->fxState[0].offset = OFFB_SSEROUND; 11802 d1->fxState[0].size = sizeof(ULong); 11803 11804 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing 11805 else. We do the actual register array, XMM[0..15], separately, 11806 in order that any undefinedness in the XMM registers is tracked 11807 separately by Memcheck and does not "infect" the in-memory 11808 shadow for the other parts of the image. */ 11809 stmt( IRStmt_Dirty(d1) ); 11810 11811 /* And now the XMMs themselves. */ 11812 UInt reg; 11813 for (reg = 0; reg < 16; reg++) { 11814 stmt( IRStmt_StoreG( 11815 Iend_LE, 11816 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)), 11817 getXMMReg(reg), 11818 guard_1 11819 )); 11820 } 11821 11822 /* ------ rfbm[2] gates the AVX state ------ */ 11823 /* Component 2 is just a bunch of register saves, so we'll do it 11824 inline, just to be simple and to be Memcheck friendly. */ 11825 11826 IRTemp rfbm_2 = newTemp(Ity_I64); 11827 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 11828 11829 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4)); 11830 11831 for (reg = 0; reg < 16; reg++) { 11832 stmt( IRStmt_StoreG( 11833 Iend_LE, 11834 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)), 11835 getYMMRegLane128(reg,1), 11836 guard_2 11837 )); 11838 } 11839 } 11840 11841 11842 static Long dis_XSAVE ( const VexAbiInfo* vbi, 11843 Prefix pfx, Long delta, Int sz ) 11844 { 11845 /* Note that the presence or absence of REX.W (indicated here by 11846 |sz|) slightly affects the written format: whether the saved FPU 11847 IP and DP pointers are 64 or 32 bits. But the helper function 11848 we call simply writes zero bits in the relevant fields, which 11849 are 64 bits regardless of what REX.W is, and so it's good enough 11850 (iow, equally broken) in both cases. */ 11851 IRTemp addr = IRTemp_INVALID; 11852 Int alen = 0; 11853 HChar dis_buf[50]; 11854 UChar modrm = getUChar(delta); 11855 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11856 vassert(sz == 4 || sz == 8); /* ditto */ 11857 11858 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11859 delta += alen; 11860 gen_SEGV_if_not_64_aligned(addr); 11861 11862 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11863 11864 /* VEX's caller is assumed to have checked this. */ 11865 const ULong aSSUMED_XCR0_VALUE = 7; 11866 11867 IRTemp rfbm = newTemp(Ity_I64); 11868 assign(rfbm, 11869 binop(Iop_And64, 11870 binop(Iop_Or64, 11871 binop(Iop_Shl64, 11872 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 11873 unop(Iop_32Uto64, getIRegRAX(4))), 11874 mkU64(aSSUMED_XCR0_VALUE))); 11875 11876 gen_XSAVE_SEQUENCE(addr, rfbm); 11877 11878 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by 11879 OR-ing the RFBM value into it. */ 11880 IRTemp addr_plus_512 = newTemp(Ity_I64); 11881 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512))); 11882 storeLE( mkexpr(addr_plus_512), 11883 binop(Iop_Or8, 11884 unop(Iop_64to8, mkexpr(rfbm)), 11885 loadLE(Ity_I8, mkexpr(addr_plus_512))) ); 11886 11887 return delta; 11888 } 11889 11890 11891 static Long dis_FXSAVE ( const VexAbiInfo* vbi, 11892 Prefix pfx, Long delta, Int sz ) 11893 { 11894 /* See comment in dis_XSAVE about the significance of REX.W. */ 11895 IRTemp addr = IRTemp_INVALID; 11896 Int alen = 0; 11897 HChar dis_buf[50]; 11898 UChar modrm = getUChar(delta); 11899 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11900 vassert(sz == 4 || sz == 8); /* ditto */ 11901 11902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11903 delta += alen; 11904 gen_SEGV_if_not_16_aligned(addr); 11905 11906 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11907 11908 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm 11909 to 0b011, generate the XSAVE sequence accordingly, and let iropt 11910 fold out the unused (AVX) parts accordingly. */ 11911 IRTemp rfbm = newTemp(Ity_I64); 11912 assign(rfbm, mkU64(3)); 11913 gen_XSAVE_SEQUENCE(addr, rfbm); 11914 11915 return delta; 11916 } 11917 11918 11919 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm ) 11920 { 11921 /* ------ rfbm[0] gates the x87 state ------ */ 11922 11923 /* If rfbm[0] == 1, we have to write the x87 state. If 11924 xstate_bv[0] == 1, we will read it from the memory image, else 11925 we'll set it to initial values. Doing this with a helper 11926 function and getting the definedness flow annotations correct is 11927 too difficult, so generate stupid but simple code: first set the 11928 registers to initial values, regardless of xstate_bv[0]. Then, 11929 conditionally restore from the memory image. */ 11930 11931 IRTemp rfbm_0 = newTemp(Ity_I64); 11932 IRTemp xstate_bv_0 = newTemp(Ity_I64); 11933 IRTemp restore_0 = newTemp(Ity_I64); 11934 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1))); 11935 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1))); 11936 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0))); 11937 11938 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) ); 11939 11940 /* Uses dirty helper: 11941 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11942 */ 11943 IRDirty* d0 = unsafeIRDirty_0_N ( 11944 0/*regparms*/, 11945 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0", 11946 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0, 11947 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 11948 ); 11949 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0)); 11950 11951 /* Declare we're reading memory. Really, bytes 24 through 31 11952 (MXCSR and MXCSR_MASK) aren't read, but we can't express more 11953 than 1 memory area here, so just mark the whole thing as 11954 read. */ 11955 d0->mFx = Ifx_Read; 11956 d0->mAddr = mkexpr(addr); 11957 d0->mSize = 160; 11958 11959 /* declare we're writing guest state */ 11960 d0->nFxState = 5; 11961 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11962 11963 d0->fxState[0].fx = Ifx_Write; 11964 d0->fxState[0].offset = OFFB_FTOP; 11965 d0->fxState[0].size = sizeof(UInt); 11966 11967 d0->fxState[1].fx = Ifx_Write; 11968 d0->fxState[1].offset = OFFB_FPREGS; 11969 d0->fxState[1].size = 8 * sizeof(ULong); 11970 11971 d0->fxState[2].fx = Ifx_Write; 11972 d0->fxState[2].offset = OFFB_FPTAGS; 11973 d0->fxState[2].size = 8 * sizeof(UChar); 11974 11975 d0->fxState[3].fx = Ifx_Write; 11976 d0->fxState[3].offset = OFFB_FPROUND; 11977 d0->fxState[3].size = sizeof(ULong); 11978 11979 d0->fxState[4].fx = Ifx_Write; 11980 d0->fxState[4].offset = OFFB_FC3210; 11981 d0->fxState[4].size = sizeof(ULong); 11982 11983 stmt( IRStmt_Dirty(d0) ); 11984 11985 /* ------ rfbm[1] gates the SSE state ------ */ 11986 11987 /* Same scheme as component 0: first zero it out, and then possibly 11988 restore from the memory area. */ 11989 IRTemp rfbm_1 = newTemp(Ity_I64); 11990 IRTemp xstate_bv_1 = newTemp(Ity_I64); 11991 IRTemp restore_1 = newTemp(Ity_I64); 11992 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11993 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2))); 11994 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1))); 11995 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0)); 11996 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0)); 11997 11998 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11999 IRTemp xstate_bv_1or2 = newTemp(Ity_I64); 12000 IRTemp restore_1or2 = newTemp(Ity_I64); 12001 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 12002 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6))); 12003 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2), 12004 mkexpr(xstate_bv_1or2))); 12005 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 12006 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0)); 12007 12008 /* The areas in question are: SSEROUND, and the XMM register array. */ 12009 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST)); 12010 12011 UInt reg; 12012 for (reg = 0; reg < 16; reg++) { 12013 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0)); 12014 } 12015 12016 /* And now possibly restore from MXCSR/MXCSR_MASK */ 12017 /* Uses dirty helper: 12018 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS 12019 ( VexGuestAMD64State*, ULong ) 12020 This restores from only MXCSR and MXCSR_MASK. We need to do 12021 this if either components 1 (SSE) or 2 (AVX) are requested. 12022 Hence the guard condition is a bit more complex. 12023 */ 12024 IRDirty* d1 = unsafeIRDirty_0_N ( 12025 0/*regparms*/, 12026 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS", 12027 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS, 12028 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) ) 12029 ) ; 12030 d1->guard = restore_1or2e; 12031 12032 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that 12033 the code for rbfm[0] just above claims a read of 0 .. 159, so 12034 this duplicates it. But at least correctly connects 24 .. 31 to 12035 the MXCSR guest state representation (SSEROUND field). */ 12036 d1->mFx = Ifx_Read; 12037 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 12038 d1->mSize = 8; 12039 12040 /* declare we're writing guest state */ 12041 d1->nFxState = 1; 12042 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 12043 12044 d1->fxState[0].fx = Ifx_Write; 12045 d1->fxState[0].offset = OFFB_SSEROUND; 12046 d1->fxState[0].size = sizeof(ULong); 12047 12048 /* Call the helper. This creates SSEROUND but nothing 12049 else. We do the actual register array, XMM[0..15], separately, 12050 in order that any undefinedness in the XMM registers is tracked 12051 separately by Memcheck and is not "infected" by the in-memory 12052 shadow for the other parts of the image. */ 12053 stmt( IRStmt_Dirty(d1) ); 12054 12055 /* And now the XMMs themselves. For each register, we PUT either 12056 its old value, or the value loaded from memory. One convenient 12057 way to do that is with a conditional load that has its the 12058 default value, the old value of the register. */ 12059 for (reg = 0; reg < 16; reg++) { 12060 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)); 12061 IRExpr* alt = getXMMReg(reg); 12062 IRTemp loadedValue = newTemp(Ity_V128); 12063 stmt( IRStmt_LoadG(Iend_LE, 12064 ILGop_IdentV128, 12065 loadedValue, ea, alt, restore_1e) ); 12066 putXMMReg(reg, mkexpr(loadedValue)); 12067 } 12068 12069 /* ------ rfbm[2] gates the AVX state ------ */ 12070 /* Component 2 is just a bunch of register loads, so we'll do it 12071 inline, just to be simple and to be Memcheck friendly. */ 12072 12073 /* Same scheme as component 0: first zero it out, and then possibly 12074 restore from the memory area. */ 12075 IRTemp rfbm_2 = newTemp(Ity_I64); 12076 IRTemp xstate_bv_2 = newTemp(Ity_I64); 12077 IRTemp restore_2 = newTemp(Ity_I64); 12078 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 12079 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4))); 12080 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2))); 12081 12082 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0)); 12083 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0)); 12084 12085 for (reg = 0; reg < 16; reg++) { 12086 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0)); 12087 } 12088 12089 for (reg = 0; reg < 16; reg++) { 12090 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)); 12091 IRExpr* alt = getYMMRegLane128(reg, 1); 12092 IRTemp loadedValue = newTemp(Ity_V128); 12093 stmt( IRStmt_LoadG(Iend_LE, 12094 ILGop_IdentV128, 12095 loadedValue, ea, alt, restore_2e) ); 12096 putYMMRegLane128(reg, 1, mkexpr(loadedValue)); 12097 } 12098 } 12099 12100 12101 static Long dis_XRSTOR ( const VexAbiInfo* vbi, 12102 Prefix pfx, Long delta, Int sz ) 12103 { 12104 /* As with XRSTOR above we ignore the value of REX.W since we're 12105 not bothering with the FPU DP and IP fields. */ 12106 IRTemp addr = IRTemp_INVALID; 12107 Int alen = 0; 12108 HChar dis_buf[50]; 12109 UChar modrm = getUChar(delta); 12110 vassert(!epartIsReg(modrm)); /* ensured by caller */ 12111 vassert(sz == 4 || sz == 8); /* ditto */ 12112 12113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12114 delta += alen; 12115 gen_SEGV_if_not_64_aligned(addr); 12116 12117 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 12118 12119 /* VEX's caller is assumed to have checked this. */ 12120 const ULong aSSUMED_XCR0_VALUE = 7; 12121 12122 IRTemp rfbm = newTemp(Ity_I64); 12123 assign(rfbm, 12124 binop(Iop_And64, 12125 binop(Iop_Or64, 12126 binop(Iop_Shl64, 12127 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 12128 unop(Iop_32Uto64, getIRegRAX(4))), 12129 mkU64(aSSUMED_XCR0_VALUE))); 12130 12131 IRTemp xstate_bv = newTemp(Ity_I64); 12132 assign(xstate_bv, loadLE(Ity_I64, 12133 binop(Iop_Add64, mkexpr(addr), mkU64(512+0)))); 12134 12135 IRTemp xcomp_bv = newTemp(Ity_I64); 12136 assign(xcomp_bv, loadLE(Ity_I64, 12137 binop(Iop_Add64, mkexpr(addr), mkU64(512+8)))); 12138 12139 IRTemp xsavehdr_23_16 = newTemp(Ity_I64); 12140 assign( xsavehdr_23_16, 12141 loadLE(Ity_I64, 12142 binop(Iop_Add64, mkexpr(addr), mkU64(512+16)))); 12143 12144 /* We must fault if 12145 * xcomp_bv[63] == 1, since this simulated CPU does not support 12146 the compaction extension. 12147 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7). 12148 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to 12149 imply that xcomp_bv must be zero. 12150 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0 12151 */ 12152 IRTemp fault_if_nonzero = newTemp(Ity_I64); 12153 assign(fault_if_nonzero, 12154 binop(Iop_Or64, 12155 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)), 12156 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16)))); 12157 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)), 12158 Ijk_SigSEGV, 12159 IRConst_U64(guest_RIP_curr_instr), 12160 OFFB_RIP 12161 )); 12162 12163 /* We are guaranteed now that both xstate_bv and rfbm are in the 12164 range 0 .. 7. Generate the restore sequence proper. */ 12165 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm); 12166 12167 return delta; 12168 } 12169 12170 12171 static Long dis_FXRSTOR ( const VexAbiInfo* vbi, 12172 Prefix pfx, Long delta, Int sz ) 12173 { 12174 /* As with FXSAVE above we ignore the value of REX.W since we're 12175 not bothering with the FPU DP and IP fields. */ 12176 IRTemp addr = IRTemp_INVALID; 12177 Int alen = 0; 12178 HChar dis_buf[50]; 12179 UChar modrm = getUChar(delta); 12180 vassert(!epartIsReg(modrm)); /* ensured by caller */ 12181 vassert(sz == 4 || sz == 8); /* ditto */ 12182 12183 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12184 delta += alen; 12185 gen_SEGV_if_not_16_aligned(addr); 12186 12187 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 12188 12189 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also 12190 as if components 0 and 1 are set as present in XSTATE_BV in the 12191 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore, 12192 generate the XRSTOR sequence accordingly, and let iropt fold out 12193 the unused (AVX) parts accordingly. */ 12194 IRTemp three = newTemp(Ity_I64); 12195 assign(three, mkU64(3)); 12196 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/); 12197 12198 return delta; 12199 } 12200 12201 12202 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 12203 { 12204 vassert(imm8 >= 0 && imm8 <= 7); 12205 12206 // Create a V128 value which has the selected word in the 12207 // specified lane, and zeroes everywhere else. 12208 IRTemp tmp128 = newTemp(Ity_V128); 12209 IRTemp halfshift = newTemp(Ity_I64); 12210 assign(halfshift, binop(Iop_Shl64, 12211 unop(Iop_16Uto64, mkexpr(u16)), 12212 mkU8(16 * (imm8 & 3)))); 12213 if (imm8 < 4) { 12214 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 12215 } else { 12216 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 12217 } 12218 12219 UShort mask = ~(3 << (imm8 * 2)); 12220 IRTemp res = newTemp(Ity_V128); 12221 assign( res, binop(Iop_OrV128, 12222 mkexpr(tmp128), 12223 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 12224 return res; 12225 } 12226 12227 12228 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 12229 { 12230 IRTemp s1, s0, d1, d0; 12231 s1 = s0 = d1 = d0 = IRTemp_INVALID; 12232 12233 breakupV128to64s( sV, &s1, &s0 ); 12234 breakupV128to64s( dV, &d1, &d0 ); 12235 12236 IRTemp res = newTemp(Ity_V128); 12237 assign( res, 12238 binop(Iop_64HLtoV128, 12239 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12240 "amd64g_calculate_mmx_psadbw", 12241 &amd64g_calculate_mmx_psadbw, 12242 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 12243 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12244 "amd64g_calculate_mmx_psadbw", 12245 &amd64g_calculate_mmx_psadbw, 12246 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 12247 return res; 12248 } 12249 12250 12251 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV ) 12252 { 12253 IRTemp sHi, sLo, dHi, dLo; 12254 sHi = sLo = dHi = dLo = IRTemp_INVALID; 12255 breakupV256toV128s( dV, &dHi, &dLo); 12256 breakupV256toV128s( sV, &sHi, &sLo); 12257 IRTemp res = newTemp(Ity_V256); 12258 assign(res, binop(Iop_V128HLtoV256, 12259 mkexpr(math_PSADBW_128(dHi, sHi)), 12260 mkexpr(math_PSADBW_128(dLo, sLo)))); 12261 return res; 12262 } 12263 12264 12265 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx, 12266 Long delta, Bool isAvx ) 12267 { 12268 IRTemp regD = newTemp(Ity_V128); 12269 IRTemp mask = newTemp(Ity_V128); 12270 IRTemp olddata = newTemp(Ity_V128); 12271 IRTemp newdata = newTemp(Ity_V128); 12272 IRTemp addr = newTemp(Ity_I64); 12273 UChar modrm = getUChar(delta); 12274 UInt rG = gregOfRexRM(pfx,modrm); 12275 UInt rE = eregOfRexRM(pfx,modrm); 12276 12277 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 12278 assign( regD, getXMMReg( rG )); 12279 12280 /* Unfortunately can't do the obvious thing with SarN8x16 12281 here since that can't be re-emitted as SSE2 code - no such 12282 insn. */ 12283 assign( mask, 12284 binop(Iop_64HLtoV128, 12285 binop(Iop_SarN8x8, 12286 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 12287 mkU8(7) ), 12288 binop(Iop_SarN8x8, 12289 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 12290 mkU8(7) ) )); 12291 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 12292 assign( newdata, binop(Iop_OrV128, 12293 binop(Iop_AndV128, 12294 mkexpr(regD), 12295 mkexpr(mask) ), 12296 binop(Iop_AndV128, 12297 mkexpr(olddata), 12298 unop(Iop_NotV128, mkexpr(mask)))) ); 12299 storeLE( mkexpr(addr), mkexpr(newdata) ); 12300 12301 delta += 1; 12302 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 12303 nameXMMReg(rE), nameXMMReg(rG) ); 12304 return delta; 12305 } 12306 12307 12308 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx, 12309 Long delta, Bool isAvx ) 12310 { 12311 UChar modrm = getUChar(delta); 12312 UInt rG = gregOfRexRM(pfx,modrm); 12313 UInt rE = eregOfRexRM(pfx,modrm); 12314 IRTemp t0 = newTemp(Ity_I32); 12315 IRTemp t1 = newTemp(Ity_I32); 12316 IRTemp t2 = newTemp(Ity_I32); 12317 IRTemp t3 = newTemp(Ity_I32); 12318 delta += 1; 12319 assign( t0, binop( Iop_And32, 12320 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 12321 mkU32(1) )); 12322 assign( t1, binop( Iop_And32, 12323 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 12324 mkU32(2) )); 12325 assign( t2, binop( Iop_And32, 12326 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 12327 mkU32(4) )); 12328 assign( t3, binop( Iop_And32, 12329 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 12330 mkU32(8) )); 12331 putIReg32( rG, binop(Iop_Or32, 12332 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12333 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12334 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 12335 nameXMMReg(rE), nameIReg32(rG)); 12336 return delta; 12337 } 12338 12339 12340 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12341 { 12342 UChar modrm = getUChar(delta); 12343 UInt rG = gregOfRexRM(pfx,modrm); 12344 UInt rE = eregOfRexRM(pfx,modrm); 12345 IRTemp t0 = newTemp(Ity_I32); 12346 IRTemp t1 = newTemp(Ity_I32); 12347 IRTemp t2 = newTemp(Ity_I32); 12348 IRTemp t3 = newTemp(Ity_I32); 12349 IRTemp t4 = newTemp(Ity_I32); 12350 IRTemp t5 = newTemp(Ity_I32); 12351 IRTemp t6 = newTemp(Ity_I32); 12352 IRTemp t7 = newTemp(Ity_I32); 12353 delta += 1; 12354 assign( t0, binop( Iop_And32, 12355 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 12356 mkU32(1) )); 12357 assign( t1, binop( Iop_And32, 12358 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 12359 mkU32(2) )); 12360 assign( t2, binop( Iop_And32, 12361 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 12362 mkU32(4) )); 12363 assign( t3, binop( Iop_And32, 12364 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 12365 mkU32(8) )); 12366 assign( t4, binop( Iop_And32, 12367 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 12368 mkU32(16) )); 12369 assign( t5, binop( Iop_And32, 12370 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 12371 mkU32(32) )); 12372 assign( t6, binop( Iop_And32, 12373 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 12374 mkU32(64) )); 12375 assign( t7, binop( Iop_And32, 12376 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 12377 mkU32(128) )); 12378 putIReg32( rG, binop(Iop_Or32, 12379 binop(Iop_Or32, 12380 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12381 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 12382 binop(Iop_Or32, 12383 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 12384 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 12385 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12386 return delta; 12387 } 12388 12389 12390 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx, 12391 Long delta, Bool isAvx ) 12392 { 12393 UChar modrm = getUChar(delta); 12394 UInt rG = gregOfRexRM(pfx,modrm); 12395 UInt rE = eregOfRexRM(pfx,modrm); 12396 IRTemp t0 = newTemp(Ity_I32); 12397 IRTemp t1 = newTemp(Ity_I32); 12398 delta += 1; 12399 assign( t0, binop( Iop_And32, 12400 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 12401 mkU32(1) )); 12402 assign( t1, binop( Iop_And32, 12403 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 12404 mkU32(2) )); 12405 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 12406 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 12407 nameXMMReg(rE), nameIReg32(rG)); 12408 return delta; 12409 } 12410 12411 12412 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12413 { 12414 UChar modrm = getUChar(delta); 12415 UInt rG = gregOfRexRM(pfx,modrm); 12416 UInt rE = eregOfRexRM(pfx,modrm); 12417 IRTemp t0 = newTemp(Ity_I32); 12418 IRTemp t1 = newTemp(Ity_I32); 12419 IRTemp t2 = newTemp(Ity_I32); 12420 IRTemp t3 = newTemp(Ity_I32); 12421 delta += 1; 12422 assign( t0, binop( Iop_And32, 12423 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 12424 mkU32(1) )); 12425 assign( t1, binop( Iop_And32, 12426 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 12427 mkU32(2) )); 12428 assign( t2, binop( Iop_And32, 12429 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 12430 mkU32(4) )); 12431 assign( t3, binop( Iop_And32, 12432 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 12433 mkU32(8) )); 12434 putIReg32( rG, binop(Iop_Or32, 12435 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12436 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12437 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12438 return delta; 12439 } 12440 12441 12442 /* Note, this also handles SSE(1) insns. */ 12443 __attribute__((noinline)) 12444 static 12445 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 12446 const VexArchInfo* archinfo, 12447 const VexAbiInfo* vbi, 12448 Prefix pfx, Int sz, Long deltaIN, 12449 DisResult* dres ) 12450 { 12451 IRTemp addr = IRTemp_INVALID; 12452 IRTemp t0 = IRTemp_INVALID; 12453 IRTemp t1 = IRTemp_INVALID; 12454 IRTemp t2 = IRTemp_INVALID; 12455 IRTemp t3 = IRTemp_INVALID; 12456 IRTemp t4 = IRTemp_INVALID; 12457 IRTemp t5 = IRTemp_INVALID; 12458 IRTemp t6 = IRTemp_INVALID; 12459 UChar modrm = 0; 12460 Int alen = 0; 12461 HChar dis_buf[50]; 12462 12463 *decode_OK = False; 12464 12465 Long delta = deltaIN; 12466 UChar opc = getUChar(delta); 12467 delta++; 12468 switch (opc) { 12469 12470 case 0x10: 12471 if (have66noF2noF3(pfx) 12472 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12473 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 12474 modrm = getUChar(delta); 12475 if (epartIsReg(modrm)) { 12476 putXMMReg( gregOfRexRM(pfx,modrm), 12477 getXMMReg( eregOfRexRM(pfx,modrm) )); 12478 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12479 nameXMMReg(gregOfRexRM(pfx,modrm))); 12480 delta += 1; 12481 } else { 12482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12483 putXMMReg( gregOfRexRM(pfx,modrm), 12484 loadLE(Ity_V128, mkexpr(addr)) ); 12485 DIP("movupd %s,%s\n", dis_buf, 12486 nameXMMReg(gregOfRexRM(pfx,modrm))); 12487 delta += alen; 12488 } 12489 goto decode_success; 12490 } 12491 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 12492 G (lo half xmm). If E is mem, upper half of G is zeroed out. 12493 If E is reg, upper half of G is unchanged. */ 12494 if (haveF2no66noF3(pfx) 12495 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 12496 modrm = getUChar(delta); 12497 if (epartIsReg(modrm)) { 12498 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12499 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 12500 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12501 nameXMMReg(gregOfRexRM(pfx,modrm))); 12502 delta += 1; 12503 } else { 12504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12505 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12506 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12507 loadLE(Ity_I64, mkexpr(addr)) ); 12508 DIP("movsd %s,%s\n", dis_buf, 12509 nameXMMReg(gregOfRexRM(pfx,modrm))); 12510 delta += alen; 12511 } 12512 goto decode_success; 12513 } 12514 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 12515 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 12516 if (haveF3no66noF2(pfx) 12517 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12518 modrm = getUChar(delta); 12519 if (epartIsReg(modrm)) { 12520 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12521 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 12522 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12523 nameXMMReg(gregOfRexRM(pfx,modrm))); 12524 delta += 1; 12525 } else { 12526 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12527 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12528 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12529 loadLE(Ity_I32, mkexpr(addr)) ); 12530 DIP("movss %s,%s\n", dis_buf, 12531 nameXMMReg(gregOfRexRM(pfx,modrm))); 12532 delta += alen; 12533 } 12534 goto decode_success; 12535 } 12536 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 12537 if (haveNo66noF2noF3(pfx) 12538 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12539 modrm = getUChar(delta); 12540 if (epartIsReg(modrm)) { 12541 putXMMReg( gregOfRexRM(pfx,modrm), 12542 getXMMReg( eregOfRexRM(pfx,modrm) )); 12543 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12544 nameXMMReg(gregOfRexRM(pfx,modrm))); 12545 delta += 1; 12546 } else { 12547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12548 putXMMReg( gregOfRexRM(pfx,modrm), 12549 loadLE(Ity_V128, mkexpr(addr)) ); 12550 DIP("movups %s,%s\n", dis_buf, 12551 nameXMMReg(gregOfRexRM(pfx,modrm))); 12552 delta += alen; 12553 } 12554 goto decode_success; 12555 } 12556 break; 12557 12558 case 0x11: 12559 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 12560 or lo half xmm). */ 12561 if (haveF2no66noF3(pfx) 12562 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12563 modrm = getUChar(delta); 12564 if (epartIsReg(modrm)) { 12565 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 12566 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 12567 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12568 nameXMMReg(eregOfRexRM(pfx,modrm))); 12569 delta += 1; 12570 } else { 12571 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12572 storeLE( mkexpr(addr), 12573 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 12574 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12575 dis_buf); 12576 delta += alen; 12577 } 12578 goto decode_success; 12579 } 12580 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 12581 or lo 1/4 xmm). */ 12582 if (haveF3no66noF2(pfx) && sz == 4) { 12583 modrm = getUChar(delta); 12584 if (epartIsReg(modrm)) { 12585 /* fall through, we don't yet have a test case */ 12586 } else { 12587 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12588 storeLE( mkexpr(addr), 12589 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 12590 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12591 dis_buf); 12592 delta += alen; 12593 goto decode_success; 12594 } 12595 } 12596 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 12597 if (have66noF2noF3(pfx) 12598 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12599 modrm = getUChar(delta); 12600 if (epartIsReg(modrm)) { 12601 putXMMReg( eregOfRexRM(pfx,modrm), 12602 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12603 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12604 nameXMMReg(eregOfRexRM(pfx,modrm))); 12605 delta += 1; 12606 } else { 12607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12608 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12609 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12610 dis_buf ); 12611 delta += alen; 12612 } 12613 goto decode_success; 12614 } 12615 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 12616 if (haveNo66noF2noF3(pfx) 12617 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12618 modrm = getUChar(delta); 12619 if (epartIsReg(modrm)) { 12620 /* fall through; awaiting test case */ 12621 } else { 12622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12623 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12624 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12625 dis_buf ); 12626 delta += alen; 12627 goto decode_success; 12628 } 12629 } 12630 break; 12631 12632 case 0x12: 12633 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 12634 /* Identical to MOVLPS ? */ 12635 if (have66noF2noF3(pfx) 12636 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12637 modrm = getUChar(delta); 12638 if (epartIsReg(modrm)) { 12639 /* fall through; apparently reg-reg is not possible */ 12640 } else { 12641 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12642 delta += alen; 12643 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12644 0/*lower lane*/, 12645 loadLE(Ity_I64, mkexpr(addr)) ); 12646 DIP("movlpd %s, %s\n", 12647 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12648 goto decode_success; 12649 } 12650 } 12651 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 12652 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 12653 if (haveNo66noF2noF3(pfx) 12654 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12655 modrm = getUChar(delta); 12656 if (epartIsReg(modrm)) { 12657 delta += 1; 12658 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12659 0/*lower lane*/, 12660 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 12661 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12662 nameXMMReg(gregOfRexRM(pfx,modrm))); 12663 } else { 12664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12665 delta += alen; 12666 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 12667 loadLE(Ity_I64, mkexpr(addr)) ); 12668 DIP("movlps %s, %s\n", 12669 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12670 } 12671 goto decode_success; 12672 } 12673 break; 12674 12675 case 0x13: 12676 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 12677 if (haveNo66noF2noF3(pfx) 12678 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12679 modrm = getUChar(delta); 12680 if (!epartIsReg(modrm)) { 12681 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12682 delta += alen; 12683 storeLE( mkexpr(addr), 12684 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12685 0/*lower lane*/ ) ); 12686 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12687 dis_buf); 12688 goto decode_success; 12689 } 12690 /* else fall through */ 12691 } 12692 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 12693 /* Identical to MOVLPS ? */ 12694 if (have66noF2noF3(pfx) 12695 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12696 modrm = getUChar(delta); 12697 if (!epartIsReg(modrm)) { 12698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12699 delta += alen; 12700 storeLE( mkexpr(addr), 12701 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12702 0/*lower lane*/ ) ); 12703 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12704 dis_buf); 12705 goto decode_success; 12706 } 12707 /* else fall through */ 12708 } 12709 break; 12710 12711 case 0x14: 12712 case 0x15: 12713 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 12714 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 12715 /* These just appear to be special cases of SHUFPS */ 12716 if (haveNo66noF2noF3(pfx) && sz == 4) { 12717 Bool hi = toBool(opc == 0x15); 12718 IRTemp sV = newTemp(Ity_V128); 12719 IRTemp dV = newTemp(Ity_V128); 12720 modrm = getUChar(delta); 12721 UInt rG = gregOfRexRM(pfx,modrm); 12722 assign( dV, getXMMReg(rG) ); 12723 if (epartIsReg(modrm)) { 12724 UInt rE = eregOfRexRM(pfx,modrm); 12725 assign( sV, getXMMReg(rE) ); 12726 delta += 1; 12727 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12728 nameXMMReg(rE), nameXMMReg(rG)); 12729 } else { 12730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12731 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12732 delta += alen; 12733 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12734 dis_buf, nameXMMReg(rG)); 12735 } 12736 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 12737 putXMMReg( rG, mkexpr(res) ); 12738 goto decode_success; 12739 } 12740 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 12741 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 12742 /* These just appear to be special cases of SHUFPS */ 12743 if (have66noF2noF3(pfx) 12744 && sz == 2 /* could be 8 if rex also present */) { 12745 Bool hi = toBool(opc == 0x15); 12746 IRTemp sV = newTemp(Ity_V128); 12747 IRTemp dV = newTemp(Ity_V128); 12748 modrm = getUChar(delta); 12749 UInt rG = gregOfRexRM(pfx,modrm); 12750 assign( dV, getXMMReg(rG) ); 12751 if (epartIsReg(modrm)) { 12752 UInt rE = eregOfRexRM(pfx,modrm); 12753 assign( sV, getXMMReg(rE) ); 12754 delta += 1; 12755 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12756 nameXMMReg(rE), nameXMMReg(rG)); 12757 } else { 12758 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12759 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12760 delta += alen; 12761 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12762 dis_buf, nameXMMReg(rG)); 12763 } 12764 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 12765 putXMMReg( rG, mkexpr(res) ); 12766 goto decode_success; 12767 } 12768 break; 12769 12770 case 0x16: 12771 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 12772 /* These seems identical to MOVHPS. This instruction encoding is 12773 completely crazy. */ 12774 if (have66noF2noF3(pfx) 12775 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12776 modrm = getUChar(delta); 12777 if (epartIsReg(modrm)) { 12778 /* fall through; apparently reg-reg is not possible */ 12779 } else { 12780 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12781 delta += alen; 12782 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12783 loadLE(Ity_I64, mkexpr(addr)) ); 12784 DIP("movhpd %s,%s\n", dis_buf, 12785 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12786 goto decode_success; 12787 } 12788 } 12789 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 12790 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 12791 if (haveNo66noF2noF3(pfx) 12792 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12793 modrm = getUChar(delta); 12794 if (epartIsReg(modrm)) { 12795 delta += 1; 12796 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12797 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 12798 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12799 nameXMMReg(gregOfRexRM(pfx,modrm))); 12800 } else { 12801 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12802 delta += alen; 12803 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12804 loadLE(Ity_I64, mkexpr(addr)) ); 12805 DIP("movhps %s,%s\n", dis_buf, 12806 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12807 } 12808 goto decode_success; 12809 } 12810 break; 12811 12812 case 0x17: 12813 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 12814 if (haveNo66noF2noF3(pfx) 12815 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12816 modrm = getUChar(delta); 12817 if (!epartIsReg(modrm)) { 12818 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12819 delta += alen; 12820 storeLE( mkexpr(addr), 12821 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12822 1/*upper lane*/ ) ); 12823 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12824 dis_buf); 12825 goto decode_success; 12826 } 12827 /* else fall through */ 12828 } 12829 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 12830 /* Again, this seems identical to MOVHPS. */ 12831 if (have66noF2noF3(pfx) 12832 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12833 modrm = getUChar(delta); 12834 if (!epartIsReg(modrm)) { 12835 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12836 delta += alen; 12837 storeLE( mkexpr(addr), 12838 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12839 1/*upper lane*/ ) ); 12840 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12841 dis_buf); 12842 goto decode_success; 12843 } 12844 /* else fall through */ 12845 } 12846 break; 12847 12848 case 0x18: 12849 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 12850 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 12851 /* 0F 18 /2 = PREFETCH1 */ 12852 /* 0F 18 /3 = PREFETCH2 */ 12853 if (haveNo66noF2noF3(pfx) 12854 && !epartIsReg(getUChar(delta)) 12855 && gregLO3ofRM(getUChar(delta)) >= 0 12856 && gregLO3ofRM(getUChar(delta)) <= 3) { 12857 const HChar* hintstr = "??"; 12858 12859 modrm = getUChar(delta); 12860 vassert(!epartIsReg(modrm)); 12861 12862 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12863 delta += alen; 12864 12865 switch (gregLO3ofRM(modrm)) { 12866 case 0: hintstr = "nta"; break; 12867 case 1: hintstr = "t0"; break; 12868 case 2: hintstr = "t1"; break; 12869 case 3: hintstr = "t2"; break; 12870 default: vassert(0); 12871 } 12872 12873 DIP("prefetch%s %s\n", hintstr, dis_buf); 12874 goto decode_success; 12875 } 12876 break; 12877 12878 case 0x28: 12879 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 12880 if (have66noF2noF3(pfx) 12881 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12882 modrm = getUChar(delta); 12883 if (epartIsReg(modrm)) { 12884 putXMMReg( gregOfRexRM(pfx,modrm), 12885 getXMMReg( eregOfRexRM(pfx,modrm) )); 12886 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12887 nameXMMReg(gregOfRexRM(pfx,modrm))); 12888 delta += 1; 12889 } else { 12890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12891 gen_SEGV_if_not_16_aligned( addr ); 12892 putXMMReg( gregOfRexRM(pfx,modrm), 12893 loadLE(Ity_V128, mkexpr(addr)) ); 12894 DIP("movapd %s,%s\n", dis_buf, 12895 nameXMMReg(gregOfRexRM(pfx,modrm))); 12896 delta += alen; 12897 } 12898 goto decode_success; 12899 } 12900 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 12901 if (haveNo66noF2noF3(pfx) 12902 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12903 modrm = getUChar(delta); 12904 if (epartIsReg(modrm)) { 12905 putXMMReg( gregOfRexRM(pfx,modrm), 12906 getXMMReg( eregOfRexRM(pfx,modrm) )); 12907 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12908 nameXMMReg(gregOfRexRM(pfx,modrm))); 12909 delta += 1; 12910 } else { 12911 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12912 gen_SEGV_if_not_16_aligned( addr ); 12913 putXMMReg( gregOfRexRM(pfx,modrm), 12914 loadLE(Ity_V128, mkexpr(addr)) ); 12915 DIP("movaps %s,%s\n", dis_buf, 12916 nameXMMReg(gregOfRexRM(pfx,modrm))); 12917 delta += alen; 12918 } 12919 goto decode_success; 12920 } 12921 break; 12922 12923 case 0x29: 12924 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 12925 if (haveNo66noF2noF3(pfx) 12926 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12927 modrm = getUChar(delta); 12928 if (epartIsReg(modrm)) { 12929 putXMMReg( eregOfRexRM(pfx,modrm), 12930 getXMMReg( gregOfRexRM(pfx,modrm) )); 12931 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12932 nameXMMReg(eregOfRexRM(pfx,modrm))); 12933 delta += 1; 12934 } else { 12935 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12936 gen_SEGV_if_not_16_aligned( addr ); 12937 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12938 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12939 dis_buf ); 12940 delta += alen; 12941 } 12942 goto decode_success; 12943 } 12944 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 12945 if (have66noF2noF3(pfx) 12946 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12947 modrm = getUChar(delta); 12948 if (epartIsReg(modrm)) { 12949 putXMMReg( eregOfRexRM(pfx,modrm), 12950 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12951 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12952 nameXMMReg(eregOfRexRM(pfx,modrm))); 12953 delta += 1; 12954 } else { 12955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12956 gen_SEGV_if_not_16_aligned( addr ); 12957 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12958 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12959 dis_buf ); 12960 delta += alen; 12961 } 12962 goto decode_success; 12963 } 12964 break; 12965 12966 case 0x2A: 12967 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 12968 half xmm */ 12969 if (haveNo66noF2noF3(pfx) && sz == 4) { 12970 IRTemp arg64 = newTemp(Ity_I64); 12971 IRTemp rmode = newTemp(Ity_I32); 12972 12973 modrm = getUChar(delta); 12974 if (epartIsReg(modrm)) { 12975 /* Only switch to MMX mode if the source is a MMX register. 12976 See comments on CVTPI2PD for details. Fixes #357059. */ 12977 do_MMX_preamble(); 12978 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12979 delta += 1; 12980 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12981 nameXMMReg(gregOfRexRM(pfx,modrm))); 12982 } else { 12983 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12984 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12985 delta += alen; 12986 DIP("cvtpi2ps %s,%s\n", dis_buf, 12987 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12988 } 12989 12990 assign( rmode, get_sse_roundingmode() ); 12991 12992 putXMMRegLane32F( 12993 gregOfRexRM(pfx,modrm), 0, 12994 binop(Iop_F64toF32, 12995 mkexpr(rmode), 12996 unop(Iop_I32StoF64, 12997 unop(Iop_64to32, mkexpr(arg64)) )) ); 12998 12999 putXMMRegLane32F( 13000 gregOfRexRM(pfx,modrm), 1, 13001 binop(Iop_F64toF32, 13002 mkexpr(rmode), 13003 unop(Iop_I32StoF64, 13004 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 13005 13006 goto decode_success; 13007 } 13008 /* F3 0F 2A = CVTSI2SS 13009 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 13010 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 13011 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 13012 IRTemp rmode = newTemp(Ity_I32); 13013 assign( rmode, get_sse_roundingmode() ); 13014 modrm = getUChar(delta); 13015 if (sz == 4) { 13016 IRTemp arg32 = newTemp(Ity_I32); 13017 if (epartIsReg(modrm)) { 13018 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 13019 delta += 1; 13020 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13021 nameXMMReg(gregOfRexRM(pfx,modrm))); 13022 } else { 13023 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13024 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 13025 delta += alen; 13026 DIP("cvtsi2ss %s,%s\n", dis_buf, 13027 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13028 } 13029 putXMMRegLane32F( 13030 gregOfRexRM(pfx,modrm), 0, 13031 binop(Iop_F64toF32, 13032 mkexpr(rmode), 13033 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 13034 } else { 13035 /* sz == 8 */ 13036 IRTemp arg64 = newTemp(Ity_I64); 13037 if (epartIsReg(modrm)) { 13038 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 13039 delta += 1; 13040 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13041 nameXMMReg(gregOfRexRM(pfx,modrm))); 13042 } else { 13043 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13044 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 13045 delta += alen; 13046 DIP("cvtsi2ssq %s,%s\n", dis_buf, 13047 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13048 } 13049 putXMMRegLane32F( 13050 gregOfRexRM(pfx,modrm), 0, 13051 binop(Iop_F64toF32, 13052 mkexpr(rmode), 13053 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 13054 } 13055 goto decode_success; 13056 } 13057 /* F2 0F 2A = CVTSI2SD 13058 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 13059 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 13060 */ 13061 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 13062 modrm = getUChar(delta); 13063 if (sz == 4) { 13064 IRTemp arg32 = newTemp(Ity_I32); 13065 if (epartIsReg(modrm)) { 13066 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 13067 delta += 1; 13068 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13069 nameXMMReg(gregOfRexRM(pfx,modrm))); 13070 } else { 13071 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13072 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 13073 delta += alen; 13074 DIP("cvtsi2sdl %s,%s\n", dis_buf, 13075 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13076 } 13077 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 13078 unop(Iop_I32StoF64, mkexpr(arg32)) 13079 ); 13080 } else { 13081 /* sz == 8 */ 13082 IRTemp arg64 = newTemp(Ity_I64); 13083 if (epartIsReg(modrm)) { 13084 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 13085 delta += 1; 13086 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13087 nameXMMReg(gregOfRexRM(pfx,modrm))); 13088 } else { 13089 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13090 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 13091 delta += alen; 13092 DIP("cvtsi2sdq %s,%s\n", dis_buf, 13093 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13094 } 13095 putXMMRegLane64F( 13096 gregOfRexRM(pfx,modrm), 13097 0, 13098 binop( Iop_I64StoF64, 13099 get_sse_roundingmode(), 13100 mkexpr(arg64) 13101 ) 13102 ); 13103 } 13104 goto decode_success; 13105 } 13106 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 13107 xmm(G) */ 13108 if (have66noF2noF3(pfx) && sz == 2) { 13109 IRTemp arg64 = newTemp(Ity_I64); 13110 13111 modrm = getUChar(delta); 13112 if (epartIsReg(modrm)) { 13113 /* Only switch to MMX mode if the source is a MMX register. 13114 This is inconsistent with all other instructions which 13115 convert between XMM and (M64 or MMX), which always switch 13116 to MMX mode even if 64-bit operand is M64 and not MMX. At 13117 least, that's what the Intel docs seem to me to say. 13118 Fixes #210264. */ 13119 do_MMX_preamble(); 13120 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 13121 delta += 1; 13122 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 13123 nameXMMReg(gregOfRexRM(pfx,modrm))); 13124 } else { 13125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13126 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 13127 delta += alen; 13128 DIP("cvtpi2pd %s,%s\n", dis_buf, 13129 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13130 } 13131 13132 putXMMRegLane64F( 13133 gregOfRexRM(pfx,modrm), 0, 13134 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 13135 ); 13136 13137 putXMMRegLane64F( 13138 gregOfRexRM(pfx,modrm), 1, 13139 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 13140 ); 13141 13142 goto decode_success; 13143 } 13144 break; 13145 13146 case 0x2B: 13147 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 13148 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 13149 if ( (haveNo66noF2noF3(pfx) && sz == 4) 13150 || (have66noF2noF3(pfx) && sz == 2) ) { 13151 modrm = getUChar(delta); 13152 if (!epartIsReg(modrm)) { 13153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13154 gen_SEGV_if_not_16_aligned( addr ); 13155 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13156 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 13157 dis_buf, 13158 nameXMMReg(gregOfRexRM(pfx,modrm))); 13159 delta += alen; 13160 goto decode_success; 13161 } 13162 /* else fall through */ 13163 } 13164 break; 13165 13166 case 0x2C: 13167 case 0x2D: 13168 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13169 I32 in mmx, according to prevailing SSE rounding mode */ 13170 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13171 I32 in mmx, rounding towards zero */ 13172 if (haveNo66noF2noF3(pfx) && sz == 4) { 13173 IRTemp dst64 = newTemp(Ity_I64); 13174 IRTemp rmode = newTemp(Ity_I32); 13175 IRTemp f32lo = newTemp(Ity_F32); 13176 IRTemp f32hi = newTemp(Ity_F32); 13177 Bool r2zero = toBool(opc == 0x2C); 13178 13179 do_MMX_preamble(); 13180 modrm = getUChar(delta); 13181 13182 if (epartIsReg(modrm)) { 13183 delta += 1; 13184 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13185 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 13186 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13187 nameXMMReg(eregOfRexRM(pfx,modrm)), 13188 nameMMXReg(gregLO3ofRM(modrm))); 13189 } else { 13190 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13191 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13192 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 13193 mkexpr(addr), 13194 mkU64(4) ))); 13195 delta += alen; 13196 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13197 dis_buf, 13198 nameMMXReg(gregLO3ofRM(modrm))); 13199 } 13200 13201 if (r2zero) { 13202 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13203 } else { 13204 assign( rmode, get_sse_roundingmode() ); 13205 } 13206 13207 assign( 13208 dst64, 13209 binop( Iop_32HLto64, 13210 binop( Iop_F64toI32S, 13211 mkexpr(rmode), 13212 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 13213 binop( Iop_F64toI32S, 13214 mkexpr(rmode), 13215 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 13216 ) 13217 ); 13218 13219 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13220 goto decode_success; 13221 } 13222 /* F3 0F 2D = CVTSS2SI 13223 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13224 according to prevailing SSE rounding mode 13225 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13226 according to prevailing SSE rounding mode 13227 */ 13228 /* F3 0F 2C = CVTTSS2SI 13229 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13230 truncating towards zero 13231 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13232 truncating towards zero 13233 */ 13234 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 13235 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13236 goto decode_success; 13237 } 13238 /* F2 0F 2D = CVTSD2SI 13239 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13240 according to prevailing SSE rounding mode 13241 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13242 according to prevailing SSE rounding mode 13243 */ 13244 /* F2 0F 2C = CVTTSD2SI 13245 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13246 truncating towards zero 13247 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13248 truncating towards zero 13249 */ 13250 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 13251 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13252 goto decode_success; 13253 } 13254 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13255 I32 in mmx, according to prevailing SSE rounding mode */ 13256 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13257 I32 in mmx, rounding towards zero */ 13258 if (have66noF2noF3(pfx) && sz == 2) { 13259 IRTemp dst64 = newTemp(Ity_I64); 13260 IRTemp rmode = newTemp(Ity_I32); 13261 IRTemp f64lo = newTemp(Ity_F64); 13262 IRTemp f64hi = newTemp(Ity_F64); 13263 Bool r2zero = toBool(opc == 0x2C); 13264 13265 do_MMX_preamble(); 13266 modrm = getUChar(delta); 13267 13268 if (epartIsReg(modrm)) { 13269 delta += 1; 13270 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13271 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 13272 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 13273 nameXMMReg(eregOfRexRM(pfx,modrm)), 13274 nameMMXReg(gregLO3ofRM(modrm))); 13275 } else { 13276 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13277 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13278 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 13279 mkexpr(addr), 13280 mkU64(8) ))); 13281 delta += alen; 13282 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 13283 dis_buf, 13284 nameMMXReg(gregLO3ofRM(modrm))); 13285 } 13286 13287 if (r2zero) { 13288 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13289 } else { 13290 assign( rmode, get_sse_roundingmode() ); 13291 } 13292 13293 assign( 13294 dst64, 13295 binop( Iop_32HLto64, 13296 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 13297 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 13298 ) 13299 ); 13300 13301 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13302 goto decode_success; 13303 } 13304 break; 13305 13306 case 0x2E: 13307 case 0x2F: 13308 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 13309 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 13310 if (have66noF2noF3(pfx) && sz == 2) { 13311 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 13312 goto decode_success; 13313 } 13314 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 13315 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 13316 if (haveNo66noF2noF3(pfx) && sz == 4) { 13317 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 13318 goto decode_success; 13319 } 13320 break; 13321 13322 case 0x50: 13323 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 13324 to 4 lowest bits of ireg(G) */ 13325 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13326 && epartIsReg(getUChar(delta))) { 13327 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13328 set to 1, which has been known to happen: 13329 13330 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 13331 13332 20071106: Intel docs say that REX.W isn't redundant: when 13333 present, a 64-bit register is written; when not present, only 13334 the 32-bit half is written. However, testing on a Core2 13335 machine suggests the entire 64 bit register is written 13336 irrespective of the status of REX.W. That could be because 13337 of the default rule that says "if the lower half of a 32-bit 13338 register is written, the upper half is zeroed". By using 13339 putIReg32 here we inadvertantly produce the same behaviour as 13340 the Core2, for the same reason -- putIReg32 implements said 13341 rule. 13342 13343 AMD docs give no indication that REX.W is even valid for this 13344 insn. */ 13345 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13346 goto decode_success; 13347 } 13348 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 13349 2 lowest bits of ireg(G) */ 13350 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 13351 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13352 set to 1, which has been known to happen: 13353 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 13354 20071106: see further comments on MOVMSKPS implementation above. 13355 */ 13356 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13357 goto decode_success; 13358 } 13359 break; 13360 13361 case 0x51: 13362 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 13363 if (haveF3no66noF2(pfx) && sz == 4) { 13364 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13365 "sqrtss", Iop_Sqrt32F0x4 ); 13366 goto decode_success; 13367 } 13368 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 13369 if (haveNo66noF2noF3(pfx) && sz == 4) { 13370 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13371 "sqrtps", Iop_Sqrt32Fx4 ); 13372 goto decode_success; 13373 } 13374 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 13375 if (haveF2no66noF3(pfx) && sz == 4) { 13376 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 13377 "sqrtsd", Iop_Sqrt64F0x2 ); 13378 goto decode_success; 13379 } 13380 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 13381 if (have66noF2noF3(pfx) && sz == 2) { 13382 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13383 "sqrtpd", Iop_Sqrt64Fx2 ); 13384 goto decode_success; 13385 } 13386 break; 13387 13388 case 0x52: 13389 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 13390 if (haveF3no66noF2(pfx) && sz == 4) { 13391 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13392 "rsqrtss", Iop_RSqrtEst32F0x4 ); 13393 goto decode_success; 13394 } 13395 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 13396 if (haveNo66noF2noF3(pfx) && sz == 4) { 13397 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13398 "rsqrtps", Iop_RSqrtEst32Fx4 ); 13399 goto decode_success; 13400 } 13401 break; 13402 13403 case 0x53: 13404 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 13405 if (haveF3no66noF2(pfx) && sz == 4) { 13406 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13407 "rcpss", Iop_RecipEst32F0x4 ); 13408 goto decode_success; 13409 } 13410 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 13411 if (haveNo66noF2noF3(pfx) && sz == 4) { 13412 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13413 "rcpps", Iop_RecipEst32Fx4 ); 13414 goto decode_success; 13415 } 13416 break; 13417 13418 case 0x54: 13419 /* 0F 54 = ANDPS -- G = G and E */ 13420 if (haveNo66noF2noF3(pfx) && sz == 4) { 13421 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 13422 goto decode_success; 13423 } 13424 /* 66 0F 54 = ANDPD -- G = G and E */ 13425 if (have66noF2noF3(pfx) && sz == 2) { 13426 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 13427 goto decode_success; 13428 } 13429 break; 13430 13431 case 0x55: 13432 /* 0F 55 = ANDNPS -- G = (not G) and E */ 13433 if (haveNo66noF2noF3(pfx) && sz == 4) { 13434 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 13435 Iop_AndV128 ); 13436 goto decode_success; 13437 } 13438 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 13439 if (have66noF2noF3(pfx) && sz == 2) { 13440 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 13441 Iop_AndV128 ); 13442 goto decode_success; 13443 } 13444 break; 13445 13446 case 0x56: 13447 /* 0F 56 = ORPS -- G = G and E */ 13448 if (haveNo66noF2noF3(pfx) && sz == 4) { 13449 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 13450 goto decode_success; 13451 } 13452 /* 66 0F 56 = ORPD -- G = G and E */ 13453 if (have66noF2noF3(pfx) && sz == 2) { 13454 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 13455 goto decode_success; 13456 } 13457 break; 13458 13459 case 0x57: 13460 /* 66 0F 57 = XORPD -- G = G xor E */ 13461 if (have66noF2noF3(pfx) && sz == 2) { 13462 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 13463 goto decode_success; 13464 } 13465 /* 0F 57 = XORPS -- G = G xor E */ 13466 if (haveNo66noF2noF3(pfx) && sz == 4) { 13467 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 13468 goto decode_success; 13469 } 13470 break; 13471 13472 case 0x58: 13473 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 13474 if (haveNo66noF2noF3(pfx) && sz == 4) { 13475 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 13476 goto decode_success; 13477 } 13478 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 13479 if (haveF3no66noF2(pfx) && sz == 4) { 13480 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 13481 goto decode_success; 13482 } 13483 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 13484 if (haveF2no66noF3(pfx) 13485 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13486 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 13487 goto decode_success; 13488 } 13489 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 13490 if (have66noF2noF3(pfx) 13491 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13492 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 13493 goto decode_success; 13494 } 13495 break; 13496 13497 case 0x59: 13498 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 13499 if (haveF2no66noF3(pfx) 13500 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13501 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 13502 goto decode_success; 13503 } 13504 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 13505 if (haveF3no66noF2(pfx) && sz == 4) { 13506 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 13507 goto decode_success; 13508 } 13509 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 13510 if (haveNo66noF2noF3(pfx) && sz == 4) { 13511 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 13512 goto decode_success; 13513 } 13514 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 13515 if (have66noF2noF3(pfx) 13516 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13517 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 13518 goto decode_success; 13519 } 13520 break; 13521 13522 case 0x5A: 13523 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 13524 F64 in xmm(G). */ 13525 if (haveNo66noF2noF3(pfx) 13526 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13527 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13528 goto decode_success; 13529 } 13530 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 13531 low half xmm(G) */ 13532 if (haveF3no66noF2(pfx) && sz == 4) { 13533 IRTemp f32lo = newTemp(Ity_F32); 13534 13535 modrm = getUChar(delta); 13536 if (epartIsReg(modrm)) { 13537 delta += 1; 13538 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13539 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13540 nameXMMReg(gregOfRexRM(pfx,modrm))); 13541 } else { 13542 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13543 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13544 delta += alen; 13545 DIP("cvtss2sd %s,%s\n", dis_buf, 13546 nameXMMReg(gregOfRexRM(pfx,modrm))); 13547 } 13548 13549 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 13550 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 13551 13552 goto decode_success; 13553 } 13554 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 13555 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 13556 if (haveF2no66noF3(pfx) && sz == 4) { 13557 IRTemp rmode = newTemp(Ity_I32); 13558 IRTemp f64lo = newTemp(Ity_F64); 13559 13560 modrm = getUChar(delta); 13561 if (epartIsReg(modrm)) { 13562 delta += 1; 13563 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13564 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13565 nameXMMReg(gregOfRexRM(pfx,modrm))); 13566 } else { 13567 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13568 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13569 delta += alen; 13570 DIP("cvtsd2ss %s,%s\n", dis_buf, 13571 nameXMMReg(gregOfRexRM(pfx,modrm))); 13572 } 13573 13574 assign( rmode, get_sse_roundingmode() ); 13575 putXMMRegLane32F( 13576 gregOfRexRM(pfx,modrm), 0, 13577 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 13578 ); 13579 13580 goto decode_success; 13581 } 13582 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 13583 lo half xmm(G), rounding according to prevailing SSE rounding 13584 mode, and zero upper half */ 13585 /* Note, this is practically identical to CVTPD2DQ. It would have 13586 be nice to merge them together. */ 13587 if (have66noF2noF3(pfx) && sz == 2) { 13588 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13589 goto decode_success; 13590 } 13591 break; 13592 13593 case 0x5B: 13594 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13595 xmm(G), rounding towards zero */ 13596 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13597 xmm(G), as per the prevailing rounding mode */ 13598 if ( (have66noF2noF3(pfx) && sz == 2) 13599 || (haveF3no66noF2(pfx) && sz == 4) ) { 13600 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 13601 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 13602 goto decode_success; 13603 } 13604 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 13605 xmm(G) */ 13606 if (haveNo66noF2noF3(pfx) && sz == 4) { 13607 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13608 goto decode_success; 13609 } 13610 break; 13611 13612 case 0x5C: 13613 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 13614 if (haveF3no66noF2(pfx) && sz == 4) { 13615 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 13616 goto decode_success; 13617 } 13618 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 13619 if (haveF2no66noF3(pfx) 13620 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13621 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 13622 goto decode_success; 13623 } 13624 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 13625 if (haveNo66noF2noF3(pfx) && sz == 4) { 13626 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 13627 goto decode_success; 13628 } 13629 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 13630 if (have66noF2noF3(pfx) && sz == 2) { 13631 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 13632 goto decode_success; 13633 } 13634 break; 13635 13636 case 0x5D: 13637 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 13638 if (haveNo66noF2noF3(pfx) && sz == 4) { 13639 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 13640 goto decode_success; 13641 } 13642 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 13643 if (haveF3no66noF2(pfx) && sz == 4) { 13644 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 13645 goto decode_success; 13646 } 13647 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 13648 if (haveF2no66noF3(pfx) 13649 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13650 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 13651 goto decode_success; 13652 } 13653 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 13654 if (have66noF2noF3(pfx) && sz == 2) { 13655 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 13656 goto decode_success; 13657 } 13658 break; 13659 13660 case 0x5E: 13661 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 13662 if (haveF2no66noF3(pfx) && sz == 4) { 13663 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 13664 goto decode_success; 13665 } 13666 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 13667 if (haveNo66noF2noF3(pfx) && sz == 4) { 13668 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 13669 goto decode_success; 13670 } 13671 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 13672 if (haveF3no66noF2(pfx) && sz == 4) { 13673 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 13674 goto decode_success; 13675 } 13676 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 13677 if (have66noF2noF3(pfx) && sz == 2) { 13678 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 13679 goto decode_success; 13680 } 13681 break; 13682 13683 case 0x5F: 13684 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 13685 if (haveNo66noF2noF3(pfx) && sz == 4) { 13686 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 13687 goto decode_success; 13688 } 13689 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 13690 if (haveF3no66noF2(pfx) && sz == 4) { 13691 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 13692 goto decode_success; 13693 } 13694 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 13695 if (haveF2no66noF3(pfx) 13696 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13697 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 13698 goto decode_success; 13699 } 13700 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 13701 if (have66noF2noF3(pfx) && sz == 2) { 13702 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 13703 goto decode_success; 13704 } 13705 break; 13706 13707 case 0x60: 13708 /* 66 0F 60 = PUNPCKLBW */ 13709 if (have66noF2noF3(pfx) && sz == 2) { 13710 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13711 "punpcklbw", 13712 Iop_InterleaveLO8x16, True ); 13713 goto decode_success; 13714 } 13715 break; 13716 13717 case 0x61: 13718 /* 66 0F 61 = PUNPCKLWD */ 13719 if (have66noF2noF3(pfx) && sz == 2) { 13720 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13721 "punpcklwd", 13722 Iop_InterleaveLO16x8, True ); 13723 goto decode_success; 13724 } 13725 break; 13726 13727 case 0x62: 13728 /* 66 0F 62 = PUNPCKLDQ */ 13729 if (have66noF2noF3(pfx) && sz == 2) { 13730 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13731 "punpckldq", 13732 Iop_InterleaveLO32x4, True ); 13733 goto decode_success; 13734 } 13735 break; 13736 13737 case 0x63: 13738 /* 66 0F 63 = PACKSSWB */ 13739 if (have66noF2noF3(pfx) && sz == 2) { 13740 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13741 "packsswb", 13742 Iop_QNarrowBin16Sto8Sx16, True ); 13743 goto decode_success; 13744 } 13745 break; 13746 13747 case 0x64: 13748 /* 66 0F 64 = PCMPGTB */ 13749 if (have66noF2noF3(pfx) && sz == 2) { 13750 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13751 "pcmpgtb", Iop_CmpGT8Sx16, False ); 13752 goto decode_success; 13753 } 13754 break; 13755 13756 case 0x65: 13757 /* 66 0F 65 = PCMPGTW */ 13758 if (have66noF2noF3(pfx) && sz == 2) { 13759 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13760 "pcmpgtw", Iop_CmpGT16Sx8, False ); 13761 goto decode_success; 13762 } 13763 break; 13764 13765 case 0x66: 13766 /* 66 0F 66 = PCMPGTD */ 13767 if (have66noF2noF3(pfx) && sz == 2) { 13768 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13769 "pcmpgtd", Iop_CmpGT32Sx4, False ); 13770 goto decode_success; 13771 } 13772 break; 13773 13774 case 0x67: 13775 /* 66 0F 67 = PACKUSWB */ 13776 if (have66noF2noF3(pfx) && sz == 2) { 13777 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13778 "packuswb", 13779 Iop_QNarrowBin16Sto8Ux16, True ); 13780 goto decode_success; 13781 } 13782 break; 13783 13784 case 0x68: 13785 /* 66 0F 68 = PUNPCKHBW */ 13786 if (have66noF2noF3(pfx) && sz == 2) { 13787 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13788 "punpckhbw", 13789 Iop_InterleaveHI8x16, True ); 13790 goto decode_success; 13791 } 13792 break; 13793 13794 case 0x69: 13795 /* 66 0F 69 = PUNPCKHWD */ 13796 if (have66noF2noF3(pfx) && sz == 2) { 13797 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13798 "punpckhwd", 13799 Iop_InterleaveHI16x8, True ); 13800 goto decode_success; 13801 } 13802 break; 13803 13804 case 0x6A: 13805 /* 66 0F 6A = PUNPCKHDQ */ 13806 if (have66noF2noF3(pfx) && sz == 2) { 13807 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13808 "punpckhdq", 13809 Iop_InterleaveHI32x4, True ); 13810 goto decode_success; 13811 } 13812 break; 13813 13814 case 0x6B: 13815 /* 66 0F 6B = PACKSSDW */ 13816 if (have66noF2noF3(pfx) && sz == 2) { 13817 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13818 "packssdw", 13819 Iop_QNarrowBin32Sto16Sx8, True ); 13820 goto decode_success; 13821 } 13822 break; 13823 13824 case 0x6C: 13825 /* 66 0F 6C = PUNPCKLQDQ */ 13826 if (have66noF2noF3(pfx) && sz == 2) { 13827 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13828 "punpcklqdq", 13829 Iop_InterleaveLO64x2, True ); 13830 goto decode_success; 13831 } 13832 break; 13833 13834 case 0x6D: 13835 /* 66 0F 6D = PUNPCKHQDQ */ 13836 if (have66noF2noF3(pfx) && sz == 2) { 13837 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13838 "punpckhqdq", 13839 Iop_InterleaveHI64x2, True ); 13840 goto decode_success; 13841 } 13842 break; 13843 13844 case 0x6E: 13845 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 13846 zeroing high 3/4 of xmm. */ 13847 /* or from ireg64/m64 to xmm lo 1/2, 13848 zeroing high 1/2 of xmm. */ 13849 if (have66noF2noF3(pfx)) { 13850 vassert(sz == 2 || sz == 8); 13851 if (sz == 2) sz = 4; 13852 modrm = getUChar(delta); 13853 if (epartIsReg(modrm)) { 13854 delta += 1; 13855 if (sz == 4) { 13856 putXMMReg( 13857 gregOfRexRM(pfx,modrm), 13858 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 13859 ); 13860 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13861 nameXMMReg(gregOfRexRM(pfx,modrm))); 13862 } else { 13863 putXMMReg( 13864 gregOfRexRM(pfx,modrm), 13865 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 13866 ); 13867 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13868 nameXMMReg(gregOfRexRM(pfx,modrm))); 13869 } 13870 } else { 13871 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13872 delta += alen; 13873 putXMMReg( 13874 gregOfRexRM(pfx,modrm), 13875 sz == 4 13876 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 13877 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 13878 ); 13879 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 13880 nameXMMReg(gregOfRexRM(pfx,modrm))); 13881 } 13882 goto decode_success; 13883 } 13884 break; 13885 13886 case 0x6F: 13887 if (have66noF2noF3(pfx) 13888 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13889 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 13890 modrm = getUChar(delta); 13891 if (epartIsReg(modrm)) { 13892 putXMMReg( gregOfRexRM(pfx,modrm), 13893 getXMMReg( eregOfRexRM(pfx,modrm) )); 13894 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13895 nameXMMReg(gregOfRexRM(pfx,modrm))); 13896 delta += 1; 13897 } else { 13898 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13899 gen_SEGV_if_not_16_aligned( addr ); 13900 putXMMReg( gregOfRexRM(pfx,modrm), 13901 loadLE(Ity_V128, mkexpr(addr)) ); 13902 DIP("movdqa %s,%s\n", dis_buf, 13903 nameXMMReg(gregOfRexRM(pfx,modrm))); 13904 delta += alen; 13905 } 13906 goto decode_success; 13907 } 13908 if (haveF3no66noF2(pfx) && sz == 4) { 13909 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 13910 modrm = getUChar(delta); 13911 if (epartIsReg(modrm)) { 13912 putXMMReg( gregOfRexRM(pfx,modrm), 13913 getXMMReg( eregOfRexRM(pfx,modrm) )); 13914 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13915 nameXMMReg(gregOfRexRM(pfx,modrm))); 13916 delta += 1; 13917 } else { 13918 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13919 putXMMReg( gregOfRexRM(pfx,modrm), 13920 loadLE(Ity_V128, mkexpr(addr)) ); 13921 DIP("movdqu %s,%s\n", dis_buf, 13922 nameXMMReg(gregOfRexRM(pfx,modrm))); 13923 delta += alen; 13924 } 13925 goto decode_success; 13926 } 13927 break; 13928 13929 case 0x70: 13930 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 13931 if (have66noF2noF3(pfx) && sz == 2) { 13932 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 13933 goto decode_success; 13934 } 13935 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13936 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 13937 if (haveNo66noF2noF3(pfx) && sz == 4) { 13938 Int order; 13939 IRTemp sV, dV, s3, s2, s1, s0; 13940 s3 = s2 = s1 = s0 = IRTemp_INVALID; 13941 sV = newTemp(Ity_I64); 13942 dV = newTemp(Ity_I64); 13943 do_MMX_preamble(); 13944 modrm = getUChar(delta); 13945 if (epartIsReg(modrm)) { 13946 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13947 order = (Int)getUChar(delta+1); 13948 delta += 1+1; 13949 DIP("pshufw $%d,%s,%s\n", order, 13950 nameMMXReg(eregLO3ofRM(modrm)), 13951 nameMMXReg(gregLO3ofRM(modrm))); 13952 } else { 13953 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13954 1/*extra byte after amode*/ ); 13955 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13956 order = (Int)getUChar(delta+alen); 13957 delta += 1+alen; 13958 DIP("pshufw $%d,%s,%s\n", order, 13959 dis_buf, 13960 nameMMXReg(gregLO3ofRM(modrm))); 13961 } 13962 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 13963 # define SEL(n) \ 13964 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 13965 assign(dV, 13966 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 13967 SEL((order>>2)&3), SEL((order>>0)&3) ) 13968 ); 13969 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 13970 # undef SEL 13971 goto decode_success; 13972 } 13973 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 13974 mem) to G(xmm), and copy upper half */ 13975 if (haveF2no66noF3(pfx) && sz == 4) { 13976 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13977 False/*!isAvx*/, False/*!xIsH*/ ); 13978 goto decode_success; 13979 } 13980 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 13981 mem) to G(xmm), and copy lower half */ 13982 if (haveF3no66noF2(pfx) && sz == 4) { 13983 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13984 False/*!isAvx*/, True/*xIsH*/ ); 13985 goto decode_success; 13986 } 13987 break; 13988 13989 case 0x71: 13990 /* 66 0F 71 /2 ib = PSRLW by immediate */ 13991 if (have66noF2noF3(pfx) && sz == 2 13992 && epartIsReg(getUChar(delta)) 13993 && gregLO3ofRM(getUChar(delta)) == 2) { 13994 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 13995 goto decode_success; 13996 } 13997 /* 66 0F 71 /4 ib = PSRAW by immediate */ 13998 if (have66noF2noF3(pfx) && sz == 2 13999 && epartIsReg(getUChar(delta)) 14000 && gregLO3ofRM(getUChar(delta)) == 4) { 14001 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 14002 goto decode_success; 14003 } 14004 /* 66 0F 71 /6 ib = PSLLW by immediate */ 14005 if (have66noF2noF3(pfx) && sz == 2 14006 && epartIsReg(getUChar(delta)) 14007 && gregLO3ofRM(getUChar(delta)) == 6) { 14008 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 14009 goto decode_success; 14010 } 14011 break; 14012 14013 case 0x72: 14014 /* 66 0F 72 /2 ib = PSRLD by immediate */ 14015 if (have66noF2noF3(pfx) && sz == 2 14016 && epartIsReg(getUChar(delta)) 14017 && gregLO3ofRM(getUChar(delta)) == 2) { 14018 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 14019 goto decode_success; 14020 } 14021 /* 66 0F 72 /4 ib = PSRAD by immediate */ 14022 if (have66noF2noF3(pfx) && sz == 2 14023 && epartIsReg(getUChar(delta)) 14024 && gregLO3ofRM(getUChar(delta)) == 4) { 14025 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 14026 goto decode_success; 14027 } 14028 /* 66 0F 72 /6 ib = PSLLD by immediate */ 14029 if (have66noF2noF3(pfx) && sz == 2 14030 && epartIsReg(getUChar(delta)) 14031 && gregLO3ofRM(getUChar(delta)) == 6) { 14032 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 14033 goto decode_success; 14034 } 14035 break; 14036 14037 case 0x73: 14038 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 14039 /* note, if mem case ever filled in, 1 byte after amode */ 14040 if (have66noF2noF3(pfx) && sz == 2 14041 && epartIsReg(getUChar(delta)) 14042 && gregLO3ofRM(getUChar(delta)) == 3) { 14043 Int imm = (Int)getUChar(delta+1); 14044 Int reg = eregOfRexRM(pfx,getUChar(delta)); 14045 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 14046 delta += 2; 14047 IRTemp sV = newTemp(Ity_V128); 14048 assign( sV, getXMMReg(reg) ); 14049 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 14050 goto decode_success; 14051 } 14052 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 14053 /* note, if mem case ever filled in, 1 byte after amode */ 14054 if (have66noF2noF3(pfx) && sz == 2 14055 && epartIsReg(getUChar(delta)) 14056 && gregLO3ofRM(getUChar(delta)) == 7) { 14057 Int imm = (Int)getUChar(delta+1); 14058 Int reg = eregOfRexRM(pfx,getUChar(delta)); 14059 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 14060 vassert(imm >= 0 && imm <= 255); 14061 delta += 2; 14062 IRTemp sV = newTemp(Ity_V128); 14063 assign( sV, getXMMReg(reg) ); 14064 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 14065 goto decode_success; 14066 } 14067 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 14068 if (have66noF2noF3(pfx) && sz == 2 14069 && epartIsReg(getUChar(delta)) 14070 && gregLO3ofRM(getUChar(delta)) == 2) { 14071 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 14072 goto decode_success; 14073 } 14074 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 14075 if (have66noF2noF3(pfx) && sz == 2 14076 && epartIsReg(getUChar(delta)) 14077 && gregLO3ofRM(getUChar(delta)) == 6) { 14078 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 14079 goto decode_success; 14080 } 14081 break; 14082 14083 case 0x74: 14084 /* 66 0F 74 = PCMPEQB */ 14085 if (have66noF2noF3(pfx) && sz == 2) { 14086 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14087 "pcmpeqb", Iop_CmpEQ8x16, False ); 14088 goto decode_success; 14089 } 14090 break; 14091 14092 case 0x75: 14093 /* 66 0F 75 = PCMPEQW */ 14094 if (have66noF2noF3(pfx) && sz == 2) { 14095 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14096 "pcmpeqw", Iop_CmpEQ16x8, False ); 14097 goto decode_success; 14098 } 14099 break; 14100 14101 case 0x76: 14102 /* 66 0F 76 = PCMPEQD */ 14103 if (have66noF2noF3(pfx) && sz == 2) { 14104 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14105 "pcmpeqd", Iop_CmpEQ32x4, False ); 14106 goto decode_success; 14107 } 14108 break; 14109 14110 case 0x7E: 14111 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 14112 G (lo half xmm). Upper half of G is zeroed out. */ 14113 if (haveF3no66noF2(pfx) 14114 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14115 modrm = getUChar(delta); 14116 if (epartIsReg(modrm)) { 14117 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 14118 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14119 /* zero bits 127:64 */ 14120 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 14121 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14122 nameXMMReg(gregOfRexRM(pfx,modrm))); 14123 delta += 1; 14124 } else { 14125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14126 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 14127 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 14128 loadLE(Ity_I64, mkexpr(addr)) ); 14129 DIP("movsd %s,%s\n", dis_buf, 14130 nameXMMReg(gregOfRexRM(pfx,modrm))); 14131 delta += alen; 14132 } 14133 goto decode_success; 14134 } 14135 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 14136 /* or from xmm low 1/2 to ireg64 or m64. */ 14137 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 14138 if (sz == 2) sz = 4; 14139 modrm = getUChar(delta); 14140 if (epartIsReg(modrm)) { 14141 delta += 1; 14142 if (sz == 4) { 14143 putIReg32( eregOfRexRM(pfx,modrm), 14144 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 14145 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14146 nameIReg32(eregOfRexRM(pfx,modrm))); 14147 } else { 14148 putIReg64( eregOfRexRM(pfx,modrm), 14149 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 14150 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14151 nameIReg64(eregOfRexRM(pfx,modrm))); 14152 } 14153 } else { 14154 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14155 delta += alen; 14156 storeLE( mkexpr(addr), 14157 sz == 4 14158 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 14159 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 14160 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 14161 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14162 } 14163 goto decode_success; 14164 } 14165 break; 14166 14167 case 0x7F: 14168 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 14169 if (haveF3no66noF2(pfx) && sz == 4) { 14170 modrm = getUChar(delta); 14171 if (epartIsReg(modrm)) { 14172 goto decode_failure; /* awaiting test case */ 14173 delta += 1; 14174 putXMMReg( eregOfRexRM(pfx,modrm), 14175 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14176 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14177 nameXMMReg(eregOfRexRM(pfx,modrm))); 14178 } else { 14179 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14180 delta += alen; 14181 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14182 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14183 } 14184 goto decode_success; 14185 } 14186 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 14187 if (have66noF2noF3(pfx) && sz == 2) { 14188 modrm = getUChar(delta); 14189 if (epartIsReg(modrm)) { 14190 delta += 1; 14191 putXMMReg( eregOfRexRM(pfx,modrm), 14192 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14193 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14194 nameXMMReg(eregOfRexRM(pfx,modrm))); 14195 } else { 14196 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14197 gen_SEGV_if_not_16_aligned( addr ); 14198 delta += alen; 14199 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14200 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14201 } 14202 goto decode_success; 14203 } 14204 break; 14205 14206 case 0xAE: 14207 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 14208 if (haveNo66noF2noF3(pfx) 14209 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14210 && sz == 4) { 14211 delta += 1; 14212 /* Insert a memory fence. It's sometimes important that these 14213 are carried through to the generated code. */ 14214 stmt( IRStmt_MBE(Imbe_Fence) ); 14215 DIP("sfence\n"); 14216 goto decode_success; 14217 } 14218 /* mindless duplication follows .. */ 14219 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 14220 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 14221 if (haveNo66noF2noF3(pfx) 14222 && epartIsReg(getUChar(delta)) 14223 && (gregLO3ofRM(getUChar(delta)) == 5 14224 || gregLO3ofRM(getUChar(delta)) == 6) 14225 && sz == 4) { 14226 delta += 1; 14227 /* Insert a memory fence. It's sometimes important that these 14228 are carried through to the generated code. */ 14229 stmt( IRStmt_MBE(Imbe_Fence) ); 14230 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 14231 goto decode_success; 14232 } 14233 14234 /* 0F AE /7 = CLFLUSH -- flush cache line */ 14235 if (haveNo66noF2noF3(pfx) 14236 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14237 && sz == 4) { 14238 14239 /* This is something of a hack. We need to know the size of 14240 the cache line containing addr. Since we don't (easily), 14241 assume 256 on the basis that no real cache would have a 14242 line that big. It's safe to invalidate more stuff than we 14243 need, just inefficient. */ 14244 ULong lineszB = 256ULL; 14245 14246 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14247 delta += alen; 14248 14249 /* Round addr down to the start of the containing block. */ 14250 stmt( IRStmt_Put( 14251 OFFB_CMSTART, 14252 binop( Iop_And64, 14253 mkexpr(addr), 14254 mkU64( ~(lineszB-1) ))) ); 14255 14256 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); 14257 14258 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); 14259 14260 DIP("clflush %s\n", dis_buf); 14261 goto decode_success; 14262 } 14263 14264 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 14265 if (haveNo66noF2noF3(pfx) 14266 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 14267 && sz == 4) { 14268 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14269 goto decode_success; 14270 } 14271 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 14272 if (haveNo66noF2noF3(pfx) 14273 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 14274 && sz == 4) { 14275 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14276 goto decode_success; 14277 } 14278 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 14279 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14280 && !epartIsReg(getUChar(delta)) 14281 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 14282 delta = dis_FXSAVE(vbi, pfx, delta, sz); 14283 goto decode_success; 14284 } 14285 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 14286 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14287 && !epartIsReg(getUChar(delta)) 14288 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 14289 delta = dis_FXRSTOR(vbi, pfx, delta, sz); 14290 goto decode_success; 14291 } 14292 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */ 14293 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14294 && !epartIsReg(getUChar(delta)) 14295 && gregOfRexRM(pfx,getUChar(delta)) == 4 14296 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14297 delta = dis_XSAVE(vbi, pfx, delta, sz); 14298 goto decode_success; 14299 } 14300 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */ 14301 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14302 && !epartIsReg(getUChar(delta)) 14303 && gregOfRexRM(pfx,getUChar(delta)) == 5 14304 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14305 delta = dis_XRSTOR(vbi, pfx, delta, sz); 14306 goto decode_success; 14307 } 14308 break; 14309 14310 case 0xC2: 14311 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 14312 if (haveNo66noF2noF3(pfx) && sz == 4) { 14313 Long delta0 = delta; 14314 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 14315 if (delta > delta0) goto decode_success; 14316 } 14317 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 14318 if (haveF3no66noF2(pfx) && sz == 4) { 14319 Long delta0 = delta; 14320 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 14321 if (delta > delta0) goto decode_success; 14322 } 14323 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 14324 if (haveF2no66noF3(pfx) && sz == 4) { 14325 Long delta0 = delta; 14326 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 14327 if (delta > delta0) goto decode_success; 14328 } 14329 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 14330 if (have66noF2noF3(pfx) && sz == 2) { 14331 Long delta0 = delta; 14332 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 14333 if (delta > delta0) goto decode_success; 14334 } 14335 break; 14336 14337 case 0xC3: 14338 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 14339 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14340 modrm = getUChar(delta); 14341 if (!epartIsReg(modrm)) { 14342 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14343 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 14344 DIP("movnti %s,%s\n", dis_buf, 14345 nameIRegG(sz, pfx, modrm)); 14346 delta += alen; 14347 goto decode_success; 14348 } 14349 /* else fall through */ 14350 } 14351 break; 14352 14353 case 0xC4: 14354 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14355 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14356 put it into the specified lane of mmx(G). */ 14357 if (haveNo66noF2noF3(pfx) 14358 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14359 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 14360 mmx reg. t4 is the new lane value. t5 is the original 14361 mmx value. t6 is the new mmx value. */ 14362 Int lane; 14363 t4 = newTemp(Ity_I16); 14364 t5 = newTemp(Ity_I64); 14365 t6 = newTemp(Ity_I64); 14366 modrm = getUChar(delta); 14367 do_MMX_preamble(); 14368 14369 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 14370 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 14371 14372 if (epartIsReg(modrm)) { 14373 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 14374 delta += 1+1; 14375 lane = getUChar(delta-1); 14376 DIP("pinsrw $%d,%s,%s\n", lane, 14377 nameIReg16(eregOfRexRM(pfx,modrm)), 14378 nameMMXReg(gregLO3ofRM(modrm))); 14379 } else { 14380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14381 delta += 1+alen; 14382 lane = getUChar(delta-1); 14383 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14384 DIP("pinsrw $%d,%s,%s\n", lane, 14385 dis_buf, 14386 nameMMXReg(gregLO3ofRM(modrm))); 14387 } 14388 14389 switch (lane & 3) { 14390 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 14391 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 14392 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 14393 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 14394 default: vassert(0); 14395 } 14396 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 14397 goto decode_success; 14398 } 14399 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14400 put it into the specified lane of xmm(G). */ 14401 if (have66noF2noF3(pfx) 14402 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14403 Int lane; 14404 t4 = newTemp(Ity_I16); 14405 modrm = getUChar(delta); 14406 UInt rG = gregOfRexRM(pfx,modrm); 14407 if (epartIsReg(modrm)) { 14408 UInt rE = eregOfRexRM(pfx,modrm); 14409 assign(t4, getIReg16(rE)); 14410 delta += 1+1; 14411 lane = getUChar(delta-1); 14412 DIP("pinsrw $%d,%s,%s\n", 14413 lane, nameIReg16(rE), nameXMMReg(rG)); 14414 } else { 14415 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 14416 1/*byte after the amode*/ ); 14417 delta += 1+alen; 14418 lane = getUChar(delta-1); 14419 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14420 DIP("pinsrw $%d,%s,%s\n", 14421 lane, dis_buf, nameXMMReg(rG)); 14422 } 14423 IRTemp src_vec = newTemp(Ity_V128); 14424 assign(src_vec, getXMMReg(rG)); 14425 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 14426 putXMMReg(rG, mkexpr(res_vec)); 14427 goto decode_success; 14428 } 14429 break; 14430 14431 case 0xC5: 14432 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14433 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 14434 zero-extend of it in ireg(G). */ 14435 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14436 modrm = getUChar(delta); 14437 if (epartIsReg(modrm)) { 14438 IRTemp sV = newTemp(Ity_I64); 14439 t5 = newTemp(Ity_I16); 14440 do_MMX_preamble(); 14441 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 14442 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 14443 switch (getUChar(delta+1) & 3) { 14444 case 0: assign(t5, mkexpr(t0)); break; 14445 case 1: assign(t5, mkexpr(t1)); break; 14446 case 2: assign(t5, mkexpr(t2)); break; 14447 case 3: assign(t5, mkexpr(t3)); break; 14448 default: vassert(0); 14449 } 14450 if (sz == 8) 14451 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 14452 else 14453 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 14454 DIP("pextrw $%d,%s,%s\n", 14455 (Int)getUChar(delta+1), 14456 nameMMXReg(eregLO3ofRM(modrm)), 14457 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 14458 : nameIReg32(gregOfRexRM(pfx,modrm)) 14459 ); 14460 delta += 2; 14461 goto decode_success; 14462 } 14463 /* else fall through */ 14464 /* note, for anyone filling in the mem case: this insn has one 14465 byte after the amode and therefore you must pass 1 as the 14466 last arg to disAMode */ 14467 } 14468 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 14469 zero-extend of it in ireg(G). */ 14470 if (have66noF2noF3(pfx) 14471 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14472 Long delta0 = delta; 14473 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 14474 False/*!isAvx*/ ); 14475 if (delta > delta0) goto decode_success; 14476 /* else fall through -- decoding has failed */ 14477 } 14478 break; 14479 14480 case 0xC6: 14481 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 14482 if (haveNo66noF2noF3(pfx) && sz == 4) { 14483 Int imm8 = 0; 14484 IRTemp sV = newTemp(Ity_V128); 14485 IRTemp dV = newTemp(Ity_V128); 14486 modrm = getUChar(delta); 14487 UInt rG = gregOfRexRM(pfx,modrm); 14488 assign( dV, getXMMReg(rG) ); 14489 if (epartIsReg(modrm)) { 14490 UInt rE = eregOfRexRM(pfx,modrm); 14491 assign( sV, getXMMReg(rE) ); 14492 imm8 = (Int)getUChar(delta+1); 14493 delta += 1+1; 14494 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 14495 } else { 14496 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14497 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14498 imm8 = (Int)getUChar(delta+alen); 14499 delta += 1+alen; 14500 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 14501 } 14502 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 14503 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14504 goto decode_success; 14505 } 14506 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 14507 if (have66noF2noF3(pfx) && sz == 2) { 14508 Int select; 14509 IRTemp sV = newTemp(Ity_V128); 14510 IRTemp dV = newTemp(Ity_V128); 14511 14512 modrm = getUChar(delta); 14513 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14514 14515 if (epartIsReg(modrm)) { 14516 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14517 select = (Int)getUChar(delta+1); 14518 delta += 1+1; 14519 DIP("shufpd $%d,%s,%s\n", select, 14520 nameXMMReg(eregOfRexRM(pfx,modrm)), 14521 nameXMMReg(gregOfRexRM(pfx,modrm))); 14522 } else { 14523 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14524 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14525 select = getUChar(delta+alen); 14526 delta += 1+alen; 14527 DIP("shufpd $%d,%s,%s\n", select, 14528 dis_buf, 14529 nameXMMReg(gregOfRexRM(pfx,modrm))); 14530 } 14531 14532 IRTemp res = math_SHUFPD_128( sV, dV, select ); 14533 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14534 goto decode_success; 14535 } 14536 break; 14537 14538 case 0xD1: 14539 /* 66 0F D1 = PSRLW by E */ 14540 if (have66noF2noF3(pfx) && sz == 2) { 14541 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 14542 goto decode_success; 14543 } 14544 break; 14545 14546 case 0xD2: 14547 /* 66 0F D2 = PSRLD by E */ 14548 if (have66noF2noF3(pfx) && sz == 2) { 14549 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 14550 goto decode_success; 14551 } 14552 break; 14553 14554 case 0xD3: 14555 /* 66 0F D3 = PSRLQ by E */ 14556 if (have66noF2noF3(pfx) && sz == 2) { 14557 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 14558 goto decode_success; 14559 } 14560 break; 14561 14562 case 0xD4: 14563 /* 66 0F D4 = PADDQ */ 14564 if (have66noF2noF3(pfx) && sz == 2) { 14565 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14566 "paddq", Iop_Add64x2, False ); 14567 goto decode_success; 14568 } 14569 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14570 /* 0F D4 = PADDQ -- add 64x1 */ 14571 if (haveNo66noF2noF3(pfx) && sz == 4) { 14572 do_MMX_preamble(); 14573 delta = dis_MMXop_regmem_to_reg ( 14574 vbi, pfx, delta, opc, "paddq", False ); 14575 goto decode_success; 14576 } 14577 break; 14578 14579 case 0xD5: 14580 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 14581 if (have66noF2noF3(pfx) && sz == 2) { 14582 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14583 "pmullw", Iop_Mul16x8, False ); 14584 goto decode_success; 14585 } 14586 break; 14587 14588 case 0xD6: 14589 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 14590 hi half). */ 14591 if (haveF3no66noF2(pfx) && sz == 4) { 14592 modrm = getUChar(delta); 14593 if (epartIsReg(modrm)) { 14594 do_MMX_preamble(); 14595 putXMMReg( gregOfRexRM(pfx,modrm), 14596 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 14597 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14598 nameXMMReg(gregOfRexRM(pfx,modrm))); 14599 delta += 1; 14600 goto decode_success; 14601 } 14602 /* apparently no mem case for this insn */ 14603 } 14604 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 14605 or lo half xmm). */ 14606 if (have66noF2noF3(pfx) 14607 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14608 modrm = getUChar(delta); 14609 if (epartIsReg(modrm)) { 14610 /* fall through, awaiting test case */ 14611 /* dst: lo half copied, hi half zeroed */ 14612 } else { 14613 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14614 storeLE( mkexpr(addr), 14615 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 14616 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 14617 delta += alen; 14618 goto decode_success; 14619 } 14620 } 14621 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 14622 if (haveF2no66noF3(pfx) && sz == 4) { 14623 modrm = getUChar(delta); 14624 if (epartIsReg(modrm)) { 14625 do_MMX_preamble(); 14626 putMMXReg( gregLO3ofRM(modrm), 14627 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14628 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14629 nameMMXReg(gregLO3ofRM(modrm))); 14630 delta += 1; 14631 goto decode_success; 14632 } 14633 /* apparently no mem case for this insn */ 14634 } 14635 break; 14636 14637 case 0xD7: 14638 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 14639 lanes in xmm(E), turn them into a byte, and put 14640 zero-extend of it in ireg(G). Doing this directly is just 14641 too cumbersome; give up therefore and call a helper. */ 14642 if (have66noF2noF3(pfx) 14643 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14644 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 14645 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 14646 goto decode_success; 14647 } 14648 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14649 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 14650 mmx(E), turn them into a byte, and put zero-extend of it in 14651 ireg(G). */ 14652 if (haveNo66noF2noF3(pfx) 14653 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14654 modrm = getUChar(delta); 14655 if (epartIsReg(modrm)) { 14656 do_MMX_preamble(); 14657 t0 = newTemp(Ity_I64); 14658 t1 = newTemp(Ity_I32); 14659 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 14660 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 14661 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); 14662 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14663 nameIReg32(gregOfRexRM(pfx,modrm))); 14664 delta += 1; 14665 goto decode_success; 14666 } 14667 /* else fall through */ 14668 } 14669 break; 14670 14671 case 0xD8: 14672 /* 66 0F D8 = PSUBUSB */ 14673 if (have66noF2noF3(pfx) && sz == 2) { 14674 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14675 "psubusb", Iop_QSub8Ux16, False ); 14676 goto decode_success; 14677 } 14678 break; 14679 14680 case 0xD9: 14681 /* 66 0F D9 = PSUBUSW */ 14682 if (have66noF2noF3(pfx) && sz == 2) { 14683 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14684 "psubusw", Iop_QSub16Ux8, False ); 14685 goto decode_success; 14686 } 14687 break; 14688 14689 case 0xDA: 14690 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14691 /* 0F DA = PMINUB -- 8x8 unsigned min */ 14692 if (haveNo66noF2noF3(pfx) && sz == 4) { 14693 do_MMX_preamble(); 14694 delta = dis_MMXop_regmem_to_reg ( 14695 vbi, pfx, delta, opc, "pminub", False ); 14696 goto decode_success; 14697 } 14698 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 14699 if (have66noF2noF3(pfx) && sz == 2) { 14700 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14701 "pminub", Iop_Min8Ux16, False ); 14702 goto decode_success; 14703 } 14704 break; 14705 14706 case 0xDB: 14707 /* 66 0F DB = PAND */ 14708 if (have66noF2noF3(pfx) && sz == 2) { 14709 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 14710 goto decode_success; 14711 } 14712 break; 14713 14714 case 0xDC: 14715 /* 66 0F DC = PADDUSB */ 14716 if (have66noF2noF3(pfx) && sz == 2) { 14717 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14718 "paddusb", Iop_QAdd8Ux16, False ); 14719 goto decode_success; 14720 } 14721 break; 14722 14723 case 0xDD: 14724 /* 66 0F DD = PADDUSW */ 14725 if (have66noF2noF3(pfx) && sz == 2) { 14726 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14727 "paddusw", Iop_QAdd16Ux8, False ); 14728 goto decode_success; 14729 } 14730 break; 14731 14732 case 0xDE: 14733 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14734 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 14735 if (haveNo66noF2noF3(pfx) && sz == 4) { 14736 do_MMX_preamble(); 14737 delta = dis_MMXop_regmem_to_reg ( 14738 vbi, pfx, delta, opc, "pmaxub", False ); 14739 goto decode_success; 14740 } 14741 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 14742 if (have66noF2noF3(pfx) && sz == 2) { 14743 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14744 "pmaxub", Iop_Max8Ux16, False ); 14745 goto decode_success; 14746 } 14747 break; 14748 14749 case 0xDF: 14750 /* 66 0F DF = PANDN */ 14751 if (have66noF2noF3(pfx) && sz == 2) { 14752 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 14753 goto decode_success; 14754 } 14755 break; 14756 14757 case 0xE0: 14758 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14759 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 14760 if (haveNo66noF2noF3(pfx) && sz == 4) { 14761 do_MMX_preamble(); 14762 delta = dis_MMXop_regmem_to_reg ( 14763 vbi, pfx, delta, opc, "pavgb", False ); 14764 goto decode_success; 14765 } 14766 /* 66 0F E0 = PAVGB */ 14767 if (have66noF2noF3(pfx) && sz == 2) { 14768 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14769 "pavgb", Iop_Avg8Ux16, False ); 14770 goto decode_success; 14771 } 14772 break; 14773 14774 case 0xE1: 14775 /* 66 0F E1 = PSRAW by E */ 14776 if (have66noF2noF3(pfx) && sz == 2) { 14777 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 14778 goto decode_success; 14779 } 14780 break; 14781 14782 case 0xE2: 14783 /* 66 0F E2 = PSRAD by E */ 14784 if (have66noF2noF3(pfx) && sz == 2) { 14785 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 14786 goto decode_success; 14787 } 14788 break; 14789 14790 case 0xE3: 14791 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14792 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 14793 if (haveNo66noF2noF3(pfx) && sz == 4) { 14794 do_MMX_preamble(); 14795 delta = dis_MMXop_regmem_to_reg ( 14796 vbi, pfx, delta, opc, "pavgw", False ); 14797 goto decode_success; 14798 } 14799 /* 66 0F E3 = PAVGW */ 14800 if (have66noF2noF3(pfx) && sz == 2) { 14801 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14802 "pavgw", Iop_Avg16Ux8, False ); 14803 goto decode_success; 14804 } 14805 break; 14806 14807 case 0xE4: 14808 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14809 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 14810 if (haveNo66noF2noF3(pfx) && sz == 4) { 14811 do_MMX_preamble(); 14812 delta = dis_MMXop_regmem_to_reg ( 14813 vbi, pfx, delta, opc, "pmuluh", False ); 14814 goto decode_success; 14815 } 14816 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 14817 if (have66noF2noF3(pfx) && sz == 2) { 14818 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14819 "pmulhuw", Iop_MulHi16Ux8, False ); 14820 goto decode_success; 14821 } 14822 break; 14823 14824 case 0xE5: 14825 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 14826 if (have66noF2noF3(pfx) && sz == 2) { 14827 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14828 "pmulhw", Iop_MulHi16Sx8, False ); 14829 goto decode_success; 14830 } 14831 break; 14832 14833 case 0xE6: 14834 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14835 lo half xmm(G), and zero upper half, rounding towards zero */ 14836 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14837 lo half xmm(G), according to prevailing rounding mode, and zero 14838 upper half */ 14839 if ( (haveF2no66noF3(pfx) && sz == 4) 14840 || (have66noF2noF3(pfx) && sz == 2) ) { 14841 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 14842 toBool(sz == 2)/*r2zero*/); 14843 goto decode_success; 14844 } 14845 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 14846 F64 in xmm(G) */ 14847 if (haveF3no66noF2(pfx) && sz == 4) { 14848 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 14849 goto decode_success; 14850 } 14851 break; 14852 14853 case 0xE7: 14854 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14855 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 14856 Intel manual does not say anything about the usual business of 14857 the FP reg tags getting trashed whenever an MMX insn happens. 14858 So we just leave them alone. 14859 */ 14860 if (haveNo66noF2noF3(pfx) && sz == 4) { 14861 modrm = getUChar(delta); 14862 if (!epartIsReg(modrm)) { 14863 /* do_MMX_preamble(); Intel docs don't specify this */ 14864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14865 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 14866 DIP("movntq %s,%s\n", dis_buf, 14867 nameMMXReg(gregLO3ofRM(modrm))); 14868 delta += alen; 14869 goto decode_success; 14870 } 14871 /* else fall through */ 14872 } 14873 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 14874 if (have66noF2noF3(pfx) && sz == 2) { 14875 modrm = getUChar(delta); 14876 if (!epartIsReg(modrm)) { 14877 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14878 gen_SEGV_if_not_16_aligned( addr ); 14879 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14880 DIP("movntdq %s,%s\n", dis_buf, 14881 nameXMMReg(gregOfRexRM(pfx,modrm))); 14882 delta += alen; 14883 goto decode_success; 14884 } 14885 /* else fall through */ 14886 } 14887 break; 14888 14889 case 0xE8: 14890 /* 66 0F E8 = PSUBSB */ 14891 if (have66noF2noF3(pfx) && sz == 2) { 14892 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14893 "psubsb", Iop_QSub8Sx16, False ); 14894 goto decode_success; 14895 } 14896 break; 14897 14898 case 0xE9: 14899 /* 66 0F E9 = PSUBSW */ 14900 if (have66noF2noF3(pfx) && sz == 2) { 14901 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14902 "psubsw", Iop_QSub16Sx8, False ); 14903 goto decode_success; 14904 } 14905 break; 14906 14907 case 0xEA: 14908 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14909 /* 0F EA = PMINSW -- 16x4 signed min */ 14910 if (haveNo66noF2noF3(pfx) && sz == 4) { 14911 do_MMX_preamble(); 14912 delta = dis_MMXop_regmem_to_reg ( 14913 vbi, pfx, delta, opc, "pminsw", False ); 14914 goto decode_success; 14915 } 14916 /* 66 0F EA = PMINSW -- 16x8 signed min */ 14917 if (have66noF2noF3(pfx) && sz == 2) { 14918 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14919 "pminsw", Iop_Min16Sx8, False ); 14920 goto decode_success; 14921 } 14922 break; 14923 14924 case 0xEB: 14925 /* 66 0F EB = POR */ 14926 if (have66noF2noF3(pfx) && sz == 2) { 14927 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 14928 goto decode_success; 14929 } 14930 break; 14931 14932 case 0xEC: 14933 /* 66 0F EC = PADDSB */ 14934 if (have66noF2noF3(pfx) && sz == 2) { 14935 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14936 "paddsb", Iop_QAdd8Sx16, False ); 14937 goto decode_success; 14938 } 14939 break; 14940 14941 case 0xED: 14942 /* 66 0F ED = PADDSW */ 14943 if (have66noF2noF3(pfx) && sz == 2) { 14944 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14945 "paddsw", Iop_QAdd16Sx8, False ); 14946 goto decode_success; 14947 } 14948 break; 14949 14950 case 0xEE: 14951 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14952 /* 0F EE = PMAXSW -- 16x4 signed max */ 14953 if (haveNo66noF2noF3(pfx) && sz == 4) { 14954 do_MMX_preamble(); 14955 delta = dis_MMXop_regmem_to_reg ( 14956 vbi, pfx, delta, opc, "pmaxsw", False ); 14957 goto decode_success; 14958 } 14959 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 14960 if (have66noF2noF3(pfx) && sz == 2) { 14961 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14962 "pmaxsw", Iop_Max16Sx8, False ); 14963 goto decode_success; 14964 } 14965 break; 14966 14967 case 0xEF: 14968 /* 66 0F EF = PXOR */ 14969 if (have66noF2noF3(pfx) && sz == 2) { 14970 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 14971 goto decode_success; 14972 } 14973 break; 14974 14975 case 0xF1: 14976 /* 66 0F F1 = PSLLW by E */ 14977 if (have66noF2noF3(pfx) && sz == 2) { 14978 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 14979 goto decode_success; 14980 } 14981 break; 14982 14983 case 0xF2: 14984 /* 66 0F F2 = PSLLD by E */ 14985 if (have66noF2noF3(pfx) && sz == 2) { 14986 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 14987 goto decode_success; 14988 } 14989 break; 14990 14991 case 0xF3: 14992 /* 66 0F F3 = PSLLQ by E */ 14993 if (have66noF2noF3(pfx) && sz == 2) { 14994 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 14995 goto decode_success; 14996 } 14997 break; 14998 14999 case 0xF4: 15000 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 15001 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 15002 half */ 15003 if (have66noF2noF3(pfx) && sz == 2) { 15004 IRTemp sV = newTemp(Ity_V128); 15005 IRTemp dV = newTemp(Ity_V128); 15006 modrm = getUChar(delta); 15007 UInt rG = gregOfRexRM(pfx,modrm); 15008 assign( dV, getXMMReg(rG) ); 15009 if (epartIsReg(modrm)) { 15010 UInt rE = eregOfRexRM(pfx,modrm); 15011 assign( sV, getXMMReg(rE) ); 15012 delta += 1; 15013 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15014 } else { 15015 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15016 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15017 delta += alen; 15018 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 15019 } 15020 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 15021 goto decode_success; 15022 } 15023 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 15024 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 15025 0 to form 64-bit result */ 15026 if (haveNo66noF2noF3(pfx) && sz == 4) { 15027 IRTemp sV = newTemp(Ity_I64); 15028 IRTemp dV = newTemp(Ity_I64); 15029 t1 = newTemp(Ity_I32); 15030 t0 = newTemp(Ity_I32); 15031 modrm = getUChar(delta); 15032 15033 do_MMX_preamble(); 15034 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15035 15036 if (epartIsReg(modrm)) { 15037 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15038 delta += 1; 15039 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15040 nameMMXReg(gregLO3ofRM(modrm))); 15041 } else { 15042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15043 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15044 delta += alen; 15045 DIP("pmuludq %s,%s\n", dis_buf, 15046 nameMMXReg(gregLO3ofRM(modrm))); 15047 } 15048 15049 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 15050 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 15051 putMMXReg( gregLO3ofRM(modrm), 15052 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 15053 goto decode_success; 15054 } 15055 break; 15056 15057 case 0xF5: 15058 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 15059 E(xmm or mem) to G(xmm) */ 15060 if (have66noF2noF3(pfx) && sz == 2) { 15061 IRTemp sV = newTemp(Ity_V128); 15062 IRTemp dV = newTemp(Ity_V128); 15063 modrm = getUChar(delta); 15064 UInt rG = gregOfRexRM(pfx,modrm); 15065 if (epartIsReg(modrm)) { 15066 UInt rE = eregOfRexRM(pfx,modrm); 15067 assign( sV, getXMMReg(rE) ); 15068 delta += 1; 15069 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15070 } else { 15071 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15072 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15073 delta += alen; 15074 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 15075 } 15076 assign( dV, getXMMReg(rG) ); 15077 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 15078 goto decode_success; 15079 } 15080 break; 15081 15082 case 0xF6: 15083 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 15084 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 15085 if (haveNo66noF2noF3(pfx) && sz == 4) { 15086 do_MMX_preamble(); 15087 delta = dis_MMXop_regmem_to_reg ( 15088 vbi, pfx, delta, opc, "psadbw", False ); 15089 goto decode_success; 15090 } 15091 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 15092 from E(xmm or mem) to G(xmm) */ 15093 if (have66noF2noF3(pfx) && sz == 2) { 15094 IRTemp sV = newTemp(Ity_V128); 15095 IRTemp dV = newTemp(Ity_V128); 15096 modrm = getUChar(delta); 15097 UInt rG = gregOfRexRM(pfx,modrm); 15098 if (epartIsReg(modrm)) { 15099 UInt rE = eregOfRexRM(pfx,modrm); 15100 assign( sV, getXMMReg(rE) ); 15101 delta += 1; 15102 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15103 } else { 15104 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15105 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15106 delta += alen; 15107 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 15108 } 15109 assign( dV, getXMMReg(rG) ); 15110 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 15111 15112 goto decode_success; 15113 } 15114 break; 15115 15116 case 0xF7: 15117 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 15118 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 15119 if (haveNo66noF2noF3(pfx) && sz == 4) { 15120 Bool ok = False; 15121 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 15122 if (ok) goto decode_success; 15123 } 15124 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 15125 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 15126 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 15127 goto decode_success; 15128 } 15129 break; 15130 15131 case 0xF8: 15132 /* 66 0F F8 = PSUBB */ 15133 if (have66noF2noF3(pfx) && sz == 2) { 15134 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15135 "psubb", Iop_Sub8x16, False ); 15136 goto decode_success; 15137 } 15138 break; 15139 15140 case 0xF9: 15141 /* 66 0F F9 = PSUBW */ 15142 if (have66noF2noF3(pfx) && sz == 2) { 15143 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15144 "psubw", Iop_Sub16x8, False ); 15145 goto decode_success; 15146 } 15147 break; 15148 15149 case 0xFA: 15150 /* 66 0F FA = PSUBD */ 15151 if (have66noF2noF3(pfx) && sz == 2) { 15152 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15153 "psubd", Iop_Sub32x4, False ); 15154 goto decode_success; 15155 } 15156 break; 15157 15158 case 0xFB: 15159 /* 66 0F FB = PSUBQ */ 15160 if (have66noF2noF3(pfx) && sz == 2) { 15161 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15162 "psubq", Iop_Sub64x2, False ); 15163 goto decode_success; 15164 } 15165 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 15166 /* 0F FB = PSUBQ -- sub 64x1 */ 15167 if (haveNo66noF2noF3(pfx) && sz == 4) { 15168 do_MMX_preamble(); 15169 delta = dis_MMXop_regmem_to_reg ( 15170 vbi, pfx, delta, opc, "psubq", False ); 15171 goto decode_success; 15172 } 15173 break; 15174 15175 case 0xFC: 15176 /* 66 0F FC = PADDB */ 15177 if (have66noF2noF3(pfx) && sz == 2) { 15178 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15179 "paddb", Iop_Add8x16, False ); 15180 goto decode_success; 15181 } 15182 break; 15183 15184 case 0xFD: 15185 /* 66 0F FD = PADDW */ 15186 if (have66noF2noF3(pfx) && sz == 2) { 15187 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15188 "paddw", Iop_Add16x8, False ); 15189 goto decode_success; 15190 } 15191 break; 15192 15193 case 0xFE: 15194 /* 66 0F FE = PADDD */ 15195 if (have66noF2noF3(pfx) && sz == 2) { 15196 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15197 "paddd", Iop_Add32x4, False ); 15198 goto decode_success; 15199 } 15200 break; 15201 15202 default: 15203 goto decode_failure; 15204 15205 } 15206 15207 decode_failure: 15208 *decode_OK = False; 15209 return deltaIN; 15210 15211 decode_success: 15212 *decode_OK = True; 15213 return delta; 15214 } 15215 15216 15217 /*------------------------------------------------------------*/ 15218 /*--- ---*/ 15219 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 15220 /*--- ---*/ 15221 /*------------------------------------------------------------*/ 15222 15223 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15224 Long delta, Bool isAvx ) 15225 { 15226 IRTemp addr = IRTemp_INVALID; 15227 Int alen = 0; 15228 HChar dis_buf[50]; 15229 IRTemp sV = newTemp(Ity_V128); 15230 IRTemp d0 = newTemp(Ity_I64); 15231 UChar modrm = getUChar(delta); 15232 UInt rG = gregOfRexRM(pfx,modrm); 15233 if (epartIsReg(modrm)) { 15234 UInt rE = eregOfRexRM(pfx,modrm); 15235 assign( sV, getXMMReg(rE) ); 15236 DIP("%smovddup %s,%s\n", 15237 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 15238 delta += 1; 15239 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 15240 } else { 15241 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15242 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15243 DIP("%smovddup %s,%s\n", 15244 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 15245 delta += alen; 15246 } 15247 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15248 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 15249 return delta; 15250 } 15251 15252 15253 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15254 Long delta ) 15255 { 15256 IRTemp addr = IRTemp_INVALID; 15257 Int alen = 0; 15258 HChar dis_buf[50]; 15259 IRTemp d0 = newTemp(Ity_I64); 15260 IRTemp d1 = newTemp(Ity_I64); 15261 UChar modrm = getUChar(delta); 15262 UInt rG = gregOfRexRM(pfx,modrm); 15263 if (epartIsReg(modrm)) { 15264 UInt rE = eregOfRexRM(pfx,modrm); 15265 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 15266 delta += 1; 15267 assign ( d0, getYMMRegLane64(rE, 0) ); 15268 assign ( d1, getYMMRegLane64(rE, 2) ); 15269 } else { 15270 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15271 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15272 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 15273 mkexpr(addr), mkU64(16))) ); 15274 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 15275 delta += alen; 15276 } 15277 putYMMRegLane64( rG, 0, mkexpr(d0) ); 15278 putYMMRegLane64( rG, 1, mkexpr(d0) ); 15279 putYMMRegLane64( rG, 2, mkexpr(d1) ); 15280 putYMMRegLane64( rG, 3, mkexpr(d1) ); 15281 return delta; 15282 } 15283 15284 15285 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15286 Long delta, Bool isAvx, Bool isL ) 15287 { 15288 IRTemp addr = IRTemp_INVALID; 15289 Int alen = 0; 15290 HChar dis_buf[50]; 15291 IRTemp sV = newTemp(Ity_V128); 15292 UChar modrm = getUChar(delta); 15293 UInt rG = gregOfRexRM(pfx,modrm); 15294 IRTemp s3, s2, s1, s0; 15295 s3 = s2 = s1 = s0 = IRTemp_INVALID; 15296 if (epartIsReg(modrm)) { 15297 UInt rE = eregOfRexRM(pfx,modrm); 15298 assign( sV, getXMMReg(rE) ); 15299 DIP("%smovs%cdup %s,%s\n", 15300 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 15301 delta += 1; 15302 } else { 15303 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15304 if (!isAvx) 15305 gen_SEGV_if_not_16_aligned( addr ); 15306 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15307 DIP("%smovs%cdup %s,%s\n", 15308 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 15309 delta += alen; 15310 } 15311 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15312 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15313 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 15314 : mkV128from32s( s3, s3, s1, s1 ) ); 15315 return delta; 15316 } 15317 15318 15319 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15320 Long delta, Bool isL ) 15321 { 15322 IRTemp addr = IRTemp_INVALID; 15323 Int alen = 0; 15324 HChar dis_buf[50]; 15325 IRTemp sV = newTemp(Ity_V256); 15326 UChar modrm = getUChar(delta); 15327 UInt rG = gregOfRexRM(pfx,modrm); 15328 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 15329 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 15330 if (epartIsReg(modrm)) { 15331 UInt rE = eregOfRexRM(pfx,modrm); 15332 assign( sV, getYMMReg(rE) ); 15333 DIP("vmovs%cdup %s,%s\n", 15334 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 15335 delta += 1; 15336 } else { 15337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15338 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15339 DIP("vmovs%cdup %s,%s\n", 15340 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 15341 delta += alen; 15342 } 15343 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 15344 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 15345 : mkV128from32s( s7, s7, s5, s5 ) ); 15346 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 15347 : mkV128from32s( s3, s3, s1, s1 ) ); 15348 return delta; 15349 } 15350 15351 15352 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15353 { 15354 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15355 IRTemp leftV = newTemp(Ity_V128); 15356 IRTemp rightV = newTemp(Ity_V128); 15357 IRTemp rm = newTemp(Ity_I32); 15358 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15359 15360 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15361 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 15362 15363 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 15364 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 15365 15366 IRTemp res = newTemp(Ity_V128); 15367 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15368 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 15369 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15370 return res; 15371 } 15372 15373 15374 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15375 { 15376 IRTemp s1, s0, d1, d0; 15377 IRTemp leftV = newTemp(Ity_V128); 15378 IRTemp rightV = newTemp(Ity_V128); 15379 IRTemp rm = newTemp(Ity_I32); 15380 s1 = s0 = d1 = d0 = IRTemp_INVALID; 15381 15382 breakupV128to64s( sV, &s1, &s0 ); 15383 breakupV128to64s( dV, &d1, &d0 ); 15384 15385 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 15386 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 15387 15388 IRTemp res = newTemp(Ity_V128); 15389 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15390 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 15391 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15392 return res; 15393 } 15394 15395 15396 __attribute__((noinline)) 15397 static 15398 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 15399 const VexAbiInfo* vbi, 15400 Prefix pfx, Int sz, Long deltaIN ) 15401 { 15402 IRTemp addr = IRTemp_INVALID; 15403 UChar modrm = 0; 15404 Int alen = 0; 15405 HChar dis_buf[50]; 15406 15407 *decode_OK = False; 15408 15409 Long delta = deltaIN; 15410 UChar opc = getUChar(delta); 15411 delta++; 15412 switch (opc) { 15413 15414 case 0x12: 15415 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 15416 duplicating some lanes (2:2:0:0). */ 15417 if (haveF3no66noF2(pfx) && sz == 4) { 15418 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 15419 True/*isL*/ ); 15420 goto decode_success; 15421 } 15422 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 15423