1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * some of the FCOM cases could do with testing -- not convinced 55 that the args are the right way round. 56 57 * FSAVE does not re-initialise the FPU; it should do 58 59 * FINIT not only initialises the FPU environment, it also zeroes 60 all the FP registers. It should leave the registers unchanged. 61 62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 63 per Intel docs this bit has no meaning anyway. Since PUSHF is the 64 only way to observe eflags[1], a proper fix would be to make that 65 bit be set by PUSHF. 66 67 This module uses global variables and so is not MT-safe (if that 68 should ever become relevant). 69 */ 70 71 /* Notes re address size overrides (0x67). 72 73 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 75 and System Instructions"), Section 1.2.3 ("Address-Size Override 76 Prefix"): 77 78 0x67 applies to all explicit memory references, causing the top 79 32 bits of the effective address to become zero. 80 81 0x67 has no effect on stack references (push/pop); these always 82 use a 64-bit address. 83 84 0x67 changes the interpretation of instructions which implicitly 85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 86 instead. These are: 87 88 cmp{s,sb,sw,sd,sq} 89 in{s,sb,sw,sd} 90 jcxz, jecxz, jrcxz 91 lod{s,sb,sw,sd,sq} 92 loop{,e,bz,be,z} 93 mov{s,sb,sw,sd,sq} 94 out{s,sb,sw,sd} 95 rep{,e,ne,nz} 96 sca{s,sb,sw,sd,sq} 97 sto{s,sb,sw,sd,sq} 98 xlat{,b} */ 99 100 /* "Special" instructions. 101 102 This instruction decoder can decode three special instructions 103 which mean nothing natively (are no-ops as far as regs/mem are 104 concerned) but have meaning for supporting Valgrind. A special 105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 108 Following that, one of the following 3 are allowed (standard 109 interpretation in parentheses): 110 111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 114 4887F6 (xchgq %rdi,%rdi) IR injection 115 116 Any other bytes following the 16-byte preamble are illegal and 117 constitute a failure in instruction decoding. This all assumes 118 that the preamble will never occur except in specific code 119 fragments designed for Valgrind to catch. 120 121 No prefixes may precede a "Special" instruction. 122 */ 123 124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 125 insns: the side-exit back to the start of the insn is done with 126 Ijk_Boring. This is quite wrong, it should be done with 127 Ijk_NoRedir, since otherwise the side exit, which is intended to 128 restart the instruction for whatever reason, could go somewhere 129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 130 no-redir jumps performance critical, at least for rep-prefixed 131 instructions, since all iterations thereof would involve such a 132 jump. It's not such a big deal with casLE since the side exit is 133 only taken if the CAS fails, that is, the location is contended, 134 which is relatively unlikely. 135 136 Note also, the test for CAS success vs failure is done using 137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 139 shouldn't definedness-check these comparisons. See 140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 141 background/rationale. 142 */ 143 144 /* LOCK prefixed instructions. These are translated using IR-level 145 CAS statements (IRCAS) and are believed to preserve atomicity, even 146 from the point of view of some other process racing against a 147 simulated one (presumably they communicate via a shared memory 148 segment). 149 150 Handlers which are aware of LOCK prefixes are: 151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 152 dis_cmpxchg_G_E (cmpxchg) 153 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 154 dis_Grp3 (not, neg) 155 dis_Grp4 (inc, dec) 156 dis_Grp5 (inc, dec) 157 dis_Grp8_Imm (bts, btc, btr) 158 dis_bt_G_E (bts, btc, btr) 159 dis_xadd_G_E (xadd) 160 */ 161 162 163 #include "libvex_basictypes.h" 164 #include "libvex_ir.h" 165 #include "libvex.h" 166 #include "libvex_guest_amd64.h" 167 168 #include "main_util.h" 169 #include "main_globals.h" 170 #include "guest_generic_bb_to_IR.h" 171 #include "guest_generic_x87.h" 172 #include "guest_amd64_defs.h" 173 174 175 /*------------------------------------------------------------*/ 176 /*--- Globals ---*/ 177 /*------------------------------------------------------------*/ 178 179 /* These are set at the start of the translation of an insn, right 180 down in disInstr_AMD64, so that we don't have to pass them around 181 endlessly. They are all constant during the translation of any 182 given insn. */ 183 184 /* These are set at the start of the translation of a BB, so 185 that we don't have to pass them around endlessly. */ 186 187 /* We need to know this to do sub-register accesses correctly. */ 188 static VexEndness host_endness; 189 190 /* Pointer to the guest code area (points to start of BB, not to the 191 insn being processed). */ 192 static const UChar* guest_code; 193 194 /* The guest address corresponding to guest_code[0]. */ 195 static Addr64 guest_RIP_bbstart; 196 197 /* The guest address for the instruction currently being 198 translated. */ 199 static Addr64 guest_RIP_curr_instr; 200 201 /* The IRSB* into which we're generating code. */ 202 static IRSB* irsb; 203 204 /* For ensuring that %rip-relative addressing is done right. A read 205 of %rip generates the address of the next instruction. It may be 206 that we don't conveniently know that inside disAMode(). For sanity 207 checking, if the next insn %rip is needed, we make a guess at what 208 it is, record that guess here, and set the accompanying Bool to 209 indicate that -- after this insn's decode is finished -- that guess 210 needs to be checked. */ 211 212 /* At the start of each insn decode, is set to (0, False). 213 After the decode, if _mustcheck is now True, _assumed is 214 checked. */ 215 216 static Addr64 guest_RIP_next_assumed; 217 static Bool guest_RIP_next_mustcheck; 218 219 220 /*------------------------------------------------------------*/ 221 /*--- Helpers for constructing IR. ---*/ 222 /*------------------------------------------------------------*/ 223 224 /* Generate a new temporary of the given type. */ 225 static IRTemp newTemp ( IRType ty ) 226 { 227 vassert(isPlausibleIRType(ty)); 228 return newIRTemp( irsb->tyenv, ty ); 229 } 230 231 /* Add a statement to the list held by "irsb". */ 232 static void stmt ( IRStmt* st ) 233 { 234 addStmtToIRSB( irsb, st ); 235 } 236 237 /* Generate a statement "dst := e". */ 238 static void assign ( IRTemp dst, IRExpr* e ) 239 { 240 stmt( IRStmt_WrTmp(dst, e) ); 241 } 242 243 static IRExpr* unop ( IROp op, IRExpr* a ) 244 { 245 return IRExpr_Unop(op, a); 246 } 247 248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 249 { 250 return IRExpr_Binop(op, a1, a2); 251 } 252 253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 254 { 255 return IRExpr_Triop(op, a1, a2, a3); 256 } 257 258 static IRExpr* mkexpr ( IRTemp tmp ) 259 { 260 return IRExpr_RdTmp(tmp); 261 } 262 263 static IRExpr* mkU8 ( ULong i ) 264 { 265 vassert(i < 256); 266 return IRExpr_Const(IRConst_U8( (UChar)i )); 267 } 268 269 static IRExpr* mkU16 ( ULong i ) 270 { 271 vassert(i < 0x10000ULL); 272 return IRExpr_Const(IRConst_U16( (UShort)i )); 273 } 274 275 static IRExpr* mkU32 ( ULong i ) 276 { 277 vassert(i < 0x100000000ULL); 278 return IRExpr_Const(IRConst_U32( (UInt)i )); 279 } 280 281 static IRExpr* mkU64 ( ULong i ) 282 { 283 return IRExpr_Const(IRConst_U64(i)); 284 } 285 286 static IRExpr* mkU ( IRType ty, ULong i ) 287 { 288 switch (ty) { 289 case Ity_I8: return mkU8(i); 290 case Ity_I16: return mkU16(i); 291 case Ity_I32: return mkU32(i); 292 case Ity_I64: return mkU64(i); 293 default: vpanic("mkU(amd64)"); 294 } 295 } 296 297 static void storeLE ( IRExpr* addr, IRExpr* data ) 298 { 299 stmt( IRStmt_Store(Iend_LE, addr, data) ); 300 } 301 302 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 303 { 304 return IRExpr_Load(Iend_LE, ty, addr); 305 } 306 307 static IROp mkSizedOp ( IRType ty, IROp op8 ) 308 { 309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 310 || op8 == Iop_Mul8 311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 314 || op8 == Iop_CasCmpNE8 315 || op8 == Iop_Not8 ); 316 switch (ty) { 317 case Ity_I8: return 0 +op8; 318 case Ity_I16: return 1 +op8; 319 case Ity_I32: return 2 +op8; 320 case Ity_I64: return 3 +op8; 321 default: vpanic("mkSizedOp(amd64)"); 322 } 323 } 324 325 static 326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 327 { 328 if (szSmall == 1 && szBig == 4) { 329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 330 } 331 if (szSmall == 1 && szBig == 2) { 332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 333 } 334 if (szSmall == 2 && szBig == 4) { 335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 336 } 337 if (szSmall == 1 && szBig == 8 && !signd) { 338 return unop(Iop_8Uto64, src); 339 } 340 if (szSmall == 1 && szBig == 8 && signd) { 341 return unop(Iop_8Sto64, src); 342 } 343 if (szSmall == 2 && szBig == 8 && !signd) { 344 return unop(Iop_16Uto64, src); 345 } 346 if (szSmall == 2 && szBig == 8 && signd) { 347 return unop(Iop_16Sto64, src); 348 } 349 vpanic("doScalarWidening(amd64)"); 350 } 351 352 static 353 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value ) 354 { 355 IRType ty = typeOfIRExpr(irsb->tyenv, value); 356 stmt( IRStmt_Put(gstOffB, 357 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) ); 358 } 359 360 361 /*------------------------------------------------------------*/ 362 /*--- Debugging output ---*/ 363 /*------------------------------------------------------------*/ 364 365 /* Bomb out if we can't handle something. */ 366 __attribute__ ((noreturn)) 367 static void unimplemented ( const HChar* str ) 368 { 369 vex_printf("amd64toIR: unimplemented feature\n"); 370 vpanic(str); 371 } 372 373 #define DIP(format, args...) \ 374 if (vex_traceflags & VEX_TRACE_FE) \ 375 vex_printf(format, ## args) 376 377 #define DIS(buf, format, args...) \ 378 if (vex_traceflags & VEX_TRACE_FE) \ 379 vex_sprintf(buf, format, ## args) 380 381 382 /*------------------------------------------------------------*/ 383 /*--- Offsets of various parts of the amd64 guest state. ---*/ 384 /*------------------------------------------------------------*/ 385 386 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 387 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 388 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 389 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 390 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 391 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 392 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 393 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 394 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 395 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 396 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 397 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 398 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 399 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 400 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 401 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 402 403 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 404 405 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST) 406 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST) 407 408 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 409 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 410 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 411 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 412 413 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 414 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 415 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 416 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 417 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 418 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 419 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 420 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 421 422 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 423 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 424 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 425 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 426 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 427 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 428 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 429 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 430 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 431 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 432 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 433 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 434 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 435 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 436 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 437 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 438 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 439 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 440 441 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) 442 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) 443 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) 444 445 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 446 447 448 /*------------------------------------------------------------*/ 449 /*--- Helper bits and pieces for deconstructing the ---*/ 450 /*--- amd64 insn stream. ---*/ 451 /*------------------------------------------------------------*/ 452 453 /* This is the AMD64 register encoding -- integer regs. */ 454 #define R_RAX 0 455 #define R_RCX 1 456 #define R_RDX 2 457 #define R_RBX 3 458 #define R_RSP 4 459 #define R_RBP 5 460 #define R_RSI 6 461 #define R_RDI 7 462 #define R_R8 8 463 #define R_R9 9 464 #define R_R10 10 465 #define R_R11 11 466 #define R_R12 12 467 #define R_R13 13 468 #define R_R14 14 469 #define R_R15 15 470 471 /* This is the Intel register encoding -- segment regs. */ 472 #define R_ES 0 473 #define R_CS 1 474 #define R_SS 2 475 #define R_DS 3 476 #define R_FS 4 477 #define R_GS 5 478 479 480 /* Various simple conversions */ 481 482 static ULong extend_s_8to64 ( UChar x ) 483 { 484 return (ULong)((Long)(((ULong)x) << 56) >> 56); 485 } 486 487 static ULong extend_s_16to64 ( UShort x ) 488 { 489 return (ULong)((Long)(((ULong)x) << 48) >> 48); 490 } 491 492 static ULong extend_s_32to64 ( UInt x ) 493 { 494 return (ULong)((Long)(((ULong)x) << 32) >> 32); 495 } 496 497 /* Figure out whether the mod and rm parts of a modRM byte refer to a 498 register or memory. If so, the byte will have the form 11XXXYYY, 499 where YYY is the register number. */ 500 inline 501 static Bool epartIsReg ( UChar mod_reg_rm ) 502 { 503 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 504 } 505 506 /* Extract the 'g' field from a modRM byte. This only produces 3 507 bits, which is not a complete register number. You should avoid 508 this function if at all possible. */ 509 inline 510 static Int gregLO3ofRM ( UChar mod_reg_rm ) 511 { 512 return (Int)( (mod_reg_rm >> 3) & 7 ); 513 } 514 515 /* Ditto the 'e' field of a modRM byte. */ 516 inline 517 static Int eregLO3ofRM ( UChar mod_reg_rm ) 518 { 519 return (Int)(mod_reg_rm & 0x7); 520 } 521 522 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 523 524 static inline UChar getUChar ( Long delta ) 525 { 526 UChar v = guest_code[delta+0]; 527 return v; 528 } 529 530 static UInt getUDisp16 ( Long delta ) 531 { 532 UInt v = guest_code[delta+1]; v <<= 8; 533 v |= guest_code[delta+0]; 534 return v & 0xFFFF; 535 } 536 537 //.. static UInt getUDisp ( Int size, Long delta ) 538 //.. { 539 //.. switch (size) { 540 //.. case 4: return getUDisp32(delta); 541 //.. case 2: return getUDisp16(delta); 542 //.. case 1: return getUChar(delta); 543 //.. default: vpanic("getUDisp(x86)"); 544 //.. } 545 //.. return 0; /*notreached*/ 546 //.. } 547 548 549 /* Get a byte value out of the insn stream and sign-extend to 64 550 bits. */ 551 static Long getSDisp8 ( Long delta ) 552 { 553 return extend_s_8to64( guest_code[delta] ); 554 } 555 556 /* Get a 16-bit value out of the insn stream and sign-extend to 64 557 bits. */ 558 static Long getSDisp16 ( Long delta ) 559 { 560 UInt v = guest_code[delta+1]; v <<= 8; 561 v |= guest_code[delta+0]; 562 return extend_s_16to64( (UShort)v ); 563 } 564 565 /* Get a 32-bit value out of the insn stream and sign-extend to 64 566 bits. */ 567 static Long getSDisp32 ( Long delta ) 568 { 569 UInt v = guest_code[delta+3]; v <<= 8; 570 v |= guest_code[delta+2]; v <<= 8; 571 v |= guest_code[delta+1]; v <<= 8; 572 v |= guest_code[delta+0]; 573 return extend_s_32to64( v ); 574 } 575 576 /* Get a 64-bit value out of the insn stream. */ 577 static Long getDisp64 ( Long delta ) 578 { 579 ULong v = 0; 580 v |= guest_code[delta+7]; v <<= 8; 581 v |= guest_code[delta+6]; v <<= 8; 582 v |= guest_code[delta+5]; v <<= 8; 583 v |= guest_code[delta+4]; v <<= 8; 584 v |= guest_code[delta+3]; v <<= 8; 585 v |= guest_code[delta+2]; v <<= 8; 586 v |= guest_code[delta+1]; v <<= 8; 587 v |= guest_code[delta+0]; 588 return v; 589 } 590 591 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 592 if this is called with size==8. Should not happen. */ 593 static Long getSDisp ( Int size, Long delta ) 594 { 595 switch (size) { 596 case 4: return getSDisp32(delta); 597 case 2: return getSDisp16(delta); 598 case 1: return getSDisp8(delta); 599 default: vpanic("getSDisp(amd64)"); 600 } 601 } 602 603 static ULong mkSizeMask ( Int sz ) 604 { 605 switch (sz) { 606 case 1: return 0x00000000000000FFULL; 607 case 2: return 0x000000000000FFFFULL; 608 case 4: return 0x00000000FFFFFFFFULL; 609 case 8: return 0xFFFFFFFFFFFFFFFFULL; 610 default: vpanic("mkSzMask(amd64)"); 611 } 612 } 613 614 static Int imin ( Int a, Int b ) 615 { 616 return (a < b) ? a : b; 617 } 618 619 static IRType szToITy ( Int n ) 620 { 621 switch (n) { 622 case 1: return Ity_I8; 623 case 2: return Ity_I16; 624 case 4: return Ity_I32; 625 case 8: return Ity_I64; 626 default: vex_printf("\nszToITy(%d)\n", n); 627 vpanic("szToITy(amd64)"); 628 } 629 } 630 631 632 /*------------------------------------------------------------*/ 633 /*--- For dealing with prefixes. ---*/ 634 /*------------------------------------------------------------*/ 635 636 /* The idea is to pass around an int holding a bitmask summarising 637 info from the prefixes seen on the current instruction, including 638 info from the REX byte. This info is used in various places, but 639 most especially when making sense of register fields in 640 instructions. 641 642 The top 8 bits of the prefix are 0x55, just as a hacky way to 643 ensure it really is a valid prefix. 644 645 Things you can safely assume about a well-formed prefix: 646 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 647 * if REX is not present then REXW,REXR,REXX,REXB will read 648 as zero. 649 * F2 and F3 will not both be 1. 650 */ 651 652 typedef UInt Prefix; 653 654 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 655 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 656 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 657 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 658 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 659 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 660 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 661 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 662 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 663 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 664 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 665 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 666 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 667 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 668 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 669 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 670 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 671 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 672 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 673 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 674 positions. */ 675 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 676 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 677 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 678 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 679 680 681 #define PFX_EMPTY 0x55000000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 static Int getRexR ( Prefix pfx ) { 695 return (pfx & PFX_REXR) ? 1 : 0; 696 } 697 static Int getRexX ( Prefix pfx ) { 698 return (pfx & PFX_REXX) ? 1 : 0; 699 } 700 static Int getRexB ( Prefix pfx ) { 701 return (pfx & PFX_REXB) ? 1 : 0; 702 } 703 704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 705 completely changes what instruction it really is. */ 706 static Bool haveF2orF3 ( Prefix pfx ) { 707 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 708 } 709 static Bool haveF2andF3 ( Prefix pfx ) { 710 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3)); 711 } 712 static Bool haveF2 ( Prefix pfx ) { 713 return toBool((pfx & PFX_F2) > 0); 714 } 715 static Bool haveF3 ( Prefix pfx ) { 716 return toBool((pfx & PFX_F3) > 0); 717 } 718 719 static Bool have66 ( Prefix pfx ) { 720 return toBool((pfx & PFX_66) > 0); 721 } 722 static Bool haveASO ( Prefix pfx ) { 723 return toBool((pfx & PFX_ASO) > 0); 724 } 725 static Bool haveLOCK ( Prefix pfx ) { 726 return toBool((pfx & PFX_LOCK) > 0); 727 } 728 729 /* Return True iff pfx has 66 set and F2 and F3 clear */ 730 static Bool have66noF2noF3 ( Prefix pfx ) 731 { 732 return 733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 734 } 735 736 /* Return True iff pfx has F2 set and 66 and F3 clear */ 737 static Bool haveF2no66noF3 ( Prefix pfx ) 738 { 739 return 740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 741 } 742 743 /* Return True iff pfx has F3 set and 66 and F2 clear */ 744 static Bool haveF3no66noF2 ( Prefix pfx ) 745 { 746 return 747 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 748 } 749 750 /* Return True iff pfx has F3 set and F2 clear */ 751 static Bool haveF3noF2 ( Prefix pfx ) 752 { 753 return 754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 755 } 756 757 /* Return True iff pfx has F2 set and F3 clear */ 758 static Bool haveF2noF3 ( Prefix pfx ) 759 { 760 return 761 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 762 } 763 764 /* Return True iff pfx has 66, F2 and F3 clear */ 765 static Bool haveNo66noF2noF3 ( Prefix pfx ) 766 { 767 return 768 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 769 } 770 771 /* Return True iff pfx has any of 66, F2 and F3 set */ 772 static Bool have66orF2orF3 ( Prefix pfx ) 773 { 774 return toBool( ! haveNo66noF2noF3(pfx) ); 775 } 776 777 /* Return True iff pfx has 66 or F3 set */ 778 static Bool have66orF3 ( Prefix pfx ) 779 { 780 return toBool((pfx & (PFX_66|PFX_F3)) > 0); 781 } 782 783 /* Clear all the segment-override bits in a prefix. */ 784 static Prefix clearSegBits ( Prefix p ) 785 { 786 return 787 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 788 } 789 790 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 791 static UInt getVexNvvvv ( Prefix pfx ) { 792 UInt r = (UInt)pfx; 793 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 794 return r & 0xF; 795 } 796 797 static Bool haveVEX ( Prefix pfx ) { 798 return toBool(pfx & PFX_VEX); 799 } 800 801 static Int getVexL ( Prefix pfx ) { 802 return (pfx & PFX_VEXL) ? 1 : 0; 803 } 804 805 806 /*------------------------------------------------------------*/ 807 /*--- For dealing with escapes ---*/ 808 /*------------------------------------------------------------*/ 809 810 811 /* Escapes come after the prefixes, but before the primary opcode 812 byte. They escape the primary opcode byte into a bigger space. 813 The 0xF0000000 isn't significant, except so as to make it not 814 overlap valid Prefix values, for sanity checking. 815 */ 816 817 typedef 818 enum { 819 ESC_NONE=0xF0000000, // none 820 ESC_0F, // 0F 821 ESC_0F38, // 0F 38 822 ESC_0F3A // 0F 3A 823 } 824 Escape; 825 826 827 /*------------------------------------------------------------*/ 828 /*--- For dealing with integer registers ---*/ 829 /*------------------------------------------------------------*/ 830 831 /* This is somewhat complex. The rules are: 832 833 For 64, 32 and 16 bit register references, the e or g fields in the 834 modrm bytes supply the low 3 bits of the register number. The 835 fourth (most-significant) bit of the register number is supplied by 836 the REX byte, if it is present; else that bit is taken to be zero. 837 838 The REX.R bit supplies the high bit corresponding to the g register 839 field, and the REX.B bit supplies the high bit corresponding to the 840 e register field (when the mod part of modrm indicates that modrm's 841 e component refers to a register and not to memory). 842 843 The REX.X bit supplies a high register bit for certain registers 844 in SIB address modes, and is generally rarely used. 845 846 For 8 bit register references, the presence of the REX byte itself 847 has significance. If there is no REX present, then the 3-bit 848 number extracted from the modrm e or g field is treated as an index 849 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 850 old x86 encoding scheme. 851 852 But if there is a REX present, the register reference is 853 interpreted in the same way as for 64/32/16-bit references: a high 854 bit is extracted from REX, giving a 4-bit number, and the denoted 855 register is the lowest 8 bits of the 16 integer registers denoted 856 by the number. In particular, values 3 through 7 of this sequence 857 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 858 %rsp %rbp %rsi %rdi. 859 860 The REX.W bit has no bearing at all on register numbers. Instead 861 its presence indicates that the operand size is to be overridden 862 from its default value (32 bits) to 64 bits instead. This is in 863 the same fashion that an 0x66 prefix indicates the operand size is 864 to be overridden from 32 bits down to 16 bits. When both REX.W and 865 0x66 are present there is a conflict, and REX.W takes precedence. 866 867 Rather than try to handle this complexity using a single huge 868 function, several smaller ones are provided. The aim is to make it 869 as difficult as possible to screw up register decoding in a subtle 870 and hard-to-track-down way. 871 872 Because these routines fish around in the host's memory (that is, 873 in the guest state area) for sub-parts of guest registers, their 874 correctness depends on the host's endianness. So far these 875 routines only work for little-endian hosts. Those for which 876 endianness is important have assertions to ensure sanity. 877 */ 878 879 880 /* About the simplest question you can ask: where do the 64-bit 881 integer registers live (in the guest state) ? */ 882 883 static Int integerGuestReg64Offset ( UInt reg ) 884 { 885 switch (reg) { 886 case R_RAX: return OFFB_RAX; 887 case R_RCX: return OFFB_RCX; 888 case R_RDX: return OFFB_RDX; 889 case R_RBX: return OFFB_RBX; 890 case R_RSP: return OFFB_RSP; 891 case R_RBP: return OFFB_RBP; 892 case R_RSI: return OFFB_RSI; 893 case R_RDI: return OFFB_RDI; 894 case R_R8: return OFFB_R8; 895 case R_R9: return OFFB_R9; 896 case R_R10: return OFFB_R10; 897 case R_R11: return OFFB_R11; 898 case R_R12: return OFFB_R12; 899 case R_R13: return OFFB_R13; 900 case R_R14: return OFFB_R14; 901 case R_R15: return OFFB_R15; 902 default: vpanic("integerGuestReg64Offset(amd64)"); 903 } 904 } 905 906 907 /* Produce the name of an integer register, for printing purposes. 908 reg is a number in the range 0 .. 15 that has been generated from a 909 3-bit reg-field number and a REX extension bit. irregular denotes 910 the case where sz==1 and no REX byte is present. */ 911 912 static 913 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 914 { 915 static const HChar* ireg64_names[16] 916 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 917 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 918 static const HChar* ireg32_names[16] 919 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 920 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 921 static const HChar* ireg16_names[16] 922 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 923 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 924 static const HChar* ireg8_names[16] 925 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 926 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 927 static const HChar* ireg8_irregular[8] 928 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 929 930 vassert(reg < 16); 931 if (sz == 1) { 932 if (irregular) 933 vassert(reg < 8); 934 } else { 935 vassert(irregular == False); 936 } 937 938 switch (sz) { 939 case 8: return ireg64_names[reg]; 940 case 4: return ireg32_names[reg]; 941 case 2: return ireg16_names[reg]; 942 case 1: if (irregular) { 943 return ireg8_irregular[reg]; 944 } else { 945 return ireg8_names[reg]; 946 } 947 default: vpanic("nameIReg(amd64)"); 948 } 949 } 950 951 /* Using the same argument conventions as nameIReg, produce the 952 guest state offset of an integer register. */ 953 954 static 955 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 956 { 957 vassert(reg < 16); 958 if (sz == 1) { 959 if (irregular) 960 vassert(reg < 8); 961 } else { 962 vassert(irregular == False); 963 } 964 965 /* Deal with irregular case -- sz==1 and no REX present */ 966 if (sz == 1 && irregular) { 967 switch (reg) { 968 case R_RSP: return 1+ OFFB_RAX; 969 case R_RBP: return 1+ OFFB_RCX; 970 case R_RSI: return 1+ OFFB_RDX; 971 case R_RDI: return 1+ OFFB_RBX; 972 default: break; /* use the normal case */ 973 } 974 } 975 976 /* Normal case */ 977 return integerGuestReg64Offset(reg); 978 } 979 980 981 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 982 983 static IRExpr* getIRegCL ( void ) 984 { 985 vassert(host_endness == VexEndnessLE); 986 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 987 } 988 989 990 /* Write to the %AH register. */ 991 992 static void putIRegAH ( IRExpr* e ) 993 { 994 vassert(host_endness == VexEndnessLE); 995 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 996 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 997 } 998 999 1000 /* Read/write various widths of %RAX, as it has various 1001 special-purpose uses. */ 1002 1003 static const HChar* nameIRegRAX ( Int sz ) 1004 { 1005 switch (sz) { 1006 case 1: return "%al"; 1007 case 2: return "%ax"; 1008 case 4: return "%eax"; 1009 case 8: return "%rax"; 1010 default: vpanic("nameIRegRAX(amd64)"); 1011 } 1012 } 1013 1014 static IRExpr* getIRegRAX ( Int sz ) 1015 { 1016 vassert(host_endness == VexEndnessLE); 1017 switch (sz) { 1018 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1019 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1020 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1021 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1022 default: vpanic("getIRegRAX(amd64)"); 1023 } 1024 } 1025 1026 static void putIRegRAX ( Int sz, IRExpr* e ) 1027 { 1028 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1029 vassert(host_endness == VexEndnessLE); 1030 switch (sz) { 1031 case 8: vassert(ty == Ity_I64); 1032 stmt( IRStmt_Put( OFFB_RAX, e )); 1033 break; 1034 case 4: vassert(ty == Ity_I32); 1035 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1036 break; 1037 case 2: vassert(ty == Ity_I16); 1038 stmt( IRStmt_Put( OFFB_RAX, e )); 1039 break; 1040 case 1: vassert(ty == Ity_I8); 1041 stmt( IRStmt_Put( OFFB_RAX, e )); 1042 break; 1043 default: vpanic("putIRegRAX(amd64)"); 1044 } 1045 } 1046 1047 1048 /* Read/write various widths of %RDX, as it has various 1049 special-purpose uses. */ 1050 1051 static const HChar* nameIRegRDX ( Int sz ) 1052 { 1053 switch (sz) { 1054 case 1: return "%dl"; 1055 case 2: return "%dx"; 1056 case 4: return "%edx"; 1057 case 8: return "%rdx"; 1058 default: vpanic("nameIRegRDX(amd64)"); 1059 } 1060 } 1061 1062 static IRExpr* getIRegRDX ( Int sz ) 1063 { 1064 vassert(host_endness == VexEndnessLE); 1065 switch (sz) { 1066 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1067 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1068 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1069 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1070 default: vpanic("getIRegRDX(amd64)"); 1071 } 1072 } 1073 1074 static void putIRegRDX ( Int sz, IRExpr* e ) 1075 { 1076 vassert(host_endness == VexEndnessLE); 1077 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1078 switch (sz) { 1079 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1080 break; 1081 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1082 break; 1083 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1084 break; 1085 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1086 break; 1087 default: vpanic("putIRegRDX(amd64)"); 1088 } 1089 } 1090 1091 1092 /* Simplistic functions to deal with the integer registers as a 1093 straightforward bank of 16 64-bit regs. */ 1094 1095 static IRExpr* getIReg64 ( UInt regno ) 1096 { 1097 return IRExpr_Get( integerGuestReg64Offset(regno), 1098 Ity_I64 ); 1099 } 1100 1101 static void putIReg64 ( UInt regno, IRExpr* e ) 1102 { 1103 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1104 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1105 } 1106 1107 static const HChar* nameIReg64 ( UInt regno ) 1108 { 1109 return nameIReg( 8, regno, False ); 1110 } 1111 1112 1113 /* Simplistic functions to deal with the lower halves of integer 1114 registers as a straightforward bank of 16 32-bit regs. */ 1115 1116 static IRExpr* getIReg32 ( UInt regno ) 1117 { 1118 vassert(host_endness == VexEndnessLE); 1119 return unop(Iop_64to32, 1120 IRExpr_Get( integerGuestReg64Offset(regno), 1121 Ity_I64 )); 1122 } 1123 1124 static void putIReg32 ( UInt regno, IRExpr* e ) 1125 { 1126 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1127 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1128 unop(Iop_32Uto64,e) ) ); 1129 } 1130 1131 static const HChar* nameIReg32 ( UInt regno ) 1132 { 1133 return nameIReg( 4, regno, False ); 1134 } 1135 1136 1137 /* Simplistic functions to deal with the lower quarters of integer 1138 registers as a straightforward bank of 16 16-bit regs. */ 1139 1140 static IRExpr* getIReg16 ( UInt regno ) 1141 { 1142 vassert(host_endness == VexEndnessLE); 1143 return IRExpr_Get( integerGuestReg64Offset(regno), 1144 Ity_I16 ); 1145 } 1146 1147 static void putIReg16 ( UInt regno, IRExpr* e ) 1148 { 1149 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1150 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1151 unop(Iop_16Uto64,e) ) ); 1152 } 1153 1154 static const HChar* nameIReg16 ( UInt regno ) 1155 { 1156 return nameIReg( 2, regno, False ); 1157 } 1158 1159 1160 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1161 which field of the REX byte is to be used to extend to a 4-bit 1162 number. These functions cater for that situation. 1163 */ 1164 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1165 { 1166 vassert(lo3bits < 8); 1167 vassert(IS_VALID_PFX(pfx)); 1168 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1169 } 1170 1171 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1172 { 1173 vassert(lo3bits < 8); 1174 vassert(IS_VALID_PFX(pfx)); 1175 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1176 } 1177 1178 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1179 { 1180 vassert(lo3bits < 8); 1181 vassert(IS_VALID_PFX(pfx)); 1182 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1183 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1184 toBool(sz==1 && !haveREX(pfx)) ); 1185 } 1186 1187 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1188 { 1189 vassert(lo3bits < 8); 1190 vassert(IS_VALID_PFX(pfx)); 1191 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1192 if (sz == 4) { 1193 sz = 8; 1194 return unop(Iop_64to32, 1195 IRExpr_Get( 1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1197 False/*!irregular*/ ), 1198 szToITy(sz) 1199 ) 1200 ); 1201 } else { 1202 return IRExpr_Get( 1203 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1204 toBool(sz==1 && !haveREX(pfx)) ), 1205 szToITy(sz) 1206 ); 1207 } 1208 } 1209 1210 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1211 { 1212 vassert(lo3bits < 8); 1213 vassert(IS_VALID_PFX(pfx)); 1214 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1215 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1216 stmt( IRStmt_Put( 1217 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1218 toBool(sz==1 && !haveREX(pfx)) ), 1219 sz==4 ? unop(Iop_32Uto64,e) : e 1220 )); 1221 } 1222 1223 1224 /* Functions for getting register numbers from modrm bytes and REX 1225 when we don't have to consider the complexities of integer subreg 1226 accesses. 1227 */ 1228 /* Extract the g reg field from a modRM byte, and augment it using the 1229 REX.R bit from the supplied REX byte. The R bit usually is 1230 associated with the g register field. 1231 */ 1232 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1233 { 1234 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1235 reg += (pfx & PFX_REXR) ? 8 : 0; 1236 return reg; 1237 } 1238 1239 /* Extract the e reg field from a modRM byte, and augment it using the 1240 REX.B bit from the supplied REX byte. The B bit usually is 1241 associated with the e register field (when modrm indicates e is a 1242 register, that is). 1243 */ 1244 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1245 { 1246 Int rm; 1247 vassert(epartIsReg(mod_reg_rm)); 1248 rm = (Int)(mod_reg_rm & 0x7); 1249 rm += (pfx & PFX_REXB) ? 8 : 0; 1250 return rm; 1251 } 1252 1253 1254 /* General functions for dealing with integer register access. */ 1255 1256 /* Produce the guest state offset for a reference to the 'g' register 1257 field in a modrm byte, taking into account REX (or its absence), 1258 and the size of the access. 1259 */ 1260 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1261 { 1262 UInt reg; 1263 vassert(host_endness == VexEndnessLE); 1264 vassert(IS_VALID_PFX(pfx)); 1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1266 reg = gregOfRexRM( pfx, mod_reg_rm ); 1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1268 } 1269 1270 static 1271 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1272 { 1273 if (sz == 4) { 1274 sz = 8; 1275 return unop(Iop_64to32, 1276 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1277 szToITy(sz) )); 1278 } else { 1279 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1280 szToITy(sz) ); 1281 } 1282 } 1283 1284 static 1285 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1286 { 1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1288 if (sz == 4) { 1289 e = unop(Iop_32Uto64,e); 1290 } 1291 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1292 } 1293 1294 static 1295 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1296 { 1297 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1298 toBool(sz==1 && !haveREX(pfx)) ); 1299 } 1300 1301 1302 static 1303 IRExpr* getIRegV ( Int sz, Prefix pfx ) 1304 { 1305 if (sz == 4) { 1306 sz = 8; 1307 return unop(Iop_64to32, 1308 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1309 szToITy(sz) )); 1310 } else { 1311 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1312 szToITy(sz) ); 1313 } 1314 } 1315 1316 static 1317 void putIRegV ( Int sz, Prefix pfx, IRExpr* e ) 1318 { 1319 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1320 if (sz == 4) { 1321 e = unop(Iop_32Uto64,e); 1322 } 1323 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) ); 1324 } 1325 1326 static 1327 const HChar* nameIRegV ( Int sz, Prefix pfx ) 1328 { 1329 return nameIReg( sz, getVexNvvvv(pfx), False ); 1330 } 1331 1332 1333 1334 /* Produce the guest state offset for a reference to the 'e' register 1335 field in a modrm byte, taking into account REX (or its absence), 1336 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1337 denotes a memory access rather than a register access. 1338 */ 1339 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1340 { 1341 UInt reg; 1342 vassert(host_endness == VexEndnessLE); 1343 vassert(IS_VALID_PFX(pfx)); 1344 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1345 reg = eregOfRexRM( pfx, mod_reg_rm ); 1346 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1347 } 1348 1349 static 1350 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1351 { 1352 if (sz == 4) { 1353 sz = 8; 1354 return unop(Iop_64to32, 1355 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1356 szToITy(sz) )); 1357 } else { 1358 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1359 szToITy(sz) ); 1360 } 1361 } 1362 1363 static 1364 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1365 { 1366 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1367 if (sz == 4) { 1368 e = unop(Iop_32Uto64,e); 1369 } 1370 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1371 } 1372 1373 static 1374 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1375 { 1376 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1377 toBool(sz==1 && !haveREX(pfx)) ); 1378 } 1379 1380 1381 /*------------------------------------------------------------*/ 1382 /*--- For dealing with XMM registers ---*/ 1383 /*------------------------------------------------------------*/ 1384 1385 static Int ymmGuestRegOffset ( UInt ymmreg ) 1386 { 1387 switch (ymmreg) { 1388 case 0: return OFFB_YMM0; 1389 case 1: return OFFB_YMM1; 1390 case 2: return OFFB_YMM2; 1391 case 3: return OFFB_YMM3; 1392 case 4: return OFFB_YMM4; 1393 case 5: return OFFB_YMM5; 1394 case 6: return OFFB_YMM6; 1395 case 7: return OFFB_YMM7; 1396 case 8: return OFFB_YMM8; 1397 case 9: return OFFB_YMM9; 1398 case 10: return OFFB_YMM10; 1399 case 11: return OFFB_YMM11; 1400 case 12: return OFFB_YMM12; 1401 case 13: return OFFB_YMM13; 1402 case 14: return OFFB_YMM14; 1403 case 15: return OFFB_YMM15; 1404 default: vpanic("ymmGuestRegOffset(amd64)"); 1405 } 1406 } 1407 1408 static Int xmmGuestRegOffset ( UInt xmmreg ) 1409 { 1410 /* Correct for little-endian host only. */ 1411 vassert(host_endness == VexEndnessLE); 1412 return ymmGuestRegOffset( xmmreg ); 1413 } 1414 1415 /* Lanes of vector registers are always numbered from zero being the 1416 least significant lane (rightmost in the register). */ 1417 1418 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1419 { 1420 /* Correct for little-endian host only. */ 1421 vassert(host_endness == VexEndnessLE); 1422 vassert(laneno >= 0 && laneno < 8); 1423 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1424 } 1425 1426 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1427 { 1428 /* Correct for little-endian host only. */ 1429 vassert(host_endness == VexEndnessLE); 1430 vassert(laneno >= 0 && laneno < 4); 1431 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1432 } 1433 1434 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1435 { 1436 /* Correct for little-endian host only. */ 1437 vassert(host_endness == VexEndnessLE); 1438 vassert(laneno >= 0 && laneno < 2); 1439 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1440 } 1441 1442 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1443 { 1444 /* Correct for little-endian host only. */ 1445 vassert(host_endness == VexEndnessLE); 1446 vassert(laneno >= 0 && laneno < 2); 1447 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1448 } 1449 1450 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1451 { 1452 /* Correct for little-endian host only. */ 1453 vassert(host_endness == VexEndnessLE); 1454 vassert(laneno >= 0 && laneno < 4); 1455 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1456 } 1457 1458 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1459 { 1460 /* Correct for little-endian host only. */ 1461 vassert(host_endness == VexEndnessLE); 1462 vassert(laneno >= 0 && laneno < 8); 1463 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1464 } 1465 1466 static IRExpr* getXMMReg ( UInt xmmreg ) 1467 { 1468 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1469 } 1470 1471 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1472 { 1473 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1474 } 1475 1476 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1477 { 1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1479 } 1480 1481 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1482 { 1483 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1484 } 1485 1486 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1487 { 1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1489 } 1490 1491 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1492 { 1493 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1494 } 1495 1496 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1497 { 1498 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1499 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1500 } 1501 1502 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1503 { 1504 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1505 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1506 } 1507 1508 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1509 { 1510 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1511 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1512 } 1513 1514 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1515 { 1516 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1517 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1518 } 1519 1520 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1521 { 1522 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1523 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1524 } 1525 1526 static IRExpr* getYMMReg ( UInt xmmreg ) 1527 { 1528 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1529 } 1530 1531 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1532 { 1533 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1534 } 1535 1536 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1537 { 1538 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1539 } 1540 1541 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1542 { 1543 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1544 } 1545 1546 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1547 { 1548 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1549 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1550 } 1551 1552 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1553 { 1554 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1555 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1556 } 1557 1558 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1559 { 1560 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1561 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1562 } 1563 1564 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1565 { 1566 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1567 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1568 } 1569 1570 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1571 { 1572 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1573 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1574 } 1575 1576 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1577 { 1578 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1579 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1580 } 1581 1582 static IRExpr* mkV128 ( UShort mask ) 1583 { 1584 return IRExpr_Const(IRConst_V128(mask)); 1585 } 1586 1587 /* Write the low half of a YMM reg and zero out the upper half. */ 1588 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1589 { 1590 putYMMRegLane128( ymmreg, 0, e ); 1591 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1592 } 1593 1594 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1595 { 1596 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1597 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1598 return unop(Iop_64to1, 1599 binop(Iop_And64, 1600 unop(Iop_1Uto64,x), 1601 unop(Iop_1Uto64,y))); 1602 } 1603 1604 /* Generate a compare-and-swap operation, operating on memory at 1605 'addr'. The expected value is 'expVal' and the new value is 1606 'newVal'. If the operation fails, then transfer control (with a 1607 no-redir jump (XXX no -- see comment at top of this file)) to 1608 'restart_point', which is presumably the address of the guest 1609 instruction again -- retrying, essentially. */ 1610 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1611 Addr64 restart_point ) 1612 { 1613 IRCAS* cas; 1614 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1615 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1616 IRTemp oldTmp = newTemp(tyE); 1617 IRTemp expTmp = newTemp(tyE); 1618 vassert(tyE == tyN); 1619 vassert(tyE == Ity_I64 || tyE == Ity_I32 1620 || tyE == Ity_I16 || tyE == Ity_I8); 1621 assign(expTmp, expVal); 1622 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1623 NULL, mkexpr(expTmp), NULL, newVal ); 1624 stmt( IRStmt_CAS(cas) ); 1625 stmt( IRStmt_Exit( 1626 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1627 mkexpr(oldTmp), mkexpr(expTmp) ), 1628 Ijk_Boring, /*Ijk_NoRedir*/ 1629 IRConst_U64( restart_point ), 1630 OFFB_RIP 1631 )); 1632 } 1633 1634 1635 /*------------------------------------------------------------*/ 1636 /*--- Helpers for %rflags. ---*/ 1637 /*------------------------------------------------------------*/ 1638 1639 /* -------------- Evaluating the flags-thunk. -------------- */ 1640 1641 /* Build IR to calculate all the eflags from stored 1642 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1643 Ity_I64. */ 1644 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1645 { 1646 IRExpr** args 1647 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1648 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1649 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1650 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1651 IRExpr* call 1652 = mkIRExprCCall( 1653 Ity_I64, 1654 0/*regparm*/, 1655 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1656 args 1657 ); 1658 /* Exclude OP and NDEP from definedness checking. We're only 1659 interested in DEP1 and DEP2. */ 1660 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1661 return call; 1662 } 1663 1664 /* Build IR to calculate some particular condition from stored 1665 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1666 Ity_Bit. */ 1667 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1668 { 1669 IRExpr** args 1670 = mkIRExprVec_5( mkU64(cond), 1671 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1672 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1673 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1674 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1675 IRExpr* call 1676 = mkIRExprCCall( 1677 Ity_I64, 1678 0/*regparm*/, 1679 "amd64g_calculate_condition", &amd64g_calculate_condition, 1680 args 1681 ); 1682 /* Exclude the requested condition, OP and NDEP from definedness 1683 checking. We're only interested in DEP1 and DEP2. */ 1684 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1685 return unop(Iop_64to1, call); 1686 } 1687 1688 /* Build IR to calculate just the carry flag from stored 1689 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1690 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1691 { 1692 IRExpr** args 1693 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1694 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1695 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1696 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1697 IRExpr* call 1698 = mkIRExprCCall( 1699 Ity_I64, 1700 0/*regparm*/, 1701 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1702 args 1703 ); 1704 /* Exclude OP and NDEP from definedness checking. We're only 1705 interested in DEP1 and DEP2. */ 1706 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1707 return call; 1708 } 1709 1710 1711 /* -------------- Building the flags-thunk. -------------- */ 1712 1713 /* The machinery in this section builds the flag-thunk following a 1714 flag-setting operation. Hence the various setFlags_* functions. 1715 */ 1716 1717 static Bool isAddSub ( IROp op8 ) 1718 { 1719 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1720 } 1721 1722 static Bool isLogic ( IROp op8 ) 1723 { 1724 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1725 } 1726 1727 /* U-widen 1/8/16/32/64 bit int expr to 64. */ 1728 static IRExpr* widenUto64 ( IRExpr* e ) 1729 { 1730 switch (typeOfIRExpr(irsb->tyenv,e)) { 1731 case Ity_I64: return e; 1732 case Ity_I32: return unop(Iop_32Uto64, e); 1733 case Ity_I16: return unop(Iop_16Uto64, e); 1734 case Ity_I8: return unop(Iop_8Uto64, e); 1735 case Ity_I1: return unop(Iop_1Uto64, e); 1736 default: vpanic("widenUto64"); 1737 } 1738 } 1739 1740 /* S-widen 8/16/32/64 bit int expr to 32. */ 1741 static IRExpr* widenSto64 ( IRExpr* e ) 1742 { 1743 switch (typeOfIRExpr(irsb->tyenv,e)) { 1744 case Ity_I64: return e; 1745 case Ity_I32: return unop(Iop_32Sto64, e); 1746 case Ity_I16: return unop(Iop_16Sto64, e); 1747 case Ity_I8: return unop(Iop_8Sto64, e); 1748 default: vpanic("widenSto64"); 1749 } 1750 } 1751 1752 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1753 of these combinations make sense. */ 1754 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1755 { 1756 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1757 if (src_ty == dst_ty) 1758 return e; 1759 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1760 return unop(Iop_32to16, e); 1761 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1762 return unop(Iop_32to8, e); 1763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1764 return unop(Iop_64to32, e); 1765 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1766 return unop(Iop_64to16, e); 1767 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1768 return unop(Iop_64to8, e); 1769 1770 vex_printf("\nsrc, dst tys are: "); 1771 ppIRType(src_ty); 1772 vex_printf(", "); 1773 ppIRType(dst_ty); 1774 vex_printf("\n"); 1775 vpanic("narrowTo(amd64)"); 1776 } 1777 1778 1779 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1780 auto-sized up to the real op. */ 1781 1782 static 1783 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1784 { 1785 Int ccOp = 0; 1786 switch (ty) { 1787 case Ity_I8: ccOp = 0; break; 1788 case Ity_I16: ccOp = 1; break; 1789 case Ity_I32: ccOp = 2; break; 1790 case Ity_I64: ccOp = 3; break; 1791 default: vassert(0); 1792 } 1793 switch (op8) { 1794 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1795 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1796 default: ppIROp(op8); 1797 vpanic("setFlags_DEP1_DEP2(amd64)"); 1798 } 1799 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1800 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1801 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1802 } 1803 1804 1805 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1806 1807 static 1808 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1809 { 1810 Int ccOp = 0; 1811 switch (ty) { 1812 case Ity_I8: ccOp = 0; break; 1813 case Ity_I16: ccOp = 1; break; 1814 case Ity_I32: ccOp = 2; break; 1815 case Ity_I64: ccOp = 3; break; 1816 default: vassert(0); 1817 } 1818 switch (op8) { 1819 case Iop_Or8: 1820 case Iop_And8: 1821 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1822 default: ppIROp(op8); 1823 vpanic("setFlags_DEP1(amd64)"); 1824 } 1825 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1826 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1827 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1828 } 1829 1830 1831 /* For shift operations, we put in the result and the undershifted 1832 result. Except if the shift amount is zero, the thunk is left 1833 unchanged. */ 1834 1835 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1836 IRTemp res, 1837 IRTemp resUS, 1838 IRType ty, 1839 IRTemp guard ) 1840 { 1841 Int ccOp = 0; 1842 switch (ty) { 1843 case Ity_I8: ccOp = 0; break; 1844 case Ity_I16: ccOp = 1; break; 1845 case Ity_I32: ccOp = 2; break; 1846 case Ity_I64: ccOp = 3; break; 1847 default: vassert(0); 1848 } 1849 1850 vassert(guard); 1851 1852 /* Both kinds of right shifts are handled by the same thunk 1853 operation. */ 1854 switch (op64) { 1855 case Iop_Shr64: 1856 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1857 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1858 default: ppIROp(op64); 1859 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1860 } 1861 1862 /* guard :: Ity_I8. We need to convert it to I1. */ 1863 IRTemp guardB = newTemp(Ity_I1); 1864 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 1865 1866 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1867 stmt( IRStmt_Put( OFFB_CC_OP, 1868 IRExpr_ITE( mkexpr(guardB), 1869 mkU64(ccOp), 1870 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 1871 stmt( IRStmt_Put( OFFB_CC_DEP1, 1872 IRExpr_ITE( mkexpr(guardB), 1873 widenUto64(mkexpr(res)), 1874 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 1875 stmt( IRStmt_Put( OFFB_CC_DEP2, 1876 IRExpr_ITE( mkexpr(guardB), 1877 widenUto64(mkexpr(resUS)), 1878 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 1879 } 1880 1881 1882 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1883 the former value of the carry flag, which unfortunately we have to 1884 compute. */ 1885 1886 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1887 { 1888 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1889 1890 switch (ty) { 1891 case Ity_I8: ccOp += 0; break; 1892 case Ity_I16: ccOp += 1; break; 1893 case Ity_I32: ccOp += 2; break; 1894 case Ity_I64: ccOp += 3; break; 1895 default: vassert(0); 1896 } 1897 1898 /* This has to come first, because calculating the C flag 1899 may require reading all four thunk fields. */ 1900 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1901 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1902 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1903 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1904 } 1905 1906 1907 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1908 two arguments. */ 1909 1910 static 1911 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1912 { 1913 switch (ty) { 1914 case Ity_I8: 1915 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1916 break; 1917 case Ity_I16: 1918 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1919 break; 1920 case Ity_I32: 1921 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1922 break; 1923 case Ity_I64: 1924 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1925 break; 1926 default: 1927 vpanic("setFlags_MUL(amd64)"); 1928 } 1929 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1930 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1931 } 1932 1933 1934 /* -------------- Condition codes. -------------- */ 1935 1936 /* Condition codes, using the AMD encoding. */ 1937 1938 static const HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1939 { 1940 switch (cond) { 1941 case AMD64CondO: return "o"; 1942 case AMD64CondNO: return "no"; 1943 case AMD64CondB: return "b"; 1944 case AMD64CondNB: return "ae"; /*"nb";*/ 1945 case AMD64CondZ: return "e"; /*"z";*/ 1946 case AMD64CondNZ: return "ne"; /*"nz";*/ 1947 case AMD64CondBE: return "be"; 1948 case AMD64CondNBE: return "a"; /*"nbe";*/ 1949 case AMD64CondS: return "s"; 1950 case AMD64CondNS: return "ns"; 1951 case AMD64CondP: return "p"; 1952 case AMD64CondNP: return "np"; 1953 case AMD64CondL: return "l"; 1954 case AMD64CondNL: return "ge"; /*"nl";*/ 1955 case AMD64CondLE: return "le"; 1956 case AMD64CondNLE: return "g"; /*"nle";*/ 1957 case AMD64CondAlways: return "ALWAYS"; 1958 default: vpanic("name_AMD64Condcode"); 1959 } 1960 } 1961 1962 static 1963 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1964 /*OUT*/Bool* needInvert ) 1965 { 1966 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1967 if (cond & 1) { 1968 *needInvert = True; 1969 return cond-1; 1970 } else { 1971 *needInvert = False; 1972 return cond; 1973 } 1974 } 1975 1976 1977 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1978 1979 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1980 appropriately. 1981 1982 Optionally, generate a store for the 'tres' value. This can either 1983 be a normal store, or it can be a cas-with-possible-failure style 1984 store: 1985 1986 if taddr is IRTemp_INVALID, then no store is generated. 1987 1988 if taddr is not IRTemp_INVALID, then a store (using taddr as 1989 the address) is generated: 1990 1991 if texpVal is IRTemp_INVALID then a normal store is 1992 generated, and restart_point must be zero (it is irrelevant). 1993 1994 if texpVal is not IRTemp_INVALID then a cas-style store is 1995 generated. texpVal is the expected value, restart_point 1996 is the restart point if the store fails, and texpVal must 1997 have the same type as tres. 1998 1999 */ 2000 static void helper_ADC ( Int sz, 2001 IRTemp tres, IRTemp ta1, IRTemp ta2, 2002 /* info about optional store: */ 2003 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2004 { 2005 UInt thunkOp; 2006 IRType ty = szToITy(sz); 2007 IRTemp oldc = newTemp(Ity_I64); 2008 IRTemp oldcn = newTemp(ty); 2009 IROp plus = mkSizedOp(ty, Iop_Add8); 2010 IROp xor = mkSizedOp(ty, Iop_Xor8); 2011 2012 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2013 2014 switch (sz) { 2015 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 2016 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 2017 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 2018 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 2019 default: vassert(0); 2020 } 2021 2022 /* oldc = old carry flag, 0 or 1 */ 2023 assign( oldc, binop(Iop_And64, 2024 mk_amd64g_calculate_rflags_c(), 2025 mkU64(1)) ); 2026 2027 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2028 2029 assign( tres, binop(plus, 2030 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2031 mkexpr(oldcn)) ); 2032 2033 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2034 start of this function. */ 2035 if (taddr != IRTemp_INVALID) { 2036 if (texpVal == IRTemp_INVALID) { 2037 vassert(restart_point == 0); 2038 storeLE( mkexpr(taddr), mkexpr(tres) ); 2039 } else { 2040 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2041 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2042 casLE( mkexpr(taddr), 2043 mkexpr(texpVal), mkexpr(tres), restart_point ); 2044 } 2045 } 2046 2047 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2048 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2049 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2050 mkexpr(oldcn)) )) ); 2051 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2052 } 2053 2054 2055 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2056 appropriately. As with helper_ADC, possibly generate a store of 2057 the result -- see comments on helper_ADC for details. 2058 */ 2059 static void helper_SBB ( Int sz, 2060 IRTemp tres, IRTemp ta1, IRTemp ta2, 2061 /* info about optional store: */ 2062 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2063 { 2064 UInt thunkOp; 2065 IRType ty = szToITy(sz); 2066 IRTemp oldc = newTemp(Ity_I64); 2067 IRTemp oldcn = newTemp(ty); 2068 IROp minus = mkSizedOp(ty, Iop_Sub8); 2069 IROp xor = mkSizedOp(ty, Iop_Xor8); 2070 2071 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2072 2073 switch (sz) { 2074 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2075 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2076 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2077 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2078 default: vassert(0); 2079 } 2080 2081 /* oldc = old carry flag, 0 or 1 */ 2082 assign( oldc, binop(Iop_And64, 2083 mk_amd64g_calculate_rflags_c(), 2084 mkU64(1)) ); 2085 2086 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2087 2088 assign( tres, binop(minus, 2089 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2090 mkexpr(oldcn)) ); 2091 2092 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2093 start of this function. */ 2094 if (taddr != IRTemp_INVALID) { 2095 if (texpVal == IRTemp_INVALID) { 2096 vassert(restart_point == 0); 2097 storeLE( mkexpr(taddr), mkexpr(tres) ); 2098 } else { 2099 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2100 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2101 casLE( mkexpr(taddr), 2102 mkexpr(texpVal), mkexpr(tres), restart_point ); 2103 } 2104 } 2105 2106 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2107 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2108 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2109 mkexpr(oldcn)) )) ); 2110 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2111 } 2112 2113 2114 /* -------------- Helpers for disassembly printing. -------------- */ 2115 2116 static const HChar* nameGrp1 ( Int opc_aux ) 2117 { 2118 static const HChar* grp1_names[8] 2119 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2120 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2121 return grp1_names[opc_aux]; 2122 } 2123 2124 static const HChar* nameGrp2 ( Int opc_aux ) 2125 { 2126 static const HChar* grp2_names[8] 2127 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2128 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2129 return grp2_names[opc_aux]; 2130 } 2131 2132 static const HChar* nameGrp4 ( Int opc_aux ) 2133 { 2134 static const HChar* grp4_names[8] 2135 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2136 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2137 return grp4_names[opc_aux]; 2138 } 2139 2140 static const HChar* nameGrp5 ( Int opc_aux ) 2141 { 2142 static const HChar* grp5_names[8] 2143 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2144 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2145 return grp5_names[opc_aux]; 2146 } 2147 2148 static const HChar* nameGrp8 ( Int opc_aux ) 2149 { 2150 static const HChar* grp8_names[8] 2151 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2152 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2153 return grp8_names[opc_aux]; 2154 } 2155 2156 static const HChar* nameSReg ( UInt sreg ) 2157 { 2158 switch (sreg) { 2159 case R_ES: return "%es"; 2160 case R_CS: return "%cs"; 2161 case R_SS: return "%ss"; 2162 case R_DS: return "%ds"; 2163 case R_FS: return "%fs"; 2164 case R_GS: return "%gs"; 2165 default: vpanic("nameSReg(amd64)"); 2166 } 2167 } 2168 2169 static const HChar* nameMMXReg ( Int mmxreg ) 2170 { 2171 static const HChar* mmx_names[8] 2172 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2173 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2174 return mmx_names[mmxreg]; 2175 } 2176 2177 static const HChar* nameXMMReg ( Int xmmreg ) 2178 { 2179 static const HChar* xmm_names[16] 2180 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2181 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2182 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2183 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2184 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2185 return xmm_names[xmmreg]; 2186 } 2187 2188 static const HChar* nameMMXGran ( Int gran ) 2189 { 2190 switch (gran) { 2191 case 0: return "b"; 2192 case 1: return "w"; 2193 case 2: return "d"; 2194 case 3: return "q"; 2195 default: vpanic("nameMMXGran(amd64,guest)"); 2196 } 2197 } 2198 2199 static HChar nameISize ( Int size ) 2200 { 2201 switch (size) { 2202 case 8: return 'q'; 2203 case 4: return 'l'; 2204 case 2: return 'w'; 2205 case 1: return 'b'; 2206 default: vpanic("nameISize(amd64)"); 2207 } 2208 } 2209 2210 static const HChar* nameYMMReg ( Int ymmreg ) 2211 { 2212 static const HChar* ymm_names[16] 2213 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2214 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2215 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2216 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2217 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2218 return ymm_names[ymmreg]; 2219 } 2220 2221 2222 /*------------------------------------------------------------*/ 2223 /*--- JMP helpers ---*/ 2224 /*------------------------------------------------------------*/ 2225 2226 static void jmp_lit( /*MOD*/DisResult* dres, 2227 IRJumpKind kind, Addr64 d64 ) 2228 { 2229 vassert(dres->whatNext == Dis_Continue); 2230 vassert(dres->len == 0); 2231 vassert(dres->continueAt == 0); 2232 vassert(dres->jk_StopHere == Ijk_INVALID); 2233 dres->whatNext = Dis_StopHere; 2234 dres->jk_StopHere = kind; 2235 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2236 } 2237 2238 static void jmp_treg( /*MOD*/DisResult* dres, 2239 IRJumpKind kind, IRTemp t ) 2240 { 2241 vassert(dres->whatNext == Dis_Continue); 2242 vassert(dres->len == 0); 2243 vassert(dres->continueAt == 0); 2244 vassert(dres->jk_StopHere == Ijk_INVALID); 2245 dres->whatNext = Dis_StopHere; 2246 dres->jk_StopHere = kind; 2247 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2248 } 2249 2250 static 2251 void jcc_01 ( /*MOD*/DisResult* dres, 2252 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2253 { 2254 Bool invert; 2255 AMD64Condcode condPos; 2256 vassert(dres->whatNext == Dis_Continue); 2257 vassert(dres->len == 0); 2258 vassert(dres->continueAt == 0); 2259 vassert(dres->jk_StopHere == Ijk_INVALID); 2260 dres->whatNext = Dis_StopHere; 2261 dres->jk_StopHere = Ijk_Boring; 2262 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2263 if (invert) { 2264 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2265 Ijk_Boring, 2266 IRConst_U64(d64_false), 2267 OFFB_RIP ) ); 2268 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2269 } else { 2270 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2271 Ijk_Boring, 2272 IRConst_U64(d64_true), 2273 OFFB_RIP ) ); 2274 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2275 } 2276 } 2277 2278 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2279 guest address of the next instruction to be executed. 2280 2281 This function generates an AbiHint to say that -128(%rsp) 2282 .. -1(%rsp) should now be regarded as uninitialised. 2283 */ 2284 static 2285 void make_redzone_AbiHint ( const VexAbiInfo* vbi, 2286 IRTemp new_rsp, IRTemp nia, const HChar* who ) 2287 { 2288 Int szB = vbi->guest_stack_redzone_size; 2289 vassert(szB >= 0); 2290 2291 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2292 for is ELF. So just check it's the expected 128 value 2293 (paranoia). */ 2294 vassert(szB == 128); 2295 2296 if (0) vex_printf("AbiHint: %s\n", who); 2297 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2298 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2299 if (szB > 0) 2300 stmt( IRStmt_AbiHint( 2301 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2302 szB, 2303 mkexpr(nia) 2304 )); 2305 } 2306 2307 2308 /*------------------------------------------------------------*/ 2309 /*--- Disassembling addressing modes ---*/ 2310 /*------------------------------------------------------------*/ 2311 2312 static 2313 const HChar* segRegTxt ( Prefix pfx ) 2314 { 2315 if (pfx & PFX_CS) return "%cs:"; 2316 if (pfx & PFX_DS) return "%ds:"; 2317 if (pfx & PFX_ES) return "%es:"; 2318 if (pfx & PFX_FS) return "%fs:"; 2319 if (pfx & PFX_GS) return "%gs:"; 2320 if (pfx & PFX_SS) return "%ss:"; 2321 return ""; /* no override */ 2322 } 2323 2324 2325 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2326 linear address by adding any required segment override as indicated 2327 by sorb, and also dealing with any address size override 2328 present. */ 2329 static 2330 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi, 2331 Prefix pfx, IRExpr* virtual ) 2332 { 2333 /* Note that the below are hacks that relies on the assumption 2334 that %fs or %gs are constant. 2335 Typically, %fs is always 0x63 on linux (in the main thread, it 2336 stays at value 0), %gs always 0x60 on Darwin, ... */ 2337 /* --- segment overrides --- */ 2338 if (pfx & PFX_FS) { 2339 if (vbi->guest_amd64_assume_fs_is_const) { 2340 /* return virtual + guest_FS_CONST. */ 2341 virtual = binop(Iop_Add64, virtual, 2342 IRExpr_Get(OFFB_FS_CONST, Ity_I64)); 2343 } else { 2344 unimplemented("amd64 %fs segment override"); 2345 } 2346 } 2347 2348 if (pfx & PFX_GS) { 2349 if (vbi->guest_amd64_assume_gs_is_const) { 2350 /* return virtual + guest_GS_CONST. */ 2351 virtual = binop(Iop_Add64, virtual, 2352 IRExpr_Get(OFFB_GS_CONST, Ity_I64)); 2353 } else { 2354 unimplemented("amd64 %gs segment override"); 2355 } 2356 } 2357 2358 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2359 2360 /* --- address size override --- */ 2361 if (haveASO(pfx)) 2362 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2363 2364 return virtual; 2365 } 2366 2367 //.. { 2368 //.. Int sreg; 2369 //.. IRType hWordTy; 2370 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2371 //.. 2372 //.. if (sorb == 0) 2373 //.. /* the common case - no override */ 2374 //.. return virtual; 2375 //.. 2376 //.. switch (sorb) { 2377 //.. case 0x3E: sreg = R_DS; break; 2378 //.. case 0x26: sreg = R_ES; break; 2379 //.. case 0x64: sreg = R_FS; break; 2380 //.. case 0x65: sreg = R_GS; break; 2381 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2382 //.. } 2383 //.. 2384 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2385 //.. 2386 //.. seg_selector = newTemp(Ity_I32); 2387 //.. ldt_ptr = newTemp(hWordTy); 2388 //.. gdt_ptr = newTemp(hWordTy); 2389 //.. r64 = newTemp(Ity_I64); 2390 //.. 2391 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2392 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2393 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2394 //.. 2395 //.. /* 2396 //.. Call this to do the translation and limit checks: 2397 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2398 //.. UInt seg_selector, UInt virtual_addr ) 2399 //.. */ 2400 //.. assign( 2401 //.. r64, 2402 //.. mkIRExprCCall( 2403 //.. Ity_I64, 2404 //.. 0/*regparms*/, 2405 //.. "x86g_use_seg_selector", 2406 //.. &x86g_use_seg_selector, 2407 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2408 //.. mkexpr(seg_selector), virtual) 2409 //.. ) 2410 //.. ); 2411 //.. 2412 //.. /* If the high 32 of the result are non-zero, there was a 2413 //.. failure in address translation. In which case, make a 2414 //.. quick exit. 2415 //.. */ 2416 //.. stmt( 2417 //.. IRStmt_Exit( 2418 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2419 //.. Ijk_MapFail, 2420 //.. IRConst_U32( guest_eip_curr_instr ) 2421 //.. ) 2422 //.. ); 2423 //.. 2424 //.. /* otherwise, here's the translated result. */ 2425 //.. return unop(Iop_64to32, mkexpr(r64)); 2426 //.. } 2427 2428 2429 /* Generate IR to calculate an address indicated by a ModRM and 2430 following SIB bytes. The expression, and the number of bytes in 2431 the address mode, are returned (the latter in *len). Note that 2432 this fn should not be called if the R/M part of the address denotes 2433 a register instead of memory. If print_codegen is true, text of 2434 the addressing mode is placed in buf. 2435 2436 The computed address is stored in a new tempreg, and the 2437 identity of the tempreg is returned. 2438 2439 extra_bytes holds the number of bytes after the amode, as supplied 2440 by the caller. This is needed to make sense of %rip-relative 2441 addresses. Note that the value that *len is set to is only the 2442 length of the amode itself and does not include the value supplied 2443 in extra_bytes. 2444 */ 2445 2446 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2447 { 2448 IRTemp tmp = newTemp(Ity_I64); 2449 assign( tmp, addr64 ); 2450 return tmp; 2451 } 2452 2453 static 2454 IRTemp disAMode ( /*OUT*/Int* len, 2455 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2456 /*OUT*/HChar* buf, Int extra_bytes ) 2457 { 2458 UChar mod_reg_rm = getUChar(delta); 2459 delta++; 2460 2461 buf[0] = (UChar)0; 2462 vassert(extra_bytes >= 0 && extra_bytes < 10); 2463 2464 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2465 jump table seems a bit excessive. 2466 */ 2467 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2468 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2469 /* is now XX0XXYYY */ 2470 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2471 switch (mod_reg_rm) { 2472 2473 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2474 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2475 */ 2476 case 0x00: case 0x01: case 0x02: case 0x03: 2477 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2478 { UChar rm = toUChar(mod_reg_rm & 7); 2479 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2480 *len = 1; 2481 return disAMode_copy2tmp( 2482 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2483 } 2484 2485 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2486 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2487 */ 2488 case 0x08: case 0x09: case 0x0A: case 0x0B: 2489 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2490 { UChar rm = toUChar(mod_reg_rm & 7); 2491 Long d = getSDisp8(delta); 2492 if (d == 0) { 2493 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2494 } else { 2495 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2496 } 2497 *len = 2; 2498 return disAMode_copy2tmp( 2499 handleAddrOverrides(vbi, pfx, 2500 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2501 } 2502 2503 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2504 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2505 */ 2506 case 0x10: case 0x11: case 0x12: case 0x13: 2507 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2508 { UChar rm = toUChar(mod_reg_rm & 7); 2509 Long d = getSDisp32(delta); 2510 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2511 *len = 5; 2512 return disAMode_copy2tmp( 2513 handleAddrOverrides(vbi, pfx, 2514 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2515 } 2516 2517 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2518 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2519 case 0x18: case 0x19: case 0x1A: case 0x1B: 2520 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2521 vpanic("disAMode(amd64): not an addr!"); 2522 2523 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2524 correctly at the start of handling each instruction. */ 2525 case 0x05: 2526 { Long d = getSDisp32(delta); 2527 *len = 5; 2528 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2529 /* We need to know the next instruction's start address. 2530 Try and figure out what it is, record the guess, and ask 2531 the top-level driver logic (bbToIR_AMD64) to check we 2532 guessed right, after the instruction is completely 2533 decoded. */ 2534 guest_RIP_next_mustcheck = True; 2535 guest_RIP_next_assumed = guest_RIP_bbstart 2536 + delta+4 + extra_bytes; 2537 return disAMode_copy2tmp( 2538 handleAddrOverrides(vbi, pfx, 2539 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2540 mkU64(d)))); 2541 } 2542 2543 case 0x04: { 2544 /* SIB, with no displacement. Special cases: 2545 -- %rsp cannot act as an index value. 2546 If index_r indicates %rsp, zero is used for the index. 2547 -- when mod is zero and base indicates RBP or R13, base is 2548 instead a 32-bit sign-extended literal. 2549 It's all madness, I tell you. Extract %index, %base and 2550 scale from the SIB byte. The value denoted is then: 2551 | %index == %RSP && (%base == %RBP || %base == %R13) 2552 = d32 following SIB byte 2553 | %index == %RSP && !(%base == %RBP || %base == %R13) 2554 = %base 2555 | %index != %RSP && (%base == %RBP || %base == %R13) 2556 = d32 following SIB byte + (%index << scale) 2557 | %index != %RSP && !(%base == %RBP || %base == %R13) 2558 = %base + (%index << scale) 2559 */ 2560 UChar sib = getUChar(delta); 2561 UChar scale = toUChar((sib >> 6) & 3); 2562 UChar index_r = toUChar((sib >> 3) & 7); 2563 UChar base_r = toUChar(sib & 7); 2564 /* correct since #(R13) == 8 + #(RBP) */ 2565 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2566 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2567 delta++; 2568 2569 if ((!index_is_SP) && (!base_is_BPor13)) { 2570 if (scale == 0) { 2571 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2572 nameIRegRexB(8,pfx,base_r), 2573 nameIReg64rexX(pfx,index_r)); 2574 } else { 2575 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2576 nameIRegRexB(8,pfx,base_r), 2577 nameIReg64rexX(pfx,index_r), 1<<scale); 2578 } 2579 *len = 2; 2580 return 2581 disAMode_copy2tmp( 2582 handleAddrOverrides(vbi, pfx, 2583 binop(Iop_Add64, 2584 getIRegRexB(8,pfx,base_r), 2585 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2586 mkU8(scale))))); 2587 } 2588 2589 if ((!index_is_SP) && base_is_BPor13) { 2590 Long d = getSDisp32(delta); 2591 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2592 nameIReg64rexX(pfx,index_r), 1<<scale); 2593 *len = 6; 2594 return 2595 disAMode_copy2tmp( 2596 handleAddrOverrides(vbi, pfx, 2597 binop(Iop_Add64, 2598 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2599 mkU8(scale)), 2600 mkU64(d)))); 2601 } 2602 2603 if (index_is_SP && (!base_is_BPor13)) { 2604 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2605 *len = 2; 2606 return disAMode_copy2tmp( 2607 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2608 } 2609 2610 if (index_is_SP && base_is_BPor13) { 2611 Long d = getSDisp32(delta); 2612 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2613 *len = 6; 2614 return disAMode_copy2tmp( 2615 handleAddrOverrides(vbi, pfx, mkU64(d))); 2616 } 2617 2618 vassert(0); 2619 } 2620 2621 /* SIB, with 8-bit displacement. Special cases: 2622 -- %esp cannot act as an index value. 2623 If index_r indicates %esp, zero is used for the index. 2624 Denoted value is: 2625 | %index == %ESP 2626 = d8 + %base 2627 | %index != %ESP 2628 = d8 + %base + (%index << scale) 2629 */ 2630 case 0x0C: { 2631 UChar sib = getUChar(delta); 2632 UChar scale = toUChar((sib >> 6) & 3); 2633 UChar index_r = toUChar((sib >> 3) & 7); 2634 UChar base_r = toUChar(sib & 7); 2635 Long d = getSDisp8(delta+1); 2636 2637 if (index_r == R_RSP && 0==getRexX(pfx)) { 2638 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2639 d, nameIRegRexB(8,pfx,base_r)); 2640 *len = 3; 2641 return disAMode_copy2tmp( 2642 handleAddrOverrides(vbi, pfx, 2643 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2644 } else { 2645 if (scale == 0) { 2646 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2647 nameIRegRexB(8,pfx,base_r), 2648 nameIReg64rexX(pfx,index_r)); 2649 } else { 2650 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2651 nameIRegRexB(8,pfx,base_r), 2652 nameIReg64rexX(pfx,index_r), 1<<scale); 2653 } 2654 *len = 3; 2655 return 2656 disAMode_copy2tmp( 2657 handleAddrOverrides(vbi, pfx, 2658 binop(Iop_Add64, 2659 binop(Iop_Add64, 2660 getIRegRexB(8,pfx,base_r), 2661 binop(Iop_Shl64, 2662 getIReg64rexX(pfx,index_r), mkU8(scale))), 2663 mkU64(d)))); 2664 } 2665 vassert(0); /*NOTREACHED*/ 2666 } 2667 2668 /* SIB, with 32-bit displacement. Special cases: 2669 -- %rsp cannot act as an index value. 2670 If index_r indicates %rsp, zero is used for the index. 2671 Denoted value is: 2672 | %index == %RSP 2673 = d32 + %base 2674 | %index != %RSP 2675 = d32 + %base + (%index << scale) 2676 */ 2677 case 0x14: { 2678 UChar sib = getUChar(delta); 2679 UChar scale = toUChar((sib >> 6) & 3); 2680 UChar index_r = toUChar((sib >> 3) & 7); 2681 UChar base_r = toUChar(sib & 7); 2682 Long d = getSDisp32(delta+1); 2683 2684 if (index_r == R_RSP && 0==getRexX(pfx)) { 2685 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2686 d, nameIRegRexB(8,pfx,base_r)); 2687 *len = 6; 2688 return disAMode_copy2tmp( 2689 handleAddrOverrides(vbi, pfx, 2690 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2691 } else { 2692 if (scale == 0) { 2693 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2694 nameIRegRexB(8,pfx,base_r), 2695 nameIReg64rexX(pfx,index_r)); 2696 } else { 2697 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2698 nameIRegRexB(8,pfx,base_r), 2699 nameIReg64rexX(pfx,index_r), 1<<scale); 2700 } 2701 *len = 6; 2702 return 2703 disAMode_copy2tmp( 2704 handleAddrOverrides(vbi, pfx, 2705 binop(Iop_Add64, 2706 binop(Iop_Add64, 2707 getIRegRexB(8,pfx,base_r), 2708 binop(Iop_Shl64, 2709 getIReg64rexX(pfx,index_r), mkU8(scale))), 2710 mkU64(d)))); 2711 } 2712 vassert(0); /*NOTREACHED*/ 2713 } 2714 2715 default: 2716 vpanic("disAMode(amd64)"); 2717 return 0; /*notreached*/ 2718 } 2719 } 2720 2721 2722 /* Similarly for VSIB addressing. This returns just the addend, 2723 and fills in *rI and *vscale with the register number of the vector 2724 index and its multiplicand. */ 2725 static 2726 IRTemp disAVSIBMode ( /*OUT*/Int* len, 2727 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2728 /*OUT*/HChar* buf, /*OUT*/UInt* rI, 2729 IRType ty, /*OUT*/Int* vscale ) 2730 { 2731 UChar mod_reg_rm = getUChar(delta); 2732 const HChar *vindex; 2733 2734 *len = 0; 2735 *rI = 0; 2736 *vscale = 0; 2737 buf[0] = (UChar)0; 2738 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm)) 2739 return IRTemp_INVALID; 2740 2741 UChar sib = getUChar(delta+1); 2742 UChar scale = toUChar((sib >> 6) & 3); 2743 UChar index_r = toUChar((sib >> 3) & 7); 2744 UChar base_r = toUChar(sib & 7); 2745 Long d = 0; 2746 /* correct since #(R13) == 8 + #(RBP) */ 2747 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2748 delta += 2; 2749 *len = 2; 2750 2751 *rI = index_r | (getRexX(pfx) << 3); 2752 if (ty == Ity_V128) 2753 vindex = nameXMMReg(*rI); 2754 else 2755 vindex = nameYMMReg(*rI); 2756 *vscale = 1<<scale; 2757 2758 switch (mod_reg_rm >> 6) { 2759 case 0: 2760 if (base_is_BPor13) { 2761 d = getSDisp32(delta); 2762 *len += 4; 2763 if (scale == 0) { 2764 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex); 2765 } else { 2766 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale); 2767 } 2768 return disAMode_copy2tmp( mkU64(d) ); 2769 } else { 2770 if (scale == 0) { 2771 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2772 nameIRegRexB(8,pfx,base_r), vindex); 2773 } else { 2774 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2775 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2776 } 2777 } 2778 break; 2779 case 1: 2780 d = getSDisp8(delta); 2781 *len += 1; 2782 goto have_disp; 2783 case 2: 2784 d = getSDisp32(delta); 2785 *len += 4; 2786 have_disp: 2787 if (scale == 0) { 2788 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2789 nameIRegRexB(8,pfx,base_r), vindex); 2790 } else { 2791 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2792 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2793 } 2794 break; 2795 } 2796 2797 if (!d) 2798 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) ); 2799 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r), 2800 mkU64(d)) ); 2801 } 2802 2803 2804 /* Figure out the number of (insn-stream) bytes constituting the amode 2805 beginning at delta. Is useful for getting hold of literals beyond 2806 the end of the amode before it has been disassembled. */ 2807 2808 static UInt lengthAMode ( Prefix pfx, Long delta ) 2809 { 2810 UChar mod_reg_rm = getUChar(delta); 2811 delta++; 2812 2813 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2814 jump table seems a bit excessive. 2815 */ 2816 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2817 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2818 /* is now XX0XXYYY */ 2819 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2820 switch (mod_reg_rm) { 2821 2822 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2823 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2824 */ 2825 case 0x00: case 0x01: case 0x02: case 0x03: 2826 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2827 return 1; 2828 2829 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2830 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2831 */ 2832 case 0x08: case 0x09: case 0x0A: case 0x0B: 2833 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2834 return 2; 2835 2836 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2837 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2838 */ 2839 case 0x10: case 0x11: case 0x12: case 0x13: 2840 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2841 return 5; 2842 2843 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2844 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2845 /* Not an address, but still handled. */ 2846 case 0x18: case 0x19: case 0x1A: case 0x1B: 2847 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2848 return 1; 2849 2850 /* RIP + disp32. */ 2851 case 0x05: 2852 return 5; 2853 2854 case 0x04: { 2855 /* SIB, with no displacement. */ 2856 UChar sib = getUChar(delta); 2857 UChar base_r = toUChar(sib & 7); 2858 /* correct since #(R13) == 8 + #(RBP) */ 2859 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2860 2861 if (base_is_BPor13) { 2862 return 6; 2863 } else { 2864 return 2; 2865 } 2866 } 2867 2868 /* SIB, with 8-bit displacement. */ 2869 case 0x0C: 2870 return 3; 2871 2872 /* SIB, with 32-bit displacement. */ 2873 case 0x14: 2874 return 6; 2875 2876 default: 2877 vpanic("lengthAMode(amd64)"); 2878 return 0; /*notreached*/ 2879 } 2880 } 2881 2882 2883 /*------------------------------------------------------------*/ 2884 /*--- Disassembling common idioms ---*/ 2885 /*------------------------------------------------------------*/ 2886 2887 /* Handle binary integer instructions of the form 2888 op E, G meaning 2889 op reg-or-mem, reg 2890 Is passed the a ptr to the modRM byte, the actual operation, and the 2891 data size. Returns the address advanced completely over this 2892 instruction. 2893 2894 E(src) is reg-or-mem 2895 G(dst) is reg. 2896 2897 If E is reg, --> GET %G, tmp 2898 OP %E, tmp 2899 PUT tmp, %G 2900 2901 If E is mem and OP is not reversible, 2902 --> (getAddr E) -> tmpa 2903 LD (tmpa), tmpa 2904 GET %G, tmp2 2905 OP tmpa, tmp2 2906 PUT tmp2, %G 2907 2908 If E is mem and OP is reversible 2909 --> (getAddr E) -> tmpa 2910 LD (tmpa), tmpa 2911 OP %G, tmpa 2912 PUT tmpa, %G 2913 */ 2914 static 2915 ULong dis_op2_E_G ( const VexAbiInfo* vbi, 2916 Prefix pfx, 2917 Bool addSubCarry, 2918 IROp op8, 2919 Bool keep, 2920 Int size, 2921 Long delta0, 2922 const HChar* t_amd64opc ) 2923 { 2924 HChar dis_buf[50]; 2925 Int len; 2926 IRType ty = szToITy(size); 2927 IRTemp dst1 = newTemp(ty); 2928 IRTemp src = newTemp(ty); 2929 IRTemp dst0 = newTemp(ty); 2930 UChar rm = getUChar(delta0); 2931 IRTemp addr = IRTemp_INVALID; 2932 2933 /* addSubCarry == True indicates the intended operation is 2934 add-with-carry or subtract-with-borrow. */ 2935 if (addSubCarry) { 2936 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2937 vassert(keep); 2938 } 2939 2940 if (epartIsReg(rm)) { 2941 /* Specially handle XOR reg,reg, because that doesn't really 2942 depend on reg, and doing the obvious thing potentially 2943 generates a spurious value check failure due to the bogus 2944 dependency. */ 2945 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2946 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2947 if (False && op8 == Iop_Sub8) 2948 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2949 putIRegG(size,pfx,rm, mkU(ty,0)); 2950 } 2951 2952 assign( dst0, getIRegG(size,pfx,rm) ); 2953 assign( src, getIRegE(size,pfx,rm) ); 2954 2955 if (addSubCarry && op8 == Iop_Add8) { 2956 helper_ADC( size, dst1, dst0, src, 2957 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2958 putIRegG(size, pfx, rm, mkexpr(dst1)); 2959 } else 2960 if (addSubCarry && op8 == Iop_Sub8) { 2961 helper_SBB( size, dst1, dst0, src, 2962 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2963 putIRegG(size, pfx, rm, mkexpr(dst1)); 2964 } else { 2965 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2966 if (isAddSub(op8)) 2967 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2968 else 2969 setFlags_DEP1(op8, dst1, ty); 2970 if (keep) 2971 putIRegG(size, pfx, rm, mkexpr(dst1)); 2972 } 2973 2974 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2975 nameIRegE(size,pfx,rm), 2976 nameIRegG(size,pfx,rm)); 2977 return 1+delta0; 2978 } else { 2979 /* E refers to memory */ 2980 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2981 assign( dst0, getIRegG(size,pfx,rm) ); 2982 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2983 2984 if (addSubCarry && op8 == Iop_Add8) { 2985 helper_ADC( size, dst1, dst0, src, 2986 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2987 putIRegG(size, pfx, rm, mkexpr(dst1)); 2988 } else 2989 if (addSubCarry && op8 == Iop_Sub8) { 2990 helper_SBB( size, dst1, dst0, src, 2991 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2992 putIRegG(size, pfx, rm, mkexpr(dst1)); 2993 } else { 2994 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2995 if (isAddSub(op8)) 2996 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2997 else 2998 setFlags_DEP1(op8, dst1, ty); 2999 if (keep) 3000 putIRegG(size, pfx, rm, mkexpr(dst1)); 3001 } 3002 3003 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3004 dis_buf, nameIRegG(size, pfx, rm)); 3005 return len+delta0; 3006 } 3007 } 3008 3009 3010 3011 /* Handle binary integer instructions of the form 3012 op G, E meaning 3013 op reg, reg-or-mem 3014 Is passed the a ptr to the modRM byte, the actual operation, and the 3015 data size. Returns the address advanced completely over this 3016 instruction. 3017 3018 G(src) is reg. 3019 E(dst) is reg-or-mem 3020 3021 If E is reg, --> GET %E, tmp 3022 OP %G, tmp 3023 PUT tmp, %E 3024 3025 If E is mem, --> (getAddr E) -> tmpa 3026 LD (tmpa), tmpv 3027 OP %G, tmpv 3028 ST tmpv, (tmpa) 3029 */ 3030 static 3031 ULong dis_op2_G_E ( const VexAbiInfo* vbi, 3032 Prefix pfx, 3033 Bool addSubCarry, 3034 IROp op8, 3035 Bool keep, 3036 Int size, 3037 Long delta0, 3038 const HChar* t_amd64opc ) 3039 { 3040 HChar dis_buf[50]; 3041 Int len; 3042 IRType ty = szToITy(size); 3043 IRTemp dst1 = newTemp(ty); 3044 IRTemp src = newTemp(ty); 3045 IRTemp dst0 = newTemp(ty); 3046 UChar rm = getUChar(delta0); 3047 IRTemp addr = IRTemp_INVALID; 3048 3049 /* addSubCarry == True indicates the intended operation is 3050 add-with-carry or subtract-with-borrow. */ 3051 if (addSubCarry) { 3052 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 3053 vassert(keep); 3054 } 3055 3056 if (epartIsReg(rm)) { 3057 /* Specially handle XOR reg,reg, because that doesn't really 3058 depend on reg, and doing the obvious thing potentially 3059 generates a spurious value check failure due to the bogus 3060 dependency. Ditto SBB reg,reg. */ 3061 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 3062 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3063 putIRegE(size,pfx,rm, mkU(ty,0)); 3064 } 3065 3066 assign(dst0, getIRegE(size,pfx,rm)); 3067 assign(src, getIRegG(size,pfx,rm)); 3068 3069 if (addSubCarry && op8 == Iop_Add8) { 3070 helper_ADC( size, dst1, dst0, src, 3071 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3072 putIRegE(size, pfx, rm, mkexpr(dst1)); 3073 } else 3074 if (addSubCarry && op8 == Iop_Sub8) { 3075 helper_SBB( size, dst1, dst0, src, 3076 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3077 putIRegE(size, pfx, rm, mkexpr(dst1)); 3078 } else { 3079 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3080 if (isAddSub(op8)) 3081 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3082 else 3083 setFlags_DEP1(op8, dst1, ty); 3084 if (keep) 3085 putIRegE(size, pfx, rm, mkexpr(dst1)); 3086 } 3087 3088 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3089 nameIRegG(size,pfx,rm), 3090 nameIRegE(size,pfx,rm)); 3091 return 1+delta0; 3092 } 3093 3094 /* E refers to memory */ 3095 { 3096 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3097 assign(dst0, loadLE(ty,mkexpr(addr))); 3098 assign(src, getIRegG(size,pfx,rm)); 3099 3100 if (addSubCarry && op8 == Iop_Add8) { 3101 if (haveLOCK(pfx)) { 3102 /* cas-style store */ 3103 helper_ADC( size, dst1, dst0, src, 3104 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3105 } else { 3106 /* normal store */ 3107 helper_ADC( size, dst1, dst0, src, 3108 /*store*/addr, IRTemp_INVALID, 0 ); 3109 } 3110 } else 3111 if (addSubCarry && op8 == Iop_Sub8) { 3112 if (haveLOCK(pfx)) { 3113 /* cas-style store */ 3114 helper_SBB( size, dst1, dst0, src, 3115 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3116 } else { 3117 /* normal store */ 3118 helper_SBB( size, dst1, dst0, src, 3119 /*store*/addr, IRTemp_INVALID, 0 ); 3120 } 3121 } else { 3122 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3123 if (keep) { 3124 if (haveLOCK(pfx)) { 3125 if (0) vex_printf("locked case\n" ); 3126 casLE( mkexpr(addr), 3127 mkexpr(dst0)/*expval*/, 3128 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3129 } else { 3130 if (0) vex_printf("nonlocked case\n"); 3131 storeLE(mkexpr(addr), mkexpr(dst1)); 3132 } 3133 } 3134 if (isAddSub(op8)) 3135 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3136 else 3137 setFlags_DEP1(op8, dst1, ty); 3138 } 3139 3140 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3141 nameIRegG(size,pfx,rm), dis_buf); 3142 return len+delta0; 3143 } 3144 } 3145 3146 3147 /* Handle move instructions of the form 3148 mov E, G meaning 3149 mov reg-or-mem, reg 3150 Is passed the a ptr to the modRM byte, and the data size. Returns 3151 the address advanced completely over this instruction. 3152 3153 E(src) is reg-or-mem 3154 G(dst) is reg. 3155 3156 If E is reg, --> GET %E, tmpv 3157 PUT tmpv, %G 3158 3159 If E is mem --> (getAddr E) -> tmpa 3160 LD (tmpa), tmpb 3161 PUT tmpb, %G 3162 */ 3163 static 3164 ULong dis_mov_E_G ( const VexAbiInfo* vbi, 3165 Prefix pfx, 3166 Int size, 3167 Long delta0 ) 3168 { 3169 Int len; 3170 UChar rm = getUChar(delta0); 3171 HChar dis_buf[50]; 3172 3173 if (epartIsReg(rm)) { 3174 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3175 DIP("mov%c %s,%s\n", nameISize(size), 3176 nameIRegE(size,pfx,rm), 3177 nameIRegG(size,pfx,rm)); 3178 return 1+delta0; 3179 } 3180 3181 /* E refers to memory */ 3182 { 3183 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3184 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3185 DIP("mov%c %s,%s\n", nameISize(size), 3186 dis_buf, 3187 nameIRegG(size,pfx,rm)); 3188 return delta0+len; 3189 } 3190 } 3191 3192 3193 /* Handle move instructions of the form 3194 mov G, E meaning 3195 mov reg, reg-or-mem 3196 Is passed the a ptr to the modRM byte, and the data size. Returns 3197 the address advanced completely over this instruction. 3198 We have to decide here whether F2 or F3 are acceptable. F2 never is. 3199 3200 G(src) is reg. 3201 E(dst) is reg-or-mem 3202 3203 If E is reg, --> GET %G, tmp 3204 PUT tmp, %E 3205 3206 If E is mem, --> (getAddr E) -> tmpa 3207 GET %G, tmpv 3208 ST tmpv, (tmpa) 3209 */ 3210 static 3211 ULong dis_mov_G_E ( const VexAbiInfo* vbi, 3212 Prefix pfx, 3213 Int size, 3214 Long delta0, 3215 /*OUT*/Bool* ok ) 3216 { 3217 Int len; 3218 UChar rm = getUChar(delta0); 3219 HChar dis_buf[50]; 3220 3221 *ok = True; 3222 3223 if (epartIsReg(rm)) { 3224 if (haveF2orF3(pfx)) { *ok = False; return delta0; } 3225 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3226 DIP("mov%c %s,%s\n", nameISize(size), 3227 nameIRegG(size,pfx,rm), 3228 nameIRegE(size,pfx,rm)); 3229 return 1+delta0; 3230 } 3231 3232 /* E refers to memory */ 3233 { 3234 if (haveF2(pfx)) { *ok = False; return delta0; } 3235 /* F3(XRELEASE) is acceptable, though. */ 3236 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3237 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3238 DIP("mov%c %s,%s\n", nameISize(size), 3239 nameIRegG(size,pfx,rm), 3240 dis_buf); 3241 return len+delta0; 3242 } 3243 } 3244 3245 3246 /* op $immediate, AL/AX/EAX/RAX. */ 3247 static 3248 ULong dis_op_imm_A ( Int size, 3249 Bool carrying, 3250 IROp op8, 3251 Bool keep, 3252 Long delta, 3253 const HChar* t_amd64opc ) 3254 { 3255 Int size4 = imin(size,4); 3256 IRType ty = szToITy(size); 3257 IRTemp dst0 = newTemp(ty); 3258 IRTemp src = newTemp(ty); 3259 IRTemp dst1 = newTemp(ty); 3260 Long lit = getSDisp(size4,delta); 3261 assign(dst0, getIRegRAX(size)); 3262 assign(src, mkU(ty,lit & mkSizeMask(size))); 3263 3264 if (isAddSub(op8) && !carrying) { 3265 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3266 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3267 } 3268 else 3269 if (isLogic(op8)) { 3270 vassert(!carrying); 3271 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3272 setFlags_DEP1(op8, dst1, ty); 3273 } 3274 else 3275 if (op8 == Iop_Add8 && carrying) { 3276 helper_ADC( size, dst1, dst0, src, 3277 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3278 } 3279 else 3280 if (op8 == Iop_Sub8 && carrying) { 3281 helper_SBB( size, dst1, dst0, src, 3282 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3283 } 3284 else 3285 vpanic("dis_op_imm_A(amd64,guest)"); 3286 3287 if (keep) 3288 putIRegRAX(size, mkexpr(dst1)); 3289 3290 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3291 lit, nameIRegRAX(size)); 3292 return delta+size4; 3293 } 3294 3295 3296 /* Sign- and Zero-extending moves. */ 3297 static 3298 ULong dis_movx_E_G ( const VexAbiInfo* vbi, 3299 Prefix pfx, 3300 Long delta, Int szs, Int szd, Bool sign_extend ) 3301 { 3302 UChar rm = getUChar(delta); 3303 if (epartIsReg(rm)) { 3304 putIRegG(szd, pfx, rm, 3305 doScalarWidening( 3306 szs,szd,sign_extend, 3307 getIRegE(szs,pfx,rm))); 3308 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3309 nameISize(szs), 3310 nameISize(szd), 3311 nameIRegE(szs,pfx,rm), 3312 nameIRegG(szd,pfx,rm)); 3313 return 1+delta; 3314 } 3315 3316 /* E refers to memory */ 3317 { 3318 Int len; 3319 HChar dis_buf[50]; 3320 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3321 putIRegG(szd, pfx, rm, 3322 doScalarWidening( 3323 szs,szd,sign_extend, 3324 loadLE(szToITy(szs),mkexpr(addr)))); 3325 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3326 nameISize(szs), 3327 nameISize(szd), 3328 dis_buf, 3329 nameIRegG(szd,pfx,rm)); 3330 return len+delta; 3331 } 3332 } 3333 3334 3335 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3336 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3337 static 3338 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3339 { 3340 /* special-case the 64-bit case */ 3341 if (sz == 8) { 3342 IROp op = signed_divide ? Iop_DivModS128to64 3343 : Iop_DivModU128to64; 3344 IRTemp src128 = newTemp(Ity_I128); 3345 IRTemp dst128 = newTemp(Ity_I128); 3346 assign( src128, binop(Iop_64HLto128, 3347 getIReg64(R_RDX), 3348 getIReg64(R_RAX)) ); 3349 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3350 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3351 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3352 } else { 3353 IROp op = signed_divide ? Iop_DivModS64to32 3354 : Iop_DivModU64to32; 3355 IRTemp src64 = newTemp(Ity_I64); 3356 IRTemp dst64 = newTemp(Ity_I64); 3357 switch (sz) { 3358 case 4: 3359 assign( src64, 3360 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3361 assign( dst64, 3362 binop(op, mkexpr(src64), mkexpr(t)) ); 3363 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3364 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3365 break; 3366 case 2: { 3367 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3368 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3369 assign( src64, unop(widen3264, 3370 binop(Iop_16HLto32, 3371 getIRegRDX(2), 3372 getIRegRAX(2))) ); 3373 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3374 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3375 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3376 break; 3377 } 3378 case 1: { 3379 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3380 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3381 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3382 assign( src64, unop(widen3264, 3383 unop(widen1632, getIRegRAX(2))) ); 3384 assign( dst64, 3385 binop(op, mkexpr(src64), 3386 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3387 putIRegRAX( 1, unop(Iop_16to8, 3388 unop(Iop_32to16, 3389 unop(Iop_64to32,mkexpr(dst64)))) ); 3390 putIRegAH( unop(Iop_16to8, 3391 unop(Iop_32to16, 3392 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3393 break; 3394 } 3395 default: 3396 vpanic("codegen_div(amd64)"); 3397 } 3398 } 3399 } 3400 3401 static 3402 ULong dis_Grp1 ( const VexAbiInfo* vbi, 3403 Prefix pfx, 3404 Long delta, UChar modrm, 3405 Int am_sz, Int d_sz, Int sz, Long d64 ) 3406 { 3407 Int len; 3408 HChar dis_buf[50]; 3409 IRType ty = szToITy(sz); 3410 IRTemp dst1 = newTemp(ty); 3411 IRTemp src = newTemp(ty); 3412 IRTemp dst0 = newTemp(ty); 3413 IRTemp addr = IRTemp_INVALID; 3414 IROp op8 = Iop_INVALID; 3415 ULong mask = mkSizeMask(sz); 3416 3417 switch (gregLO3ofRM(modrm)) { 3418 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3419 case 2: break; // ADC 3420 case 3: break; // SBB 3421 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3422 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3423 /*NOTREACHED*/ 3424 default: vpanic("dis_Grp1(amd64): unhandled case"); 3425 } 3426 3427 if (epartIsReg(modrm)) { 3428 vassert(am_sz == 1); 3429 3430 assign(dst0, getIRegE(sz,pfx,modrm)); 3431 assign(src, mkU(ty,d64 & mask)); 3432 3433 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3434 helper_ADC( sz, dst1, dst0, src, 3435 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3436 } else 3437 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3438 helper_SBB( sz, dst1, dst0, src, 3439 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3440 } else { 3441 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3442 if (isAddSub(op8)) 3443 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3444 else 3445 setFlags_DEP1(op8, dst1, ty); 3446 } 3447 3448 if (gregLO3ofRM(modrm) < 7) 3449 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3450 3451 delta += (am_sz + d_sz); 3452 DIP("%s%c $%lld, %s\n", 3453 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3454 nameIRegE(sz,pfx,modrm)); 3455 } else { 3456 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3457 3458 assign(dst0, loadLE(ty,mkexpr(addr))); 3459 assign(src, mkU(ty,d64 & mask)); 3460 3461 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3462 if (haveLOCK(pfx)) { 3463 /* cas-style store */ 3464 helper_ADC( sz, dst1, dst0, src, 3465 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3466 } else { 3467 /* normal store */ 3468 helper_ADC( sz, dst1, dst0, src, 3469 /*store*/addr, IRTemp_INVALID, 0 ); 3470 } 3471 } else 3472 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3473 if (haveLOCK(pfx)) { 3474 /* cas-style store */ 3475 helper_SBB( sz, dst1, dst0, src, 3476 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3477 } else { 3478 /* normal store */ 3479 helper_SBB( sz, dst1, dst0, src, 3480 /*store*/addr, IRTemp_INVALID, 0 ); 3481 } 3482 } else { 3483 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3484 if (gregLO3ofRM(modrm) < 7) { 3485 if (haveLOCK(pfx)) { 3486 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3487 mkexpr(dst1)/*newVal*/, 3488 guest_RIP_curr_instr ); 3489 } else { 3490 storeLE(mkexpr(addr), mkexpr(dst1)); 3491 } 3492 } 3493 if (isAddSub(op8)) 3494 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3495 else 3496 setFlags_DEP1(op8, dst1, ty); 3497 } 3498 3499 delta += (len+d_sz); 3500 DIP("%s%c $%lld, %s\n", 3501 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3502 d64, dis_buf); 3503 } 3504 return delta; 3505 } 3506 3507 3508 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3509 expression. */ 3510 3511 static 3512 ULong dis_Grp2 ( const VexAbiInfo* vbi, 3513 Prefix pfx, 3514 Long delta, UChar modrm, 3515 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3516 const HChar* shift_expr_txt, Bool* decode_OK ) 3517 { 3518 /* delta on entry points at the modrm byte. */ 3519 HChar dis_buf[50]; 3520 Int len; 3521 Bool isShift, isRotate, isRotateC; 3522 IRType ty = szToITy(sz); 3523 IRTemp dst0 = newTemp(ty); 3524 IRTemp dst1 = newTemp(ty); 3525 IRTemp addr = IRTemp_INVALID; 3526 3527 *decode_OK = True; 3528 3529 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3530 3531 /* Put value to shift/rotate in dst0. */ 3532 if (epartIsReg(modrm)) { 3533 assign(dst0, getIRegE(sz, pfx, modrm)); 3534 delta += (am_sz + d_sz); 3535 } else { 3536 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3537 assign(dst0, loadLE(ty,mkexpr(addr))); 3538 delta += len + d_sz; 3539 } 3540 3541 isShift = False; 3542 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3543 3544 isRotate = False; 3545 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3546 3547 isRotateC = False; 3548 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3549 3550 if (!isShift && !isRotate && !isRotateC) { 3551 /*NOTREACHED*/ 3552 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3553 } 3554 3555 if (isRotateC) { 3556 /* Call a helper; this insn is so ridiculous it does not deserve 3557 better. One problem is, the helper has to calculate both the 3558 new value and the new flags. This is more than 64 bits, and 3559 there is no way to return more than 64 bits from the helper. 3560 Hence the crude and obvious solution is to call it twice, 3561 using the sign of the sz field to indicate whether it is the 3562 value or rflags result we want. 3563 */ 3564 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3565 IRExpr** argsVALUE; 3566 IRExpr** argsRFLAGS; 3567 3568 IRTemp new_value = newTemp(Ity_I64); 3569 IRTemp new_rflags = newTemp(Ity_I64); 3570 IRTemp old_rflags = newTemp(Ity_I64); 3571 3572 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3573 3574 argsVALUE 3575 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3576 widenUto64(shift_expr), /* rotate amount */ 3577 mkexpr(old_rflags), 3578 mkU64(sz) ); 3579 assign( new_value, 3580 mkIRExprCCall( 3581 Ity_I64, 3582 0/*regparm*/, 3583 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3584 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3585 argsVALUE 3586 ) 3587 ); 3588 3589 argsRFLAGS 3590 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3591 widenUto64(shift_expr), /* rotate amount */ 3592 mkexpr(old_rflags), 3593 mkU64(-sz) ); 3594 assign( new_rflags, 3595 mkIRExprCCall( 3596 Ity_I64, 3597 0/*regparm*/, 3598 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3599 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3600 argsRFLAGS 3601 ) 3602 ); 3603 3604 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3605 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3606 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3607 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3609 } 3610 3611 else 3612 if (isShift) { 3613 3614 IRTemp pre64 = newTemp(Ity_I64); 3615 IRTemp res64 = newTemp(Ity_I64); 3616 IRTemp res64ss = newTemp(Ity_I64); 3617 IRTemp shift_amt = newTemp(Ity_I8); 3618 UChar mask = toUChar(sz==8 ? 63 : 31); 3619 IROp op64; 3620 3621 switch (gregLO3ofRM(modrm)) { 3622 case 4: op64 = Iop_Shl64; break; 3623 case 5: op64 = Iop_Shr64; break; 3624 case 6: op64 = Iop_Shl64; break; 3625 case 7: op64 = Iop_Sar64; break; 3626 /*NOTREACHED*/ 3627 default: vpanic("dis_Grp2:shift"); break; 3628 } 3629 3630 /* Widen the value to be shifted to 64 bits, do the shift, and 3631 narrow back down. This seems surprisingly long-winded, but 3632 unfortunately the AMD semantics requires that 8/16/32-bit 3633 shifts give defined results for shift values all the way up 3634 to 32, and this seems the simplest way to do it. It has the 3635 advantage that the only IR level shifts generated are of 64 3636 bit values, and the shift amount is guaranteed to be in the 3637 range 0 .. 63, thereby observing the IR semantics requiring 3638 all shift values to be in the range 0 .. 2^word_size-1. 3639 3640 Therefore the shift amount is masked with 63 for 64-bit shifts 3641 and 31 for all others. 3642 */ 3643 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3644 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3645 3646 /* suitably widen the value to be shifted to 64 bits. */ 3647 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3648 : widenUto64(mkexpr(dst0)) ); 3649 3650 /* res64 = pre64 `shift` shift_amt */ 3651 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3652 3653 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3654 assign( res64ss, 3655 binop(op64, 3656 mkexpr(pre64), 3657 binop(Iop_And8, 3658 binop(Iop_Sub8, 3659 mkexpr(shift_amt), mkU8(1)), 3660 mkU8(mask))) ); 3661 3662 /* Build the flags thunk. */ 3663 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3664 3665 /* Narrow the result back down. */ 3666 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3667 3668 } /* if (isShift) */ 3669 3670 else 3671 if (isRotate) { 3672 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3673 : (ty==Ity_I32 ? 2 : 3)); 3674 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3675 IRTemp rot_amt = newTemp(Ity_I8); 3676 IRTemp rot_amt64 = newTemp(Ity_I8); 3677 IRTemp oldFlags = newTemp(Ity_I64); 3678 UChar mask = toUChar(sz==8 ? 63 : 31); 3679 3680 /* rot_amt = shift_expr & mask */ 3681 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3682 expressions never shift beyond the word size and thus remain 3683 well defined. */ 3684 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3685 3686 if (ty == Ity_I64) 3687 assign(rot_amt, mkexpr(rot_amt64)); 3688 else 3689 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3690 3691 if (left) { 3692 3693 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3694 assign(dst1, 3695 binop( mkSizedOp(ty,Iop_Or8), 3696 binop( mkSizedOp(ty,Iop_Shl8), 3697 mkexpr(dst0), 3698 mkexpr(rot_amt) 3699 ), 3700 binop( mkSizedOp(ty,Iop_Shr8), 3701 mkexpr(dst0), 3702 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3703 ) 3704 ) 3705 ); 3706 ccOp += AMD64G_CC_OP_ROLB; 3707 3708 } else { /* right */ 3709 3710 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3711 assign(dst1, 3712 binop( mkSizedOp(ty,Iop_Or8), 3713 binop( mkSizedOp(ty,Iop_Shr8), 3714 mkexpr(dst0), 3715 mkexpr(rot_amt) 3716 ), 3717 binop( mkSizedOp(ty,Iop_Shl8), 3718 mkexpr(dst0), 3719 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3720 ) 3721 ) 3722 ); 3723 ccOp += AMD64G_CC_OP_RORB; 3724 3725 } 3726 3727 /* dst1 now holds the rotated value. Build flag thunk. We 3728 need the resulting value for this, and the previous flags. 3729 Except don't set it if the rotate count is zero. */ 3730 3731 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3732 3733 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */ 3734 IRTemp rot_amt64b = newTemp(Ity_I1); 3735 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) ); 3736 3737 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3738 stmt( IRStmt_Put( OFFB_CC_OP, 3739 IRExpr_ITE( mkexpr(rot_amt64b), 3740 mkU64(ccOp), 3741 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 3742 stmt( IRStmt_Put( OFFB_CC_DEP1, 3743 IRExpr_ITE( mkexpr(rot_amt64b), 3744 widenUto64(mkexpr(dst1)), 3745 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 3746 stmt( IRStmt_Put( OFFB_CC_DEP2, 3747 IRExpr_ITE( mkexpr(rot_amt64b), 3748 mkU64(0), 3749 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 3750 stmt( IRStmt_Put( OFFB_CC_NDEP, 3751 IRExpr_ITE( mkexpr(rot_amt64b), 3752 mkexpr(oldFlags), 3753 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) )); 3754 } /* if (isRotate) */ 3755 3756 /* Save result, and finish up. */ 3757 if (epartIsReg(modrm)) { 3758 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3759 if (vex_traceflags & VEX_TRACE_FE) { 3760 vex_printf("%s%c ", 3761 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3762 if (shift_expr_txt) 3763 vex_printf("%s", shift_expr_txt); 3764 else 3765 ppIRExpr(shift_expr); 3766 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3767 } 3768 } else { 3769 storeLE(mkexpr(addr), mkexpr(dst1)); 3770 if (vex_traceflags & VEX_TRACE_FE) { 3771 vex_printf("%s%c ", 3772 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3773 if (shift_expr_txt) 3774 vex_printf("%s", shift_expr_txt); 3775 else 3776 ppIRExpr(shift_expr); 3777 vex_printf(", %s\n", dis_buf); 3778 } 3779 } 3780 return delta; 3781 } 3782 3783 3784 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3785 static 3786 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi, 3787 Prefix pfx, 3788 Long delta, UChar modrm, 3789 Int am_sz, Int sz, ULong src_val, 3790 Bool* decode_OK ) 3791 { 3792 /* src_val denotes a d8. 3793 And delta on entry points at the modrm byte. */ 3794 3795 IRType ty = szToITy(sz); 3796 IRTemp t2 = newTemp(Ity_I64); 3797 IRTemp t2m = newTemp(Ity_I64); 3798 IRTemp t_addr = IRTemp_INVALID; 3799 HChar dis_buf[50]; 3800 ULong mask; 3801 3802 /* we're optimists :-) */ 3803 *decode_OK = True; 3804 3805 /* Check whether F2 or F3 are acceptable. */ 3806 if (epartIsReg(modrm)) { 3807 /* F2 or F3 are not allowed in the register case. */ 3808 if (haveF2orF3(pfx)) { 3809 *decode_OK = False; 3810 return delta; 3811 } 3812 } else { 3813 /* F2 or F3 (but not both) are allowable provided LOCK is also 3814 present. */ 3815 if (haveF2orF3(pfx)) { 3816 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 3817 *decode_OK = False; 3818 return delta; 3819 } 3820 } 3821 } 3822 3823 /* Limit src_val -- the bit offset -- to something within a word. 3824 The Intel docs say that literal offsets larger than a word are 3825 masked in this way. */ 3826 switch (sz) { 3827 case 2: src_val &= 15; break; 3828 case 4: src_val &= 31; break; 3829 case 8: src_val &= 63; break; 3830 default: *decode_OK = False; return delta; 3831 } 3832 3833 /* Invent a mask suitable for the operation. */ 3834 switch (gregLO3ofRM(modrm)) { 3835 case 4: /* BT */ mask = 0; break; 3836 case 5: /* BTS */ mask = 1ULL << src_val; break; 3837 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3838 case 7: /* BTC */ mask = 1ULL << src_val; break; 3839 /* If this needs to be extended, probably simplest to make a 3840 new function to handle the other cases (0 .. 3). The 3841 Intel docs do however not indicate any use for 0 .. 3, so 3842 we don't expect this to happen. */ 3843 default: *decode_OK = False; return delta; 3844 } 3845 3846 /* Fetch the value to be tested and modified into t2, which is 3847 64-bits wide regardless of sz. */ 3848 if (epartIsReg(modrm)) { 3849 vassert(am_sz == 1); 3850 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3851 delta += (am_sz + 1); 3852 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3853 nameISize(sz), 3854 src_val, nameIRegE(sz,pfx,modrm)); 3855 } else { 3856 Int len; 3857 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3858 delta += (len+1); 3859 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3860 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3861 nameISize(sz), 3862 src_val, dis_buf); 3863 } 3864 3865 /* Compute the new value into t2m, if non-BT. */ 3866 switch (gregLO3ofRM(modrm)) { 3867 case 4: /* BT */ 3868 break; 3869 case 5: /* BTS */ 3870 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3871 break; 3872 case 6: /* BTR */ 3873 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3874 break; 3875 case 7: /* BTC */ 3876 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3877 break; 3878 default: 3879 /*NOTREACHED*/ /*the previous switch guards this*/ 3880 vassert(0); 3881 } 3882 3883 /* Write the result back, if non-BT. */ 3884 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3885 if (epartIsReg(modrm)) { 3886 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3887 } else { 3888 if (haveLOCK(pfx)) { 3889 casLE( mkexpr(t_addr), 3890 narrowTo(ty, mkexpr(t2))/*expd*/, 3891 narrowTo(ty, mkexpr(t2m))/*new*/, 3892 guest_RIP_curr_instr ); 3893 } else { 3894 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3895 } 3896 } 3897 } 3898 3899 /* Copy relevant bit from t2 into the carry flag. */ 3900 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3901 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3902 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3903 stmt( IRStmt_Put( 3904 OFFB_CC_DEP1, 3905 binop(Iop_And64, 3906 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3907 mkU64(1)) 3908 )); 3909 /* Set NDEP even though it isn't used. This makes redundant-PUT 3910 elimination of previous stores to this field work better. */ 3911 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3912 3913 return delta; 3914 } 3915 3916 3917 /* Signed/unsigned widening multiply. Generate IR to multiply the 3918 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3919 RDX:RAX/EDX:EAX/DX:AX/AX. 3920 */ 3921 static void codegen_mulL_A_D ( Int sz, Bool syned, 3922 IRTemp tmp, const HChar* tmp_txt ) 3923 { 3924 IRType ty = szToITy(sz); 3925 IRTemp t1 = newTemp(ty); 3926 3927 assign( t1, getIRegRAX(sz) ); 3928 3929 switch (ty) { 3930 case Ity_I64: { 3931 IRTemp res128 = newTemp(Ity_I128); 3932 IRTemp resHi = newTemp(Ity_I64); 3933 IRTemp resLo = newTemp(Ity_I64); 3934 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3935 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3936 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3937 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3938 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3939 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3940 putIReg64(R_RDX, mkexpr(resHi)); 3941 putIReg64(R_RAX, mkexpr(resLo)); 3942 break; 3943 } 3944 case Ity_I32: { 3945 IRTemp res64 = newTemp(Ity_I64); 3946 IRTemp resHi = newTemp(Ity_I32); 3947 IRTemp resLo = newTemp(Ity_I32); 3948 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3949 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3950 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3951 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3952 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3953 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3954 putIRegRDX(4, mkexpr(resHi)); 3955 putIRegRAX(4, mkexpr(resLo)); 3956 break; 3957 } 3958 case Ity_I16: { 3959 IRTemp res32 = newTemp(Ity_I32); 3960 IRTemp resHi = newTemp(Ity_I16); 3961 IRTemp resLo = newTemp(Ity_I16); 3962 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3963 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3964 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3965 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3966 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3967 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3968 putIRegRDX(2, mkexpr(resHi)); 3969 putIRegRAX(2, mkexpr(resLo)); 3970 break; 3971 } 3972 case Ity_I8: { 3973 IRTemp res16 = newTemp(Ity_I16); 3974 IRTemp resHi = newTemp(Ity_I8); 3975 IRTemp resLo = newTemp(Ity_I8); 3976 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3977 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3978 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3979 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3980 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3981 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3982 putIRegRAX(2, mkexpr(res16)); 3983 break; 3984 } 3985 default: 3986 ppIRType(ty); 3987 vpanic("codegen_mulL_A_D(amd64)"); 3988 } 3989 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3990 } 3991 3992 3993 /* Group 3 extended opcodes. We have to decide here whether F2 and F3 3994 might be valid.*/ 3995 static 3996 ULong dis_Grp3 ( const VexAbiInfo* vbi, 3997 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3998 { 3999 Long d64; 4000 UChar modrm; 4001 HChar dis_buf[50]; 4002 Int len; 4003 IRTemp addr; 4004 IRType ty = szToITy(sz); 4005 IRTemp t1 = newTemp(ty); 4006 IRTemp dst1, src, dst0; 4007 *decode_OK = True; 4008 modrm = getUChar(delta); 4009 if (epartIsReg(modrm)) { 4010 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4011 if (haveF2orF3(pfx)) goto unhandled; 4012 switch (gregLO3ofRM(modrm)) { 4013 case 0: { /* TEST */ 4014 delta++; 4015 d64 = getSDisp(imin(4,sz), delta); 4016 delta += imin(4,sz); 4017 dst1 = newTemp(ty); 4018 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4019 getIRegE(sz,pfx,modrm), 4020 mkU(ty, d64 & mkSizeMask(sz)))); 4021 setFlags_DEP1( Iop_And8, dst1, ty ); 4022 DIP("test%c $%lld, %s\n", 4023 nameISize(sz), d64, 4024 nameIRegE(sz, pfx, modrm)); 4025 break; 4026 } 4027 case 1: 4028 *decode_OK = False; 4029 return delta; 4030 case 2: /* NOT */ 4031 delta++; 4032 putIRegE(sz, pfx, modrm, 4033 unop(mkSizedOp(ty,Iop_Not8), 4034 getIRegE(sz, pfx, modrm))); 4035 DIP("not%c %s\n", nameISize(sz), 4036 nameIRegE(sz, pfx, modrm)); 4037 break; 4038 case 3: /* NEG */ 4039 delta++; 4040 dst0 = newTemp(ty); 4041 src = newTemp(ty); 4042 dst1 = newTemp(ty); 4043 assign(dst0, mkU(ty,0)); 4044 assign(src, getIRegE(sz, pfx, modrm)); 4045 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4046 mkexpr(src))); 4047 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4048 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 4049 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 4050 break; 4051 case 4: /* MUL (unsigned widening) */ 4052 delta++; 4053 src = newTemp(ty); 4054 assign(src, getIRegE(sz,pfx,modrm)); 4055 codegen_mulL_A_D ( sz, False, src, 4056 nameIRegE(sz,pfx,modrm) ); 4057 break; 4058 case 5: /* IMUL (signed widening) */ 4059 delta++; 4060 src = newTemp(ty); 4061 assign(src, getIRegE(sz,pfx,modrm)); 4062 codegen_mulL_A_D ( sz, True, src, 4063 nameIRegE(sz,pfx,modrm) ); 4064 break; 4065 case 6: /* DIV */ 4066 delta++; 4067 assign( t1, getIRegE(sz, pfx, modrm) ); 4068 codegen_div ( sz, t1, False ); 4069 DIP("div%c %s\n", nameISize(sz), 4070 nameIRegE(sz, pfx, modrm)); 4071 break; 4072 case 7: /* IDIV */ 4073 delta++; 4074 assign( t1, getIRegE(sz, pfx, modrm) ); 4075 codegen_div ( sz, t1, True ); 4076 DIP("idiv%c %s\n", nameISize(sz), 4077 nameIRegE(sz, pfx, modrm)); 4078 break; 4079 default: 4080 /*NOTREACHED*/ 4081 vpanic("Grp3(amd64,R)"); 4082 } 4083 } else { 4084 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4085 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4086 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/) 4087 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4088 validF2orF3 = True; 4089 } 4090 if (!validF2orF3) goto unhandled; 4091 /* */ 4092 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 4093 /* we have to inform disAMode of any immediate 4094 bytes used */ 4095 gregLO3ofRM(modrm)==0/*TEST*/ 4096 ? imin(4,sz) 4097 : 0 4098 ); 4099 t1 = newTemp(ty); 4100 delta += len; 4101 assign(t1, loadLE(ty,mkexpr(addr))); 4102 switch (gregLO3ofRM(modrm)) { 4103 case 0: { /* TEST */ 4104 d64 = getSDisp(imin(4,sz), delta); 4105 delta += imin(4,sz); 4106 dst1 = newTemp(ty); 4107 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4108 mkexpr(t1), 4109 mkU(ty, d64 & mkSizeMask(sz)))); 4110 setFlags_DEP1( Iop_And8, dst1, ty ); 4111 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 4112 break; 4113 } 4114 case 1: 4115 *decode_OK = False; 4116 return delta; 4117 case 2: /* NOT */ 4118 dst1 = newTemp(ty); 4119 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 4120 if (haveLOCK(pfx)) { 4121 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4122 guest_RIP_curr_instr ); 4123 } else { 4124 storeLE( mkexpr(addr), mkexpr(dst1) ); 4125 } 4126 DIP("not%c %s\n", nameISize(sz), dis_buf); 4127 break; 4128 case 3: /* NEG */ 4129 dst0 = newTemp(ty); 4130 src = newTemp(ty); 4131 dst1 = newTemp(ty); 4132 assign(dst0, mkU(ty,0)); 4133 assign(src, mkexpr(t1)); 4134 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4135 mkexpr(src))); 4136 if (haveLOCK(pfx)) { 4137 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4138 guest_RIP_curr_instr ); 4139 } else { 4140 storeLE( mkexpr(addr), mkexpr(dst1) ); 4141 } 4142 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4143 DIP("neg%c %s\n", nameISize(sz), dis_buf); 4144 break; 4145 case 4: /* MUL (unsigned widening) */ 4146 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 4147 break; 4148 case 5: /* IMUL */ 4149 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 4150 break; 4151 case 6: /* DIV */ 4152 codegen_div ( sz, t1, False ); 4153 DIP("div%c %s\n", nameISize(sz), dis_buf); 4154 break; 4155 case 7: /* IDIV */ 4156 codegen_div ( sz, t1, True ); 4157 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 4158 break; 4159 default: 4160 /*NOTREACHED*/ 4161 vpanic("Grp3(amd64,M)"); 4162 } 4163 } 4164 return delta; 4165 unhandled: 4166 *decode_OK = False; 4167 return delta; 4168 } 4169 4170 4171 /* Group 4 extended opcodes. We have to decide here whether F2 and F3 4172 might be valid. */ 4173 static 4174 ULong dis_Grp4 ( const VexAbiInfo* vbi, 4175 Prefix pfx, Long delta, Bool* decode_OK ) 4176 { 4177 Int alen; 4178 UChar modrm; 4179 HChar dis_buf[50]; 4180 IRType ty = Ity_I8; 4181 IRTemp t1 = newTemp(ty); 4182 IRTemp t2 = newTemp(ty); 4183 4184 *decode_OK = True; 4185 4186 modrm = getUChar(delta); 4187 if (epartIsReg(modrm)) { 4188 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4189 if (haveF2orF3(pfx)) goto unhandled; 4190 assign(t1, getIRegE(1, pfx, modrm)); 4191 switch (gregLO3ofRM(modrm)) { 4192 case 0: /* INC */ 4193 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4194 putIRegE(1, pfx, modrm, mkexpr(t2)); 4195 setFlags_INC_DEC( True, t2, ty ); 4196 break; 4197 case 1: /* DEC */ 4198 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4199 putIRegE(1, pfx, modrm, mkexpr(t2)); 4200 setFlags_INC_DEC( False, t2, ty ); 4201 break; 4202 default: 4203 *decode_OK = False; 4204 return delta; 4205 } 4206 delta++; 4207 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4208 nameIRegE(1, pfx, modrm)); 4209 } else { 4210 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4211 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4212 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4213 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4214 validF2orF3 = True; 4215 } 4216 if (!validF2orF3) goto unhandled; 4217 /* */ 4218 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4219 assign( t1, loadLE(ty, mkexpr(addr)) ); 4220 switch (gregLO3ofRM(modrm)) { 4221 case 0: /* INC */ 4222 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4223 if (haveLOCK(pfx)) { 4224 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4225 guest_RIP_curr_instr ); 4226 } else { 4227 storeLE( mkexpr(addr), mkexpr(t2) ); 4228 } 4229 setFlags_INC_DEC( True, t2, ty ); 4230 break; 4231 case 1: /* DEC */ 4232 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4233 if (haveLOCK(pfx)) { 4234 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4235 guest_RIP_curr_instr ); 4236 } else { 4237 storeLE( mkexpr(addr), mkexpr(t2) ); 4238 } 4239 setFlags_INC_DEC( False, t2, ty ); 4240 break; 4241 default: 4242 *decode_OK = False; 4243 return delta; 4244 } 4245 delta += alen; 4246 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4247 } 4248 return delta; 4249 unhandled: 4250 *decode_OK = False; 4251 return delta; 4252 } 4253 4254 4255 /* Group 5 extended opcodes. We have to decide here whether F2 and F3 4256 might be valid. */ 4257 static 4258 ULong dis_Grp5 ( const VexAbiInfo* vbi, 4259 Prefix pfx, Int sz, Long delta, 4260 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4261 { 4262 Int len; 4263 UChar modrm; 4264 HChar dis_buf[50]; 4265 IRTemp addr = IRTemp_INVALID; 4266 IRType ty = szToITy(sz); 4267 IRTemp t1 = newTemp(ty); 4268 IRTemp t2 = IRTemp_INVALID; 4269 IRTemp t3 = IRTemp_INVALID; 4270 Bool showSz = True; 4271 4272 *decode_OK = True; 4273 4274 modrm = getUChar(delta); 4275 if (epartIsReg(modrm)) { 4276 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. 4277 F2/CALL and F2/JMP may have bnd prefix. */ 4278 if (haveF2orF3(pfx) 4279 && ! (haveF2(pfx) 4280 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) 4281 goto unhandledR; 4282 assign(t1, getIRegE(sz,pfx,modrm)); 4283 switch (gregLO3ofRM(modrm)) { 4284 case 0: /* INC */ 4285 t2 = newTemp(ty); 4286 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4287 mkexpr(t1), mkU(ty,1))); 4288 setFlags_INC_DEC( True, t2, ty ); 4289 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4290 break; 4291 case 1: /* DEC */ 4292 t2 = newTemp(ty); 4293 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4294 mkexpr(t1), mkU(ty,1))); 4295 setFlags_INC_DEC( False, t2, ty ); 4296 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4297 break; 4298 case 2: /* call Ev */ 4299 /* Ignore any sz value and operate as if sz==8. */ 4300 if (!(sz == 4 || sz == 8)) goto unhandledR; 4301 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4302 sz = 8; 4303 t3 = newTemp(Ity_I64); 4304 assign(t3, getIRegE(sz,pfx,modrm)); 4305 t2 = newTemp(Ity_I64); 4306 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4307 putIReg64(R_RSP, mkexpr(t2)); 4308 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4309 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4310 jmp_treg(dres, Ijk_Call, t3); 4311 vassert(dres->whatNext == Dis_StopHere); 4312 showSz = False; 4313 break; 4314 case 4: /* jmp Ev */ 4315 /* Ignore any sz value and operate as if sz==8. */ 4316 if (!(sz == 4 || sz == 8)) goto unhandledR; 4317 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4318 sz = 8; 4319 t3 = newTemp(Ity_I64); 4320 assign(t3, getIRegE(sz,pfx,modrm)); 4321 jmp_treg(dres, Ijk_Boring, t3); 4322 vassert(dres->whatNext == Dis_StopHere); 4323 showSz = False; 4324 break; 4325 case 6: /* PUSH Ev */ 4326 /* There is no encoding for 32-bit operand size; hence ... */ 4327 if (sz == 4) sz = 8; 4328 if (sz == 8 || sz == 2) { 4329 ty = szToITy(sz); /* redo it, since sz might have changed */ 4330 t3 = newTemp(ty); 4331 assign(t3, getIRegE(sz,pfx,modrm)); 4332 t2 = newTemp(Ity_I64); 4333 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4334 putIReg64(R_RSP, mkexpr(t2) ); 4335 storeLE( mkexpr(t2), mkexpr(t3) ); 4336 break; 4337 } else { 4338 goto unhandledR; /* awaiting test case */ 4339 } 4340 default: 4341 unhandledR: 4342 *decode_OK = False; 4343 return delta; 4344 } 4345 delta++; 4346 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4347 showSz ? nameISize(sz) : ' ', 4348 nameIRegE(sz, pfx, modrm)); 4349 } else { 4350 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ 4351 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4352 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4353 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4354 validF2orF3 = True; 4355 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) 4356 && (haveF2(pfx) && !haveF3(pfx))) { 4357 validF2orF3 = True; 4358 } 4359 if (!validF2orF3) goto unhandledM; 4360 /* */ 4361 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4362 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4363 && gregLO3ofRM(modrm) != 6) { 4364 assign(t1, loadLE(ty,mkexpr(addr))); 4365 } 4366 switch (gregLO3ofRM(modrm)) { 4367 case 0: /* INC */ 4368 t2 = newTemp(ty); 4369 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4370 mkexpr(t1), mkU(ty,1))); 4371 if (haveLOCK(pfx)) { 4372 casLE( mkexpr(addr), 4373 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4374 } else { 4375 storeLE(mkexpr(addr),mkexpr(t2)); 4376 } 4377 setFlags_INC_DEC( True, t2, ty ); 4378 break; 4379 case 1: /* DEC */ 4380 t2 = newTemp(ty); 4381 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4382 mkexpr(t1), mkU(ty,1))); 4383 if (haveLOCK(pfx)) { 4384 casLE( mkexpr(addr), 4385 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4386 } else { 4387 storeLE(mkexpr(addr),mkexpr(t2)); 4388 } 4389 setFlags_INC_DEC( False, t2, ty ); 4390 break; 4391 case 2: /* call Ev */ 4392 /* Ignore any sz value and operate as if sz==8. */ 4393 if (!(sz == 4 || sz == 8)) goto unhandledM; 4394 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4395 sz = 8; 4396 t3 = newTemp(Ity_I64); 4397 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4398 t2 = newTemp(Ity_I64); 4399 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4400 putIReg64(R_RSP, mkexpr(t2)); 4401 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4402 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4403 jmp_treg(dres, Ijk_Call, t3); 4404 vassert(dres->whatNext == Dis_StopHere); 4405 showSz = False; 4406 break; 4407 case 4: /* JMP Ev */ 4408 /* Ignore any sz value and operate as if sz==8. */ 4409 if (!(sz == 4 || sz == 8)) goto unhandledM; 4410 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4411 sz = 8; 4412 t3 = newTemp(Ity_I64); 4413 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4414 jmp_treg(dres, Ijk_Boring, t3); 4415 vassert(dres->whatNext == Dis_StopHere); 4416 showSz = False; 4417 break; 4418 case 6: /* PUSH Ev */ 4419 /* There is no encoding for 32-bit operand size; hence ... */ 4420 if (sz == 4) sz = 8; 4421 if (sz == 8 || sz == 2) { 4422 ty = szToITy(sz); /* redo it, since sz might have changed */ 4423 t3 = newTemp(ty); 4424 assign(t3, loadLE(ty,mkexpr(addr))); 4425 t2 = newTemp(Ity_I64); 4426 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4427 putIReg64(R_RSP, mkexpr(t2) ); 4428 storeLE( mkexpr(t2), mkexpr(t3) ); 4429 break; 4430 } else { 4431 goto unhandledM; /* awaiting test case */ 4432 } 4433 default: 4434 unhandledM: 4435 *decode_OK = False; 4436 return delta; 4437 } 4438 delta += len; 4439 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4440 showSz ? nameISize(sz) : ' ', 4441 dis_buf); 4442 } 4443 return delta; 4444 } 4445 4446 4447 /*------------------------------------------------------------*/ 4448 /*--- Disassembling string ops (including REP prefixes) ---*/ 4449 /*------------------------------------------------------------*/ 4450 4451 /* Code shared by all the string ops */ 4452 static 4453 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4454 { 4455 UChar logSz; 4456 if (sz == 8 || sz == 4 || sz == 2) { 4457 logSz = 1; 4458 if (sz == 4) logSz = 2; 4459 if (sz == 8) logSz = 3; 4460 assign( t_inc, 4461 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4462 mkU8(logSz) ) ); 4463 } else { 4464 assign( t_inc, 4465 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4466 } 4467 } 4468 4469 static 4470 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4471 Int sz, const HChar* name, Prefix pfx ) 4472 { 4473 IRTemp t_inc = newTemp(Ity_I64); 4474 /* Really we ought to inspect the override prefixes, but we don't. 4475 The following assertion catches any resulting sillyness. */ 4476 vassert(pfx == clearSegBits(pfx)); 4477 dis_string_op_increment(sz, t_inc); 4478 dis_OP( sz, t_inc, pfx ); 4479 DIP("%s%c\n", name, nameISize(sz)); 4480 } 4481 4482 static 4483 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4484 { 4485 IRType ty = szToITy(sz); 4486 IRTemp td = newTemp(Ity_I64); /* RDI */ 4487 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4488 IRExpr *incd, *incs; 4489 4490 if (haveASO(pfx)) { 4491 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4492 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4493 } else { 4494 assign( td, getIReg64(R_RDI) ); 4495 assign( ts, getIReg64(R_RSI) ); 4496 } 4497 4498 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4499 4500 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4501 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4502 if (haveASO(pfx)) { 4503 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4504 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4505 } 4506 putIReg64( R_RDI, incd ); 4507 putIReg64( R_RSI, incs ); 4508 } 4509 4510 static 4511 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4512 { 4513 IRType ty = szToITy(sz); 4514 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4515 IRExpr *incs; 4516 4517 if (haveASO(pfx)) 4518 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4519 else 4520 assign( ts, getIReg64(R_RSI) ); 4521 4522 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4523 4524 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4525 if (haveASO(pfx)) 4526 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4527 putIReg64( R_RSI, incs ); 4528 } 4529 4530 static 4531 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4532 { 4533 IRType ty = szToITy(sz); 4534 IRTemp ta = newTemp(ty); /* rAX */ 4535 IRTemp td = newTemp(Ity_I64); /* RDI */ 4536 IRExpr *incd; 4537 4538 assign( ta, getIRegRAX(sz) ); 4539 4540 if (haveASO(pfx)) 4541 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4542 else 4543 assign( td, getIReg64(R_RDI) ); 4544 4545 storeLE( mkexpr(td), mkexpr(ta) ); 4546 4547 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4548 if (haveASO(pfx)) 4549 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4550 putIReg64( R_RDI, incd ); 4551 } 4552 4553 static 4554 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4555 { 4556 IRType ty = szToITy(sz); 4557 IRTemp tdv = newTemp(ty); /* (RDI) */ 4558 IRTemp tsv = newTemp(ty); /* (RSI) */ 4559 IRTemp td = newTemp(Ity_I64); /* RDI */ 4560 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4561 IRExpr *incd, *incs; 4562 4563 if (haveASO(pfx)) { 4564 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4565 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4566 } else { 4567 assign( td, getIReg64(R_RDI) ); 4568 assign( ts, getIReg64(R_RSI) ); 4569 } 4570 4571 assign( tdv, loadLE(ty,mkexpr(td)) ); 4572 4573 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4574 4575 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4576 4577 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4578 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4579 if (haveASO(pfx)) { 4580 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4581 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4582 } 4583 putIReg64( R_RDI, incd ); 4584 putIReg64( R_RSI, incs ); 4585 } 4586 4587 static 4588 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4589 { 4590 IRType ty = szToITy(sz); 4591 IRTemp ta = newTemp(ty); /* rAX */ 4592 IRTemp td = newTemp(Ity_I64); /* RDI */ 4593 IRTemp tdv = newTemp(ty); /* (RDI) */ 4594 IRExpr *incd; 4595 4596 assign( ta, getIRegRAX(sz) ); 4597 4598 if (haveASO(pfx)) 4599 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4600 else 4601 assign( td, getIReg64(R_RDI) ); 4602 4603 assign( tdv, loadLE(ty,mkexpr(td)) ); 4604 4605 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4606 4607 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4608 if (haveASO(pfx)) 4609 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4610 putIReg64( R_RDI, incd ); 4611 } 4612 4613 4614 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4615 the insn is the last one in the basic block, and so emit a jump to 4616 the next insn, rather than just falling through. */ 4617 static 4618 void dis_REP_op ( /*MOD*/DisResult* dres, 4619 AMD64Condcode cond, 4620 void (*dis_OP)(Int, IRTemp, Prefix), 4621 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name, 4622 Prefix pfx ) 4623 { 4624 IRTemp t_inc = newTemp(Ity_I64); 4625 IRTemp tc; 4626 IRExpr* cmp; 4627 4628 /* Really we ought to inspect the override prefixes, but we don't. 4629 The following assertion catches any resulting sillyness. */ 4630 vassert(pfx == clearSegBits(pfx)); 4631 4632 if (haveASO(pfx)) { 4633 tc = newTemp(Ity_I32); /* ECX */ 4634 assign( tc, getIReg32(R_RCX) ); 4635 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4636 } else { 4637 tc = newTemp(Ity_I64); /* RCX */ 4638 assign( tc, getIReg64(R_RCX) ); 4639 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4640 } 4641 4642 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4643 IRConst_U64(rip_next), OFFB_RIP ) ); 4644 4645 if (haveASO(pfx)) 4646 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4647 else 4648 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4649 4650 dis_string_op_increment(sz, t_inc); 4651 dis_OP (sz, t_inc, pfx); 4652 4653 if (cond == AMD64CondAlways) { 4654 jmp_lit(dres, Ijk_Boring, rip); 4655 vassert(dres->whatNext == Dis_StopHere); 4656 } else { 4657 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4658 Ijk_Boring, 4659 IRConst_U64(rip), 4660 OFFB_RIP ) ); 4661 jmp_lit(dres, Ijk_Boring, rip_next); 4662 vassert(dres->whatNext == Dis_StopHere); 4663 } 4664 DIP("%s%c\n", name, nameISize(sz)); 4665 } 4666 4667 4668 /*------------------------------------------------------------*/ 4669 /*--- Arithmetic, etc. ---*/ 4670 /*------------------------------------------------------------*/ 4671 4672 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4673 static 4674 ULong dis_mul_E_G ( const VexAbiInfo* vbi, 4675 Prefix pfx, 4676 Int size, 4677 Long delta0 ) 4678 { 4679 Int alen; 4680 HChar dis_buf[50]; 4681 UChar rm = getUChar(delta0); 4682 IRType ty = szToITy(size); 4683 IRTemp te = newTemp(ty); 4684 IRTemp tg = newTemp(ty); 4685 IRTemp resLo = newTemp(ty); 4686 4687 assign( tg, getIRegG(size, pfx, rm) ); 4688 if (epartIsReg(rm)) { 4689 assign( te, getIRegE(size, pfx, rm) ); 4690 } else { 4691 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4692 assign( te, loadLE(ty,mkexpr(addr)) ); 4693 } 4694 4695 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4696 4697 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4698 4699 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4700 4701 if (epartIsReg(rm)) { 4702 DIP("imul%c %s, %s\n", nameISize(size), 4703 nameIRegE(size,pfx,rm), 4704 nameIRegG(size,pfx,rm)); 4705 return 1+delta0; 4706 } else { 4707 DIP("imul%c %s, %s\n", nameISize(size), 4708 dis_buf, 4709 nameIRegG(size,pfx,rm)); 4710 return alen+delta0; 4711 } 4712 } 4713 4714 4715 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4716 static 4717 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi, 4718 Prefix pfx, 4719 Int size, 4720 Long delta, 4721 Int litsize ) 4722 { 4723 Long d64; 4724 Int alen; 4725 HChar dis_buf[50]; 4726 UChar rm = getUChar(delta); 4727 IRType ty = szToITy(size); 4728 IRTemp te = newTemp(ty); 4729 IRTemp tl = newTemp(ty); 4730 IRTemp resLo = newTemp(ty); 4731 4732 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4733 4734 if (epartIsReg(rm)) { 4735 assign(te, getIRegE(size, pfx, rm)); 4736 delta++; 4737 } else { 4738 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4739 imin(4,litsize) ); 4740 assign(te, loadLE(ty, mkexpr(addr))); 4741 delta += alen; 4742 } 4743 d64 = getSDisp(imin(4,litsize),delta); 4744 delta += imin(4,litsize); 4745 4746 d64 &= mkSizeMask(size); 4747 assign(tl, mkU(ty,d64)); 4748 4749 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4750 4751 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4752 4753 putIRegG(size, pfx, rm, mkexpr(resLo)); 4754 4755 DIP("imul%c $%lld, %s, %s\n", 4756 nameISize(size), d64, 4757 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4758 nameIRegG(size,pfx,rm) ); 4759 return delta; 4760 } 4761 4762 4763 /* Generate an IR sequence to do a popcount operation on the supplied 4764 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4765 Ity_I16, Ity_I32 or Ity_I64 only. */ 4766 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4767 { 4768 Int i; 4769 if (ty == Ity_I16) { 4770 IRTemp old = IRTemp_INVALID; 4771 IRTemp nyu = IRTemp_INVALID; 4772 IRTemp mask[4], shift[4]; 4773 for (i = 0; i < 4; i++) { 4774 mask[i] = newTemp(ty); 4775 shift[i] = 1 << i; 4776 } 4777 assign(mask[0], mkU16(0x5555)); 4778 assign(mask[1], mkU16(0x3333)); 4779 assign(mask[2], mkU16(0x0F0F)); 4780 assign(mask[3], mkU16(0x00FF)); 4781 old = src; 4782 for (i = 0; i < 4; i++) { 4783 nyu = newTemp(ty); 4784 assign(nyu, 4785 binop(Iop_Add16, 4786 binop(Iop_And16, 4787 mkexpr(old), 4788 mkexpr(mask[i])), 4789 binop(Iop_And16, 4790 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4791 mkexpr(mask[i])))); 4792 old = nyu; 4793 } 4794 return nyu; 4795 } 4796 if (ty == Ity_I32) { 4797 IRTemp old = IRTemp_INVALID; 4798 IRTemp nyu = IRTemp_INVALID; 4799 IRTemp mask[5], shift[5]; 4800 for (i = 0; i < 5; i++) { 4801 mask[i] = newTemp(ty); 4802 shift[i] = 1 << i; 4803 } 4804 assign(mask[0], mkU32(0x55555555)); 4805 assign(mask[1], mkU32(0x33333333)); 4806 assign(mask[2], mkU32(0x0F0F0F0F)); 4807 assign(mask[3], mkU32(0x00FF00FF)); 4808 assign(mask[4], mkU32(0x0000FFFF)); 4809 old = src; 4810 for (i = 0; i < 5; i++) { 4811 nyu = newTemp(ty); 4812 assign(nyu, 4813 binop(Iop_Add32, 4814 binop(Iop_And32, 4815 mkexpr(old), 4816 mkexpr(mask[i])), 4817 binop(Iop_And32, 4818 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4819 mkexpr(mask[i])))); 4820 old = nyu; 4821 } 4822 return nyu; 4823 } 4824 if (ty == Ity_I64) { 4825 IRTemp old = IRTemp_INVALID; 4826 IRTemp nyu = IRTemp_INVALID; 4827 IRTemp mask[6], shift[6]; 4828 for (i = 0; i < 6; i++) { 4829 mask[i] = newTemp(ty); 4830 shift[i] = 1 << i; 4831 } 4832 assign(mask[0], mkU64(0x5555555555555555ULL)); 4833 assign(mask[1], mkU64(0x3333333333333333ULL)); 4834 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4835 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4836 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4837 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4838 old = src; 4839 for (i = 0; i < 6; i++) { 4840 nyu = newTemp(ty); 4841 assign(nyu, 4842 binop(Iop_Add64, 4843 binop(Iop_And64, 4844 mkexpr(old), 4845 mkexpr(mask[i])), 4846 binop(Iop_And64, 4847 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4848 mkexpr(mask[i])))); 4849 old = nyu; 4850 } 4851 return nyu; 4852 } 4853 /*NOTREACHED*/ 4854 vassert(0); 4855 } 4856 4857 4858 /* Generate an IR sequence to do a count-leading-zeroes operation on 4859 the supplied IRTemp, and return a new IRTemp holding the result. 4860 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4861 the argument is zero, return the number of bits in the word (the 4862 natural semantics). */ 4863 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4864 { 4865 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4866 4867 IRTemp src64 = newTemp(Ity_I64); 4868 assign(src64, widenUto64( mkexpr(src) )); 4869 4870 IRTemp src64x = newTemp(Ity_I64); 4871 assign(src64x, 4872 binop(Iop_Shl64, mkexpr(src64), 4873 mkU8(64 - 8 * sizeofIRType(ty)))); 4874 4875 // Clz64 has undefined semantics when its input is zero, so 4876 // special-case around that. 4877 IRTemp res64 = newTemp(Ity_I64); 4878 assign(res64, 4879 IRExpr_ITE( 4880 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), 4881 mkU64(8 * sizeofIRType(ty)), 4882 unop(Iop_Clz64, mkexpr(src64x)) 4883 )); 4884 4885 IRTemp res = newTemp(ty); 4886 assign(res, narrowTo(ty, mkexpr(res64))); 4887 return res; 4888 } 4889 4890 4891 /* Generate an IR sequence to do a count-trailing-zeroes operation on 4892 the supplied IRTemp, and return a new IRTemp holding the result. 4893 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4894 the argument is zero, return the number of bits in the word (the 4895 natural semantics). */ 4896 static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) 4897 { 4898 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4899 4900 IRTemp src64 = newTemp(Ity_I64); 4901 assign(src64, widenUto64( mkexpr(src) )); 4902 4903 // Ctz64 has undefined semantics when its input is zero, so 4904 // special-case around that. 4905 IRTemp res64 = newTemp(Ity_I64); 4906 assign(res64, 4907 IRExpr_ITE( 4908 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), 4909 mkU64(8 * sizeofIRType(ty)), 4910 unop(Iop_Ctz64, mkexpr(src64)) 4911 )); 4912 4913 IRTemp res = newTemp(ty); 4914 assign(res, narrowTo(ty, mkexpr(res64))); 4915 return res; 4916 } 4917 4918 4919 /*------------------------------------------------------------*/ 4920 /*--- ---*/ 4921 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4922 /*--- ---*/ 4923 /*------------------------------------------------------------*/ 4924 4925 /* --- Helper functions for dealing with the register stack. --- */ 4926 4927 /* --- Set the emulation-warning pseudo-register. --- */ 4928 4929 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4930 { 4931 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4932 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 4933 } 4934 4935 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4936 4937 static IRExpr* mkQNaN64 ( void ) 4938 { 4939 /* QNaN is 0 2047 1 0(51times) 4940 == 0b 11111111111b 1 0(51times) 4941 == 0x7FF8 0000 0000 0000 4942 */ 4943 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4944 } 4945 4946 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4947 4948 static IRExpr* get_ftop ( void ) 4949 { 4950 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4951 } 4952 4953 static void put_ftop ( IRExpr* e ) 4954 { 4955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4956 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4957 } 4958 4959 /* --------- Get/put the C3210 bits. --------- */ 4960 4961 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4962 { 4963 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4964 } 4965 4966 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4967 { 4968 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4969 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4970 } 4971 4972 /* --------- Get/put the FPU rounding mode. --------- */ 4973 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4974 { 4975 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4976 } 4977 4978 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4979 { 4980 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4981 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4982 } 4983 4984 4985 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4986 /* Produces a value in 0 .. 3, which is encoded as per the type 4987 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4988 per IRRoundingMode, we merely need to get it and mask it for 4989 safety. 4990 */ 4991 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4992 { 4993 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4994 } 4995 4996 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4997 { 4998 return mkU32(Irrm_NEAREST); 4999 } 5000 5001 5002 /* --------- Get/set FP register tag bytes. --------- */ 5003 5004 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 5005 5006 static void put_ST_TAG ( Int i, IRExpr* value ) 5007 { 5008 IRRegArray* descr; 5009 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 5010 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5011 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5012 } 5013 5014 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 5015 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 5016 5017 static IRExpr* get_ST_TAG ( Int i ) 5018 { 5019 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5020 return IRExpr_GetI( descr, get_ftop(), i ); 5021 } 5022 5023 5024 /* --------- Get/set FP registers. --------- */ 5025 5026 /* Given i, and some expression e, emit 'ST(i) = e' and set the 5027 register's tag to indicate the register is full. The previous 5028 state of the register is not checked. */ 5029 5030 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 5031 { 5032 IRRegArray* descr; 5033 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 5034 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5035 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5036 /* Mark the register as in-use. */ 5037 put_ST_TAG(i, mkU8(1)); 5038 } 5039 5040 /* Given i, and some expression e, emit 5041 ST(i) = is_full(i) ? NaN : e 5042 and set the tag accordingly. 5043 */ 5044 5045 static void put_ST ( Int i, IRExpr* value ) 5046 { 5047 put_ST_UNCHECKED( 5048 i, 5049 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5050 /* non-0 means full */ 5051 mkQNaN64(), 5052 /* 0 means empty */ 5053 value 5054 ) 5055 ); 5056 } 5057 5058 5059 /* Given i, generate an expression yielding 'ST(i)'. */ 5060 5061 static IRExpr* get_ST_UNCHECKED ( Int i ) 5062 { 5063 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5064 return IRExpr_GetI( descr, get_ftop(), i ); 5065 } 5066 5067 5068 /* Given i, generate an expression yielding 5069 is_full(i) ? ST(i) : NaN 5070 */ 5071 5072 static IRExpr* get_ST ( Int i ) 5073 { 5074 return 5075 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5076 /* non-0 means full */ 5077 get_ST_UNCHECKED(i), 5078 /* 0 means empty */ 5079 mkQNaN64()); 5080 } 5081 5082 5083 /* Given i, and some expression e, and a condition cond, generate IR 5084 which has the same effect as put_ST(i,e) when cond is true and has 5085 no effect when cond is false. Given the lack of proper 5086 if-then-else in the IR, this is pretty tricky. 5087 */ 5088 5089 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 5090 { 5091 // new_tag = if cond then FULL else old_tag 5092 // new_val = if cond then (if old_tag==FULL then NaN else val) 5093 // else old_val 5094 5095 IRTemp old_tag = newTemp(Ity_I8); 5096 assign(old_tag, get_ST_TAG(i)); 5097 IRTemp new_tag = newTemp(Ity_I8); 5098 assign(new_tag, 5099 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 5100 5101 IRTemp old_val = newTemp(Ity_F64); 5102 assign(old_val, get_ST_UNCHECKED(i)); 5103 IRTemp new_val = newTemp(Ity_F64); 5104 assign(new_val, 5105 IRExpr_ITE(mkexpr(cond), 5106 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 5107 /* non-0 means full */ 5108 mkQNaN64(), 5109 /* 0 means empty */ 5110 value), 5111 mkexpr(old_val))); 5112 5113 put_ST_UNCHECKED(i, mkexpr(new_val)); 5114 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 5115 // now set it to new_tag instead. 5116 put_ST_TAG(i, mkexpr(new_tag)); 5117 } 5118 5119 /* Adjust FTOP downwards by one register. */ 5120 5121 static void fp_push ( void ) 5122 { 5123 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 5124 } 5125 5126 /* Adjust FTOP downwards by one register when COND is 1:I1. Else 5127 don't change it. */ 5128 5129 static void maybe_fp_push ( IRTemp cond ) 5130 { 5131 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 5132 } 5133 5134 /* Adjust FTOP upwards by one register, and mark the vacated register 5135 as empty. */ 5136 5137 static void fp_pop ( void ) 5138 { 5139 put_ST_TAG(0, mkU8(0)); 5140 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5141 } 5142 5143 /* Set the C2 bit of the FPU status register to e[0]. Assumes that 5144 e[31:1] == 0. 5145 */ 5146 static void set_C2 ( IRExpr* e ) 5147 { 5148 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); 5149 put_C3210( binop(Iop_Or64, 5150 cleared, 5151 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); 5152 } 5153 5154 /* Generate code to check that abs(d64) < 2^63 and is finite. This is 5155 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 5156 test is simple, but the derivation of it is not so simple. 5157 5158 The exponent field for an IEEE754 double is 11 bits. That means it 5159 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 5160 the number is either a NaN or an Infinity and so is not finite. 5161 Furthermore, a finite value of exactly 2^63 is the smallest value 5162 that has exponent value 0x43E. Hence, what we need to do is 5163 extract the exponent, ignoring the sign bit and mantissa, and check 5164 it is < 0x43E, or <= 0x43D. 5165 5166 To make this easily applicable to 32- and 64-bit targets, a 5167 roundabout approach is used. First the number is converted to I64, 5168 then the top 32 bits are taken. Shifting them right by 20 bits 5169 places the sign bit and exponent in the bottom 12 bits. Anding 5170 with 0x7FF gets rid of the sign bit, leaving just the exponent 5171 available for comparison. 5172 */ 5173 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 5174 { 5175 IRTemp i64 = newTemp(Ity_I64); 5176 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 5177 IRTemp exponent = newTemp(Ity_I32); 5178 assign(exponent, 5179 binop(Iop_And32, 5180 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 5181 mkU32(0x7FF))); 5182 IRTemp in_range_and_finite = newTemp(Ity_I1); 5183 assign(in_range_and_finite, 5184 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 5185 return in_range_and_finite; 5186 } 5187 5188 /* Invent a plausible-looking FPU status word value: 5189 ((ftop & 7) << 11) | (c3210 & 0x4700) 5190 */ 5191 static IRExpr* get_FPU_sw ( void ) 5192 { 5193 return 5194 unop(Iop_32to16, 5195 binop(Iop_Or32, 5196 binop(Iop_Shl32, 5197 binop(Iop_And32, get_ftop(), mkU32(7)), 5198 mkU8(11)), 5199 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 5200 mkU32(0x4700)) 5201 )); 5202 } 5203 5204 5205 /* Generate a dirty helper call that initialises the x87 state a la 5206 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise 5207 |guard| is used as a guarding condition. 5208 */ 5209 static void gen_FINIT_SEQUENCE ( IRExpr* guard ) 5210 { 5211 /* Uses dirty helper: 5212 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5213 IRDirty* d = unsafeIRDirty_0_N ( 5214 0/*regparms*/, 5215 "amd64g_dirtyhelper_FINIT", 5216 &amd64g_dirtyhelper_FINIT, 5217 mkIRExprVec_1( IRExpr_BBPTR() ) 5218 ); 5219 5220 /* declare we're writing guest state */ 5221 d->nFxState = 5; 5222 vex_bzero(&d->fxState, sizeof(d->fxState)); 5223 5224 d->fxState[0].fx = Ifx_Write; 5225 d->fxState[0].offset = OFFB_FTOP; 5226 d->fxState[0].size = sizeof(UInt); 5227 5228 d->fxState[1].fx = Ifx_Write; 5229 d->fxState[1].offset = OFFB_FPREGS; 5230 d->fxState[1].size = 8 * sizeof(ULong); 5231 5232 d->fxState[2].fx = Ifx_Write; 5233 d->fxState[2].offset = OFFB_FPTAGS; 5234 d->fxState[2].size = 8 * sizeof(UChar); 5235 5236 d->fxState[3].fx = Ifx_Write; 5237 d->fxState[3].offset = OFFB_FPROUND; 5238 d->fxState[3].size = sizeof(ULong); 5239 5240 d->fxState[4].fx = Ifx_Write; 5241 d->fxState[4].offset = OFFB_FC3210; 5242 d->fxState[4].size = sizeof(ULong); 5243 5244 if (guard) 5245 d->guard = guard; 5246 5247 stmt( IRStmt_Dirty(d) ); 5248 } 5249 5250 5251 /* ------------------------------------------------------- */ 5252 /* Given all that stack-mangling junk, we can now go ahead 5253 and describe FP instructions. 5254 */ 5255 5256 /* ST(0) = ST(0) `op` mem64/32(addr) 5257 Need to check ST(0)'s tag on read, but not on write. 5258 */ 5259 static 5260 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5261 IROp op, Bool dbl ) 5262 { 5263 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5264 if (dbl) { 5265 put_ST_UNCHECKED(0, 5266 triop( op, 5267 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5268 get_ST(0), 5269 loadLE(Ity_F64,mkexpr(addr)) 5270 )); 5271 } else { 5272 put_ST_UNCHECKED(0, 5273 triop( op, 5274 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5275 get_ST(0), 5276 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 5277 )); 5278 } 5279 } 5280 5281 5282 /* ST(0) = mem64/32(addr) `op` ST(0) 5283 Need to check ST(0)'s tag on read, but not on write. 5284 */ 5285 static 5286 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5287 IROp op, Bool dbl ) 5288 { 5289 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5290 if (dbl) { 5291 put_ST_UNCHECKED(0, 5292 triop( op, 5293 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5294 loadLE(Ity_F64,mkexpr(addr)), 5295 get_ST(0) 5296 )); 5297 } else { 5298 put_ST_UNCHECKED(0, 5299 triop( op, 5300 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5301 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 5302 get_ST(0) 5303 )); 5304 } 5305 } 5306 5307 5308 /* ST(dst) = ST(dst) `op` ST(src). 5309 Check dst and src tags when reading but not on write. 5310 */ 5311 static 5312 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5313 Bool pop_after ) 5314 { 5315 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5316 put_ST_UNCHECKED( 5317 st_dst, 5318 triop( op, 5319 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5320 get_ST(st_dst), 5321 get_ST(st_src) ) 5322 ); 5323 if (pop_after) 5324 fp_pop(); 5325 } 5326 5327 /* ST(dst) = ST(src) `op` ST(dst). 5328 Check dst and src tags when reading but not on write. 5329 */ 5330 static 5331 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5332 Bool pop_after ) 5333 { 5334 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5335 put_ST_UNCHECKED( 5336 st_dst, 5337 triop( op, 5338 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5339 get_ST(st_src), 5340 get_ST(st_dst) ) 5341 ); 5342 if (pop_after) 5343 fp_pop(); 5344 } 5345 5346 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 5347 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 5348 { 5349 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 5350 /* This is a bit of a hack (and isn't really right). It sets 5351 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 5352 documentation implies A and S are unchanged. 5353 */ 5354 /* It's also fishy in that it is used both for COMIP and 5355 UCOMIP, and they aren't the same (although similar). */ 5356 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 5357 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 5358 stmt( IRStmt_Put( 5359 OFFB_CC_DEP1, 5360 binop( Iop_And64, 5361 unop( Iop_32Uto64, 5362 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 5363 mkU64(0x45) 5364 ))); 5365 if (pop_after) 5366 fp_pop(); 5367 } 5368 5369 5370 /* returns 5371 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 5372 */ 5373 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 5374 { 5375 IRTemp t32 = newTemp(Ity_I32); 5376 assign( t32, e32 ); 5377 return 5378 IRExpr_ITE( 5379 binop(Iop_CmpLT64U, 5380 unop(Iop_32Uto64, 5381 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5382 mkU64(65536)), 5383 unop(Iop_32to16, mkexpr(t32)), 5384 mkU16( 0x8000 ) ); 5385 } 5386 5387 5388 static 5389 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5390 const VexAbiInfo* vbi, Prefix pfx, Long delta ) 5391 { 5392 Int len; 5393 UInt r_src, r_dst; 5394 HChar dis_buf[50]; 5395 IRTemp t1, t2; 5396 5397 /* On entry, delta points at the second byte of the insn (the modrm 5398 byte).*/ 5399 UChar first_opcode = getUChar(delta-1); 5400 UChar modrm = getUChar(delta+0); 5401 5402 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5403 5404 if (first_opcode == 0xD8) { 5405 if (modrm < 0xC0) { 5406 5407 /* bits 5,4,3 are an opcode extension, and the modRM also 5408 specifies an address. */ 5409 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5410 delta += len; 5411 5412 switch (gregLO3ofRM(modrm)) { 5413 5414 case 0: /* FADD single-real */ 5415 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5416 break; 5417 5418 case 1: /* FMUL single-real */ 5419 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5420 break; 5421 5422 case 2: /* FCOM single-real */ 5423 DIP("fcoms %s\n", dis_buf); 5424 /* This forces C1 to zero, which isn't right. */ 5425 /* The AMD documentation suggests that forcing C1 to 5426 zero is correct (Eliot Moss) */ 5427 put_C3210( 5428 unop( Iop_32Uto64, 5429 binop( Iop_And32, 5430 binop(Iop_Shl32, 5431 binop(Iop_CmpF64, 5432 get_ST(0), 5433 unop(Iop_F32toF64, 5434 loadLE(Ity_F32,mkexpr(addr)))), 5435 mkU8(8)), 5436 mkU32(0x4500) 5437 ))); 5438 break; 5439 5440 case 3: /* FCOMP single-real */ 5441 /* The AMD documentation suggests that forcing C1 to 5442 zero is correct (Eliot Moss) */ 5443 DIP("fcomps %s\n", dis_buf); 5444 /* This forces C1 to zero, which isn't right. */ 5445 put_C3210( 5446 unop( Iop_32Uto64, 5447 binop( Iop_And32, 5448 binop(Iop_Shl32, 5449 binop(Iop_CmpF64, 5450 get_ST(0), 5451 unop(Iop_F32toF64, 5452 loadLE(Ity_F32,mkexpr(addr)))), 5453 mkU8(8)), 5454 mkU32(0x4500) 5455 ))); 5456 fp_pop(); 5457 break; 5458 5459 case 4: /* FSUB single-real */ 5460 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5461 break; 5462 5463 case 5: /* FSUBR single-real */ 5464 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5465 break; 5466 5467 case 6: /* FDIV single-real */ 5468 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5469 break; 5470 5471 case 7: /* FDIVR single-real */ 5472 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5473 break; 5474 5475 default: 5476 vex_printf("unhandled opc_aux = 0x%2x\n", 5477 (UInt)gregLO3ofRM(modrm)); 5478 vex_printf("first_opcode == 0xD8\n"); 5479 goto decode_fail; 5480 } 5481 } else { 5482 delta++; 5483 switch (modrm) { 5484 5485 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5486 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5487 break; 5488 5489 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5490 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5491 break; 5492 5493 /* Dunno if this is right */ 5494 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5495 r_dst = (UInt)modrm - 0xD0; 5496 DIP("fcom %%st(0),%%st(%u)\n", r_dst); 5497 /* This forces C1 to zero, which isn't right. */ 5498 put_C3210( 5499 unop(Iop_32Uto64, 5500 binop( Iop_And32, 5501 binop(Iop_Shl32, 5502 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5503 mkU8(8)), 5504 mkU32(0x4500) 5505 ))); 5506 break; 5507 5508 /* Dunno if this is right */ 5509 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5510 r_dst = (UInt)modrm - 0xD8; 5511 DIP("fcomp %%st(0),%%st(%u)\n", r_dst); 5512 /* This forces C1 to zero, which isn't right. */ 5513 put_C3210( 5514 unop(Iop_32Uto64, 5515 binop( Iop_And32, 5516 binop(Iop_Shl32, 5517 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5518 mkU8(8)), 5519 mkU32(0x4500) 5520 ))); 5521 fp_pop(); 5522 break; 5523 5524 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5525 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5526 break; 5527 5528 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5529 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5530 break; 5531 5532 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5533 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5534 break; 5535 5536 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5537 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5538 break; 5539 5540 default: 5541 goto decode_fail; 5542 } 5543 } 5544 } 5545 5546 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5547 else 5548 if (first_opcode == 0xD9) { 5549 if (modrm < 0xC0) { 5550 5551 /* bits 5,4,3 are an opcode extension, and the modRM also 5552 specifies an address. */ 5553 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5554 delta += len; 5555 5556 switch (gregLO3ofRM(modrm)) { 5557 5558 case 0: /* FLD single-real */ 5559 DIP("flds %s\n", dis_buf); 5560 fp_push(); 5561 put_ST(0, unop(Iop_F32toF64, 5562 loadLE(Ity_F32, mkexpr(addr)))); 5563 break; 5564 5565 case 2: /* FST single-real */ 5566 DIP("fsts %s\n", dis_buf); 5567 storeLE(mkexpr(addr), 5568 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5569 break; 5570 5571 case 3: /* FSTP single-real */ 5572 DIP("fstps %s\n", dis_buf); 5573 storeLE(mkexpr(addr), 5574 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5575 fp_pop(); 5576 break; 5577 5578 case 4: { /* FLDENV m28 */ 5579 /* Uses dirty helper: 5580 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5581 IRTemp ew = newTemp(Ity_I32); 5582 IRTemp w64 = newTemp(Ity_I64); 5583 IRDirty* d = unsafeIRDirty_0_N ( 5584 0/*regparms*/, 5585 "amd64g_dirtyhelper_FLDENV", 5586 &amd64g_dirtyhelper_FLDENV, 5587 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5588 ); 5589 d->tmp = w64; 5590 /* declare we're reading memory */ 5591 d->mFx = Ifx_Read; 5592 d->mAddr = mkexpr(addr); 5593 d->mSize = 28; 5594 5595 /* declare we're writing guest state */ 5596 d->nFxState = 4; 5597 vex_bzero(&d->fxState, sizeof(d->fxState)); 5598 5599 d->fxState[0].fx = Ifx_Write; 5600 d->fxState[0].offset = OFFB_FTOP; 5601 d->fxState[0].size = sizeof(UInt); 5602 5603 d->fxState[1].fx = Ifx_Write; 5604 d->fxState[1].offset = OFFB_FPTAGS; 5605 d->fxState[1].size = 8 * sizeof(UChar); 5606 5607 d->fxState[2].fx = Ifx_Write; 5608 d->fxState[2].offset = OFFB_FPROUND; 5609 d->fxState[2].size = sizeof(ULong); 5610 5611 d->fxState[3].fx = Ifx_Write; 5612 d->fxState[3].offset = OFFB_FC3210; 5613 d->fxState[3].size = sizeof(ULong); 5614 5615 stmt( IRStmt_Dirty(d) ); 5616 5617 /* ew contains any emulation warning we may need to 5618 issue. If needed, side-exit to the next insn, 5619 reporting the warning, so that Valgrind's dispatcher 5620 sees the warning. */ 5621 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5622 put_emwarn( mkexpr(ew) ); 5623 stmt( 5624 IRStmt_Exit( 5625 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5626 Ijk_EmWarn, 5627 IRConst_U64( guest_RIP_bbstart+delta ), 5628 OFFB_RIP 5629 ) 5630 ); 5631 5632 DIP("fldenv %s\n", dis_buf); 5633 break; 5634 } 5635 5636 case 5: {/* FLDCW */ 5637 /* The only thing we observe in the control word is the 5638 rounding mode. Therefore, pass the 16-bit value 5639 (x87 native-format control word) to a clean helper, 5640 getting back a 64-bit value, the lower half of which 5641 is the FPROUND value to store, and the upper half of 5642 which is the emulation-warning token which may be 5643 generated. 5644 */ 5645 /* ULong amd64h_check_fldcw ( ULong ); */ 5646 IRTemp t64 = newTemp(Ity_I64); 5647 IRTemp ew = newTemp(Ity_I32); 5648 DIP("fldcw %s\n", dis_buf); 5649 assign( t64, mkIRExprCCall( 5650 Ity_I64, 0/*regparms*/, 5651 "amd64g_check_fldcw", 5652 &amd64g_check_fldcw, 5653 mkIRExprVec_1( 5654 unop( Iop_16Uto64, 5655 loadLE(Ity_I16, mkexpr(addr))) 5656 ) 5657 ) 5658 ); 5659 5660 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5661 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5662 put_emwarn( mkexpr(ew) ); 5663 /* Finally, if an emulation warning was reported, 5664 side-exit to the next insn, reporting the warning, 5665 so that Valgrind's dispatcher sees the warning. */ 5666 stmt( 5667 IRStmt_Exit( 5668 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5669 Ijk_EmWarn, 5670 IRConst_U64( guest_RIP_bbstart+delta ), 5671 OFFB_RIP 5672 ) 5673 ); 5674 break; 5675 } 5676 5677 case 6: { /* FNSTENV m28 */ 5678 /* Uses dirty helper: 5679 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5680 IRDirty* d = unsafeIRDirty_0_N ( 5681 0/*regparms*/, 5682 "amd64g_dirtyhelper_FSTENV", 5683 &amd64g_dirtyhelper_FSTENV, 5684 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5685 ); 5686 /* declare we're writing memory */ 5687 d->mFx = Ifx_Write; 5688 d->mAddr = mkexpr(addr); 5689 d->mSize = 28; 5690 5691 /* declare we're reading guest state */ 5692 d->nFxState = 4; 5693 vex_bzero(&d->fxState, sizeof(d->fxState)); 5694 5695 d->fxState[0].fx = Ifx_Read; 5696 d->fxState[0].offset = OFFB_FTOP; 5697 d->fxState[0].size = sizeof(UInt); 5698 5699 d->fxState[1].fx = Ifx_Read; 5700 d->fxState[1].offset = OFFB_FPTAGS; 5701 d->fxState[1].size = 8 * sizeof(UChar); 5702 5703 d->fxState[2].fx = Ifx_Read; 5704 d->fxState[2].offset = OFFB_FPROUND; 5705 d->fxState[2].size = sizeof(ULong); 5706 5707 d->fxState[3].fx = Ifx_Read; 5708 d->fxState[3].offset = OFFB_FC3210; 5709 d->fxState[3].size = sizeof(ULong); 5710 5711 stmt( IRStmt_Dirty(d) ); 5712 5713 DIP("fnstenv %s\n", dis_buf); 5714 break; 5715 } 5716 5717 case 7: /* FNSTCW */ 5718 /* Fake up a native x87 FPU control word. The only 5719 thing it depends on is FPROUND[1:0], so call a clean 5720 helper to cook it up. */ 5721 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5722 DIP("fnstcw %s\n", dis_buf); 5723 storeLE( 5724 mkexpr(addr), 5725 unop( Iop_64to16, 5726 mkIRExprCCall( 5727 Ity_I64, 0/*regp*/, 5728 "amd64g_create_fpucw", &amd64g_create_fpucw, 5729 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5730 ) 5731 ) 5732 ); 5733 break; 5734 5735 default: 5736 vex_printf("unhandled opc_aux = 0x%2x\n", 5737 (UInt)gregLO3ofRM(modrm)); 5738 vex_printf("first_opcode == 0xD9\n"); 5739 goto decode_fail; 5740 } 5741 5742 } else { 5743 delta++; 5744 switch (modrm) { 5745 5746 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5747 r_src = (UInt)modrm - 0xC0; 5748 DIP("fld %%st(%u)\n", r_src); 5749 t1 = newTemp(Ity_F64); 5750 assign(t1, get_ST(r_src)); 5751 fp_push(); 5752 put_ST(0, mkexpr(t1)); 5753 break; 5754 5755 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5756 r_src = (UInt)modrm - 0xC8; 5757 DIP("fxch %%st(%u)\n", r_src); 5758 t1 = newTemp(Ity_F64); 5759 t2 = newTemp(Ity_F64); 5760 assign(t1, get_ST(0)); 5761 assign(t2, get_ST(r_src)); 5762 put_ST_UNCHECKED(0, mkexpr(t2)); 5763 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5764 break; 5765 5766 case 0xE0: /* FCHS */ 5767 DIP("fchs\n"); 5768 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5769 break; 5770 5771 case 0xE1: /* FABS */ 5772 DIP("fabs\n"); 5773 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5774 break; 5775 5776 case 0xE5: { /* FXAM */ 5777 /* This is an interesting one. It examines %st(0), 5778 regardless of whether the tag says it's empty or not. 5779 Here, just pass both the tag (in our format) and the 5780 value (as a double, actually a ULong) to a helper 5781 function. */ 5782 IRExpr** args 5783 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5784 unop(Iop_ReinterpF64asI64, 5785 get_ST_UNCHECKED(0)) ); 5786 put_C3210(mkIRExprCCall( 5787 Ity_I64, 5788 0/*regparm*/, 5789 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5790 args 5791 )); 5792 DIP("fxam\n"); 5793 break; 5794 } 5795 5796 case 0xE8: /* FLD1 */ 5797 DIP("fld1\n"); 5798 fp_push(); 5799 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5800 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5801 break; 5802 5803 case 0xE9: /* FLDL2T */ 5804 DIP("fldl2t\n"); 5805 fp_push(); 5806 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5807 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5808 break; 5809 5810 case 0xEA: /* FLDL2E */ 5811 DIP("fldl2e\n"); 5812 fp_push(); 5813 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5814 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5815 break; 5816 5817 case 0xEB: /* FLDPI */ 5818 DIP("fldpi\n"); 5819 fp_push(); 5820 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5821 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5822 break; 5823 5824 case 0xEC: /* FLDLG2 */ 5825 DIP("fldlg2\n"); 5826 fp_push(); 5827 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5828 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5829 break; 5830 5831 case 0xED: /* FLDLN2 */ 5832 DIP("fldln2\n"); 5833 fp_push(); 5834 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5835 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5836 break; 5837 5838 case 0xEE: /* FLDZ */ 5839 DIP("fldz\n"); 5840 fp_push(); 5841 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5842 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5843 break; 5844 5845 case 0xF0: /* F2XM1 */ 5846 DIP("f2xm1\n"); 5847 put_ST_UNCHECKED(0, 5848 binop(Iop_2xm1F64, 5849 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5850 get_ST(0))); 5851 break; 5852 5853 case 0xF1: /* FYL2X */ 5854 DIP("fyl2x\n"); 5855 put_ST_UNCHECKED(1, 5856 triop(Iop_Yl2xF64, 5857 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5858 get_ST(1), 5859 get_ST(0))); 5860 fp_pop(); 5861 break; 5862 5863 case 0xF2: { /* FPTAN */ 5864 DIP("fptan\n"); 5865 IRTemp argD = newTemp(Ity_F64); 5866 assign(argD, get_ST(0)); 5867 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5868 IRTemp resD = newTemp(Ity_F64); 5869 assign(resD, 5870 IRExpr_ITE( 5871 mkexpr(argOK), 5872 binop(Iop_TanF64, 5873 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5874 mkexpr(argD)), 5875 mkexpr(argD)) 5876 ); 5877 put_ST_UNCHECKED(0, mkexpr(resD)); 5878 /* Conditionally push 1.0 on the stack, if the arg is 5879 in range */ 5880 maybe_fp_push(argOK); 5881 maybe_put_ST(argOK, 0, 5882 IRExpr_Const(IRConst_F64(1.0))); 5883 set_C2( binop(Iop_Xor64, 5884 unop(Iop_1Uto64, mkexpr(argOK)), 5885 mkU64(1)) ); 5886 break; 5887 } 5888 5889 case 0xF3: /* FPATAN */ 5890 DIP("fpatan\n"); 5891 put_ST_UNCHECKED(1, 5892 triop(Iop_AtanF64, 5893 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5894 get_ST(1), 5895 get_ST(0))); 5896 fp_pop(); 5897 break; 5898 5899 case 0xF4: { /* FXTRACT */ 5900 IRTemp argF = newTemp(Ity_F64); 5901 IRTemp sigF = newTemp(Ity_F64); 5902 IRTemp expF = newTemp(Ity_F64); 5903 IRTemp argI = newTemp(Ity_I64); 5904 IRTemp sigI = newTemp(Ity_I64); 5905 IRTemp expI = newTemp(Ity_I64); 5906 DIP("fxtract\n"); 5907 assign( argF, get_ST(0) ); 5908 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5909 assign( sigI, 5910 mkIRExprCCall( 5911 Ity_I64, 0/*regparms*/, 5912 "x86amd64g_calculate_FXTRACT", 5913 &x86amd64g_calculate_FXTRACT, 5914 mkIRExprVec_2( mkexpr(argI), 5915 mkIRExpr_HWord(0)/*sig*/ )) 5916 ); 5917 assign( expI, 5918 mkIRExprCCall( 5919 Ity_I64, 0/*regparms*/, 5920 "x86amd64g_calculate_FXTRACT", 5921 &x86amd64g_calculate_FXTRACT, 5922 mkIRExprVec_2( mkexpr(argI), 5923 mkIRExpr_HWord(1)/*exp*/ )) 5924 ); 5925 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5926 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5927 /* exponent */ 5928 put_ST_UNCHECKED(0, mkexpr(expF) ); 5929 fp_push(); 5930 /* significand */ 5931 put_ST(0, mkexpr(sigF) ); 5932 break; 5933 } 5934 5935 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5936 IRTemp a1 = newTemp(Ity_F64); 5937 IRTemp a2 = newTemp(Ity_F64); 5938 DIP("fprem1\n"); 5939 /* Do FPREM1 twice, once to get the remainder, and once 5940 to get the C3210 flag values. */ 5941 assign( a1, get_ST(0) ); 5942 assign( a2, get_ST(1) ); 5943 put_ST_UNCHECKED(0, 5944 triop(Iop_PRem1F64, 5945 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5946 mkexpr(a1), 5947 mkexpr(a2))); 5948 put_C3210( 5949 unop(Iop_32Uto64, 5950 triop(Iop_PRem1C3210F64, 5951 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5952 mkexpr(a1), 5953 mkexpr(a2)) )); 5954 break; 5955 } 5956 5957 case 0xF7: /* FINCSTP */ 5958 DIP("fincstp\n"); 5959 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5960 break; 5961 5962 case 0xF8: { /* FPREM -- not IEEE compliant */ 5963 IRTemp a1 = newTemp(Ity_F64); 5964 IRTemp a2 = newTemp(Ity_F64); 5965 DIP("fprem\n"); 5966 /* Do FPREM twice, once to get the remainder, and once 5967 to get the C3210 flag values. */ 5968 assign( a1, get_ST(0) ); 5969 assign( a2, get_ST(1) ); 5970 put_ST_UNCHECKED(0, 5971 triop(Iop_PRemF64, 5972 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5973 mkexpr(a1), 5974 mkexpr(a2))); 5975 put_C3210( 5976 unop(Iop_32Uto64, 5977 triop(Iop_PRemC3210F64, 5978 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5979 mkexpr(a1), 5980 mkexpr(a2)) )); 5981 break; 5982 } 5983 5984 case 0xF9: /* FYL2XP1 */ 5985 DIP("fyl2xp1\n"); 5986 put_ST_UNCHECKED(1, 5987 triop(Iop_Yl2xp1F64, 5988 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5989 get_ST(1), 5990 get_ST(0))); 5991 fp_pop(); 5992 break; 5993 5994 case 0xFA: /* FSQRT */ 5995 DIP("fsqrt\n"); 5996 put_ST_UNCHECKED(0, 5997 binop(Iop_SqrtF64, 5998 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5999 get_ST(0))); 6000 break; 6001 6002 case 0xFB: { /* FSINCOS */ 6003 DIP("fsincos\n"); 6004 IRTemp argD = newTemp(Ity_F64); 6005 assign(argD, get_ST(0)); 6006 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6007 IRTemp resD = newTemp(Ity_F64); 6008 assign(resD, 6009 IRExpr_ITE( 6010 mkexpr(argOK), 6011 binop(Iop_SinF64, 6012 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6013 mkexpr(argD)), 6014 mkexpr(argD)) 6015 ); 6016 put_ST_UNCHECKED(0, mkexpr(resD)); 6017 /* Conditionally push the cos value on the stack, if 6018 the arg is in range */ 6019 maybe_fp_push(argOK); 6020 maybe_put_ST(argOK, 0, 6021 binop(Iop_CosF64, 6022 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6023 mkexpr(argD))); 6024 set_C2( binop(Iop_Xor64, 6025 unop(Iop_1Uto64, mkexpr(argOK)), 6026 mkU64(1)) ); 6027 break; 6028 } 6029 6030 case 0xFC: /* FRNDINT */ 6031 DIP("frndint\n"); 6032 put_ST_UNCHECKED(0, 6033 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 6034 break; 6035 6036 case 0xFD: /* FSCALE */ 6037 DIP("fscale\n"); 6038 put_ST_UNCHECKED(0, 6039 triop(Iop_ScaleF64, 6040 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6041 get_ST(0), 6042 get_ST(1))); 6043 break; 6044 6045 case 0xFE: /* FSIN */ 6046 case 0xFF: { /* FCOS */ 6047 Bool isSIN = modrm == 0xFE; 6048 DIP("%s\n", isSIN ? "fsin" : "fcos"); 6049 IRTemp argD = newTemp(Ity_F64); 6050 assign(argD, get_ST(0)); 6051 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6052 IRTemp resD = newTemp(Ity_F64); 6053 assign(resD, 6054 IRExpr_ITE( 6055 mkexpr(argOK), 6056 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 6057 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6058 mkexpr(argD)), 6059 mkexpr(argD)) 6060 ); 6061 put_ST_UNCHECKED(0, mkexpr(resD)); 6062 set_C2( binop(Iop_Xor64, 6063 unop(Iop_1Uto64, mkexpr(argOK)), 6064 mkU64(1)) ); 6065 break; 6066 } 6067 6068 default: 6069 goto decode_fail; 6070 } 6071 } 6072 } 6073 6074 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 6075 else 6076 if (first_opcode == 0xDA) { 6077 6078 if (modrm < 0xC0) { 6079 6080 /* bits 5,4,3 are an opcode extension, and the modRM also 6081 specifies an address. */ 6082 IROp fop; 6083 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6084 delta += len; 6085 switch (gregLO3ofRM(modrm)) { 6086 6087 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 6088 DIP("fiaddl %s\n", dis_buf); 6089 fop = Iop_AddF64; 6090 goto do_fop_m32; 6091 6092 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 6093 DIP("fimull %s\n", dis_buf); 6094 fop = Iop_MulF64; 6095 goto do_fop_m32; 6096 6097 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 6098 DIP("fisubl %s\n", dis_buf); 6099 fop = Iop_SubF64; 6100 goto do_fop_m32; 6101 6102 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 6103 DIP("fisubrl %s\n", dis_buf); 6104 fop = Iop_SubF64; 6105 goto do_foprev_m32; 6106 6107 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 6108 DIP("fisubl %s\n", dis_buf); 6109 fop = Iop_DivF64; 6110 goto do_fop_m32; 6111 6112 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 6113 DIP("fidivrl %s\n", dis_buf); 6114 fop = Iop_DivF64; 6115 goto do_foprev_m32; 6116 6117 do_fop_m32: 6118 put_ST_UNCHECKED(0, 6119 triop(fop, 6120 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6121 get_ST(0), 6122 unop(Iop_I32StoF64, 6123 loadLE(Ity_I32, mkexpr(addr))))); 6124 break; 6125 6126 do_foprev_m32: 6127 put_ST_UNCHECKED(0, 6128 triop(fop, 6129 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6130 unop(Iop_I32StoF64, 6131 loadLE(Ity_I32, mkexpr(addr))), 6132 get_ST(0))); 6133 break; 6134 6135 default: 6136 vex_printf("unhandled opc_aux = 0x%2x\n", 6137 (UInt)gregLO3ofRM(modrm)); 6138 vex_printf("first_opcode == 0xDA\n"); 6139 goto decode_fail; 6140 } 6141 6142 } else { 6143 6144 delta++; 6145 switch (modrm) { 6146 6147 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 6148 r_src = (UInt)modrm - 0xC0; 6149 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 6150 put_ST_UNCHECKED(0, 6151 IRExpr_ITE( 6152 mk_amd64g_calculate_condition(AMD64CondB), 6153 get_ST(r_src), get_ST(0)) ); 6154 break; 6155 6156 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 6157 r_src = (UInt)modrm - 0xC8; 6158 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 6159 put_ST_UNCHECKED(0, 6160 IRExpr_ITE( 6161 mk_amd64g_calculate_condition(AMD64CondZ), 6162 get_ST(r_src), get_ST(0)) ); 6163 break; 6164 6165 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 6166 r_src = (UInt)modrm - 0xD0; 6167 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 6168 put_ST_UNCHECKED(0, 6169 IRExpr_ITE( 6170 mk_amd64g_calculate_condition(AMD64CondBE), 6171 get_ST(r_src), get_ST(0)) ); 6172 break; 6173 6174 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 6175 r_src = (UInt)modrm - 0xD8; 6176 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 6177 put_ST_UNCHECKED(0, 6178 IRExpr_ITE( 6179 mk_amd64g_calculate_condition(AMD64CondP), 6180 get_ST(r_src), get_ST(0)) ); 6181 break; 6182 6183 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 6184 DIP("fucompp %%st(0),%%st(1)\n"); 6185 /* This forces C1 to zero, which isn't right. */ 6186 put_C3210( 6187 unop(Iop_32Uto64, 6188 binop( Iop_And32, 6189 binop(Iop_Shl32, 6190 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6191 mkU8(8)), 6192 mkU32(0x4500) 6193 ))); 6194 fp_pop(); 6195 fp_pop(); 6196 break; 6197 6198 default: 6199 goto decode_fail; 6200 } 6201 6202 } 6203 } 6204 6205 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 6206 else 6207 if (first_opcode == 0xDB) { 6208 if (modrm < 0xC0) { 6209 6210 /* bits 5,4,3 are an opcode extension, and the modRM also 6211 specifies an address. */ 6212 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6213 delta += len; 6214 6215 switch (gregLO3ofRM(modrm)) { 6216 6217 case 0: /* FILD m32int */ 6218 DIP("fildl %s\n", dis_buf); 6219 fp_push(); 6220 put_ST(0, unop(Iop_I32StoF64, 6221 loadLE(Ity_I32, mkexpr(addr)))); 6222 break; 6223 6224 case 1: /* FISTTPL m32 (SSE3) */ 6225 DIP("fisttpl %s\n", dis_buf); 6226 storeLE( mkexpr(addr), 6227 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 6228 fp_pop(); 6229 break; 6230 6231 case 2: /* FIST m32 */ 6232 DIP("fistl %s\n", dis_buf); 6233 storeLE( mkexpr(addr), 6234 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6235 break; 6236 6237 case 3: /* FISTP m32 */ 6238 DIP("fistpl %s\n", dis_buf); 6239 storeLE( mkexpr(addr), 6240 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6241 fp_pop(); 6242 break; 6243 6244 case 5: { /* FLD extended-real */ 6245 /* Uses dirty helper: 6246 ULong amd64g_loadF80le ( ULong ) 6247 addr holds the address. First, do a dirty call to 6248 get hold of the data. */ 6249 IRTemp val = newTemp(Ity_I64); 6250 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 6251 6252 IRDirty* d = unsafeIRDirty_1_N ( 6253 val, 6254 0/*regparms*/, 6255 "amd64g_dirtyhelper_loadF80le", 6256 &amd64g_dirtyhelper_loadF80le, 6257 args 6258 ); 6259 /* declare that we're reading memory */ 6260 d->mFx = Ifx_Read; 6261 d->mAddr = mkexpr(addr); 6262 d->mSize = 10; 6263 6264 /* execute the dirty call, dumping the result in val. */ 6265 stmt( IRStmt_Dirty(d) ); 6266 fp_push(); 6267 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 6268 6269 DIP("fldt %s\n", dis_buf); 6270 break; 6271 } 6272 6273 case 7: { /* FSTP extended-real */ 6274 /* Uses dirty helper: 6275 void amd64g_storeF80le ( ULong addr, ULong data ) 6276 */ 6277 IRExpr** args 6278 = mkIRExprVec_2( mkexpr(addr), 6279 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 6280 6281 IRDirty* d = unsafeIRDirty_0_N ( 6282 0/*regparms*/, 6283 "amd64g_dirtyhelper_storeF80le", 6284 &amd64g_dirtyhelper_storeF80le, 6285 args 6286 ); 6287 /* declare we're writing memory */ 6288 d->mFx = Ifx_Write; 6289 d->mAddr = mkexpr(addr); 6290 d->mSize = 10; 6291 6292 /* execute the dirty call. */ 6293 stmt( IRStmt_Dirty(d) ); 6294 fp_pop(); 6295 6296 DIP("fstpt\n %s", dis_buf); 6297 break; 6298 } 6299 6300 default: 6301 vex_printf("unhandled opc_aux = 0x%2x\n", 6302 (UInt)gregLO3ofRM(modrm)); 6303 vex_printf("first_opcode == 0xDB\n"); 6304 goto decode_fail; 6305 } 6306 6307 } else { 6308 6309 delta++; 6310 switch (modrm) { 6311 6312 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 6313 r_src = (UInt)modrm - 0xC0; 6314 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 6315 put_ST_UNCHECKED(0, 6316 IRExpr_ITE( 6317 mk_amd64g_calculate_condition(AMD64CondNB), 6318 get_ST(r_src), get_ST(0)) ); 6319 break; 6320 6321 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 6322 r_src = (UInt)modrm - 0xC8; 6323 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 6324 put_ST_UNCHECKED( 6325 0, 6326 IRExpr_ITE( 6327 mk_amd64g_calculate_condition(AMD64CondNZ), 6328 get_ST(r_src), 6329 get_ST(0) 6330 ) 6331 ); 6332 break; 6333 6334 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 6335 r_src = (UInt)modrm - 0xD0; 6336 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 6337 put_ST_UNCHECKED( 6338 0, 6339 IRExpr_ITE( 6340 mk_amd64g_calculate_condition(AMD64CondNBE), 6341 get_ST(r_src), 6342 get_ST(0) 6343 ) 6344 ); 6345 break; 6346 6347 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 6348 r_src = (UInt)modrm - 0xD8; 6349 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 6350 put_ST_UNCHECKED( 6351 0, 6352 IRExpr_ITE( 6353 mk_amd64g_calculate_condition(AMD64CondNP), 6354 get_ST(r_src), 6355 get_ST(0) 6356 ) 6357 ); 6358 break; 6359 6360 case 0xE2: 6361 DIP("fnclex\n"); 6362 break; 6363 6364 case 0xE3: { 6365 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/); 6366 DIP("fninit\n"); 6367 break; 6368 } 6369 6370 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6371 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6372 break; 6373 6374 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6375 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6376 break; 6377 6378 default: 6379 goto decode_fail; 6380 } 6381 } 6382 } 6383 6384 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6385 else 6386 if (first_opcode == 0xDC) { 6387 if (modrm < 0xC0) { 6388 6389 /* bits 5,4,3 are an opcode extension, and the modRM also 6390 specifies an address. */ 6391 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6392 delta += len; 6393 6394 switch (gregLO3ofRM(modrm)) { 6395 6396 case 0: /* FADD double-real */ 6397 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6398 break; 6399 6400 case 1: /* FMUL double-real */ 6401 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6402 break; 6403 6404 //.. case 2: /* FCOM double-real */ 6405 //.. DIP("fcoml %s\n", dis_buf); 6406 //.. /* This forces C1 to zero, which isn't right. */ 6407 //.. put_C3210( 6408 //.. binop( Iop_And32, 6409 //.. binop(Iop_Shl32, 6410 //.. binop(Iop_CmpF64, 6411 //.. get_ST(0), 6412 //.. loadLE(Ity_F64,mkexpr(addr))), 6413 //.. mkU8(8)), 6414 //.. mkU32(0x4500) 6415 //.. )); 6416 //.. break; 6417 6418 case 3: /* FCOMP double-real */ 6419 DIP("fcompl %s\n", dis_buf); 6420 /* This forces C1 to zero, which isn't right. */ 6421 put_C3210( 6422 unop(Iop_32Uto64, 6423 binop( Iop_And32, 6424 binop(Iop_Shl32, 6425 binop(Iop_CmpF64, 6426 get_ST(0), 6427 loadLE(Ity_F64,mkexpr(addr))), 6428 mkU8(8)), 6429 mkU32(0x4500) 6430 ))); 6431 fp_pop(); 6432 break; 6433 6434 case 4: /* FSUB double-real */ 6435 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6436 break; 6437 6438 case 5: /* FSUBR double-real */ 6439 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6440 break; 6441 6442 case 6: /* FDIV double-real */ 6443 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6444 break; 6445 6446 case 7: /* FDIVR double-real */ 6447 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6448 break; 6449 6450 default: 6451 vex_printf("unhandled opc_aux = 0x%2x\n", 6452 (UInt)gregLO3ofRM(modrm)); 6453 vex_printf("first_opcode == 0xDC\n"); 6454 goto decode_fail; 6455 } 6456 6457 } else { 6458 6459 delta++; 6460 switch (modrm) { 6461 6462 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6463 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6464 break; 6465 6466 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6467 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6468 break; 6469 6470 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6471 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6472 break; 6473 6474 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6475 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6476 break; 6477 6478 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6479 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6480 break; 6481 6482 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6483 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6484 break; 6485 6486 default: 6487 goto decode_fail; 6488 } 6489 6490 } 6491 } 6492 6493 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6494 else 6495 if (first_opcode == 0xDD) { 6496 6497 if (modrm < 0xC0) { 6498 6499 /* bits 5,4,3 are an opcode extension, and the modRM also 6500 specifies an address. */ 6501 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6502 delta += len; 6503 6504 switch (gregLO3ofRM(modrm)) { 6505 6506 case 0: /* FLD double-real */ 6507 DIP("fldl %s\n", dis_buf); 6508 fp_push(); 6509 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6510 break; 6511 6512 case 1: /* FISTTPQ m64 (SSE3) */ 6513 DIP("fistppll %s\n", dis_buf); 6514 storeLE( mkexpr(addr), 6515 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6516 fp_pop(); 6517 break; 6518 6519 case 2: /* FST double-real */ 6520 DIP("fstl %s\n", dis_buf); 6521 storeLE(mkexpr(addr), get_ST(0)); 6522 break; 6523 6524 case 3: /* FSTP double-real */ 6525 DIP("fstpl %s\n", dis_buf); 6526 storeLE(mkexpr(addr), get_ST(0)); 6527 fp_pop(); 6528 break; 6529 6530 case 4: { /* FRSTOR m94/m108 */ 6531 IRTemp ew = newTemp(Ity_I32); 6532 IRTemp w64 = newTemp(Ity_I64); 6533 IRDirty* d; 6534 if ( have66(pfx) ) { 6535 /* Uses dirty helper: 6536 VexEmNote amd64g_dirtyhelper_FRSTORS 6537 ( VexGuestAMD64State*, HWord ) */ 6538 d = unsafeIRDirty_0_N ( 6539 0/*regparms*/, 6540 "amd64g_dirtyhelper_FRSTORS", 6541 &amd64g_dirtyhelper_FRSTORS, 6542 mkIRExprVec_1( mkexpr(addr) ) 6543 ); 6544 d->mSize = 94; 6545 } else { 6546 /* Uses dirty helper: 6547 VexEmNote amd64g_dirtyhelper_FRSTOR 6548 ( VexGuestAMD64State*, HWord ) */ 6549 d = unsafeIRDirty_0_N ( 6550 0/*regparms*/, 6551 "amd64g_dirtyhelper_FRSTOR", 6552 &amd64g_dirtyhelper_FRSTOR, 6553 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6554 ); 6555 d->mSize = 108; 6556 } 6557 6558 d->tmp = w64; 6559 /* declare we're reading memory */ 6560 d->mFx = Ifx_Read; 6561 d->mAddr = mkexpr(addr); 6562 /* d->mSize set above */ 6563 6564 /* declare we're writing guest state */ 6565 d->nFxState = 5; 6566 vex_bzero(&d->fxState, sizeof(d->fxState)); 6567 6568 d->fxState[0].fx = Ifx_Write; 6569 d->fxState[0].offset = OFFB_FTOP; 6570 d->fxState[0].size = sizeof(UInt); 6571 6572 d->fxState[1].fx = Ifx_Write; 6573 d->fxState[1].offset = OFFB_FPREGS; 6574 d->fxState[1].size = 8 * sizeof(ULong); 6575 6576 d->fxState[2].fx = Ifx_Write; 6577 d->fxState[2].offset = OFFB_FPTAGS; 6578 d->fxState[2].size = 8 * sizeof(UChar); 6579 6580 d->fxState[3].fx = Ifx_Write; 6581 d->fxState[3].offset = OFFB_FPROUND; 6582 d->fxState[3].size = sizeof(ULong); 6583 6584 d->fxState[4].fx = Ifx_Write; 6585 d->fxState[4].offset = OFFB_FC3210; 6586 d->fxState[4].size = sizeof(ULong); 6587 6588 stmt( IRStmt_Dirty(d) ); 6589 6590 /* ew contains any emulation warning we may need to 6591 issue. If needed, side-exit to the next insn, 6592 reporting the warning, so that Valgrind's dispatcher 6593 sees the warning. */ 6594 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6595 put_emwarn( mkexpr(ew) ); 6596 stmt( 6597 IRStmt_Exit( 6598 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6599 Ijk_EmWarn, 6600 IRConst_U64( guest_RIP_bbstart+delta ), 6601 OFFB_RIP 6602 ) 6603 ); 6604 6605 if ( have66(pfx) ) { 6606 DIP("frstors %s\n", dis_buf); 6607 } else { 6608 DIP("frstor %s\n", dis_buf); 6609 } 6610 break; 6611 } 6612 6613 case 6: { /* FNSAVE m94/m108 */ 6614 IRDirty *d; 6615 if ( have66(pfx) ) { 6616 /* Uses dirty helper: 6617 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, 6618 HWord ) */ 6619 d = unsafeIRDirty_0_N ( 6620 0/*regparms*/, 6621 "amd64g_dirtyhelper_FNSAVES", 6622 &amd64g_dirtyhelper_FNSAVES, 6623 mkIRExprVec_1( mkexpr(addr) ) 6624 ); 6625 d->mSize = 94; 6626 } else { 6627 /* Uses dirty helper: 6628 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*, 6629 HWord ) */ 6630 d = unsafeIRDirty_0_N ( 6631 0/*regparms*/, 6632 "amd64g_dirtyhelper_FNSAVE", 6633 &amd64g_dirtyhelper_FNSAVE, 6634 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6635 ); 6636 d->mSize = 108; 6637 } 6638 6639 /* declare we're writing memory */ 6640 d->mFx = Ifx_Write; 6641 d->mAddr = mkexpr(addr); 6642 /* d->mSize set above */ 6643 6644 /* declare we're reading guest state */ 6645 d->nFxState = 5; 6646 vex_bzero(&d->fxState, sizeof(d->fxState)); 6647 6648 d->fxState[0].fx = Ifx_Read; 6649 d->fxState[0].offset = OFFB_FTOP; 6650 d->fxState[0].size = sizeof(UInt); 6651 6652 d->fxState[1].fx = Ifx_Read; 6653 d->fxState[1].offset = OFFB_FPREGS; 6654 d->fxState[1].size = 8 * sizeof(ULong); 6655 6656 d->fxState[2].fx = Ifx_Read; 6657 d->fxState[2].offset = OFFB_FPTAGS; 6658 d->fxState[2].size = 8 * sizeof(UChar); 6659 6660 d->fxState[3].fx = Ifx_Read; 6661 d->fxState[3].offset = OFFB_FPROUND; 6662 d->fxState[3].size = sizeof(ULong); 6663 6664 d->fxState[4].fx = Ifx_Read; 6665 d->fxState[4].offset = OFFB_FC3210; 6666 d->fxState[4].size = sizeof(ULong); 6667 6668 stmt( IRStmt_Dirty(d) ); 6669 6670 if ( have66(pfx) ) { 6671 DIP("fnsaves %s\n", dis_buf); 6672 } else { 6673 DIP("fnsave %s\n", dis_buf); 6674 } 6675 break; 6676 } 6677 6678 case 7: { /* FNSTSW m16 */ 6679 IRExpr* sw = get_FPU_sw(); 6680 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6681 storeLE( mkexpr(addr), sw ); 6682 DIP("fnstsw %s\n", dis_buf); 6683 break; 6684 } 6685 6686 default: 6687 vex_printf("unhandled opc_aux = 0x%2x\n", 6688 (UInt)gregLO3ofRM(modrm)); 6689 vex_printf("first_opcode == 0xDD\n"); 6690 goto decode_fail; 6691 } 6692 } else { 6693 delta++; 6694 switch (modrm) { 6695 6696 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6697 r_dst = (UInt)modrm - 0xC0; 6698 DIP("ffree %%st(%u)\n", r_dst); 6699 put_ST_TAG ( r_dst, mkU8(0) ); 6700 break; 6701 6702 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6703 r_dst = (UInt)modrm - 0xD0; 6704 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6705 /* P4 manual says: "If the destination operand is a 6706 non-empty register, the invalid-operation exception 6707 is not generated. Hence put_ST_UNCHECKED. */ 6708 put_ST_UNCHECKED(r_dst, get_ST(0)); 6709 break; 6710 6711 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6712 r_dst = (UInt)modrm - 0xD8; 6713 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6714 /* P4 manual says: "If the destination operand is a 6715 non-empty register, the invalid-operation exception 6716 is not generated. Hence put_ST_UNCHECKED. */ 6717 put_ST_UNCHECKED(r_dst, get_ST(0)); 6718 fp_pop(); 6719 break; 6720 6721 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6722 r_dst = (UInt)modrm - 0xE0; 6723 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6724 /* This forces C1 to zero, which isn't right. */ 6725 put_C3210( 6726 unop(Iop_32Uto64, 6727 binop( Iop_And32, 6728 binop(Iop_Shl32, 6729 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6730 mkU8(8)), 6731 mkU32(0x4500) 6732 ))); 6733 break; 6734 6735 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6736 r_dst = (UInt)modrm - 0xE8; 6737 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6738 /* This forces C1 to zero, which isn't right. */ 6739 put_C3210( 6740 unop(Iop_32Uto64, 6741 binop( Iop_And32, 6742 binop(Iop_Shl32, 6743 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6744 mkU8(8)), 6745 mkU32(0x4500) 6746 ))); 6747 fp_pop(); 6748 break; 6749 6750 default: 6751 goto decode_fail; 6752 } 6753 } 6754 } 6755 6756 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6757 else 6758 if (first_opcode == 0xDE) { 6759 6760 if (modrm < 0xC0) { 6761 6762 /* bits 5,4,3 are an opcode extension, and the modRM also 6763 specifies an address. */ 6764 IROp fop; 6765 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6766 delta += len; 6767 6768 switch (gregLO3ofRM(modrm)) { 6769 6770 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6771 DIP("fiaddw %s\n", dis_buf); 6772 fop = Iop_AddF64; 6773 goto do_fop_m16; 6774 6775 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6776 DIP("fimulw %s\n", dis_buf); 6777 fop = Iop_MulF64; 6778 goto do_fop_m16; 6779 6780 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6781 DIP("fisubw %s\n", dis_buf); 6782 fop = Iop_SubF64; 6783 goto do_fop_m16; 6784 6785 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6786 DIP("fisubrw %s\n", dis_buf); 6787 fop = Iop_SubF64; 6788 goto do_foprev_m16; 6789 6790 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6791 DIP("fisubw %s\n", dis_buf); 6792 fop = Iop_DivF64; 6793 goto do_fop_m16; 6794 6795 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6796 DIP("fidivrw %s\n", dis_buf); 6797 fop = Iop_DivF64; 6798 goto do_foprev_m16; 6799 6800 do_fop_m16: 6801 put_ST_UNCHECKED(0, 6802 triop(fop, 6803 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6804 get_ST(0), 6805 unop(Iop_I32StoF64, 6806 unop(Iop_16Sto32, 6807 loadLE(Ity_I16, mkexpr(addr)))))); 6808 break; 6809 6810 do_foprev_m16: 6811 put_ST_UNCHECKED(0, 6812 triop(fop, 6813 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6814 unop(Iop_I32StoF64, 6815 unop(Iop_16Sto32, 6816 loadLE(Ity_I16, mkexpr(addr)))), 6817 get_ST(0))); 6818 break; 6819 6820 default: 6821 vex_printf("unhandled opc_aux = 0x%2x\n", 6822 (UInt)gregLO3ofRM(modrm)); 6823 vex_printf("first_opcode == 0xDE\n"); 6824 goto decode_fail; 6825 } 6826 6827 } else { 6828 6829 delta++; 6830 switch (modrm) { 6831 6832 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6833 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6834 break; 6835 6836 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6837 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6838 break; 6839 6840 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6841 DIP("fcompp %%st(0),%%st(1)\n"); 6842 /* This forces C1 to zero, which isn't right. */ 6843 put_C3210( 6844 unop(Iop_32Uto64, 6845 binop( Iop_And32, 6846 binop(Iop_Shl32, 6847 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6848 mkU8(8)), 6849 mkU32(0x4500) 6850 ))); 6851 fp_pop(); 6852 fp_pop(); 6853 break; 6854 6855 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6856 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6857 break; 6858 6859 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6860 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6861 break; 6862 6863 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6864 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6865 break; 6866 6867 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6868 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6869 break; 6870 6871 default: 6872 goto decode_fail; 6873 } 6874 6875 } 6876 } 6877 6878 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6879 else 6880 if (first_opcode == 0xDF) { 6881 6882 if (modrm < 0xC0) { 6883 6884 /* bits 5,4,3 are an opcode extension, and the modRM also 6885 specifies an address. */ 6886 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6887 delta += len; 6888 6889 switch (gregLO3ofRM(modrm)) { 6890 6891 case 0: /* FILD m16int */ 6892 DIP("fildw %s\n", dis_buf); 6893 fp_push(); 6894 put_ST(0, unop(Iop_I32StoF64, 6895 unop(Iop_16Sto32, 6896 loadLE(Ity_I16, mkexpr(addr))))); 6897 break; 6898 6899 case 1: /* FISTTPS m16 (SSE3) */ 6900 DIP("fisttps %s\n", dis_buf); 6901 storeLE( mkexpr(addr), 6902 x87ishly_qnarrow_32_to_16( 6903 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6904 fp_pop(); 6905 break; 6906 6907 case 2: /* FIST m16 */ 6908 DIP("fists %s\n", dis_buf); 6909 storeLE( mkexpr(addr), 6910 x87ishly_qnarrow_32_to_16( 6911 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6912 break; 6913 6914 case 3: /* FISTP m16 */ 6915 DIP("fistps %s\n", dis_buf); 6916 storeLE( mkexpr(addr), 6917 x87ishly_qnarrow_32_to_16( 6918 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6919 fp_pop(); 6920 break; 6921 6922 case 5: /* FILD m64 */ 6923 DIP("fildll %s\n", dis_buf); 6924 fp_push(); 6925 put_ST(0, binop(Iop_I64StoF64, 6926 get_roundingmode(), 6927 loadLE(Ity_I64, mkexpr(addr)))); 6928 break; 6929 6930 case 7: /* FISTP m64 */ 6931 DIP("fistpll %s\n", dis_buf); 6932 storeLE( mkexpr(addr), 6933 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6934 fp_pop(); 6935 break; 6936 6937 default: 6938 vex_printf("unhandled opc_aux = 0x%2x\n", 6939 (UInt)gregLO3ofRM(modrm)); 6940 vex_printf("first_opcode == 0xDF\n"); 6941 goto decode_fail; 6942 } 6943 6944 } else { 6945 6946 delta++; 6947 switch (modrm) { 6948 6949 case 0xC0: /* FFREEP %st(0) */ 6950 DIP("ffreep %%st(%d)\n", 0); 6951 put_ST_TAG ( 0, mkU8(0) ); 6952 fp_pop(); 6953 break; 6954 6955 case 0xE0: /* FNSTSW %ax */ 6956 DIP("fnstsw %%ax\n"); 6957 /* Invent a plausible-looking FPU status word value and 6958 dump it in %AX: 6959 ((ftop & 7) << 11) | (c3210 & 0x4700) 6960 */ 6961 putIRegRAX( 6962 2, 6963 unop(Iop_32to16, 6964 binop(Iop_Or32, 6965 binop(Iop_Shl32, 6966 binop(Iop_And32, get_ftop(), mkU32(7)), 6967 mkU8(11)), 6968 binop(Iop_And32, 6969 unop(Iop_64to32, get_C3210()), 6970 mkU32(0x4700)) 6971 ))); 6972 break; 6973 6974 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6975 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6976 break; 6977 6978 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6979 /* not really right since COMIP != UCOMIP */ 6980 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6981 break; 6982 6983 default: 6984 goto decode_fail; 6985 } 6986 } 6987 6988 } 6989 6990 else 6991 goto decode_fail; 6992 6993 *decode_ok = True; 6994 return delta; 6995 6996 decode_fail: 6997 *decode_ok = False; 6998 return delta; 6999 } 7000 7001 7002 /*------------------------------------------------------------*/ 7003 /*--- ---*/ 7004 /*--- MMX INSTRUCTIONS ---*/ 7005 /*--- ---*/ 7006 /*------------------------------------------------------------*/ 7007 7008 /* Effect of MMX insns on x87 FPU state (table 11-2 of 7009 IA32 arch manual, volume 3): 7010 7011 Read from, or write to MMX register (viz, any insn except EMMS): 7012 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 7013 * FP stack pointer set to zero 7014 7015 EMMS: 7016 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 7017 * FP stack pointer set to zero 7018 */ 7019 7020 static void do_MMX_preamble ( void ) 7021 { 7022 Int i; 7023 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7024 IRExpr* zero = mkU32(0); 7025 IRExpr* tag1 = mkU8(1); 7026 put_ftop(zero); 7027 for (i = 0; i < 8; i++) 7028 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 7029 } 7030 7031 static void do_EMMS_preamble ( void ) 7032 { 7033 Int i; 7034 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7035 IRExpr* zero = mkU32(0); 7036 IRExpr* tag0 = mkU8(0); 7037 put_ftop(zero); 7038 for (i = 0; i < 8; i++) 7039 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 7040 } 7041 7042 7043 static IRExpr* getMMXReg ( UInt archreg ) 7044 { 7045 vassert(archreg < 8); 7046 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 7047 } 7048 7049 7050 static void putMMXReg ( UInt archreg, IRExpr* e ) 7051 { 7052 vassert(archreg < 8); 7053 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 7054 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 7055 } 7056 7057 7058 /* Helper for non-shift MMX insns. Note this is incomplete in the 7059 sense that it does not first call do_MMX_preamble() -- that is the 7060 responsibility of its caller. */ 7061 7062 static 7063 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi, 7064 Prefix pfx, 7065 Long delta, 7066 UChar opc, 7067 const HChar* name, 7068 Bool show_granularity ) 7069 { 7070 HChar dis_buf[50]; 7071 UChar modrm = getUChar(delta); 7072 Bool isReg = epartIsReg(modrm); 7073 IRExpr* argL = NULL; 7074 IRExpr* argR = NULL; 7075 IRExpr* argG = NULL; 7076 IRExpr* argE = NULL; 7077 IRTemp res = newTemp(Ity_I64); 7078 7079 Bool invG = False; 7080 IROp op = Iop_INVALID; 7081 void* hAddr = NULL; 7082 const HChar* hName = NULL; 7083 Bool eLeft = False; 7084 7085 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 7086 7087 switch (opc) { 7088 /* Original MMX ones */ 7089 case 0xFC: op = Iop_Add8x8; break; 7090 case 0xFD: op = Iop_Add16x4; break; 7091 case 0xFE: op = Iop_Add32x2; break; 7092 7093 case 0xEC: op = Iop_QAdd8Sx8; break; 7094 case 0xED: op = Iop_QAdd16Sx4; break; 7095 7096 case 0xDC: op = Iop_QAdd8Ux8; break; 7097 case 0xDD: op = Iop_QAdd16Ux4; break; 7098 7099 case 0xF8: op = Iop_Sub8x8; break; 7100 case 0xF9: op = Iop_Sub16x4; break; 7101 case 0xFA: op = Iop_Sub32x2; break; 7102 7103 case 0xE8: op = Iop_QSub8Sx8; break; 7104 case 0xE9: op = Iop_QSub16Sx4; break; 7105 7106 case 0xD8: op = Iop_QSub8Ux8; break; 7107 case 0xD9: op = Iop_QSub16Ux4; break; 7108 7109 case 0xE5: op = Iop_MulHi16Sx4; break; 7110 case 0xD5: op = Iop_Mul16x4; break; 7111 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 7112 7113 case 0x74: op = Iop_CmpEQ8x8; break; 7114 case 0x75: op = Iop_CmpEQ16x4; break; 7115 case 0x76: op = Iop_CmpEQ32x2; break; 7116 7117 case 0x64: op = Iop_CmpGT8Sx8; break; 7118 case 0x65: op = Iop_CmpGT16Sx4; break; 7119 case 0x66: op = Iop_CmpGT32Sx2; break; 7120 7121 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 7122 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 7123 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 7124 7125 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 7126 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 7127 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 7128 7129 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 7130 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 7131 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 7132 7133 case 0xDB: op = Iop_And64; break; 7134 case 0xDF: op = Iop_And64; invG = True; break; 7135 case 0xEB: op = Iop_Or64; break; 7136 case 0xEF: /* Possibly do better here if argL and argR are the 7137 same reg */ 7138 op = Iop_Xor64; break; 7139 7140 /* Introduced in SSE1 */ 7141 case 0xE0: op = Iop_Avg8Ux8; break; 7142 case 0xE3: op = Iop_Avg16Ux4; break; 7143 case 0xEE: op = Iop_Max16Sx4; break; 7144 case 0xDE: op = Iop_Max8Ux8; break; 7145 case 0xEA: op = Iop_Min16Sx4; break; 7146 case 0xDA: op = Iop_Min8Ux8; break; 7147 case 0xE4: op = Iop_MulHi16Ux4; break; 7148 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 7149 7150 /* Introduced in SSE2 */ 7151 case 0xD4: op = Iop_Add64; break; 7152 case 0xFB: op = Iop_Sub64; break; 7153 7154 default: 7155 vex_printf("\n0x%x\n", (UInt)opc); 7156 vpanic("dis_MMXop_regmem_to_reg"); 7157 } 7158 7159 # undef XXX 7160 7161 argG = getMMXReg(gregLO3ofRM(modrm)); 7162 if (invG) 7163 argG = unop(Iop_Not64, argG); 7164 7165 if (isReg) { 7166 delta++; 7167 argE = getMMXReg(eregLO3ofRM(modrm)); 7168 } else { 7169 Int len; 7170 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7171 delta += len; 7172 argE = loadLE(Ity_I64, mkexpr(addr)); 7173 } 7174 7175 if (eLeft) { 7176 argL = argE; 7177 argR = argG; 7178 } else { 7179 argL = argG; 7180 argR = argE; 7181 } 7182 7183 if (op != Iop_INVALID) { 7184 vassert(hName == NULL); 7185 vassert(hAddr == NULL); 7186 assign(res, binop(op, argL, argR)); 7187 } else { 7188 vassert(hName != NULL); 7189 vassert(hAddr != NULL); 7190 assign( res, 7191 mkIRExprCCall( 7192 Ity_I64, 7193 0/*regparms*/, hName, hAddr, 7194 mkIRExprVec_2( argL, argR ) 7195 ) 7196 ); 7197 } 7198 7199 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 7200 7201 DIP("%s%s %s, %s\n", 7202 name, show_granularity ? nameMMXGran(opc & 3) : "", 7203 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 7204 nameMMXReg(gregLO3ofRM(modrm)) ); 7205 7206 return delta; 7207 } 7208 7209 7210 /* Vector by scalar shift of G by the amount specified at the bottom 7211 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 7212 7213 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi, 7214 Prefix pfx, Long delta, 7215 const HChar* opname, IROp op ) 7216 { 7217 HChar dis_buf[50]; 7218 Int alen, size; 7219 IRTemp addr; 7220 Bool shl, shr, sar; 7221 UChar rm = getUChar(delta); 7222 IRTemp g0 = newTemp(Ity_I64); 7223 IRTemp g1 = newTemp(Ity_I64); 7224 IRTemp amt = newTemp(Ity_I64); 7225 IRTemp amt8 = newTemp(Ity_I8); 7226 7227 if (epartIsReg(rm)) { 7228 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 7229 DIP("%s %s,%s\n", opname, 7230 nameMMXReg(eregLO3ofRM(rm)), 7231 nameMMXReg(gregLO3ofRM(rm)) ); 7232 delta++; 7233 } else { 7234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 7235 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 7236 DIP("%s %s,%s\n", opname, 7237 dis_buf, 7238 nameMMXReg(gregLO3ofRM(rm)) ); 7239 delta += alen; 7240 } 7241 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 7242 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 7243 7244 shl = shr = sar = False; 7245 size = 0; 7246 switch (op) { 7247 case Iop_ShlN16x4: shl = True; size = 32; break; 7248 case Iop_ShlN32x2: shl = True; size = 32; break; 7249 case Iop_Shl64: shl = True; size = 64; break; 7250 case Iop_ShrN16x4: shr = True; size = 16; break; 7251 case Iop_ShrN32x2: shr = True; size = 32; break; 7252 case Iop_Shr64: shr = True; size = 64; break; 7253 case Iop_SarN16x4: sar = True; size = 16; break; 7254 case Iop_SarN32x2: sar = True; size = 32; break; 7255 default: vassert(0); 7256 } 7257 7258 if (shl || shr) { 7259 assign( 7260 g1, 7261 IRExpr_ITE( 7262 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7263 binop(op, mkexpr(g0), mkexpr(amt8)), 7264 mkU64(0) 7265 ) 7266 ); 7267 } else 7268 if (sar) { 7269 assign( 7270 g1, 7271 IRExpr_ITE( 7272 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7273 binop(op, mkexpr(g0), mkexpr(amt8)), 7274 binop(op, mkexpr(g0), mkU8(size-1)) 7275 ) 7276 ); 7277 } else { 7278 vassert(0); 7279 } 7280 7281 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 7282 return delta; 7283 } 7284 7285 7286 /* Vector by scalar shift of E by an immediate byte. This is a 7287 straight copy of dis_SSE_shiftE_imm. */ 7288 7289 static 7290 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op ) 7291 { 7292 Bool shl, shr, sar; 7293 UChar rm = getUChar(delta); 7294 IRTemp e0 = newTemp(Ity_I64); 7295 IRTemp e1 = newTemp(Ity_I64); 7296 UChar amt, size; 7297 vassert(epartIsReg(rm)); 7298 vassert(gregLO3ofRM(rm) == 2 7299 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 7300 amt = getUChar(delta+1); 7301 delta += 2; 7302 DIP("%s $%d,%s\n", opname, 7303 (Int)amt, 7304 nameMMXReg(eregLO3ofRM(rm)) ); 7305 7306 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 7307 7308 shl = shr = sar = False; 7309 size = 0; 7310 switch (op) { 7311 case Iop_ShlN16x4: shl = True; size = 16; break; 7312 case Iop_ShlN32x2: shl = True; size = 32; break; 7313 case Iop_Shl64: shl = True; size = 64; break; 7314 case Iop_SarN16x4: sar = True; size = 16; break; 7315 case Iop_SarN32x2: sar = True; size = 32; break; 7316 case Iop_ShrN16x4: shr = True; size = 16; break; 7317 case Iop_ShrN32x2: shr = True; size = 32; break; 7318 case Iop_Shr64: shr = True; size = 64; break; 7319 default: vassert(0); 7320 } 7321 7322 if (shl || shr) { 7323 assign( e1, amt >= size 7324 ? mkU64(0) 7325 : binop(op, mkexpr(e0), mkU8(amt)) 7326 ); 7327 } else 7328 if (sar) { 7329 assign( e1, amt >= size 7330 ? binop(op, mkexpr(e0), mkU8(size-1)) 7331 : binop(op, mkexpr(e0), mkU8(amt)) 7332 ); 7333 } else { 7334 vassert(0); 7335 } 7336 7337 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 7338 return delta; 7339 } 7340 7341 7342 /* Completely handle all MMX instructions except emms. */ 7343 7344 static 7345 ULong dis_MMX ( Bool* decode_ok, 7346 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 7347 { 7348 Int len; 7349 UChar modrm; 7350 HChar dis_buf[50]; 7351 UChar opc = getUChar(delta); 7352 delta++; 7353 7354 /* dis_MMX handles all insns except emms. */ 7355 do_MMX_preamble(); 7356 7357 switch (opc) { 7358 7359 case 0x6E: 7360 if (sz == 4) { 7361 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 7362 modrm = getUChar(delta); 7363 if (epartIsReg(modrm)) { 7364 delta++; 7365 putMMXReg( 7366 gregLO3ofRM(modrm), 7367 binop( Iop_32HLto64, 7368 mkU32(0), 7369 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 7370 DIP("movd %s, %s\n", 7371 nameIReg32(eregOfRexRM(pfx,modrm)), 7372 nameMMXReg(gregLO3ofRM(modrm))); 7373 } else { 7374 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7375 delta += len; 7376 putMMXReg( 7377 gregLO3ofRM(modrm), 7378 binop( Iop_32HLto64, 7379 mkU32(0), 7380 loadLE(Ity_I32, mkexpr(addr)) ) ); 7381 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7382 } 7383 } 7384 else 7385 if (sz == 8) { 7386 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7387 modrm = getUChar(delta); 7388 if (epartIsReg(modrm)) { 7389 delta++; 7390 putMMXReg( gregLO3ofRM(modrm), 7391 getIReg64(eregOfRexRM(pfx,modrm)) ); 7392 DIP("movd %s, %s\n", 7393 nameIReg64(eregOfRexRM(pfx,modrm)), 7394 nameMMXReg(gregLO3ofRM(modrm))); 7395 } else { 7396 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7397 delta += len; 7398 putMMXReg( gregLO3ofRM(modrm), 7399 loadLE(Ity_I64, mkexpr(addr)) ); 7400 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7401 } 7402 } 7403 else { 7404 goto mmx_decode_failure; 7405 } 7406 break; 7407 7408 case 0x7E: 7409 if (sz == 4) { 7410 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7411 modrm = getUChar(delta); 7412 if (epartIsReg(modrm)) { 7413 delta++; 7414 putIReg32( eregOfRexRM(pfx,modrm), 7415 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7416 DIP("movd %s, %s\n", 7417 nameMMXReg(gregLO3ofRM(modrm)), 7418 nameIReg32(eregOfRexRM(pfx,modrm))); 7419 } else { 7420 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7421 delta += len; 7422 storeLE( mkexpr(addr), 7423 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7424 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7425 } 7426 } 7427 else 7428 if (sz == 8) { 7429 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7430 modrm = getUChar(delta); 7431 if (epartIsReg(modrm)) { 7432 delta++; 7433 putIReg64( eregOfRexRM(pfx,modrm), 7434 getMMXReg(gregLO3ofRM(modrm)) ); 7435 DIP("movd %s, %s\n", 7436 nameMMXReg(gregLO3ofRM(modrm)), 7437 nameIReg64(eregOfRexRM(pfx,modrm))); 7438 } else { 7439 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7440 delta += len; 7441 storeLE( mkexpr(addr), 7442 getMMXReg(gregLO3ofRM(modrm)) ); 7443 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7444 } 7445 } else { 7446 goto mmx_decode_failure; 7447 } 7448 break; 7449 7450 case 0x6F: 7451 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7452 if (sz != 4 7453 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7454 goto mmx_decode_failure; 7455 modrm = getUChar(delta); 7456 if (epartIsReg(modrm)) { 7457 delta++; 7458 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7459 DIP("movq %s, %s\n", 7460 nameMMXReg(eregLO3ofRM(modrm)), 7461 nameMMXReg(gregLO3ofRM(modrm))); 7462 } else { 7463 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7464 delta += len; 7465 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7466 DIP("movq %s, %s\n", 7467 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7468 } 7469 break; 7470 7471 case 0x7F: 7472 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7473 if (sz != 4 7474 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7475 goto mmx_decode_failure; 7476 modrm = getUChar(delta); 7477 if (epartIsReg(modrm)) { 7478 delta++; 7479 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7480 DIP("movq %s, %s\n", 7481 nameMMXReg(gregLO3ofRM(modrm)), 7482 nameMMXReg(eregLO3ofRM(modrm))); 7483 } else { 7484 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7485 delta += len; 7486 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7487 DIP("mov(nt)q %s, %s\n", 7488 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7489 } 7490 break; 7491 7492 case 0xFC: 7493 case 0xFD: 7494 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7495 if (sz != 4) 7496 goto mmx_decode_failure; 7497 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7498 break; 7499 7500 case 0xEC: 7501 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7502 if (sz != 4 7503 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7504 goto mmx_decode_failure; 7505 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7506 break; 7507 7508 case 0xDC: 7509 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7510 if (sz != 4) 7511 goto mmx_decode_failure; 7512 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7513 break; 7514 7515 case 0xF8: 7516 case 0xF9: 7517 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7518 if (sz != 4) 7519 goto mmx_decode_failure; 7520 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7521 break; 7522 7523 case 0xE8: 7524 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7525 if (sz != 4) 7526 goto mmx_decode_failure; 7527 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7528 break; 7529 7530 case 0xD8: 7531 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7532 if (sz != 4) 7533 goto mmx_decode_failure; 7534 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7535 break; 7536 7537 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7538 if (sz != 4) 7539 goto mmx_decode_failure; 7540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7541 break; 7542 7543 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7544 if (sz != 4) 7545 goto mmx_decode_failure; 7546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7547 break; 7548 7549 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7550 vassert(sz == 4); 7551 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7552 break; 7553 7554 case 0x74: 7555 case 0x75: 7556 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7557 if (sz != 4) 7558 goto mmx_decode_failure; 7559 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7560 break; 7561 7562 case 0x64: 7563 case 0x65: 7564 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7565 if (sz != 4) 7566 goto mmx_decode_failure; 7567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7568 break; 7569 7570 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7571 if (sz != 4) 7572 goto mmx_decode_failure; 7573 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7574 break; 7575 7576 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7577 if (sz != 4) 7578 goto mmx_decode_failure; 7579 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7580 break; 7581 7582 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7583 if (sz != 4) 7584 goto mmx_decode_failure; 7585 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7586 break; 7587 7588 case 0x68: 7589 case 0x69: 7590 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7591 if (sz != 4 7592 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7593 goto mmx_decode_failure; 7594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7595 break; 7596 7597 case 0x60: 7598 case 0x61: 7599 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7600 if (sz != 4 7601 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7602 goto mmx_decode_failure; 7603 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7604 break; 7605 7606 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7607 if (sz != 4) 7608 goto mmx_decode_failure; 7609 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7610 break; 7611 7612 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7613 if (sz != 4) 7614 goto mmx_decode_failure; 7615 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7616 break; 7617 7618 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7619 if (sz != 4) 7620 goto mmx_decode_failure; 7621 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7622 break; 7623 7624 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7625 if (sz != 4) 7626 goto mmx_decode_failure; 7627 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7628 break; 7629 7630 # define SHIFT_BY_REG(_name,_op) \ 7631 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7632 break; 7633 7634 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7635 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7636 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7637 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7638 7639 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7640 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7641 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7642 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7643 7644 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7645 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7646 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7647 7648 # undef SHIFT_BY_REG 7649 7650 case 0x71: 7651 case 0x72: 7652 case 0x73: { 7653 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7654 UChar byte2, subopc; 7655 if (sz != 4) 7656 goto mmx_decode_failure; 7657 byte2 = getUChar(delta); /* amode / sub-opcode */ 7658 subopc = toUChar( (byte2 >> 3) & 7 ); 7659 7660 # define SHIFT_BY_IMM(_name,_op) \ 7661 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7662 } while (0) 7663 7664 if (subopc == 2 /*SRL*/ && opc == 0x71) 7665 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7666 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7667 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7668 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7669 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7670 7671 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7672 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7673 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7674 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7675 7676 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7677 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7678 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7679 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7680 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7681 SHIFT_BY_IMM("psllq", Iop_Shl64); 7682 7683 else goto mmx_decode_failure; 7684 7685 # undef SHIFT_BY_IMM 7686 break; 7687 } 7688 7689 case 0xF7: { 7690 IRTemp addr = newTemp(Ity_I64); 7691 IRTemp regD = newTemp(Ity_I64); 7692 IRTemp regM = newTemp(Ity_I64); 7693 IRTemp mask = newTemp(Ity_I64); 7694 IRTemp olddata = newTemp(Ity_I64); 7695 IRTemp newdata = newTemp(Ity_I64); 7696 7697 modrm = getUChar(delta); 7698 if (sz != 4 || (!epartIsReg(modrm))) 7699 goto mmx_decode_failure; 7700 delta++; 7701 7702 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7703 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7704 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7705 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7706 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7707 assign( newdata, 7708 binop(Iop_Or64, 7709 binop(Iop_And64, 7710 mkexpr(regD), 7711 mkexpr(mask) ), 7712 binop(Iop_And64, 7713 mkexpr(olddata), 7714 unop(Iop_Not64, mkexpr(mask)))) ); 7715 storeLE( mkexpr(addr), mkexpr(newdata) ); 7716 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7717 nameMMXReg( gregLO3ofRM(modrm) ) ); 7718 break; 7719 } 7720 7721 /* --- MMX decode failure --- */ 7722 default: 7723 mmx_decode_failure: 7724 *decode_ok = False; 7725 return delta; /* ignored */ 7726 7727 } 7728 7729 *decode_ok = True; 7730 return delta; 7731 } 7732 7733 7734 /*------------------------------------------------------------*/ 7735 /*--- More misc arithmetic and other obscure insns. ---*/ 7736 /*------------------------------------------------------------*/ 7737 7738 /* Generate base << amt with vacated places filled with stuff 7739 from xtra. amt guaranteed in 0 .. 63. */ 7740 static 7741 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7742 { 7743 /* if amt == 0 7744 then base 7745 else (base << amt) | (xtra >>u (64-amt)) 7746 */ 7747 return 7748 IRExpr_ITE( 7749 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7750 binop(Iop_Or64, 7751 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7752 binop(Iop_Shr64, mkexpr(xtra), 7753 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7754 ), 7755 mkexpr(base) 7756 ); 7757 } 7758 7759 /* Generate base >>u amt with vacated places filled with stuff 7760 from xtra. amt guaranteed in 0 .. 63. */ 7761 static 7762 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7763 { 7764 /* if amt == 0 7765 then base 7766 else (base >>u amt) | (xtra << (64-amt)) 7767 */ 7768 return 7769 IRExpr_ITE( 7770 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7771 binop(Iop_Or64, 7772 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7773 binop(Iop_Shl64, mkexpr(xtra), 7774 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7775 ), 7776 mkexpr(base) 7777 ); 7778 } 7779 7780 /* Double length left and right shifts. Apparently only required in 7781 v-size (no b- variant). */ 7782 static 7783 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi, 7784 Prefix pfx, 7785 Long delta, UChar modrm, 7786 Int sz, 7787 IRExpr* shift_amt, 7788 Bool amt_is_literal, 7789 const HChar* shift_amt_txt, 7790 Bool left_shift ) 7791 { 7792 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7793 for printing it. And eip on entry points at the modrm byte. */ 7794 Int len; 7795 HChar dis_buf[50]; 7796 7797 IRType ty = szToITy(sz); 7798 IRTemp gsrc = newTemp(ty); 7799 IRTemp esrc = newTemp(ty); 7800 IRTemp addr = IRTemp_INVALID; 7801 IRTemp tmpSH = newTemp(Ity_I8); 7802 IRTemp tmpSS = newTemp(Ity_I8); 7803 IRTemp tmp64 = IRTemp_INVALID; 7804 IRTemp res64 = IRTemp_INVALID; 7805 IRTemp rss64 = IRTemp_INVALID; 7806 IRTemp resTy = IRTemp_INVALID; 7807 IRTemp rssTy = IRTemp_INVALID; 7808 Int mask = sz==8 ? 63 : 31; 7809 7810 vassert(sz == 2 || sz == 4 || sz == 8); 7811 7812 /* The E-part is the destination; this is shifted. The G-part 7813 supplies bits to be shifted into the E-part, but is not 7814 changed. 7815 7816 If shifting left, form a double-length word with E at the top 7817 and G at the bottom, and shift this left. The result is then in 7818 the high part. 7819 7820 If shifting right, form a double-length word with G at the top 7821 and E at the bottom, and shift this right. The result is then 7822 at the bottom. */ 7823 7824 /* Fetch the operands. */ 7825 7826 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7827 7828 if (epartIsReg(modrm)) { 7829 delta++; 7830 assign( esrc, getIRegE(sz, pfx, modrm) ); 7831 DIP("sh%cd%c %s, %s, %s\n", 7832 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7833 shift_amt_txt, 7834 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7835 } else { 7836 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7837 /* # bytes following amode */ 7838 amt_is_literal ? 1 : 0 ); 7839 delta += len; 7840 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7841 DIP("sh%cd%c %s, %s, %s\n", 7842 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7843 shift_amt_txt, 7844 nameIRegG(sz, pfx, modrm), dis_buf); 7845 } 7846 7847 /* Calculate the masked shift amount (tmpSH), the masked subshift 7848 amount (tmpSS), the shifted value (res64) and the subshifted 7849 value (rss64). */ 7850 7851 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7852 assign( tmpSS, binop(Iop_And8, 7853 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7854 mkU8(mask))); 7855 7856 tmp64 = newTemp(Ity_I64); 7857 res64 = newTemp(Ity_I64); 7858 rss64 = newTemp(Ity_I64); 7859 7860 if (sz == 2 || sz == 4) { 7861 7862 /* G is xtra; E is data */ 7863 /* what a freaking nightmare: */ 7864 if (sz == 4 && left_shift) { 7865 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7866 assign( res64, 7867 binop(Iop_Shr64, 7868 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7869 mkU8(32)) ); 7870 assign( rss64, 7871 binop(Iop_Shr64, 7872 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7873 mkU8(32)) ); 7874 } 7875 else 7876 if (sz == 4 && !left_shift) { 7877 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7878 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7879 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7880 } 7881 else 7882 if (sz == 2 && left_shift) { 7883 assign( tmp64, 7884 binop(Iop_32HLto64, 7885 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7886 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7887 )); 7888 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7889 assign( res64, 7890 binop(Iop_Shr64, 7891 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7892 mkU8(48)) ); 7893 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7894 assign( rss64, 7895 binop(Iop_Shr64, 7896 binop(Iop_Shl64, 7897 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7898 mkU8(48)), 7899 mkexpr(tmpSS)), 7900 mkU8(48)) ); 7901 } 7902 else 7903 if (sz == 2 && !left_shift) { 7904 assign( tmp64, 7905 binop(Iop_32HLto64, 7906 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7907 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7908 )); 7909 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7910 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7911 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7912 assign( rss64, binop(Iop_Shr64, 7913 unop(Iop_16Uto64, mkexpr(esrc)), 7914 mkexpr(tmpSS)) ); 7915 } 7916 7917 } else { 7918 7919 vassert(sz == 8); 7920 if (left_shift) { 7921 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7922 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7923 } else { 7924 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7925 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7926 } 7927 7928 } 7929 7930 resTy = newTemp(ty); 7931 rssTy = newTemp(ty); 7932 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7933 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7934 7935 /* Put result back and write the flags thunk. */ 7936 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7937 resTy, rssTy, ty, tmpSH ); 7938 7939 if (epartIsReg(modrm)) { 7940 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7941 } else { 7942 storeLE( mkexpr(addr), mkexpr(resTy) ); 7943 } 7944 7945 if (amt_is_literal) delta++; 7946 return delta; 7947 } 7948 7949 7950 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7951 required. */ 7952 7953 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7954 7955 static const HChar* nameBtOp ( BtOp op ) 7956 { 7957 switch (op) { 7958 case BtOpNone: return ""; 7959 case BtOpSet: return "s"; 7960 case BtOpReset: return "r"; 7961 case BtOpComp: return "c"; 7962 default: vpanic("nameBtOp(amd64)"); 7963 } 7964 } 7965 7966 7967 static 7968 ULong dis_bt_G_E ( const VexAbiInfo* vbi, 7969 Prefix pfx, Int sz, Long delta, BtOp op, 7970 /*OUT*/Bool* decode_OK ) 7971 { 7972 HChar dis_buf[50]; 7973 UChar modrm; 7974 Int len; 7975 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7976 t_addr1, t_rsp, t_mask, t_new; 7977 7978 vassert(sz == 2 || sz == 4 || sz == 8); 7979 7980 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7981 = t_addr0 = t_addr1 = t_rsp 7982 = t_mask = t_new = IRTemp_INVALID; 7983 7984 t_fetched = newTemp(Ity_I8); 7985 t_new = newTemp(Ity_I8); 7986 t_bitno0 = newTemp(Ity_I64); 7987 t_bitno1 = newTemp(Ity_I64); 7988 t_bitno2 = newTemp(Ity_I8); 7989 t_addr1 = newTemp(Ity_I64); 7990 modrm = getUChar(delta); 7991 7992 *decode_OK = True; 7993 if (epartIsReg(modrm)) { 7994 /* F2 and F3 are never acceptable. */ 7995 if (haveF2orF3(pfx)) { 7996 *decode_OK = False; 7997 return delta; 7998 } 7999 } else { 8000 /* F2 or F3 (but not both) are allowed, provided LOCK is also 8001 present, and only for the BTC/BTS/BTR cases (not BT). */ 8002 if (haveF2orF3(pfx)) { 8003 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) { 8004 *decode_OK = False; 8005 return delta; 8006 } 8007 } 8008 } 8009 8010 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 8011 8012 if (epartIsReg(modrm)) { 8013 delta++; 8014 /* Get it onto the client's stack. Oh, this is a horrible 8015 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 8016 Because of the ELF ABI stack redzone, there may be live data 8017 up to 128 bytes below %RSP. So we can't just push it on the 8018 stack, else we may wind up trashing live data, and causing 8019 impossible-to-find simulation errors. (Yes, this did 8020 happen.) So we need to drop RSP before at least 128 before 8021 pushing it. That unfortunately means hitting Memcheck's 8022 fast-case painting code. Ideally we should drop more than 8023 128, to reduce the chances of breaking buggy programs that 8024 have live data below -128(%RSP). Memcheck fast-cases moves 8025 of 288 bytes due to the need to handle ppc64-linux quickly, 8026 so let's use 288. Of course the real fix is to get rid of 8027 this kludge entirely. */ 8028 t_rsp = newTemp(Ity_I64); 8029 t_addr0 = newTemp(Ity_I64); 8030 8031 vassert(vbi->guest_stack_redzone_size == 128); 8032 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 8033 putIReg64(R_RSP, mkexpr(t_rsp)); 8034 8035 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 8036 8037 /* Make t_addr0 point at it. */ 8038 assign( t_addr0, mkexpr(t_rsp) ); 8039 8040 /* Mask out upper bits of the shift amount, since we're doing a 8041 reg. */ 8042 assign( t_bitno1, binop(Iop_And64, 8043 mkexpr(t_bitno0), 8044 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 8045 8046 } else { 8047 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 8048 delta += len; 8049 assign( t_bitno1, mkexpr(t_bitno0) ); 8050 } 8051 8052 /* At this point: t_addr0 is the address being operated on. If it 8053 was a reg, we will have pushed it onto the client's stack. 8054 t_bitno1 is the bit number, suitably masked in the case of a 8055 reg. */ 8056 8057 /* Now the main sequence. */ 8058 assign( t_addr1, 8059 binop(Iop_Add64, 8060 mkexpr(t_addr0), 8061 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 8062 8063 /* t_addr1 now holds effective address */ 8064 8065 assign( t_bitno2, 8066 unop(Iop_64to8, 8067 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 8068 8069 /* t_bitno2 contains offset of bit within byte */ 8070 8071 if (op != BtOpNone) { 8072 t_mask = newTemp(Ity_I8); 8073 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 8074 } 8075 8076 /* t_mask is now a suitable byte mask */ 8077 8078 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 8079 8080 if (op != BtOpNone) { 8081 switch (op) { 8082 case BtOpSet: 8083 assign( t_new, 8084 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8085 break; 8086 case BtOpComp: 8087 assign( t_new, 8088 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8089 break; 8090 case BtOpReset: 8091 assign( t_new, 8092 binop(Iop_And8, mkexpr(t_fetched), 8093 unop(Iop_Not8, mkexpr(t_mask))) ); 8094 break; 8095 default: 8096 vpanic("dis_bt_G_E(amd64)"); 8097 } 8098 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) { 8099 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 8100 mkexpr(t_new)/*new*/, 8101 guest_RIP_curr_instr ); 8102 } else { 8103 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 8104 } 8105 } 8106 8107 /* Side effect done; now get selected bit into Carry flag */ 8108 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 8109 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8110 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8111 stmt( IRStmt_Put( 8112 OFFB_CC_DEP1, 8113 binop(Iop_And64, 8114 binop(Iop_Shr64, 8115 unop(Iop_8Uto64, mkexpr(t_fetched)), 8116 mkexpr(t_bitno2)), 8117 mkU64(1))) 8118 ); 8119 /* Set NDEP even though it isn't used. This makes redundant-PUT 8120 elimination of previous stores to this field work better. */ 8121 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8122 8123 /* Move reg operand from stack back to reg */ 8124 if (epartIsReg(modrm)) { 8125 /* t_rsp still points at it. */ 8126 /* only write the reg if actually modifying it; doing otherwise 8127 zeroes the top half erroneously when doing btl due to 8128 standard zero-extend rule */ 8129 if (op != BtOpNone) 8130 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 8131 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 8132 } 8133 8134 DIP("bt%s%c %s, %s\n", 8135 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 8136 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 8137 8138 return delta; 8139 } 8140 8141 8142 8143 /* Handle BSF/BSR. Only v-size seems necessary. */ 8144 static 8145 ULong dis_bs_E_G ( const VexAbiInfo* vbi, 8146 Prefix pfx, Int sz, Long delta, Bool fwds ) 8147 { 8148 Bool isReg; 8149 UChar modrm; 8150 HChar dis_buf[50]; 8151 8152 IRType ty = szToITy(sz); 8153 IRTemp src = newTemp(ty); 8154 IRTemp dst = newTemp(ty); 8155 IRTemp src64 = newTemp(Ity_I64); 8156 IRTemp dst64 = newTemp(Ity_I64); 8157 IRTemp srcB = newTemp(Ity_I1); 8158 8159 vassert(sz == 8 || sz == 4 || sz == 2); 8160 8161 modrm = getUChar(delta); 8162 isReg = epartIsReg(modrm); 8163 if (isReg) { 8164 delta++; 8165 assign( src, getIRegE(sz, pfx, modrm) ); 8166 } else { 8167 Int len; 8168 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 8169 delta += len; 8170 assign( src, loadLE(ty, mkexpr(addr)) ); 8171 } 8172 8173 DIP("bs%c%c %s, %s\n", 8174 fwds ? 'f' : 'r', nameISize(sz), 8175 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 8176 nameIRegG(sz, pfx, modrm)); 8177 8178 /* First, widen src to 64 bits if it is not already. */ 8179 assign( src64, widenUto64(mkexpr(src)) ); 8180 8181 /* Generate a bool expression which is zero iff the original is 8182 zero, and nonzero otherwise. Ask for a CmpNE version which, if 8183 instrumented by Memcheck, is instrumented expensively, since 8184 this may be used on the output of a preceding movmskb insn, 8185 which has been known to be partially defined, and in need of 8186 careful handling. */ 8187 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) ); 8188 8189 /* Flags: Z is 1 iff source value is zero. All others 8190 are undefined -- we force them to zero. */ 8191 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8192 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8193 stmt( IRStmt_Put( 8194 OFFB_CC_DEP1, 8195 IRExpr_ITE( mkexpr(srcB), 8196 /* src!=0 */ 8197 mkU64(0), 8198 /* src==0 */ 8199 mkU64(AMD64G_CC_MASK_Z) 8200 ) 8201 )); 8202 /* Set NDEP even though it isn't used. This makes redundant-PUT 8203 elimination of previous stores to this field work better. */ 8204 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8205 8206 /* Result: iff source value is zero, we can't use 8207 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 8208 But anyway, amd64 semantics say the result is undefined in 8209 such situations. Hence handle the zero case specially. */ 8210 8211 /* Bleh. What we compute: 8212 8213 bsf64: if src == 0 then {dst is unchanged} 8214 else Ctz64(src) 8215 8216 bsr64: if src == 0 then {dst is unchanged} 8217 else 63 - Clz64(src) 8218 8219 bsf32: if src == 0 then {dst is unchanged} 8220 else Ctz64(32Uto64(src)) 8221 8222 bsr32: if src == 0 then {dst is unchanged} 8223 else 63 - Clz64(32Uto64(src)) 8224 8225 bsf16: if src == 0 then {dst is unchanged} 8226 else Ctz64(32Uto64(16Uto32(src))) 8227 8228 bsr16: if src == 0 then {dst is unchanged} 8229 else 63 - Clz64(32Uto64(16Uto32(src))) 8230 */ 8231 8232 /* The main computation, guarding against zero. */ 8233 assign( dst64, 8234 IRExpr_ITE( 8235 mkexpr(srcB), 8236 /* src != 0 */ 8237 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 8238 : binop(Iop_Sub64, 8239 mkU64(63), 8240 unop(Iop_Clz64, mkexpr(src64))), 8241 /* src == 0 -- leave dst unchanged */ 8242 widenUto64( getIRegG( sz, pfx, modrm ) ) 8243 ) 8244 ); 8245 8246 if (sz == 2) 8247 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 8248 else 8249 if (sz == 4) 8250 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 8251 else 8252 assign( dst, mkexpr(dst64) ); 8253 8254 /* dump result back */ 8255 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 8256 8257 return delta; 8258 } 8259 8260 8261 /* swap rAX with the reg specified by reg and REX.B */ 8262 static 8263 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 8264 { 8265 IRType ty = szToITy(sz); 8266 IRTemp t1 = newTemp(ty); 8267 IRTemp t2 = newTemp(ty); 8268 vassert(sz == 2 || sz == 4 || sz == 8); 8269 vassert(regLo3 < 8); 8270 if (sz == 8) { 8271 assign( t1, getIReg64(R_RAX) ); 8272 assign( t2, getIRegRexB(8, pfx, regLo3) ); 8273 putIReg64( R_RAX, mkexpr(t2) ); 8274 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 8275 } else if (sz == 4) { 8276 assign( t1, getIReg32(R_RAX) ); 8277 assign( t2, getIRegRexB(4, pfx, regLo3) ); 8278 putIReg32( R_RAX, mkexpr(t2) ); 8279 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 8280 } else { 8281 assign( t1, getIReg16(R_RAX) ); 8282 assign( t2, getIRegRexB(2, pfx, regLo3) ); 8283 putIReg16( R_RAX, mkexpr(t2) ); 8284 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 8285 } 8286 DIP("xchg%c %s, %s\n", 8287 nameISize(sz), nameIRegRAX(sz), 8288 nameIRegRexB(sz,pfx, regLo3)); 8289 } 8290 8291 8292 static 8293 void codegen_SAHF ( void ) 8294 { 8295 /* Set the flags to: 8296 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 8297 -- retain the old O flag 8298 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8299 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 8300 */ 8301 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8302 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8303 IRTemp oldflags = newTemp(Ity_I64); 8304 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 8305 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8306 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8307 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8308 stmt( IRStmt_Put( OFFB_CC_DEP1, 8309 binop(Iop_Or64, 8310 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 8311 binop(Iop_And64, 8312 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 8313 mkU64(mask_SZACP)) 8314 ) 8315 )); 8316 } 8317 8318 8319 static 8320 void codegen_LAHF ( void ) 8321 { 8322 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 8323 IRExpr* rax_with_hole; 8324 IRExpr* new_byte; 8325 IRExpr* new_rax; 8326 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8327 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8328 8329 IRTemp flags = newTemp(Ity_I64); 8330 assign( flags, mk_amd64g_calculate_rflags_all() ); 8331 8332 rax_with_hole 8333 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 8334 new_byte 8335 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 8336 mkU64(1<<1)); 8337 new_rax 8338 = binop(Iop_Or64, rax_with_hole, 8339 binop(Iop_Shl64, new_byte, mkU8(8))); 8340 putIReg64(R_RAX, new_rax); 8341 } 8342 8343 8344 static 8345 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 8346 const VexAbiInfo* vbi, 8347 Prefix pfx, 8348 Int size, 8349 Long delta0 ) 8350 { 8351 HChar dis_buf[50]; 8352 Int len; 8353 8354 IRType ty = szToITy(size); 8355 IRTemp acc = newTemp(ty); 8356 IRTemp src = newTemp(ty); 8357 IRTemp dest = newTemp(ty); 8358 IRTemp dest2 = newTemp(ty); 8359 IRTemp acc2 = newTemp(ty); 8360 IRTemp cond = newTemp(Ity_I1); 8361 IRTemp addr = IRTemp_INVALID; 8362 UChar rm = getUChar(delta0); 8363 8364 /* There are 3 cases to consider: 8365 8366 reg-reg: ignore any lock prefix, generate sequence based 8367 on ITE 8368 8369 reg-mem, not locked: ignore any lock prefix, generate sequence 8370 based on ITE 8371 8372 reg-mem, locked: use IRCAS 8373 */ 8374 8375 /* Decide whether F2 or F3 are acceptable. Never for register 8376 case, but for the memory case, one or the other is OK provided 8377 LOCK is also present. */ 8378 if (epartIsReg(rm)) { 8379 if (haveF2orF3(pfx)) { 8380 *ok = False; 8381 return delta0; 8382 } 8383 } else { 8384 if (haveF2orF3(pfx)) { 8385 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 8386 *ok = False; 8387 return delta0; 8388 } 8389 } 8390 } 8391 8392 if (epartIsReg(rm)) { 8393 /* case 1 */ 8394 assign( dest, getIRegE(size, pfx, rm) ); 8395 delta0++; 8396 assign( src, getIRegG(size, pfx, rm) ); 8397 assign( acc, getIRegRAX(size) ); 8398 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8399 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8400 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8401 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8402 putIRegRAX(size, mkexpr(acc2)); 8403 putIRegE(size, pfx, rm, mkexpr(dest2)); 8404 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8405 nameIRegG(size,pfx,rm), 8406 nameIRegE(size,pfx,rm) ); 8407 } 8408 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8409 /* case 2 */ 8410 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8411 assign( dest, loadLE(ty, mkexpr(addr)) ); 8412 delta0 += len; 8413 assign( src, getIRegG(size, pfx, rm) ); 8414 assign( acc, getIRegRAX(size) ); 8415 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8416 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8417 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8418 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8419 putIRegRAX(size, mkexpr(acc2)); 8420 storeLE( mkexpr(addr), mkexpr(dest2) ); 8421 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8422 nameIRegG(size,pfx,rm), dis_buf); 8423 } 8424 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8425 /* case 3 */ 8426 /* src is new value. acc is expected value. dest is old value. 8427 Compute success from the output of the IRCAS, and steer the 8428 new value for RAX accordingly: in case of success, RAX is 8429 unchanged. */ 8430 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8431 delta0 += len; 8432 assign( src, getIRegG(size, pfx, rm) ); 8433 assign( acc, getIRegRAX(size) ); 8434 stmt( IRStmt_CAS( 8435 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8436 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8437 )); 8438 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8439 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8440 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8441 putIRegRAX(size, mkexpr(acc2)); 8442 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8443 nameIRegG(size,pfx,rm), dis_buf); 8444 } 8445 else vassert(0); 8446 8447 *ok = True; 8448 return delta0; 8449 } 8450 8451 8452 /* Handle conditional move instructions of the form 8453 cmovcc E(reg-or-mem), G(reg) 8454 8455 E(src) is reg-or-mem 8456 G(dst) is reg. 8457 8458 If E is reg, --> GET %E, tmps 8459 GET %G, tmpd 8460 CMOVcc tmps, tmpd 8461 PUT tmpd, %G 8462 8463 If E is mem --> (getAddr E) -> tmpa 8464 LD (tmpa), tmps 8465 GET %G, tmpd 8466 CMOVcc tmps, tmpd 8467 PUT tmpd, %G 8468 */ 8469 static 8470 ULong dis_cmov_E_G ( const VexAbiInfo* vbi, 8471 Prefix pfx, 8472 Int sz, 8473 AMD64Condcode cond, 8474 Long delta0 ) 8475 { 8476 UChar rm = getUChar(delta0); 8477 HChar dis_buf[50]; 8478 Int len; 8479 8480 IRType ty = szToITy(sz); 8481 IRTemp tmps = newTemp(ty); 8482 IRTemp tmpd = newTemp(ty); 8483 8484 if (epartIsReg(rm)) { 8485 assign( tmps, getIRegE(sz, pfx, rm) ); 8486 assign( tmpd, getIRegG(sz, pfx, rm) ); 8487 8488 putIRegG( sz, pfx, rm, 8489 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8490 mkexpr(tmps), 8491 mkexpr(tmpd) ) 8492 ); 8493 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8494 nameIRegE(sz,pfx,rm), 8495 nameIRegG(sz,pfx,rm)); 8496 return 1+delta0; 8497 } 8498 8499 /* E refers to memory */ 8500 { 8501 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8502 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8503 assign( tmpd, getIRegG(sz, pfx, rm) ); 8504 8505 putIRegG( sz, pfx, rm, 8506 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8507 mkexpr(tmps), 8508 mkexpr(tmpd) ) 8509 ); 8510 8511 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8512 dis_buf, 8513 nameIRegG(sz,pfx,rm)); 8514 return len+delta0; 8515 } 8516 } 8517 8518 8519 static 8520 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8521 const VexAbiInfo* vbi, 8522 Prefix pfx, Int sz, Long delta0 ) 8523 { 8524 Int len; 8525 UChar rm = getUChar(delta0); 8526 HChar dis_buf[50]; 8527 8528 IRType ty = szToITy(sz); 8529 IRTemp tmpd = newTemp(ty); 8530 IRTemp tmpt0 = newTemp(ty); 8531 IRTemp tmpt1 = newTemp(ty); 8532 8533 /* There are 3 cases to consider: 8534 8535 reg-reg: ignore any lock prefix, 8536 generate 'naive' (non-atomic) sequence 8537 8538 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8539 (non-atomic) sequence 8540 8541 reg-mem, locked: use IRCAS 8542 */ 8543 8544 if (epartIsReg(rm)) { 8545 /* case 1 */ 8546 assign( tmpd, getIRegE(sz, pfx, rm) ); 8547 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8548 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8549 mkexpr(tmpd), mkexpr(tmpt0)) ); 8550 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8551 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8552 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8553 DIP("xadd%c %s, %s\n", 8554 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm)); 8555 *decode_ok = True; 8556 return 1+delta0; 8557 } 8558 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8559 /* case 2 */ 8560 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8561 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8562 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8563 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8564 mkexpr(tmpd), mkexpr(tmpt0)) ); 8565 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8566 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8567 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8568 DIP("xadd%c %s, %s\n", 8569 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8570 *decode_ok = True; 8571 return len+delta0; 8572 } 8573 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8574 /* case 3 */ 8575 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8576 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8577 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8578 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8579 mkexpr(tmpd), mkexpr(tmpt0)) ); 8580 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8581 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8582 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8583 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8584 DIP("xadd%c %s, %s\n", 8585 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8586 *decode_ok = True; 8587 return len+delta0; 8588 } 8589 /*UNREACHED*/ 8590 vassert(0); 8591 } 8592 8593 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8594 //.. 8595 //.. static 8596 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8597 //.. { 8598 //.. Int len; 8599 //.. IRTemp addr; 8600 //.. UChar rm = getUChar(delta0); 8601 //.. HChar dis_buf[50]; 8602 //.. 8603 //.. if (epartIsReg(rm)) { 8604 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8605 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8606 //.. return 1+delta0; 8607 //.. } else { 8608 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8609 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8610 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8611 //.. return len+delta0; 8612 //.. } 8613 //.. } 8614 //.. 8615 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8616 //.. dst is ireg and sz==4, zero out top half of it. */ 8617 //.. 8618 //.. static 8619 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8620 //.. Int sz, 8621 //.. UInt delta0 ) 8622 //.. { 8623 //.. Int len; 8624 //.. IRTemp addr; 8625 //.. UChar rm = getUChar(delta0); 8626 //.. HChar dis_buf[50]; 8627 //.. 8628 //.. vassert(sz == 2 || sz == 4); 8629 //.. 8630 //.. if (epartIsReg(rm)) { 8631 //.. if (sz == 4) 8632 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8633 //.. else 8634 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8635 //.. 8636 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8637 //.. return 1+delta0; 8638 //.. } else { 8639 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8640 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8641 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8642 //.. return len+delta0; 8643 //.. } 8644 //.. } 8645 8646 /* Handle move instructions of the form 8647 mov S, E meaning 8648 mov sreg, reg-or-mem 8649 Is passed the a ptr to the modRM byte, and the data size. Returns 8650 the address advanced completely over this instruction. 8651 8652 VEX does not currently simulate segment registers on AMD64 which means that 8653 instead of moving a value of a segment register, zero is moved to the 8654 destination. The zero value represents a null (unused) selector. This is 8655 not correct (especially for the %cs, %fs and %gs registers) but it seems to 8656 provide a sufficient simulation for currently seen programs that use this 8657 instruction. If some program actually decides to use the obtained segment 8658 selector for something meaningful then the zero value should be a clear 8659 indicator that there is some problem. 8660 8661 S(src) is sreg. 8662 E(dst) is reg-or-mem 8663 8664 If E is reg, --> PUT $0, %E 8665 8666 If E is mem, --> (getAddr E) -> tmpa 8667 ST $0, (tmpa) 8668 */ 8669 static 8670 ULong dis_mov_S_E ( const VexAbiInfo* vbi, 8671 Prefix pfx, 8672 Int size, 8673 Long delta0 ) 8674 { 8675 Int len; 8676 UChar rm = getUChar(delta0); 8677 HChar dis_buf[50]; 8678 8679 if (epartIsReg(rm)) { 8680 putIRegE(size, pfx, rm, mkU(szToITy(size), 0)); 8681 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8682 nameIRegE(size, pfx, rm)); 8683 return 1+delta0; 8684 } 8685 8686 /* E refers to memory */ 8687 { 8688 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0); 8689 storeLE(mkexpr(addr), mkU16(0)); 8690 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8691 dis_buf); 8692 return len+delta0; 8693 } 8694 } 8695 8696 //.. static 8697 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8698 //.. { 8699 //.. IRTemp t1 = newTemp(Ity_I16); 8700 //.. IRTemp ta = newTemp(Ity_I32); 8701 //.. vassert(sz == 2 || sz == 4); 8702 //.. 8703 //.. assign( t1, getSReg(sreg) ); 8704 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8705 //.. putIReg(4, R_ESP, mkexpr(ta)); 8706 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8707 //.. 8708 //.. DIP("pushw %s\n", nameSReg(sreg)); 8709 //.. } 8710 //.. 8711 //.. static 8712 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8713 //.. { 8714 //.. IRTemp t1 = newTemp(Ity_I16); 8715 //.. IRTemp ta = newTemp(Ity_I32); 8716 //.. vassert(sz == 2 || sz == 4); 8717 //.. 8718 //.. assign( ta, getIReg(4, R_ESP) ); 8719 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8720 //.. 8721 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8722 //.. putSReg( sreg, mkexpr(t1) ); 8723 //.. DIP("pop %s\n", nameSReg(sreg)); 8724 //.. } 8725 8726 static 8727 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 ) 8728 { 8729 IRTemp t1 = newTemp(Ity_I64); 8730 IRTemp t2 = newTemp(Ity_I64); 8731 IRTemp t3 = newTemp(Ity_I64); 8732 assign(t1, getIReg64(R_RSP)); 8733 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8734 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8735 putIReg64(R_RSP, mkexpr(t3)); 8736 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8737 jmp_treg(dres, Ijk_Ret, t2); 8738 vassert(dres->whatNext == Dis_StopHere); 8739 } 8740 8741 8742 /*------------------------------------------------------------*/ 8743 /*--- SSE/SSE2/SSE3 helpers ---*/ 8744 /*------------------------------------------------------------*/ 8745 8746 /* Indicates whether the op requires a rounding-mode argument. Note 8747 that this covers only vector floating point arithmetic ops, and 8748 omits the scalar ones that need rounding modes. Note also that 8749 inconsistencies here will get picked up later by the IR sanity 8750 checker, so this isn't correctness-critical. */ 8751 static Bool requiresRMode ( IROp op ) 8752 { 8753 switch (op) { 8754 /* 128 bit ops */ 8755 case Iop_Add32Fx4: case Iop_Sub32Fx4: 8756 case Iop_Mul32Fx4: case Iop_Div32Fx4: 8757 case Iop_Add64Fx2: case Iop_Sub64Fx2: 8758 case Iop_Mul64Fx2: case Iop_Div64Fx2: 8759 /* 256 bit ops */ 8760 case Iop_Add32Fx8: case Iop_Sub32Fx8: 8761 case Iop_Mul32Fx8: case Iop_Div32Fx8: 8762 case Iop_Add64Fx4: case Iop_Sub64Fx4: 8763 case Iop_Mul64Fx4: case Iop_Div64Fx4: 8764 return True; 8765 default: 8766 break; 8767 } 8768 return False; 8769 } 8770 8771 8772 /* Worker function; do not call directly. 8773 Handles full width G = G `op` E and G = (not G) `op` E. 8774 */ 8775 8776 static ULong dis_SSE_E_to_G_all_wrk ( 8777 const VexAbiInfo* vbi, 8778 Prefix pfx, Long delta, 8779 const HChar* opname, IROp op, 8780 Bool invertG 8781 ) 8782 { 8783 HChar dis_buf[50]; 8784 Int alen; 8785 IRTemp addr; 8786 UChar rm = getUChar(delta); 8787 Bool needsRMode = requiresRMode(op); 8788 IRExpr* gpart 8789 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8790 : getXMMReg(gregOfRexRM(pfx,rm)); 8791 if (epartIsReg(rm)) { 8792 putXMMReg( 8793 gregOfRexRM(pfx,rm), 8794 needsRMode 8795 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8796 gpart, 8797 getXMMReg(eregOfRexRM(pfx,rm))) 8798 : binop(op, gpart, 8799 getXMMReg(eregOfRexRM(pfx,rm))) 8800 ); 8801 DIP("%s %s,%s\n", opname, 8802 nameXMMReg(eregOfRexRM(pfx,rm)), 8803 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8804 return delta+1; 8805 } else { 8806 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8807 putXMMReg( 8808 gregOfRexRM(pfx,rm), 8809 needsRMode 8810 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8811 gpart, 8812 loadLE(Ity_V128, mkexpr(addr))) 8813 : binop(op, gpart, 8814 loadLE(Ity_V128, mkexpr(addr))) 8815 ); 8816 DIP("%s %s,%s\n", opname, 8817 dis_buf, 8818 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8819 return delta+alen; 8820 } 8821 } 8822 8823 8824 /* All lanes SSE binary operation, G = G `op` E. */ 8825 8826 static 8827 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi, 8828 Prefix pfx, Long delta, 8829 const HChar* opname, IROp op ) 8830 { 8831 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8832 } 8833 8834 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8835 8836 static 8837 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi, 8838 Prefix pfx, Long delta, 8839 const HChar* opname, IROp op ) 8840 { 8841 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8842 } 8843 8844 8845 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8846 8847 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi, 8848 Prefix pfx, Long delta, 8849 const HChar* opname, IROp op ) 8850 { 8851 HChar dis_buf[50]; 8852 Int alen; 8853 IRTemp addr; 8854 UChar rm = getUChar(delta); 8855 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8856 if (epartIsReg(rm)) { 8857 putXMMReg( gregOfRexRM(pfx,rm), 8858 binop(op, gpart, 8859 getXMMReg(eregOfRexRM(pfx,rm))) ); 8860 DIP("%s %s,%s\n", opname, 8861 nameXMMReg(eregOfRexRM(pfx,rm)), 8862 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8863 return delta+1; 8864 } else { 8865 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8866 E operand needs to be made simply of zeroes. */ 8867 IRTemp epart = newTemp(Ity_V128); 8868 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8869 assign( epart, unop( Iop_32UtoV128, 8870 loadLE(Ity_I32, mkexpr(addr))) ); 8871 putXMMReg( gregOfRexRM(pfx,rm), 8872 binop(op, gpart, mkexpr(epart)) ); 8873 DIP("%s %s,%s\n", opname, 8874 dis_buf, 8875 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8876 return delta+alen; 8877 } 8878 } 8879 8880 8881 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8882 8883 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi, 8884 Prefix pfx, Long delta, 8885 const HChar* opname, IROp op ) 8886 { 8887 HChar dis_buf[50]; 8888 Int alen; 8889 IRTemp addr; 8890 UChar rm = getUChar(delta); 8891 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8892 if (epartIsReg(rm)) { 8893 putXMMReg( gregOfRexRM(pfx,rm), 8894 binop(op, gpart, 8895 getXMMReg(eregOfRexRM(pfx,rm))) ); 8896 DIP("%s %s,%s\n", opname, 8897 nameXMMReg(eregOfRexRM(pfx,rm)), 8898 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8899 return delta+1; 8900 } else { 8901 /* We can only do a 64-bit memory read, so the upper half of the 8902 E operand needs to be made simply of zeroes. */ 8903 IRTemp epart = newTemp(Ity_V128); 8904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8905 assign( epart, unop( Iop_64UtoV128, 8906 loadLE(Ity_I64, mkexpr(addr))) ); 8907 putXMMReg( gregOfRexRM(pfx,rm), 8908 binop(op, gpart, mkexpr(epart)) ); 8909 DIP("%s %s,%s\n", opname, 8910 dis_buf, 8911 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8912 return delta+alen; 8913 } 8914 } 8915 8916 8917 /* All lanes unary SSE operation, G = op(E). */ 8918 8919 static ULong dis_SSE_E_to_G_unary_all ( 8920 const VexAbiInfo* vbi, 8921 Prefix pfx, Long delta, 8922 const HChar* opname, IROp op 8923 ) 8924 { 8925 HChar dis_buf[50]; 8926 Int alen; 8927 IRTemp addr; 8928 UChar rm = getUChar(delta); 8929 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 8930 // up in the usual way. 8931 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 8932 if (epartIsReg(rm)) { 8933 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm)); 8934 /* XXXROUNDINGFIXME */ 8935 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8936 : unop(op, src); 8937 putXMMReg( gregOfRexRM(pfx,rm), res ); 8938 DIP("%s %s,%s\n", opname, 8939 nameXMMReg(eregOfRexRM(pfx,rm)), 8940 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8941 return delta+1; 8942 } else { 8943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8944 IRExpr* src = loadLE(Ity_V128, mkexpr(addr)); 8945 /* XXXROUNDINGFIXME */ 8946 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8947 : unop(op, src); 8948 putXMMReg( gregOfRexRM(pfx,rm), res ); 8949 DIP("%s %s,%s\n", opname, 8950 dis_buf, 8951 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8952 return delta+alen; 8953 } 8954 } 8955 8956 8957 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8958 8959 static ULong dis_SSE_E_to_G_unary_lo32 ( 8960 const VexAbiInfo* vbi, 8961 Prefix pfx, Long delta, 8962 const HChar* opname, IROp op 8963 ) 8964 { 8965 /* First we need to get the old G value and patch the low 32 bits 8966 of the E operand into it. Then apply op and write back to G. */ 8967 HChar dis_buf[50]; 8968 Int alen; 8969 IRTemp addr; 8970 UChar rm = getUChar(delta); 8971 IRTemp oldG0 = newTemp(Ity_V128); 8972 IRTemp oldG1 = newTemp(Ity_V128); 8973 8974 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8975 8976 if (epartIsReg(rm)) { 8977 assign( oldG1, 8978 binop( Iop_SetV128lo32, 8979 mkexpr(oldG0), 8980 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8981 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8982 DIP("%s %s,%s\n", opname, 8983 nameXMMReg(eregOfRexRM(pfx,rm)), 8984 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8985 return delta+1; 8986 } else { 8987 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8988 assign( oldG1, 8989 binop( Iop_SetV128lo32, 8990 mkexpr(oldG0), 8991 loadLE(Ity_I32, mkexpr(addr)) )); 8992 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8993 DIP("%s %s,%s\n", opname, 8994 dis_buf, 8995 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8996 return delta+alen; 8997 } 8998 } 8999 9000 9001 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 9002 9003 static ULong dis_SSE_E_to_G_unary_lo64 ( 9004 const VexAbiInfo* vbi, 9005 Prefix pfx, Long delta, 9006 const HChar* opname, IROp op 9007 ) 9008 { 9009 /* First we need to get the old G value and patch the low 64 bits 9010 of the E operand into it. Then apply op and write back to G. */ 9011 HChar dis_buf[50]; 9012 Int alen; 9013 IRTemp addr; 9014 UChar rm = getUChar(delta); 9015 IRTemp oldG0 = newTemp(Ity_V128); 9016 IRTemp oldG1 = newTemp(Ity_V128); 9017 9018 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9019 9020 if (epartIsReg(rm)) { 9021 assign( oldG1, 9022 binop( Iop_SetV128lo64, 9023 mkexpr(oldG0), 9024 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 9025 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9026 DIP("%s %s,%s\n", opname, 9027 nameXMMReg(eregOfRexRM(pfx,rm)), 9028 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9029 return delta+1; 9030 } else { 9031 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9032 assign( oldG1, 9033 binop( Iop_SetV128lo64, 9034 mkexpr(oldG0), 9035 loadLE(Ity_I64, mkexpr(addr)) )); 9036 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9037 DIP("%s %s,%s\n", opname, 9038 dis_buf, 9039 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9040 return delta+alen; 9041 } 9042 } 9043 9044 9045 /* SSE integer binary operation: 9046 G = G `op` E (eLeft == False) 9047 G = E `op` G (eLeft == True) 9048 */ 9049 static ULong dis_SSEint_E_to_G( 9050 const VexAbiInfo* vbi, 9051 Prefix pfx, Long delta, 9052 const HChar* opname, IROp op, 9053 Bool eLeft 9054 ) 9055 { 9056 HChar dis_buf[50]; 9057 Int alen; 9058 IRTemp addr; 9059 UChar rm = getUChar(delta); 9060 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 9061 IRExpr* epart = NULL; 9062 if (epartIsReg(rm)) { 9063 epart = getXMMReg(eregOfRexRM(pfx,rm)); 9064 DIP("%s %s,%s\n", opname, 9065 nameXMMReg(eregOfRexRM(pfx,rm)), 9066 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9067 delta += 1; 9068 } else { 9069 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9070 epart = loadLE(Ity_V128, mkexpr(addr)); 9071 DIP("%s %s,%s\n", opname, 9072 dis_buf, 9073 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9074 delta += alen; 9075 } 9076 putXMMReg( gregOfRexRM(pfx,rm), 9077 eLeft ? binop(op, epart, gpart) 9078 : binop(op, gpart, epart) ); 9079 return delta; 9080 } 9081 9082 9083 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 9084 This is all a bit of a kludge in that it ignores the subtleties of 9085 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 9086 spec. */ 9087 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 9088 /*OUT*/IROp* opP, 9089 /*OUT*/Bool* postNotP, 9090 UInt imm8, Bool all_lanes, Int sz ) 9091 { 9092 if (imm8 >= 32) return False; 9093 9094 /* First, compute a (preSwap, op, postNot) triple from 9095 the supplied imm8. */ 9096 Bool pre = False; 9097 IROp op = Iop_INVALID; 9098 Bool not = False; 9099 9100 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 9101 // If you add a case here, add a corresponding test for both VCMPSD_128 9102 // and VCMPSS_128 in avx-1.c. 9103 // Cases 0xA and above are 9104 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]" 9105 switch (imm8) { 9106 // "O" = ordered, "U" = unordered 9107 // "Q" = non-signalling (quiet), "S" = signalling 9108 // 9109 // swap operands? 9110 // | 9111 // | cmp op invert after? 9112 // | | | 9113 // v v v 9114 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 9115 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ 9116 case 0x10: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OS 9117 case 0x18: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_US 9118 // 9119 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 9120 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 9121 // 9122 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 9123 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 9124 // 9125 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 9126 case 0x13: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_S 9127 // 9128 // 0xC: this isn't really right because it returns all-1s when 9129 // either operand is a NaN, and it should return all-0s. 9130 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 9131 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 9132 case 0x14: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_US 9133 case 0x1C: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS 9134 // 9135 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 9136 case 0x15: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_UQ 9137 // 9138 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 9139 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 9140 // 9141 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 9142 case 0x17: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_S 9143 // 9144 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 9145 case 0x19: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_UQ 9146 // 9147 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 9148 case 0x1A: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_UQ 9149 // 9150 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 9151 case 0x1D: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OQ 9152 // 9153 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 9154 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 9155 // Unhandled: 9156 // 0xB FALSE_OQ 9157 // 0xF TRUE_UQ 9158 // 0x1B FALSE_OS 9159 // 0x1F TRUE_US 9160 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 9161 avx-1.c if new cases turn up. */ 9162 default: break; 9163 } 9164 # undef XXX 9165 if (op == Iop_INVALID) return False; 9166 9167 /* Now convert the op into one with the same arithmetic but that is 9168 correct for the width and laneage requirements. */ 9169 9170 /**/ if (sz == 4 && all_lanes) { 9171 switch (op) { 9172 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 9173 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 9174 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 9175 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 9176 default: vassert(0); 9177 } 9178 } 9179 else if (sz == 4 && !all_lanes) { 9180 switch (op) { 9181 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 9182 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 9183 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 9184 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 9185 default: vassert(0); 9186 } 9187 } 9188 else if (sz == 8 && all_lanes) { 9189 switch (op) { 9190 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 9191 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 9192 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 9193 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 9194 default: vassert(0); 9195 } 9196 } 9197 else if (sz == 8 && !all_lanes) { 9198 switch (op) { 9199 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 9200 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 9201 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 9202 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 9203 default: vassert(0); 9204 } 9205 } 9206 else { 9207 vpanic("findSSECmpOp(amd64,guest)"); 9208 } 9209 9210 *preSwapP = pre; *opP = op; *postNotP = not; 9211 return True; 9212 } 9213 9214 9215 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 9216 returns the original delta to indicate failure. */ 9217 9218 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi, 9219 Prefix pfx, Long delta, 9220 const HChar* opname, Bool all_lanes, Int sz ) 9221 { 9222 Long delta0 = delta; 9223 HChar dis_buf[50]; 9224 Int alen; 9225 UInt imm8; 9226 IRTemp addr; 9227 Bool preSwap = False; 9228 IROp op = Iop_INVALID; 9229 Bool postNot = False; 9230 IRTemp plain = newTemp(Ity_V128); 9231 UChar rm = getUChar(delta); 9232 UShort mask = 0; 9233 vassert(sz == 4 || sz == 8); 9234 if (epartIsReg(rm)) { 9235 imm8 = getUChar(delta+1); 9236 if (imm8 >= 8) return delta0; /* FAIL */ 9237 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9238 if (!ok) return delta0; /* FAIL */ 9239 vassert(!preSwap); /* never needed for imm8 < 8 */ 9240 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 9241 getXMMReg(eregOfRexRM(pfx,rm))) ); 9242 delta += 2; 9243 DIP("%s $%u,%s,%s\n", opname, 9244 imm8, 9245 nameXMMReg(eregOfRexRM(pfx,rm)), 9246 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9247 } else { 9248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 9249 imm8 = getUChar(delta+alen); 9250 if (imm8 >= 8) return delta0; /* FAIL */ 9251 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9252 if (!ok) return delta0; /* FAIL */ 9253 vassert(!preSwap); /* never needed for imm8 < 8 */ 9254 assign( plain, 9255 binop( 9256 op, 9257 getXMMReg(gregOfRexRM(pfx,rm)), 9258 all_lanes 9259 ? loadLE(Ity_V128, mkexpr(addr)) 9260 : sz == 8 9261 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 9262 : /*sz==4*/ 9263 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 9264 ) 9265 ); 9266 delta += alen+1; 9267 DIP("%s $%u,%s,%s\n", opname, 9268 imm8, 9269 dis_buf, 9270 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9271 } 9272 9273 if (postNot && all_lanes) { 9274 putXMMReg( gregOfRexRM(pfx,rm), 9275 unop(Iop_NotV128, mkexpr(plain)) ); 9276 } 9277 else 9278 if (postNot && !all_lanes) { 9279 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 9280 putXMMReg( gregOfRexRM(pfx,rm), 9281 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 9282 } 9283 else { 9284 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 9285 } 9286 9287 return delta; 9288 } 9289 9290 9291 /* Vector by scalar shift of G by the amount specified at the bottom 9292 of E. */ 9293 9294 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi, 9295 Prefix pfx, Long delta, 9296 const HChar* opname, IROp op ) 9297 { 9298 HChar dis_buf[50]; 9299 Int alen, size; 9300 IRTemp addr; 9301 Bool shl, shr, sar; 9302 UChar rm = getUChar(delta); 9303 IRTemp g0 = newTemp(Ity_V128); 9304 IRTemp g1 = newTemp(Ity_V128); 9305 IRTemp amt = newTemp(Ity_I64); 9306 IRTemp amt8 = newTemp(Ity_I8); 9307 if (epartIsReg(rm)) { 9308 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 9309 DIP("%s %s,%s\n", opname, 9310 nameXMMReg(eregOfRexRM(pfx,rm)), 9311 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9312 delta++; 9313 } else { 9314 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9315 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 9316 DIP("%s %s,%s\n", opname, 9317 dis_buf, 9318 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9319 delta += alen; 9320 } 9321 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9322 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 9323 9324 shl = shr = sar = False; 9325 size = 0; 9326 switch (op) { 9327 case Iop_ShlN16x8: shl = True; size = 32; break; 9328 case Iop_ShlN32x4: shl = True; size = 32; break; 9329 case Iop_ShlN64x2: shl = True; size = 64; break; 9330 case Iop_SarN16x8: sar = True; size = 16; break; 9331 case Iop_SarN32x4: sar = True; size = 32; break; 9332 case Iop_ShrN16x8: shr = True; size = 16; break; 9333 case Iop_ShrN32x4: shr = True; size = 32; break; 9334 case Iop_ShrN64x2: shr = True; size = 64; break; 9335 default: vassert(0); 9336 } 9337 9338 if (shl || shr) { 9339 assign( 9340 g1, 9341 IRExpr_ITE( 9342 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9343 binop(op, mkexpr(g0), mkexpr(amt8)), 9344 mkV128(0x0000) 9345 ) 9346 ); 9347 } else 9348 if (sar) { 9349 assign( 9350 g1, 9351 IRExpr_ITE( 9352 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9353 binop(op, mkexpr(g0), mkexpr(amt8)), 9354 binop(op, mkexpr(g0), mkU8(size-1)) 9355 ) 9356 ); 9357 } else { 9358 vassert(0); 9359 } 9360 9361 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 9362 return delta; 9363 } 9364 9365 9366 /* Vector by scalar shift of E by an immediate byte. */ 9367 9368 static 9369 ULong dis_SSE_shiftE_imm ( Prefix pfx, 9370 Long delta, const HChar* opname, IROp op ) 9371 { 9372 Bool shl, shr, sar; 9373 UChar rm = getUChar(delta); 9374 IRTemp e0 = newTemp(Ity_V128); 9375 IRTemp e1 = newTemp(Ity_V128); 9376 UChar amt, size; 9377 vassert(epartIsReg(rm)); 9378 vassert(gregLO3ofRM(rm) == 2 9379 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 9380 amt = getUChar(delta+1); 9381 delta += 2; 9382 DIP("%s $%d,%s\n", opname, 9383 (Int)amt, 9384 nameXMMReg(eregOfRexRM(pfx,rm)) ); 9385 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 9386 9387 shl = shr = sar = False; 9388 size = 0; 9389 switch (op) { 9390 case Iop_ShlN16x8: shl = True; size = 16; break; 9391 case Iop_ShlN32x4: shl = True; size = 32; break; 9392 case Iop_ShlN64x2: shl = True; size = 64; break; 9393 case Iop_SarN16x8: sar = True; size = 16; break; 9394 case Iop_SarN32x4: sar = True; size = 32; break; 9395 case Iop_ShrN16x8: shr = True; size = 16; break; 9396 case Iop_ShrN32x4: shr = True; size = 32; break; 9397 case Iop_ShrN64x2: shr = True; size = 64; break; 9398 default: vassert(0); 9399 } 9400 9401 if (shl || shr) { 9402 assign( e1, amt >= size 9403 ? mkV128(0x0000) 9404 : binop(op, mkexpr(e0), mkU8(amt)) 9405 ); 9406 } else 9407 if (sar) { 9408 assign( e1, amt >= size 9409 ? binop(op, mkexpr(e0), mkU8(size-1)) 9410 : binop(op, mkexpr(e0), mkU8(amt)) 9411 ); 9412 } else { 9413 vassert(0); 9414 } 9415 9416 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 9417 return delta; 9418 } 9419 9420 9421 /* Get the current SSE rounding mode. */ 9422 9423 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 9424 { 9425 return 9426 unop( Iop_64to32, 9427 binop( Iop_And64, 9428 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 9429 mkU64(3) )); 9430 } 9431 9432 static void put_sse_roundingmode ( IRExpr* sseround ) 9433 { 9434 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 9435 stmt( IRStmt_Put( OFFB_SSEROUND, 9436 unop(Iop_32Uto64,sseround) ) ); 9437 } 9438 9439 /* Break a V128-bit value up into four 32-bit ints. */ 9440 9441 static void breakupV128to32s ( IRTemp t128, 9442 /*OUTs*/ 9443 IRTemp* t3, IRTemp* t2, 9444 IRTemp* t1, IRTemp* t0 ) 9445 { 9446 IRTemp hi64 = newTemp(Ity_I64); 9447 IRTemp lo64 = newTemp(Ity_I64); 9448 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 9449 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 9450 9451 vassert(t0 && *t0 == IRTemp_INVALID); 9452 vassert(t1 && *t1 == IRTemp_INVALID); 9453 vassert(t2 && *t2 == IRTemp_INVALID); 9454 vassert(t3 && *t3 == IRTemp_INVALID); 9455 9456 *t0 = newTemp(Ity_I32); 9457 *t1 = newTemp(Ity_I32); 9458 *t2 = newTemp(Ity_I32); 9459 *t3 = newTemp(Ity_I32); 9460 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 9461 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 9462 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 9463 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 9464 } 9465 9466 /* Construct a V128-bit value from four 32-bit ints. */ 9467 9468 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 9469 IRTemp t1, IRTemp t0 ) 9470 { 9471 return 9472 binop( Iop_64HLtoV128, 9473 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9474 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 9475 ); 9476 } 9477 9478 /* Break a 64-bit value up into four 16-bit ints. */ 9479 9480 static void breakup64to16s ( IRTemp t64, 9481 /*OUTs*/ 9482 IRTemp* t3, IRTemp* t2, 9483 IRTemp* t1, IRTemp* t0 ) 9484 { 9485 IRTemp hi32 = newTemp(Ity_I32); 9486 IRTemp lo32 = newTemp(Ity_I32); 9487 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 9488 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 9489 9490 vassert(t0 && *t0 == IRTemp_INVALID); 9491 vassert(t1 && *t1 == IRTemp_INVALID); 9492 vassert(t2 && *t2 == IRTemp_INVALID); 9493 vassert(t3 && *t3 == IRTemp_INVALID); 9494 9495 *t0 = newTemp(Ity_I16); 9496 *t1 = newTemp(Ity_I16); 9497 *t2 = newTemp(Ity_I16); 9498 *t3 = newTemp(Ity_I16); 9499 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 9500 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 9501 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 9502 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 9503 } 9504 9505 /* Construct a 64-bit value from four 16-bit ints. */ 9506 9507 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 9508 IRTemp t1, IRTemp t0 ) 9509 { 9510 return 9511 binop( Iop_32HLto64, 9512 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9513 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9514 ); 9515 } 9516 9517 /* Break a V256-bit value up into four 64-bit ints. */ 9518 9519 static void breakupV256to64s ( IRTemp t256, 9520 /*OUTs*/ 9521 IRTemp* t3, IRTemp* t2, 9522 IRTemp* t1, IRTemp* t0 ) 9523 { 9524 vassert(t0 && *t0 == IRTemp_INVALID); 9525 vassert(t1 && *t1 == IRTemp_INVALID); 9526 vassert(t2 && *t2 == IRTemp_INVALID); 9527 vassert(t3 && *t3 == IRTemp_INVALID); 9528 *t0 = newTemp(Ity_I64); 9529 *t1 = newTemp(Ity_I64); 9530 *t2 = newTemp(Ity_I64); 9531 *t3 = newTemp(Ity_I64); 9532 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9533 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9534 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9535 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9536 } 9537 9538 /* Break a V256-bit value up into two V128s. */ 9539 9540 static void breakupV256toV128s ( IRTemp t256, 9541 /*OUTs*/ 9542 IRTemp* t1, IRTemp* t0 ) 9543 { 9544 vassert(t0 && *t0 == IRTemp_INVALID); 9545 vassert(t1 && *t1 == IRTemp_INVALID); 9546 *t0 = newTemp(Ity_V128); 9547 *t1 = newTemp(Ity_V128); 9548 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9549 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9550 } 9551 9552 /* Break a V256-bit value up into eight 32-bit ints. */ 9553 9554 static void breakupV256to32s ( IRTemp t256, 9555 /*OUTs*/ 9556 IRTemp* t7, IRTemp* t6, 9557 IRTemp* t5, IRTemp* t4, 9558 IRTemp* t3, IRTemp* t2, 9559 IRTemp* t1, IRTemp* t0 ) 9560 { 9561 IRTemp t128_1 = IRTemp_INVALID; 9562 IRTemp t128_0 = IRTemp_INVALID; 9563 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9564 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9565 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9566 } 9567 9568 /* Break a V128-bit value up into two 64-bit ints. */ 9569 9570 static void breakupV128to64s ( IRTemp t128, 9571 /*OUTs*/ 9572 IRTemp* t1, IRTemp* t0 ) 9573 { 9574 vassert(t0 && *t0 == IRTemp_INVALID); 9575 vassert(t1 && *t1 == IRTemp_INVALID); 9576 *t0 = newTemp(Ity_I64); 9577 *t1 = newTemp(Ity_I64); 9578 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9579 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9580 } 9581 9582 /* Construct a V256-bit value from eight 32-bit ints. */ 9583 9584 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9585 IRTemp t5, IRTemp t4, 9586 IRTemp t3, IRTemp t2, 9587 IRTemp t1, IRTemp t0 ) 9588 { 9589 return 9590 binop( Iop_V128HLtoV256, 9591 binop( Iop_64HLtoV128, 9592 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9593 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9594 binop( Iop_64HLtoV128, 9595 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9596 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9597 ); 9598 } 9599 9600 /* Construct a V256-bit value from four 64-bit ints. */ 9601 9602 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9603 IRTemp t1, IRTemp t0 ) 9604 { 9605 return 9606 binop( Iop_V128HLtoV256, 9607 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9608 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9609 ); 9610 } 9611 9612 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9613 values (aa,bb), computes, for each of the 4 16-bit lanes: 9614 9615 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9616 */ 9617 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9618 { 9619 IRTemp aa = newTemp(Ity_I64); 9620 IRTemp bb = newTemp(Ity_I64); 9621 IRTemp aahi32s = newTemp(Ity_I64); 9622 IRTemp aalo32s = newTemp(Ity_I64); 9623 IRTemp bbhi32s = newTemp(Ity_I64); 9624 IRTemp bblo32s = newTemp(Ity_I64); 9625 IRTemp rHi = newTemp(Ity_I64); 9626 IRTemp rLo = newTemp(Ity_I64); 9627 IRTemp one32x2 = newTemp(Ity_I64); 9628 assign(aa, aax); 9629 assign(bb, bbx); 9630 assign( aahi32s, 9631 binop(Iop_SarN32x2, 9632 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9633 mkU8(16) )); 9634 assign( aalo32s, 9635 binop(Iop_SarN32x2, 9636 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9637 mkU8(16) )); 9638 assign( bbhi32s, 9639 binop(Iop_SarN32x2, 9640 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9641 mkU8(16) )); 9642 assign( bblo32s, 9643 binop(Iop_SarN32x2, 9644 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9645 mkU8(16) )); 9646 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9647 assign( 9648 rHi, 9649 binop( 9650 Iop_ShrN32x2, 9651 binop( 9652 Iop_Add32x2, 9653 binop( 9654 Iop_ShrN32x2, 9655 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9656 mkU8(14) 9657 ), 9658 mkexpr(one32x2) 9659 ), 9660 mkU8(1) 9661 ) 9662 ); 9663 assign( 9664 rLo, 9665 binop( 9666 Iop_ShrN32x2, 9667 binop( 9668 Iop_Add32x2, 9669 binop( 9670 Iop_ShrN32x2, 9671 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9672 mkU8(14) 9673 ), 9674 mkexpr(one32x2) 9675 ), 9676 mkU8(1) 9677 ) 9678 ); 9679 return 9680 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9681 } 9682 9683 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9684 values (aa,bb), computes, for each lane: 9685 9686 if aa_lane < 0 then - bb_lane 9687 else if aa_lane > 0 then bb_lane 9688 else 0 9689 */ 9690 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9691 { 9692 IRTemp aa = newTemp(Ity_I64); 9693 IRTemp bb = newTemp(Ity_I64); 9694 IRTemp zero = newTemp(Ity_I64); 9695 IRTemp bbNeg = newTemp(Ity_I64); 9696 IRTemp negMask = newTemp(Ity_I64); 9697 IRTemp posMask = newTemp(Ity_I64); 9698 IROp opSub = Iop_INVALID; 9699 IROp opCmpGTS = Iop_INVALID; 9700 9701 switch (laneszB) { 9702 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9703 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9704 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9705 default: vassert(0); 9706 } 9707 9708 assign( aa, aax ); 9709 assign( bb, bbx ); 9710 assign( zero, mkU64(0) ); 9711 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9712 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9713 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9714 9715 return 9716 binop(Iop_Or64, 9717 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9718 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9719 9720 } 9721 9722 9723 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9724 value aa, computes, for each lane 9725 9726 if aa < 0 then -aa else aa 9727 9728 Note that the result is interpreted as unsigned, so that the 9729 absolute value of the most negative signed input can be 9730 represented. 9731 */ 9732 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9733 { 9734 IRTemp res = newTemp(Ity_I64); 9735 IRTemp zero = newTemp(Ity_I64); 9736 IRTemp aaNeg = newTemp(Ity_I64); 9737 IRTemp negMask = newTemp(Ity_I64); 9738 IRTemp posMask = newTemp(Ity_I64); 9739 IROp opSub = Iop_INVALID; 9740 IROp opSarN = Iop_INVALID; 9741 9742 switch (laneszB) { 9743 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9744 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9745 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9746 default: vassert(0); 9747 } 9748 9749 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9750 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9751 assign( zero, mkU64(0) ); 9752 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9753 assign( res, 9754 binop(Iop_Or64, 9755 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9756 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9757 return res; 9758 } 9759 9760 /* XMM version of math_PABS_MMX. */ 9761 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9762 { 9763 IRTemp res = newTemp(Ity_V128); 9764 IRTemp aaHi = newTemp(Ity_I64); 9765 IRTemp aaLo = newTemp(Ity_I64); 9766 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9767 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9768 assign(res, binop(Iop_64HLtoV128, 9769 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9770 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9771 return res; 9772 } 9773 9774 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9775 partial applications in C :-( */ 9776 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9777 return math_PABS_XMM(aa, 4); 9778 } 9779 9780 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9781 return math_PABS_XMM(aa, 2); 9782 } 9783 9784 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9785 return math_PABS_XMM(aa, 1); 9786 } 9787 9788 /* YMM version of math_PABS_XMM. */ 9789 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB ) 9790 { 9791 IRTemp res = newTemp(Ity_V256); 9792 IRTemp aaHi = IRTemp_INVALID; 9793 IRTemp aaLo = IRTemp_INVALID; 9794 breakupV256toV128s(aa, &aaHi, &aaLo); 9795 assign(res, binop(Iop_V128HLtoV256, 9796 mkexpr(math_PABS_XMM(aaHi, laneszB)), 9797 mkexpr(math_PABS_XMM(aaLo, laneszB)))); 9798 return res; 9799 } 9800 9801 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) { 9802 return math_PABS_YMM(aa, 4); 9803 } 9804 9805 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) { 9806 return math_PABS_YMM(aa, 2); 9807 } 9808 9809 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) { 9810 return math_PABS_YMM(aa, 1); 9811 } 9812 9813 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9814 IRTemp lo64, Long byteShift ) 9815 { 9816 vassert(byteShift >= 1 && byteShift <= 7); 9817 return 9818 binop(Iop_Or64, 9819 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9820 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9821 ); 9822 } 9823 9824 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9825 { 9826 IRTemp res = newTemp(Ity_V128); 9827 IRTemp sHi = newTemp(Ity_I64); 9828 IRTemp sLo = newTemp(Ity_I64); 9829 IRTemp dHi = newTemp(Ity_I64); 9830 IRTemp dLo = newTemp(Ity_I64); 9831 IRTemp rHi = newTemp(Ity_I64); 9832 IRTemp rLo = newTemp(Ity_I64); 9833 9834 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9835 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9836 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9837 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9838 9839 if (imm8 == 0) { 9840 assign( rHi, mkexpr(sHi) ); 9841 assign( rLo, mkexpr(sLo) ); 9842 } 9843 else if (imm8 >= 1 && imm8 <= 7) { 9844 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9845 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9846 } 9847 else if (imm8 == 8) { 9848 assign( rHi, mkexpr(dLo) ); 9849 assign( rLo, mkexpr(sHi) ); 9850 } 9851 else if (imm8 >= 9 && imm8 <= 15) { 9852 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9853 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9854 } 9855 else if (imm8 == 16) { 9856 assign( rHi, mkexpr(dHi) ); 9857 assign( rLo, mkexpr(dLo) ); 9858 } 9859 else if (imm8 >= 17 && imm8 <= 23) { 9860 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9861 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9862 } 9863 else if (imm8 == 24) { 9864 assign( rHi, mkU64(0) ); 9865 assign( rLo, mkexpr(dHi) ); 9866 } 9867 else if (imm8 >= 25 && imm8 <= 31) { 9868 assign( rHi, mkU64(0) ); 9869 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9870 } 9871 else if (imm8 >= 32 && imm8 <= 255) { 9872 assign( rHi, mkU64(0) ); 9873 assign( rLo, mkU64(0) ); 9874 } 9875 else 9876 vassert(0); 9877 9878 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 9879 return res; 9880 } 9881 9882 9883 /* Generate a SIGSEGV followed by a restart of the current instruction 9884 if effective_addr is not 16-aligned. This is required behaviour 9885 for some SSE3 instructions and all 128-bit SSSE3 instructions. 9886 This assumes that guest_RIP_curr_instr is set correctly! */ 9887 static 9888 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 9889 { 9890 stmt( 9891 IRStmt_Exit( 9892 binop(Iop_CmpNE64, 9893 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 9894 mkU64(0)), 9895 Ijk_SigSEGV, 9896 IRConst_U64(guest_RIP_curr_instr), 9897 OFFB_RIP 9898 ) 9899 ); 9900 } 9901 9902 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 9903 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 9904 } 9905 9906 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 9907 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 9908 } 9909 9910 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) { 9911 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1); 9912 } 9913 9914 /* Helper for deciding whether a given insn (starting at the opcode 9915 byte) may validly be used with a LOCK prefix. The following insns 9916 may be used with LOCK when their destination operand is in memory. 9917 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 9918 9919 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 9920 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 9921 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 9922 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 9923 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 9924 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 9925 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 9926 9927 DEC FE /1, FF /1 9928 INC FE /0, FF /0 9929 9930 NEG F6 /3, F7 /3 9931 NOT F6 /2, F7 /2 9932 9933 XCHG 86, 87 9934 9935 BTC 0F BB, 0F BA /7 9936 BTR 0F B3, 0F BA /6 9937 BTS 0F AB, 0F BA /5 9938 9939 CMPXCHG 0F B0, 0F B1 9940 CMPXCHG8B 0F C7 /1 9941 9942 XADD 0F C0, 0F C1 9943 9944 ------------------------------ 9945 9946 80 /0 = addb $imm8, rm8 9947 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 9948 82 /0 = addb $imm8, rm8 9949 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 9950 9951 00 = addb r8, rm8 9952 01 = addl r32, rm32 and addw r16, rm16 9953 9954 Same for ADD OR ADC SBB AND SUB XOR 9955 9956 FE /1 = dec rm8 9957 FF /1 = dec rm32 and dec rm16 9958 9959 FE /0 = inc rm8 9960 FF /0 = inc rm32 and inc rm16 9961 9962 F6 /3 = neg rm8 9963 F7 /3 = neg rm32 and neg rm16 9964 9965 F6 /2 = not rm8 9966 F7 /2 = not rm32 and not rm16 9967 9968 0F BB = btcw r16, rm16 and btcl r32, rm32 9969 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 9970 9971 Same for BTS, BTR 9972 */ 9973 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc ) 9974 { 9975 switch (opc[0]) { 9976 case 0x00: case 0x01: case 0x08: case 0x09: 9977 case 0x10: case 0x11: case 0x18: case 0x19: 9978 case 0x20: case 0x21: case 0x28: case 0x29: 9979 case 0x30: case 0x31: 9980 if (!epartIsReg(opc[1])) 9981 return True; 9982 break; 9983 9984 case 0x80: case 0x81: case 0x82: case 0x83: 9985 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 9986 && !epartIsReg(opc[1])) 9987 return True; 9988 break; 9989 9990 case 0xFE: case 0xFF: 9991 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 9992 && !epartIsReg(opc[1])) 9993 return True; 9994 break; 9995 9996 case 0xF6: case 0xF7: 9997 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 9998 && !epartIsReg(opc[1])) 9999 return True; 10000 break; 10001 10002 case 0x86: case 0x87: 10003 if (!epartIsReg(opc[1])) 10004 return True; 10005 break; 10006 10007 case 0x0F: { 10008 switch (opc[1]) { 10009 case 0xBB: case 0xB3: case 0xAB: 10010 if (!epartIsReg(opc[2])) 10011 return True; 10012 break; 10013 case 0xBA: 10014 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 10015 && !epartIsReg(opc[2])) 10016 return True; 10017 break; 10018 case 0xB0: case 0xB1: 10019 if (!epartIsReg(opc[2])) 10020 return True; 10021 break; 10022 case 0xC7: 10023 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 10024 return True; 10025 break; 10026 case 0xC0: case 0xC1: 10027 if (!epartIsReg(opc[2])) 10028 return True; 10029 break; 10030 default: 10031 break; 10032 } /* switch (opc[1]) */ 10033 break; 10034 } 10035 10036 default: 10037 break; 10038 } /* switch (opc[0]) */ 10039 10040 return False; 10041 } 10042 10043 10044 /*------------------------------------------------------------*/ 10045 /*--- ---*/ 10046 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 10047 /*--- ---*/ 10048 /*------------------------------------------------------------*/ 10049 10050 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx, 10051 Long delta, Bool isAvx, UChar opc ) 10052 { 10053 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 10054 Int alen = 0; 10055 HChar dis_buf[50]; 10056 IRTemp argL = newTemp(Ity_F64); 10057 IRTemp argR = newTemp(Ity_F64); 10058 UChar modrm = getUChar(delta); 10059 IRTemp addr = IRTemp_INVALID; 10060 if (epartIsReg(modrm)) { 10061 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 10062 0/*lowest lane*/ ) ); 10063 delta += 1; 10064 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10065 opc==0x2E ? "u" : "", 10066 nameXMMReg(eregOfRexRM(pfx,modrm)), 10067 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10068 } else { 10069 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10070 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 10071 delta += alen; 10072 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10073 opc==0x2E ? "u" : "", 10074 dis_buf, 10075 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10076 } 10077 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 10078 0/*lowest lane*/ ) ); 10079 10080 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10081 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10082 stmt( IRStmt_Put( 10083 OFFB_CC_DEP1, 10084 binop( Iop_And64, 10085 unop( Iop_32Uto64, 10086 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 10087 mkU64(0x45) 10088 ))); 10089 return delta; 10090 } 10091 10092 10093 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx, 10094 Long delta, Bool isAvx, UChar opc ) 10095 { 10096 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 10097 Int alen = 0; 10098 HChar dis_buf[50]; 10099 IRTemp argL = newTemp(Ity_F32); 10100 IRTemp argR = newTemp(Ity_F32); 10101 UChar modrm = getUChar(delta); 10102 IRTemp addr = IRTemp_INVALID; 10103 if (epartIsReg(modrm)) { 10104 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 10105 0/*lowest lane*/ ) ); 10106 delta += 1; 10107 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10108 opc==0x2E ? "u" : "", 10109 nameXMMReg(eregOfRexRM(pfx,modrm)), 10110 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10111 } else { 10112 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10113 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 10114 delta += alen; 10115 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10116 opc==0x2E ? "u" : "", 10117 dis_buf, 10118 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10119 } 10120 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 10121 0/*lowest lane*/ ) ); 10122 10123 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10124 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10125 stmt( IRStmt_Put( 10126 OFFB_CC_DEP1, 10127 binop( Iop_And64, 10128 unop( Iop_32Uto64, 10129 binop(Iop_CmpF64, 10130 unop(Iop_F32toF64,mkexpr(argL)), 10131 unop(Iop_F32toF64,mkexpr(argR)))), 10132 mkU64(0x45) 10133 ))); 10134 return delta; 10135 } 10136 10137 10138 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx, 10139 Long delta, Bool writesYmm ) 10140 { 10141 Int order; 10142 Int alen = 0; 10143 HChar dis_buf[50]; 10144 IRTemp sV = newTemp(Ity_V128); 10145 UChar modrm = getUChar(delta); 10146 const HChar* strV = writesYmm ? "v" : ""; 10147 IRTemp addr = IRTemp_INVALID; 10148 if (epartIsReg(modrm)) { 10149 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10150 order = (Int)getUChar(delta+1); 10151 delta += 1+1; 10152 DIP("%spshufd $%d,%s,%s\n", strV, order, 10153 nameXMMReg(eregOfRexRM(pfx,modrm)), 10154 nameXMMReg(gregOfRexRM(pfx,modrm))); 10155 } else { 10156 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10157 1/*byte after the amode*/ ); 10158 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10159 order = (Int)getUChar(delta+alen); 10160 delta += alen+1; 10161 DIP("%spshufd $%d,%s,%s\n", strV, order, 10162 dis_buf, 10163 nameXMMReg(gregOfRexRM(pfx,modrm))); 10164 } 10165 10166 IRTemp s3, s2, s1, s0; 10167 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10168 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10169 10170 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10171 IRTemp dV = newTemp(Ity_V128); 10172 assign(dV, 10173 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 10174 SEL((order>>2)&3), SEL((order>>0)&3) ) 10175 ); 10176 # undef SEL 10177 10178 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 10179 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 10180 return delta; 10181 } 10182 10183 10184 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 10185 { 10186 Int order; 10187 Int alen = 0; 10188 HChar dis_buf[50]; 10189 IRTemp sV = newTemp(Ity_V256); 10190 UChar modrm = getUChar(delta); 10191 IRTemp addr = IRTemp_INVALID; 10192 UInt rG = gregOfRexRM(pfx,modrm); 10193 if (epartIsReg(modrm)) { 10194 UInt rE = eregOfRexRM(pfx,modrm); 10195 assign( sV, getYMMReg(rE) ); 10196 order = (Int)getUChar(delta+1); 10197 delta += 1+1; 10198 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG)); 10199 } else { 10200 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10201 1/*byte after the amode*/ ); 10202 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 10203 order = (Int)getUChar(delta+alen); 10204 delta += alen+1; 10205 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG)); 10206 } 10207 10208 IRTemp s[8]; 10209 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 10210 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 10211 &s[3], &s[2], &s[1], &s[0] ); 10212 10213 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)], 10214 s[4 + ((order>>4)&3)], 10215 s[4 + ((order>>2)&3)], 10216 s[4 + ((order>>0)&3)], 10217 s[0 + ((order>>6)&3)], 10218 s[0 + ((order>>4)&3)], 10219 s[0 + ((order>>2)&3)], 10220 s[0 + ((order>>0)&3)] ) ); 10221 return delta; 10222 } 10223 10224 10225 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 10226 { 10227 IRTemp dV = newTemp(Ity_V128); 10228 IRTemp hi64 = newTemp(Ity_I64); 10229 IRTemp lo64 = newTemp(Ity_I64); 10230 IRTemp hi64r = newTemp(Ity_I64); 10231 IRTemp lo64r = newTemp(Ity_I64); 10232 10233 vassert(imm >= 0 && imm <= 255); 10234 if (imm >= 16) { 10235 assign(dV, mkV128(0x0000)); 10236 return dV; 10237 } 10238 10239 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10240 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10241 10242 if (imm == 0) { 10243 assign( lo64r, mkexpr(lo64) ); 10244 assign( hi64r, mkexpr(hi64) ); 10245 } 10246 else 10247 if (imm == 8) { 10248 assign( hi64r, mkU64(0) ); 10249 assign( lo64r, mkexpr(hi64) ); 10250 } 10251 else 10252 if (imm > 8) { 10253 assign( hi64r, mkU64(0) ); 10254 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 10255 } else { 10256 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 10257 assign( lo64r, 10258 binop( Iop_Or64, 10259 binop(Iop_Shr64, mkexpr(lo64), 10260 mkU8(8 * imm)), 10261 binop(Iop_Shl64, mkexpr(hi64), 10262 mkU8(8 * (8 - imm)) ) 10263 ) 10264 ); 10265 } 10266 10267 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10268 return dV; 10269 } 10270 10271 10272 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 10273 { 10274 IRTemp dV = newTemp(Ity_V128); 10275 IRTemp hi64 = newTemp(Ity_I64); 10276 IRTemp lo64 = newTemp(Ity_I64); 10277 IRTemp hi64r = newTemp(Ity_I64); 10278 IRTemp lo64r = newTemp(Ity_I64); 10279 10280 vassert(imm >= 0 && imm <= 255); 10281 if (imm >= 16) { 10282 assign(dV, mkV128(0x0000)); 10283 return dV; 10284 } 10285 10286 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10287 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10288 10289 if (imm == 0) { 10290 assign( lo64r, mkexpr(lo64) ); 10291 assign( hi64r, mkexpr(hi64) ); 10292 } 10293 else 10294 if (imm == 8) { 10295 assign( lo64r, mkU64(0) ); 10296 assign( hi64r, mkexpr(lo64) ); 10297 } 10298 else 10299 if (imm > 8) { 10300 assign( lo64r, mkU64(0) ); 10301 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 10302 } else { 10303 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 10304 assign( hi64r, 10305 binop( Iop_Or64, 10306 binop(Iop_Shl64, mkexpr(hi64), 10307 mkU8(8 * imm)), 10308 binop(Iop_Shr64, mkexpr(lo64), 10309 mkU8(8 * (8 - imm)) ) 10310 ) 10311 ); 10312 } 10313 10314 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10315 return dV; 10316 } 10317 10318 10319 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx, 10320 Long delta, Bool isAvx, UChar opc, Int sz ) 10321 { 10322 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 10323 HChar dis_buf[50]; 10324 Int alen = 0; 10325 UChar modrm = getUChar(delta); 10326 IRTemp addr = IRTemp_INVALID; 10327 IRTemp rmode = newTemp(Ity_I32); 10328 IRTemp f64lo = newTemp(Ity_F64); 10329 Bool r2zero = toBool(opc == 0x2C); 10330 10331 if (epartIsReg(modrm)) { 10332 delta += 1; 10333 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10334 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10335 nameXMMReg(eregOfRexRM(pfx,modrm)), 10336 nameIReg(sz, gregOfRexRM(pfx,modrm), 10337 False)); 10338 } else { 10339 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10340 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10341 delta += alen; 10342 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10343 dis_buf, 10344 nameIReg(sz, gregOfRexRM(pfx,modrm), 10345 False)); 10346 } 10347 10348 if (r2zero) { 10349 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10350 } else { 10351 assign( rmode, get_sse_roundingmode() ); 10352 } 10353 10354 if (sz == 4) { 10355 putIReg32( gregOfRexRM(pfx,modrm), 10356 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10357 } else { 10358 vassert(sz == 8); 10359 putIReg64( gregOfRexRM(pfx,modrm), 10360 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10361 } 10362 10363 return delta; 10364 } 10365 10366 10367 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx, 10368 Long delta, Bool isAvx, UChar opc, Int sz ) 10369 { 10370 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 10371 HChar dis_buf[50]; 10372 Int alen = 0; 10373 UChar modrm = getUChar(delta); 10374 IRTemp addr = IRTemp_INVALID; 10375 IRTemp rmode = newTemp(Ity_I32); 10376 IRTemp f32lo = newTemp(Ity_F32); 10377 Bool r2zero = toBool(opc == 0x2C); 10378 10379 if (epartIsReg(modrm)) { 10380 delta += 1; 10381 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 10382 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10383 nameXMMReg(eregOfRexRM(pfx,modrm)), 10384 nameIReg(sz, gregOfRexRM(pfx,modrm), 10385 False)); 10386 } else { 10387 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10388 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10389 delta += alen; 10390 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10391 dis_buf, 10392 nameIReg(sz, gregOfRexRM(pfx,modrm), 10393 False)); 10394 } 10395 10396 if (r2zero) { 10397 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10398 } else { 10399 assign( rmode, get_sse_roundingmode() ); 10400 } 10401 10402 if (sz == 4) { 10403 putIReg32( gregOfRexRM(pfx,modrm), 10404 binop( Iop_F64toI32S, 10405 mkexpr(rmode), 10406 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10407 } else { 10408 vassert(sz == 8); 10409 putIReg64( gregOfRexRM(pfx,modrm), 10410 binop( Iop_F64toI64S, 10411 mkexpr(rmode), 10412 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10413 } 10414 10415 return delta; 10416 } 10417 10418 10419 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 10420 Long delta, Bool isAvx ) 10421 { 10422 IRTemp addr = IRTemp_INVALID; 10423 Int alen = 0; 10424 HChar dis_buf[50]; 10425 IRTemp f32lo = newTemp(Ity_F32); 10426 IRTemp f32hi = newTemp(Ity_F32); 10427 UChar modrm = getUChar(delta); 10428 UInt rG = gregOfRexRM(pfx,modrm); 10429 if (epartIsReg(modrm)) { 10430 UInt rE = eregOfRexRM(pfx,modrm); 10431 assign( f32lo, getXMMRegLane32F(rE, 0) ); 10432 assign( f32hi, getXMMRegLane32F(rE, 1) ); 10433 delta += 1; 10434 DIP("%scvtps2pd %s,%s\n", 10435 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10436 } else { 10437 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10438 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10439 assign( f32hi, loadLE(Ity_F32, 10440 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10441 delta += alen; 10442 DIP("%scvtps2pd %s,%s\n", 10443 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 10444 } 10445 10446 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 10447 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 10448 if (isAvx) 10449 putYMMRegLane128( rG, 1, mkV128(0)); 10450 return delta; 10451 } 10452 10453 10454 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 10455 Long delta ) 10456 { 10457 IRTemp addr = IRTemp_INVALID; 10458 Int alen = 0; 10459 HChar dis_buf[50]; 10460 IRTemp f32_0 = newTemp(Ity_F32); 10461 IRTemp f32_1 = newTemp(Ity_F32); 10462 IRTemp f32_2 = newTemp(Ity_F32); 10463 IRTemp f32_3 = newTemp(Ity_F32); 10464 UChar modrm = getUChar(delta); 10465 UInt rG = gregOfRexRM(pfx,modrm); 10466 if (epartIsReg(modrm)) { 10467 UInt rE = eregOfRexRM(pfx,modrm); 10468 assign( f32_0, getXMMRegLane32F(rE, 0) ); 10469 assign( f32_1, getXMMRegLane32F(rE, 1) ); 10470 assign( f32_2, getXMMRegLane32F(rE, 2) ); 10471 assign( f32_3, getXMMRegLane32F(rE, 3) ); 10472 delta += 1; 10473 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 10474 } else { 10475 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10476 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 10477 assign( f32_1, loadLE(Ity_F32, 10478 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10479 assign( f32_2, loadLE(Ity_F32, 10480 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 10481 assign( f32_3, loadLE(Ity_F32, 10482 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 10483 delta += alen; 10484 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 10485 } 10486 10487 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 10488 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 10489 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 10490 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 10491 return delta; 10492 } 10493 10494 10495 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10496 Long delta, Bool isAvx ) 10497 { 10498 IRTemp addr = IRTemp_INVALID; 10499 Int alen = 0; 10500 HChar dis_buf[50]; 10501 UChar modrm = getUChar(delta); 10502 UInt rG = gregOfRexRM(pfx,modrm); 10503 IRTemp argV = newTemp(Ity_V128); 10504 IRTemp rmode = newTemp(Ity_I32); 10505 if (epartIsReg(modrm)) { 10506 UInt rE = eregOfRexRM(pfx,modrm); 10507 assign( argV, getXMMReg(rE) ); 10508 delta += 1; 10509 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10510 nameXMMReg(rE), nameXMMReg(rG)); 10511 } else { 10512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10513 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10514 delta += alen; 10515 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10516 dis_buf, nameXMMReg(rG) ); 10517 } 10518 10519 assign( rmode, get_sse_roundingmode() ); 10520 IRTemp t0 = newTemp(Ity_F64); 10521 IRTemp t1 = newTemp(Ity_F64); 10522 assign( t0, unop(Iop_ReinterpI64asF64, 10523 unop(Iop_V128to64, mkexpr(argV))) ); 10524 assign( t1, unop(Iop_ReinterpI64asF64, 10525 unop(Iop_V128HIto64, mkexpr(argV))) ); 10526 10527 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 10528 putXMMRegLane32( rG, 3, mkU32(0) ); 10529 putXMMRegLane32( rG, 2, mkU32(0) ); 10530 putXMMRegLane32F( rG, 1, CVT(t1) ); 10531 putXMMRegLane32F( rG, 0, CVT(t0) ); 10532 # undef CVT 10533 if (isAvx) 10534 putYMMRegLane128( rG, 1, mkV128(0) ); 10535 10536 return delta; 10537 } 10538 10539 10540 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10541 Long delta, Bool isAvx, Bool r2zero ) 10542 { 10543 IRTemp addr = IRTemp_INVALID; 10544 Int alen = 0; 10545 HChar dis_buf[50]; 10546 UChar modrm = getUChar(delta); 10547 IRTemp argV = newTemp(Ity_V128); 10548 IRTemp rmode = newTemp(Ity_I32); 10549 UInt rG = gregOfRexRM(pfx,modrm); 10550 IRTemp t0, t1, t2, t3; 10551 10552 if (epartIsReg(modrm)) { 10553 UInt rE = eregOfRexRM(pfx,modrm); 10554 assign( argV, getXMMReg(rE) ); 10555 delta += 1; 10556 DIP("%scvt%sps2dq %s,%s\n", 10557 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10558 } else { 10559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10560 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10561 delta += alen; 10562 DIP("%scvt%sps2dq %s,%s\n", 10563 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10564 } 10565 10566 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10567 : get_sse_roundingmode() ); 10568 t0 = t1 = t2 = t3 = IRTemp_INVALID; 10569 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10570 /* This is less than ideal. If it turns out to be a performance 10571 bottleneck it can be improved. */ 10572 # define CVT(_t) \ 10573 binop( Iop_F64toI32S, \ 10574 mkexpr(rmode), \ 10575 unop( Iop_F32toF64, \ 10576 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10577 10578 putXMMRegLane32( rG, 3, CVT(t3) ); 10579 putXMMRegLane32( rG, 2, CVT(t2) ); 10580 putXMMRegLane32( rG, 1, CVT(t1) ); 10581 putXMMRegLane32( rG, 0, CVT(t0) ); 10582 # undef CVT 10583 if (isAvx) 10584 putYMMRegLane128( rG, 1, mkV128(0) ); 10585 10586 return delta; 10587 } 10588 10589 10590 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10591 Long delta, Bool r2zero ) 10592 { 10593 IRTemp addr = IRTemp_INVALID; 10594 Int alen = 0; 10595 HChar dis_buf[50]; 10596 UChar modrm = getUChar(delta); 10597 IRTemp argV = newTemp(Ity_V256); 10598 IRTemp rmode = newTemp(Ity_I32); 10599 UInt rG = gregOfRexRM(pfx,modrm); 10600 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10601 10602 if (epartIsReg(modrm)) { 10603 UInt rE = eregOfRexRM(pfx,modrm); 10604 assign( argV, getYMMReg(rE) ); 10605 delta += 1; 10606 DIP("vcvt%sps2dq %s,%s\n", 10607 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10608 } else { 10609 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10610 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10611 delta += alen; 10612 DIP("vcvt%sps2dq %s,%s\n", 10613 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10614 } 10615 10616 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10617 : get_sse_roundingmode() ); 10618 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10619 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10620 /* This is less than ideal. If it turns out to be a performance 10621 bottleneck it can be improved. */ 10622 # define CVT(_t) \ 10623 binop( Iop_F64toI32S, \ 10624 mkexpr(rmode), \ 10625 unop( Iop_F32toF64, \ 10626 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10627 10628 putYMMRegLane32( rG, 7, CVT(t7) ); 10629 putYMMRegLane32( rG, 6, CVT(t6) ); 10630 putYMMRegLane32( rG, 5, CVT(t5) ); 10631 putYMMRegLane32( rG, 4, CVT(t4) ); 10632 putYMMRegLane32( rG, 3, CVT(t3) ); 10633 putYMMRegLane32( rG, 2, CVT(t2) ); 10634 putYMMRegLane32( rG, 1, CVT(t1) ); 10635 putYMMRegLane32( rG, 0, CVT(t0) ); 10636 # undef CVT 10637 10638 return delta; 10639 } 10640 10641 10642 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10643 Long delta, Bool isAvx, Bool r2zero ) 10644 { 10645 IRTemp addr = IRTemp_INVALID; 10646 Int alen = 0; 10647 HChar dis_buf[50]; 10648 UChar modrm = getUChar(delta); 10649 IRTemp argV = newTemp(Ity_V128); 10650 IRTemp rmode = newTemp(Ity_I32); 10651 UInt rG = gregOfRexRM(pfx,modrm); 10652 IRTemp t0, t1; 10653 10654 if (epartIsReg(modrm)) { 10655 UInt rE = eregOfRexRM(pfx,modrm); 10656 assign( argV, getXMMReg(rE) ); 10657 delta += 1; 10658 DIP("%scvt%spd2dq %s,%s\n", 10659 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10660 } else { 10661 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10662 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10663 delta += alen; 10664 DIP("%scvt%spd2dqx %s,%s\n", 10665 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10666 } 10667 10668 if (r2zero) { 10669 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10670 } else { 10671 assign( rmode, get_sse_roundingmode() ); 10672 } 10673 10674 t0 = newTemp(Ity_F64); 10675 t1 = newTemp(Ity_F64); 10676 assign( t0, unop(Iop_ReinterpI64asF64, 10677 unop(Iop_V128to64, mkexpr(argV))) ); 10678 assign( t1, unop(Iop_ReinterpI64asF64, 10679 unop(Iop_V128HIto64, mkexpr(argV))) ); 10680 10681 # define CVT(_t) binop( Iop_F64toI32S, \ 10682 mkexpr(rmode), \ 10683 mkexpr(_t) ) 10684 10685 putXMMRegLane32( rG, 3, mkU32(0) ); 10686 putXMMRegLane32( rG, 2, mkU32(0) ); 10687 putXMMRegLane32( rG, 1, CVT(t1) ); 10688 putXMMRegLane32( rG, 0, CVT(t0) ); 10689 # undef CVT 10690 if (isAvx) 10691 putYMMRegLane128( rG, 1, mkV128(0) ); 10692 10693 return delta; 10694 } 10695 10696 10697 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10698 Long delta, Bool r2zero ) 10699 { 10700 IRTemp addr = IRTemp_INVALID; 10701 Int alen = 0; 10702 HChar dis_buf[50]; 10703 UChar modrm = getUChar(delta); 10704 IRTemp argV = newTemp(Ity_V256); 10705 IRTemp rmode = newTemp(Ity_I32); 10706 UInt rG = gregOfRexRM(pfx,modrm); 10707 IRTemp t0, t1, t2, t3; 10708 10709 if (epartIsReg(modrm)) { 10710 UInt rE = eregOfRexRM(pfx,modrm); 10711 assign( argV, getYMMReg(rE) ); 10712 delta += 1; 10713 DIP("vcvt%spd2dq %s,%s\n", 10714 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10715 } else { 10716 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10717 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10718 delta += alen; 10719 DIP("vcvt%spd2dqy %s,%s\n", 10720 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10721 } 10722 10723 if (r2zero) { 10724 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10725 } else { 10726 assign( rmode, get_sse_roundingmode() ); 10727 } 10728 10729 t0 = IRTemp_INVALID; 10730 t1 = IRTemp_INVALID; 10731 t2 = IRTemp_INVALID; 10732 t3 = IRTemp_INVALID; 10733 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10734 10735 # define CVT(_t) binop( Iop_F64toI32S, \ 10736 mkexpr(rmode), \ 10737 unop( Iop_ReinterpI64asF64, \ 10738 mkexpr(_t) ) ) 10739 10740 putXMMRegLane32( rG, 3, CVT(t3) ); 10741 putXMMRegLane32( rG, 2, CVT(t2) ); 10742 putXMMRegLane32( rG, 1, CVT(t1) ); 10743 putXMMRegLane32( rG, 0, CVT(t0) ); 10744 # undef CVT 10745 putYMMRegLane128( rG, 1, mkV128(0) ); 10746 10747 return delta; 10748 } 10749 10750 10751 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10752 Long delta, Bool isAvx ) 10753 { 10754 IRTemp addr = IRTemp_INVALID; 10755 Int alen = 0; 10756 HChar dis_buf[50]; 10757 UChar modrm = getUChar(delta); 10758 IRTemp argV = newTemp(Ity_V128); 10759 IRTemp rmode = newTemp(Ity_I32); 10760 UInt rG = gregOfRexRM(pfx,modrm); 10761 IRTemp t0, t1, t2, t3; 10762 10763 if (epartIsReg(modrm)) { 10764 UInt rE = eregOfRexRM(pfx,modrm); 10765 assign( argV, getXMMReg(rE) ); 10766 delta += 1; 10767 DIP("%scvtdq2ps %s,%s\n", 10768 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10769 } else { 10770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10771 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10772 delta += alen; 10773 DIP("%scvtdq2ps %s,%s\n", 10774 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10775 } 10776 10777 assign( rmode, get_sse_roundingmode() ); 10778 t0 = IRTemp_INVALID; 10779 t1 = IRTemp_INVALID; 10780 t2 = IRTemp_INVALID; 10781 t3 = IRTemp_INVALID; 10782 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10783 10784 # define CVT(_t) binop( Iop_F64toF32, \ 10785 mkexpr(rmode), \ 10786 unop(Iop_I32StoF64,mkexpr(_t))) 10787 10788 putXMMRegLane32F( rG, 3, CVT(t3) ); 10789 putXMMRegLane32F( rG, 2, CVT(t2) ); 10790 putXMMRegLane32F( rG, 1, CVT(t1) ); 10791 putXMMRegLane32F( rG, 0, CVT(t0) ); 10792 # undef CVT 10793 if (isAvx) 10794 putYMMRegLane128( rG, 1, mkV128(0) ); 10795 10796 return delta; 10797 } 10798 10799 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 10800 Long delta ) 10801 { 10802 IRTemp addr = IRTemp_INVALID; 10803 Int alen = 0; 10804 HChar dis_buf[50]; 10805 UChar modrm = getUChar(delta); 10806 IRTemp argV = newTemp(Ity_V256); 10807 IRTemp rmode = newTemp(Ity_I32); 10808 UInt rG = gregOfRexRM(pfx,modrm); 10809 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10810 10811 if (epartIsReg(modrm)) { 10812 UInt rE = eregOfRexRM(pfx,modrm); 10813 assign( argV, getYMMReg(rE) ); 10814 delta += 1; 10815 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10816 } else { 10817 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10818 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10819 delta += alen; 10820 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10821 } 10822 10823 assign( rmode, get_sse_roundingmode() ); 10824 t0 = IRTemp_INVALID; 10825 t1 = IRTemp_INVALID; 10826 t2 = IRTemp_INVALID; 10827 t3 = IRTemp_INVALID; 10828 t4 = IRTemp_INVALID; 10829 t5 = IRTemp_INVALID; 10830 t6 = IRTemp_INVALID; 10831 t7 = IRTemp_INVALID; 10832 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10833 10834 # define CVT(_t) binop( Iop_F64toF32, \ 10835 mkexpr(rmode), \ 10836 unop(Iop_I32StoF64,mkexpr(_t))) 10837 10838 putYMMRegLane32F( rG, 7, CVT(t7) ); 10839 putYMMRegLane32F( rG, 6, CVT(t6) ); 10840 putYMMRegLane32F( rG, 5, CVT(t5) ); 10841 putYMMRegLane32F( rG, 4, CVT(t4) ); 10842 putYMMRegLane32F( rG, 3, CVT(t3) ); 10843 putYMMRegLane32F( rG, 2, CVT(t2) ); 10844 putYMMRegLane32F( rG, 1, CVT(t1) ); 10845 putYMMRegLane32F( rG, 0, CVT(t0) ); 10846 # undef CVT 10847 10848 return delta; 10849 } 10850 10851 10852 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx, 10853 Long delta, Bool isAvx ) 10854 { 10855 UChar modrm = getUChar(delta); 10856 vassert(epartIsReg(modrm)); /* ensured by caller */ 10857 UInt rE = eregOfRexRM(pfx,modrm); 10858 UInt rG = gregOfRexRM(pfx,modrm); 10859 IRTemp t0 = newTemp(Ity_V128); 10860 IRTemp t1 = newTemp(Ity_I32); 10861 assign(t0, getXMMReg(rE)); 10862 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0)))); 10863 putIReg32(rG, mkexpr(t1)); 10864 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10865 nameIReg32(rG)); 10866 delta += 1; 10867 return delta; 10868 } 10869 10870 10871 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx, 10872 Long delta ) 10873 { 10874 UChar modrm = getUChar(delta); 10875 vassert(epartIsReg(modrm)); /* ensured by caller */ 10876 UInt rE = eregOfRexRM(pfx,modrm); 10877 UInt rG = gregOfRexRM(pfx,modrm); 10878 IRTemp t0 = newTemp(Ity_V128); 10879 IRTemp t1 = newTemp(Ity_V128); 10880 IRTemp t2 = newTemp(Ity_I16); 10881 IRTemp t3 = newTemp(Ity_I16); 10882 assign(t0, getYMMRegLane128(rE, 0)); 10883 assign(t1, getYMMRegLane128(rE, 1)); 10884 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0))); 10885 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1))); 10886 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2))); 10887 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 10888 delta += 1; 10889 return delta; 10890 } 10891 10892 10893 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 10894 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 10895 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 10896 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10897 { 10898 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10899 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10900 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10901 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10902 IRTemp res = newTemp(Ity_V128); 10903 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 10904 : mkV128from32s( s1, d1, s0, d0 )); 10905 return res; 10906 } 10907 10908 10909 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 10910 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 10911 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10912 { 10913 IRTemp s1 = newTemp(Ity_I64); 10914 IRTemp s0 = newTemp(Ity_I64); 10915 IRTemp d1 = newTemp(Ity_I64); 10916 IRTemp d0 = newTemp(Ity_I64); 10917 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10918 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10919 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10920 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10921 IRTemp res = newTemp(Ity_V128); 10922 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 10923 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 10924 return res; 10925 } 10926 10927 10928 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 10929 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 10930 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 10931 way. */ 10932 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10933 { 10934 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10935 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10936 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 10937 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 10938 IRTemp res = newTemp(Ity_V256); 10939 assign(res, xIsH 10940 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 10941 mkexpr(s1), mkexpr(d1)) 10942 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 10943 mkexpr(s0), mkexpr(d0))); 10944 return res; 10945 } 10946 10947 10948 /* FIXME: this is really bad. Surely can do something better here? 10949 One observation is that the steering in the upper and lower 128 bit 10950 halves is the same as with math_UNPCKxPS_128, so we simply split 10951 into two halves, and use that. Consequently any improvement in 10952 math_UNPCKxPS_128 (probably, to use interleave-style primops) 10953 benefits this too. */ 10954 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10955 { 10956 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10957 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10958 breakupV256toV128s( sV, &sVhi, &sVlo ); 10959 breakupV256toV128s( dV, &dVhi, &dVlo ); 10960 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 10961 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 10962 IRTemp rV = newTemp(Ity_V256); 10963 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10964 return rV; 10965 } 10966 10967 10968 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10969 { 10970 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10971 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10972 vassert(imm8 < 256); 10973 10974 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10975 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10976 10977 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10978 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10979 IRTemp res = newTemp(Ity_V128); 10980 assign(res, 10981 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 10982 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 10983 # undef SELD 10984 # undef SELS 10985 return res; 10986 } 10987 10988 10989 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 10990 identically. Hence do the clueless thing and use math_SHUFPS_128 10991 twice. */ 10992 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10993 { 10994 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10995 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10996 breakupV256toV128s( sV, &sVhi, &sVlo ); 10997 breakupV256toV128s( dV, &dVhi, &dVlo ); 10998 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 10999 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 11000 IRTemp rV = newTemp(Ity_V256); 11001 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11002 return rV; 11003 } 11004 11005 11006 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11007 { 11008 IRTemp s1 = newTemp(Ity_I64); 11009 IRTemp s0 = newTemp(Ity_I64); 11010 IRTemp d1 = newTemp(Ity_I64); 11011 IRTemp d0 = newTemp(Ity_I64); 11012 11013 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11014 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11015 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11016 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11017 11018 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 11019 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 11020 11021 IRTemp res = newTemp(Ity_V128); 11022 assign(res, binop( Iop_64HLtoV128, 11023 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 11024 11025 # undef SELD 11026 # undef SELS 11027 return res; 11028 } 11029 11030 11031 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11032 { 11033 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11034 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11035 breakupV256toV128s( sV, &sVhi, &sVlo ); 11036 breakupV256toV128s( dV, &dVhi, &dVlo ); 11037 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11038 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 11039 IRTemp rV = newTemp(Ity_V256); 11040 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11041 return rV; 11042 } 11043 11044 11045 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11046 { 11047 UShort imm8_mask_16; 11048 IRTemp imm8_mask = newTemp(Ity_V128); 11049 11050 switch( imm8 & 3 ) { 11051 case 0: imm8_mask_16 = 0x0000; break; 11052 case 1: imm8_mask_16 = 0x00FF; break; 11053 case 2: imm8_mask_16 = 0xFF00; break; 11054 case 3: imm8_mask_16 = 0xFFFF; break; 11055 default: vassert(0); break; 11056 } 11057 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 11058 11059 IRTemp res = newTemp(Ity_V128); 11060 assign ( res, binop( Iop_OrV128, 11061 binop( Iop_AndV128, mkexpr(sV), 11062 mkexpr(imm8_mask) ), 11063 binop( Iop_AndV128, mkexpr(dV), 11064 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11065 return res; 11066 } 11067 11068 11069 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11070 { 11071 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11072 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11073 breakupV256toV128s( sV, &sVhi, &sVlo ); 11074 breakupV256toV128s( dV, &dVhi, &dVlo ); 11075 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11076 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 11077 IRTemp rV = newTemp(Ity_V256); 11078 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11079 return rV; 11080 } 11081 11082 11083 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11084 { 11085 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 11086 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 11087 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 11088 0xFFFF }; 11089 IRTemp imm8_mask = newTemp(Ity_V128); 11090 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 11091 11092 IRTemp res = newTemp(Ity_V128); 11093 assign ( res, binop( Iop_OrV128, 11094 binop( Iop_AndV128, mkexpr(sV), 11095 mkexpr(imm8_mask) ), 11096 binop( Iop_AndV128, mkexpr(dV), 11097 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11098 return res; 11099 } 11100 11101 11102 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11103 { 11104 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11105 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11106 breakupV256toV128s( sV, &sVhi, &sVlo ); 11107 breakupV256toV128s( dV, &dVhi, &dVlo ); 11108 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 11109 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 11110 IRTemp rV = newTemp(Ity_V256); 11111 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11112 return rV; 11113 } 11114 11115 11116 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11117 { 11118 /* Make w be a 16-bit version of imm8, formed by duplicating each 11119 bit in imm8. */ 11120 Int i; 11121 UShort imm16 = 0; 11122 for (i = 0; i < 8; i++) { 11123 if (imm8 & (1 << i)) 11124 imm16 |= (3 << (2*i)); 11125 } 11126 IRTemp imm16_mask = newTemp(Ity_V128); 11127 assign( imm16_mask, mkV128( imm16 )); 11128 11129 IRTemp res = newTemp(Ity_V128); 11130 assign ( res, binop( Iop_OrV128, 11131 binop( Iop_AndV128, mkexpr(sV), 11132 mkexpr(imm16_mask) ), 11133 binop( Iop_AndV128, mkexpr(dV), 11134 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 11135 return res; 11136 } 11137 11138 11139 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 11140 { 11141 /* This is a really poor translation -- could be improved if 11142 performance critical */ 11143 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11144 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11145 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11146 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11147 IRTemp res = newTemp(Ity_V128); 11148 assign(res, binop(Iop_64HLtoV128, 11149 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 11150 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 11151 return res; 11152 } 11153 11154 11155 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV ) 11156 { 11157 /* This is a really poor translation -- could be improved if 11158 performance critical */ 11159 IRTemp sHi, sLo, dHi, dLo; 11160 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11161 breakupV256toV128s( dV, &dHi, &dLo); 11162 breakupV256toV128s( sV, &sHi, &sLo); 11163 IRTemp res = newTemp(Ity_V256); 11164 assign(res, binop(Iop_V128HLtoV256, 11165 mkexpr(math_PMULUDQ_128(sHi, dHi)), 11166 mkexpr(math_PMULUDQ_128(sLo, dLo)))); 11167 return res; 11168 } 11169 11170 11171 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 11172 { 11173 /* This is a really poor translation -- could be improved if 11174 performance critical */ 11175 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11176 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11177 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11178 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11179 IRTemp res = newTemp(Ity_V128); 11180 assign(res, binop(Iop_64HLtoV128, 11181 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 11182 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 11183 return res; 11184 } 11185 11186 11187 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV ) 11188 { 11189 /* This is a really poor translation -- could be improved if 11190 performance critical */ 11191 IRTemp sHi, sLo, dHi, dLo; 11192 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11193 breakupV256toV128s( dV, &dHi, &dLo); 11194 breakupV256toV128s( sV, &sHi, &sLo); 11195 IRTemp res = newTemp(Ity_V256); 11196 assign(res, binop(Iop_V128HLtoV256, 11197 mkexpr(math_PMULDQ_128(sHi, dHi)), 11198 mkexpr(math_PMULDQ_128(sLo, dLo)))); 11199 return res; 11200 } 11201 11202 11203 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 11204 { 11205 IRTemp sVhi, sVlo, dVhi, dVlo; 11206 IRTemp resHi = newTemp(Ity_I64); 11207 IRTemp resLo = newTemp(Ity_I64); 11208 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 11209 breakupV128to64s( sV, &sVhi, &sVlo ); 11210 breakupV128to64s( dV, &dVhi, &dVlo ); 11211 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11212 "amd64g_calculate_mmx_pmaddwd", 11213 &amd64g_calculate_mmx_pmaddwd, 11214 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 11215 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11216 "amd64g_calculate_mmx_pmaddwd", 11217 &amd64g_calculate_mmx_pmaddwd, 11218 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 11219 IRTemp res = newTemp(Ity_V128); 11220 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 11221 return res; 11222 } 11223 11224 11225 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV ) 11226 { 11227 IRTemp sHi, sLo, dHi, dLo; 11228 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11229 breakupV256toV128s( dV, &dHi, &dLo); 11230 breakupV256toV128s( sV, &sHi, &sLo); 11231 IRTemp res = newTemp(Ity_V256); 11232 assign(res, binop(Iop_V128HLtoV256, 11233 mkexpr(math_PMADDWD_128(dHi, sHi)), 11234 mkexpr(math_PMADDWD_128(dLo, sLo)))); 11235 return res; 11236 } 11237 11238 11239 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 11240 { 11241 IRTemp addV = newTemp(Ity_V128); 11242 IRTemp subV = newTemp(Ity_V128); 11243 IRTemp a1 = newTemp(Ity_I64); 11244 IRTemp s0 = newTemp(Ity_I64); 11245 IRTemp rm = newTemp(Ity_I32); 11246 11247 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11248 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11249 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11250 11251 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11252 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11253 11254 IRTemp res = newTemp(Ity_V128); 11255 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11256 return res; 11257 } 11258 11259 11260 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 11261 { 11262 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11263 IRTemp addV = newTemp(Ity_V256); 11264 IRTemp subV = newTemp(Ity_V256); 11265 IRTemp rm = newTemp(Ity_I32); 11266 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11267 11268 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11269 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11270 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11271 11272 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 11273 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 11274 11275 IRTemp res = newTemp(Ity_V256); 11276 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 11277 return res; 11278 } 11279 11280 11281 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 11282 { 11283 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11284 IRTemp addV = newTemp(Ity_V128); 11285 IRTemp subV = newTemp(Ity_V128); 11286 IRTemp rm = newTemp(Ity_I32); 11287 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11288 11289 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11290 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11291 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11292 11293 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 11294 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 11295 11296 IRTemp res = newTemp(Ity_V128); 11297 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 11298 return res; 11299 } 11300 11301 11302 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 11303 { 11304 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 11305 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 11306 IRTemp addV = newTemp(Ity_V256); 11307 IRTemp subV = newTemp(Ity_V256); 11308 IRTemp rm = newTemp(Ity_I32); 11309 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 11310 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11311 11312 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11313 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11314 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11315 11316 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 11317 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 11318 11319 IRTemp res = newTemp(Ity_V256); 11320 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 11321 return res; 11322 } 11323 11324 11325 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 11326 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx, 11327 Long delta, Bool isAvx, Bool xIsH ) 11328 { 11329 IRTemp addr = IRTemp_INVALID; 11330 Int alen = 0; 11331 HChar dis_buf[50]; 11332 UChar modrm = getUChar(delta); 11333 UInt rG = gregOfRexRM(pfx,modrm); 11334 UInt imm8; 11335 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 11336 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11337 sV = newTemp(Ity_V128); 11338 dV = newTemp(Ity_V128); 11339 sVmut = newTemp(Ity_I64); 11340 dVmut = newTemp(Ity_I64); 11341 sVcon = newTemp(Ity_I64); 11342 if (epartIsReg(modrm)) { 11343 UInt rE = eregOfRexRM(pfx,modrm); 11344 assign( sV, getXMMReg(rE) ); 11345 imm8 = (UInt)getUChar(delta+1); 11346 delta += 1+1; 11347 DIP("%spshuf%cw $%u,%s,%s\n", 11348 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11349 imm8, nameXMMReg(rE), nameXMMReg(rG)); 11350 } else { 11351 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11352 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11353 imm8 = (UInt)getUChar(delta+alen); 11354 delta += alen+1; 11355 DIP("%spshuf%cw $%u,%s,%s\n", 11356 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11357 imm8, dis_buf, nameXMMReg(rG)); 11358 } 11359 11360 /* Get the to-be-changed (mut) and unchanging (con) bits of the 11361 source. */ 11362 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 11363 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 11364 11365 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 11366 # define SEL(n) \ 11367 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11368 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 11369 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 11370 # undef SEL 11371 11372 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 11373 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 11374 11375 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 11376 return delta; 11377 } 11378 11379 11380 /* Handle 256 bit PSHUFLW and PSHUFHW. */ 11381 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx, 11382 Long delta, Bool xIsH ) 11383 { 11384 IRTemp addr = IRTemp_INVALID; 11385 Int alen = 0; 11386 HChar dis_buf[50]; 11387 UChar modrm = getUChar(delta); 11388 UInt rG = gregOfRexRM(pfx,modrm); 11389 UInt imm8; 11390 IRTemp sV, s[8], sV64[4], dVhi, dVlo; 11391 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID; 11392 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 11393 sV = newTemp(Ity_V256); 11394 dVhi = newTemp(Ity_I64); 11395 dVlo = newTemp(Ity_I64); 11396 if (epartIsReg(modrm)) { 11397 UInt rE = eregOfRexRM(pfx,modrm); 11398 assign( sV, getYMMReg(rE) ); 11399 imm8 = (UInt)getUChar(delta+1); 11400 delta += 1+1; 11401 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11402 imm8, nameYMMReg(rE), nameYMMReg(rG)); 11403 } else { 11404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11405 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 11406 imm8 = (UInt)getUChar(delta+alen); 11407 delta += alen+1; 11408 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11409 imm8, dis_buf, nameYMMReg(rG)); 11410 } 11411 11412 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] ); 11413 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] ); 11414 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] ); 11415 11416 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)], 11417 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) ); 11418 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)], 11419 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) ); 11420 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3], 11421 xIsH ? sV64[2] : dVhi, 11422 xIsH ? dVlo : sV64[1], 11423 xIsH ? sV64[0] : dVlo ) ); 11424 return delta; 11425 } 11426 11427 11428 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx, 11429 Long delta, Bool isAvx ) 11430 { 11431 Long deltaIN = delta; 11432 UChar modrm = getUChar(delta); 11433 UInt rG = gregOfRexRM(pfx,modrm); 11434 IRTemp sV = newTemp(Ity_V128); 11435 IRTemp d16 = newTemp(Ity_I16); 11436 UInt imm8; 11437 IRTemp s0, s1, s2, s3; 11438 if (epartIsReg(modrm)) { 11439 UInt rE = eregOfRexRM(pfx,modrm); 11440 assign(sV, getXMMReg(rE)); 11441 imm8 = getUChar(delta+1) & 7; 11442 delta += 1+1; 11443 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "", 11444 imm8, nameXMMReg(rE), nameIReg32(rG)); 11445 } else { 11446 /* The memory case is disallowed, apparently. */ 11447 return deltaIN; /* FAIL */ 11448 } 11449 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11450 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11451 switch (imm8) { 11452 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 11453 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 11454 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 11455 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 11456 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 11457 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 11458 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 11459 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 11460 default: vassert(0); 11461 } 11462 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 11463 return delta; 11464 } 11465 11466 11467 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 11468 Long delta, Bool isAvx ) 11469 { 11470 IRTemp addr = IRTemp_INVALID; 11471 Int alen = 0; 11472 HChar dis_buf[50]; 11473 UChar modrm = getUChar(delta); 11474 IRTemp arg64 = newTemp(Ity_I64); 11475 UInt rG = gregOfRexRM(pfx,modrm); 11476 const HChar* mbV = isAvx ? "v" : ""; 11477 if (epartIsReg(modrm)) { 11478 UInt rE = eregOfRexRM(pfx,modrm); 11479 assign( arg64, getXMMRegLane64(rE, 0) ); 11480 delta += 1; 11481 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 11482 } else { 11483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11484 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11485 delta += alen; 11486 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 11487 } 11488 putXMMRegLane64F( 11489 rG, 0, 11490 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 11491 ); 11492 putXMMRegLane64F( 11493 rG, 1, 11494 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 11495 ); 11496 if (isAvx) 11497 putYMMRegLane128(rG, 1, mkV128(0)); 11498 return delta; 11499 } 11500 11501 11502 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11503 Long delta, Bool isAvx ) 11504 { 11505 IRTemp addr = IRTemp_INVALID; 11506 Int alen = 0; 11507 HChar dis_buf[50]; 11508 UChar modrm = getUChar(delta); 11509 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11510 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 11511 11512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11513 delta += alen; 11514 11515 /* Fake up a native SSE mxcsr word. The only thing it depends on 11516 is SSEROUND[1:0], so call a clean helper to cook it up. 11517 */ 11518 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 11519 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11520 storeLE( 11521 mkexpr(addr), 11522 unop(Iop_64to32, 11523 mkIRExprCCall( 11524 Ity_I64, 0/*regp*/, 11525 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 11526 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 11527 ) 11528 ) 11529 ); 11530 return delta; 11531 } 11532 11533 11534 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11535 Long delta, Bool isAvx ) 11536 { 11537 IRTemp addr = IRTemp_INVALID; 11538 Int alen = 0; 11539 HChar dis_buf[50]; 11540 UChar modrm = getUChar(delta); 11541 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11542 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 11543 11544 IRTemp t64 = newTemp(Ity_I64); 11545 IRTemp ew = newTemp(Ity_I32); 11546 11547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11548 delta += alen; 11549 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11550 11551 /* The only thing we observe in %mxcsr is the rounding mode. 11552 Therefore, pass the 32-bit value (SSE native-format control 11553 word) to a clean helper, getting back a 64-bit value, the 11554 lower half of which is the SSEROUND value to store, and the 11555 upper half of which is the emulation-warning token which may 11556 be generated. 11557 */ 11558 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 11559 assign( t64, mkIRExprCCall( 11560 Ity_I64, 0/*regparms*/, 11561 "amd64g_check_ldmxcsr", 11562 &amd64g_check_ldmxcsr, 11563 mkIRExprVec_1( 11564 unop(Iop_32Uto64, 11565 loadLE(Ity_I32, mkexpr(addr)) 11566 ) 11567 ) 11568 ) 11569 ); 11570 11571 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 11572 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 11573 put_emwarn( mkexpr(ew) ); 11574 /* Finally, if an emulation warning was reported, side-exit to 11575 the next insn, reporting the warning, so that Valgrind's 11576 dispatcher sees the warning. */ 11577 stmt( 11578 IRStmt_Exit( 11579 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 11580 Ijk_EmWarn, 11581 IRConst_U64(guest_RIP_bbstart+delta), 11582 OFFB_RIP 11583 ) 11584 ); 11585 return delta; 11586 } 11587 11588 11589 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm ) 11590 { 11591 /* ------ rfbm[0] gates the x87 state ------ */ 11592 11593 /* Uses dirty helper: 11594 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11595 */ 11596 IRDirty* d0 = unsafeIRDirty_0_N ( 11597 0/*regparms*/, 11598 "amd64g_dirtyhelper_XSAVE_COMPONENT_0", 11599 &amd64g_dirtyhelper_XSAVE_COMPONENT_0, 11600 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11601 ); 11602 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)), 11603 mkU64(1)); 11604 11605 /* Declare we're writing memory. Really, bytes 24 through 31 11606 (MXCSR and MXCSR_MASK) aren't written, but we can't express more 11607 than 1 memory area here, so just mark the whole thing as 11608 written. */ 11609 d0->mFx = Ifx_Write; 11610 d0->mAddr = mkexpr(addr); 11611 d0->mSize = 160; 11612 11613 /* declare we're reading guest state */ 11614 d0->nFxState = 5; 11615 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11616 11617 d0->fxState[0].fx = Ifx_Read; 11618 d0->fxState[0].offset = OFFB_FTOP; 11619 d0->fxState[0].size = sizeof(UInt); 11620 11621 d0->fxState[1].fx = Ifx_Read; 11622 d0->fxState[1].offset = OFFB_FPREGS; 11623 d0->fxState[1].size = 8 * sizeof(ULong); 11624 11625 d0->fxState[2].fx = Ifx_Read; 11626 d0->fxState[2].offset = OFFB_FPTAGS; 11627 d0->fxState[2].size = 8 * sizeof(UChar); 11628 11629 d0->fxState[3].fx = Ifx_Read; 11630 d0->fxState[3].offset = OFFB_FPROUND; 11631 d0->fxState[3].size = sizeof(ULong); 11632 11633 d0->fxState[4].fx = Ifx_Read; 11634 d0->fxState[4].offset = OFFB_FC3210; 11635 d0->fxState[4].size = sizeof(ULong); 11636 11637 stmt( IRStmt_Dirty(d0) ); 11638 11639 /* ------ rfbm[1] gates the SSE state ------ */ 11640 11641 IRTemp rfbm_1 = newTemp(Ity_I64); 11642 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11643 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11644 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 11645 11646 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2)); 11647 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 11648 11649 /* Uses dirty helper: 11650 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS 11651 ( VexGuestAMD64State*, ULong ) 11652 This creates only MXCSR and MXCSR_MASK. We need to do this if 11653 either components 1 (SSE) or 2 (AVX) are requested. Hence the 11654 guard condition is a bit more complex. 11655 */ 11656 IRDirty* d1 = unsafeIRDirty_0_N ( 11657 0/*regparms*/, 11658 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS", 11659 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS, 11660 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11661 ); 11662 d1->guard = guard_1or2; 11663 11664 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that 11665 the code for rbfm[0] just above claims a write of 0 .. 159, so 11666 this duplicates it. But at least correctly connects 24 .. 31 to 11667 the MXCSR guest state representation (SSEROUND field). */ 11668 d1->mFx = Ifx_Write; 11669 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 11670 d1->mSize = 8; 11671 11672 /* declare we're reading guest state */ 11673 d1->nFxState = 1; 11674 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 11675 11676 d1->fxState[0].fx = Ifx_Read; 11677 d1->fxState[0].offset = OFFB_SSEROUND; 11678 d1->fxState[0].size = sizeof(ULong); 11679 11680 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing 11681 else. We do the actual register array, XMM[0..15], separately, 11682 in order that any undefinedness in the XMM registers is tracked 11683 separately by Memcheck and does not "infect" the in-memory 11684 shadow for the other parts of the image. */ 11685 stmt( IRStmt_Dirty(d1) ); 11686 11687 /* And now the XMMs themselves. */ 11688 UInt reg; 11689 for (reg = 0; reg < 16; reg++) { 11690 stmt( IRStmt_StoreG( 11691 Iend_LE, 11692 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)), 11693 getXMMReg(reg), 11694 guard_1 11695 )); 11696 } 11697 11698 /* ------ rfbm[2] gates the AVX state ------ */ 11699 /* Component 2 is just a bunch of register saves, so we'll do it 11700 inline, just to be simple and to be Memcheck friendly. */ 11701 11702 IRTemp rfbm_2 = newTemp(Ity_I64); 11703 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 11704 11705 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4)); 11706 11707 for (reg = 0; reg < 16; reg++) { 11708 stmt( IRStmt_StoreG( 11709 Iend_LE, 11710 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)), 11711 getYMMRegLane128(reg,1), 11712 guard_2 11713 )); 11714 } 11715 } 11716 11717 11718 static Long dis_XSAVE ( const VexAbiInfo* vbi, 11719 Prefix pfx, Long delta, Int sz ) 11720 { 11721 /* Note that the presence or absence of REX.W (indicated here by 11722 |sz|) slightly affects the written format: whether the saved FPU 11723 IP and DP pointers are 64 or 32 bits. But the helper function 11724 we call simply writes zero bits in the relevant fields, which 11725 are 64 bits regardless of what REX.W is, and so it's good enough 11726 (iow, equally broken) in both cases. */ 11727 IRTemp addr = IRTemp_INVALID; 11728 Int alen = 0; 11729 HChar dis_buf[50]; 11730 UChar modrm = getUChar(delta); 11731 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11732 vassert(sz == 4 || sz == 8); /* ditto */ 11733 11734 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11735 delta += alen; 11736 gen_SEGV_if_not_64_aligned(addr); 11737 11738 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11739 11740 /* VEX's caller is assumed to have checked this. */ 11741 const ULong aSSUMED_XCR0_VALUE = 7; 11742 11743 IRTemp rfbm = newTemp(Ity_I64); 11744 assign(rfbm, 11745 binop(Iop_And64, 11746 binop(Iop_Or64, 11747 binop(Iop_Shl64, 11748 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 11749 unop(Iop_32Uto64, getIRegRAX(4))), 11750 mkU64(aSSUMED_XCR0_VALUE))); 11751 11752 gen_XSAVE_SEQUENCE(addr, rfbm); 11753 11754 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by 11755 OR-ing the RFBM value into it. */ 11756 IRTemp addr_plus_512 = newTemp(Ity_I64); 11757 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512))); 11758 storeLE( mkexpr(addr_plus_512), 11759 binop(Iop_Or8, 11760 unop(Iop_64to8, mkexpr(rfbm)), 11761 loadLE(Ity_I8, mkexpr(addr_plus_512))) ); 11762 11763 return delta; 11764 } 11765 11766 11767 static Long dis_FXSAVE ( const VexAbiInfo* vbi, 11768 Prefix pfx, Long delta, Int sz ) 11769 { 11770 /* See comment in dis_XSAVE about the significance of REX.W. */ 11771 IRTemp addr = IRTemp_INVALID; 11772 Int alen = 0; 11773 HChar dis_buf[50]; 11774 UChar modrm = getUChar(delta); 11775 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11776 vassert(sz == 4 || sz == 8); /* ditto */ 11777 11778 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11779 delta += alen; 11780 gen_SEGV_if_not_16_aligned(addr); 11781 11782 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11783 11784 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm 11785 to 0b011, generate the XSAVE sequence accordingly, and let iropt 11786 fold out the unused (AVX) parts accordingly. */ 11787 IRTemp rfbm = newTemp(Ity_I64); 11788 assign(rfbm, mkU64(3)); 11789 gen_XSAVE_SEQUENCE(addr, rfbm); 11790 11791 return delta; 11792 } 11793 11794 11795 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm ) 11796 { 11797 /* ------ rfbm[0] gates the x87 state ------ */ 11798 11799 /* If rfbm[0] == 1, we have to write the x87 state. If 11800 xstate_bv[0] == 1, we will read it from the memory image, else 11801 we'll set it to initial values. Doing this with a helper 11802 function and getting the definedness flow annotations correct is 11803 too difficult, so generate stupid but simple code: first set the 11804 registers to initial values, regardless of xstate_bv[0]. Then, 11805 conditionally restore from the memory image. */ 11806 11807 IRTemp rfbm_0 = newTemp(Ity_I64); 11808 IRTemp xstate_bv_0 = newTemp(Ity_I64); 11809 IRTemp restore_0 = newTemp(Ity_I64); 11810 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1))); 11811 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1))); 11812 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0))); 11813 11814 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) ); 11815 11816 /* Uses dirty helper: 11817 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11818 */ 11819 IRDirty* d0 = unsafeIRDirty_0_N ( 11820 0/*regparms*/, 11821 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0", 11822 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0, 11823 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11824 ); 11825 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0)); 11826 11827 /* Declare we're reading memory. Really, bytes 24 through 31 11828 (MXCSR and MXCSR_MASK) aren't read, but we can't express more 11829 than 1 memory area here, so just mark the whole thing as 11830 read. */ 11831 d0->mFx = Ifx_Read; 11832 d0->mAddr = mkexpr(addr); 11833 d0->mSize = 160; 11834 11835 /* declare we're writing guest state */ 11836 d0->nFxState = 5; 11837 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11838 11839 d0->fxState[0].fx = Ifx_Write; 11840 d0->fxState[0].offset = OFFB_FTOP; 11841 d0->fxState[0].size = sizeof(UInt); 11842 11843 d0->fxState[1].fx = Ifx_Write; 11844 d0->fxState[1].offset = OFFB_FPREGS; 11845 d0->fxState[1].size = 8 * sizeof(ULong); 11846 11847 d0->fxState[2].fx = Ifx_Write; 11848 d0->fxState[2].offset = OFFB_FPTAGS; 11849 d0->fxState[2].size = 8 * sizeof(UChar); 11850 11851 d0->fxState[3].fx = Ifx_Write; 11852 d0->fxState[3].offset = OFFB_FPROUND; 11853 d0->fxState[3].size = sizeof(ULong); 11854 11855 d0->fxState[4].fx = Ifx_Write; 11856 d0->fxState[4].offset = OFFB_FC3210; 11857 d0->fxState[4].size = sizeof(ULong); 11858 11859 stmt( IRStmt_Dirty(d0) ); 11860 11861 /* ------ rfbm[1] gates the SSE state ------ */ 11862 11863 /* Same scheme as component 0: first zero it out, and then possibly 11864 restore from the memory area. */ 11865 IRTemp rfbm_1 = newTemp(Ity_I64); 11866 IRTemp xstate_bv_1 = newTemp(Ity_I64); 11867 IRTemp restore_1 = newTemp(Ity_I64); 11868 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11869 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2))); 11870 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1))); 11871 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0)); 11872 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0)); 11873 11874 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11875 IRTemp xstate_bv_1or2 = newTemp(Ity_I64); 11876 IRTemp restore_1or2 = newTemp(Ity_I64); 11877 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 11878 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6))); 11879 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2), 11880 mkexpr(xstate_bv_1or2))); 11881 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 11882 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0)); 11883 11884 /* The areas in question are: SSEROUND, and the XMM register array. */ 11885 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST)); 11886 11887 UInt reg; 11888 for (reg = 0; reg < 16; reg++) { 11889 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0)); 11890 } 11891 11892 /* And now possibly restore from MXCSR/MXCSR_MASK */ 11893 /* Uses dirty helper: 11894 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS 11895 ( VexGuestAMD64State*, ULong ) 11896 This restores from only MXCSR and MXCSR_MASK. We need to do 11897 this if either components 1 (SSE) or 2 (AVX) are requested. 11898 Hence the guard condition is a bit more complex. 11899 */ 11900 IRDirty* d1 = unsafeIRDirty_0_N ( 11901 0/*regparms*/, 11902 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS", 11903 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS, 11904 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11905 ) ; 11906 d1->guard = restore_1or2e; 11907 11908 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that 11909 the code for rbfm[0] just above claims a read of 0 .. 159, so 11910 this duplicates it. But at least correctly connects 24 .. 31 to 11911 the MXCSR guest state representation (SSEROUND field). */ 11912 d1->mFx = Ifx_Read; 11913 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 11914 d1->mSize = 8; 11915 11916 /* declare we're writing guest state */ 11917 d1->nFxState = 1; 11918 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 11919 11920 d1->fxState[0].fx = Ifx_Write; 11921 d1->fxState[0].offset = OFFB_SSEROUND; 11922 d1->fxState[0].size = sizeof(ULong); 11923 11924 /* Call the helper. This creates SSEROUND but nothing 11925 else. We do the actual register array, XMM[0..15], separately, 11926 in order that any undefinedness in the XMM registers is tracked 11927 separately by Memcheck and is not "infected" by the in-memory 11928 shadow for the other parts of the image. */ 11929 stmt( IRStmt_Dirty(d1) ); 11930 11931 /* And now the XMMs themselves. For each register, we PUT either 11932 its old value, or the value loaded from memory. One convenient 11933 way to do that is with a conditional load that has its the 11934 default value, the old value of the register. */ 11935 for (reg = 0; reg < 16; reg++) { 11936 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)); 11937 IRExpr* alt = getXMMReg(reg); 11938 IRTemp loadedValue = newTemp(Ity_V128); 11939 stmt( IRStmt_LoadG(Iend_LE, 11940 ILGop_IdentV128, 11941 loadedValue, ea, alt, restore_1e) ); 11942 putXMMReg(reg, mkexpr(loadedValue)); 11943 } 11944 11945 /* ------ rfbm[2] gates the AVX state ------ */ 11946 /* Component 2 is just a bunch of register loads, so we'll do it 11947 inline, just to be simple and to be Memcheck friendly. */ 11948 11949 /* Same scheme as component 0: first zero it out, and then possibly 11950 restore from the memory area. */ 11951 IRTemp rfbm_2 = newTemp(Ity_I64); 11952 IRTemp xstate_bv_2 = newTemp(Ity_I64); 11953 IRTemp restore_2 = newTemp(Ity_I64); 11954 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 11955 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4))); 11956 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2))); 11957 11958 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0)); 11959 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0)); 11960 11961 for (reg = 0; reg < 16; reg++) { 11962 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0)); 11963 } 11964 11965 for (reg = 0; reg < 16; reg++) { 11966 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)); 11967 IRExpr* alt = getYMMRegLane128(reg, 1); 11968 IRTemp loadedValue = newTemp(Ity_V128); 11969 stmt( IRStmt_LoadG(Iend_LE, 11970 ILGop_IdentV128, 11971 loadedValue, ea, alt, restore_2e) ); 11972 putYMMRegLane128(reg, 1, mkexpr(loadedValue)); 11973 } 11974 } 11975 11976 11977 static Long dis_XRSTOR ( const VexAbiInfo* vbi, 11978 Prefix pfx, Long delta, Int sz ) 11979 { 11980 /* As with XRSTOR above we ignore the value of REX.W since we're 11981 not bothering with the FPU DP and IP fields. */ 11982 IRTemp addr = IRTemp_INVALID; 11983 Int alen = 0; 11984 HChar dis_buf[50]; 11985 UChar modrm = getUChar(delta); 11986 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11987 vassert(sz == 4 || sz == 8); /* ditto */ 11988 11989 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11990 delta += alen; 11991 gen_SEGV_if_not_64_aligned(addr); 11992 11993 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 11994 11995 /* VEX's caller is assumed to have checked this. */ 11996 const ULong aSSUMED_XCR0_VALUE = 7; 11997 11998 IRTemp rfbm = newTemp(Ity_I64); 11999 assign(rfbm, 12000 binop(Iop_And64, 12001 binop(Iop_Or64, 12002 binop(Iop_Shl64, 12003 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 12004 unop(Iop_32Uto64, getIRegRAX(4))), 12005 mkU64(aSSUMED_XCR0_VALUE))); 12006 12007 IRTemp xstate_bv = newTemp(Ity_I64); 12008 assign(xstate_bv, loadLE(Ity_I64, 12009 binop(Iop_Add64, mkexpr(addr), mkU64(512+0)))); 12010 12011 IRTemp xcomp_bv = newTemp(Ity_I64); 12012 assign(xcomp_bv, loadLE(Ity_I64, 12013 binop(Iop_Add64, mkexpr(addr), mkU64(512+8)))); 12014 12015 IRTemp xsavehdr_23_16 = newTemp(Ity_I64); 12016 assign( xsavehdr_23_16, 12017 loadLE(Ity_I64, 12018 binop(Iop_Add64, mkexpr(addr), mkU64(512+16)))); 12019 12020 /* We must fault if 12021 * xcomp_bv[63] == 1, since this simulated CPU does not support 12022 the compaction extension. 12023 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7). 12024 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to 12025 imply that xcomp_bv must be zero. 12026 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0 12027 */ 12028 IRTemp fault_if_nonzero = newTemp(Ity_I64); 12029 assign(fault_if_nonzero, 12030 binop(Iop_Or64, 12031 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)), 12032 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16)))); 12033 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)), 12034 Ijk_SigSEGV, 12035 IRConst_U64(guest_RIP_curr_instr), 12036 OFFB_RIP 12037 )); 12038 12039 /* We are guaranteed now that both xstate_bv and rfbm are in the 12040 range 0 .. 7. Generate the restore sequence proper. */ 12041 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm); 12042 12043 return delta; 12044 } 12045 12046 12047 static Long dis_FXRSTOR ( const VexAbiInfo* vbi, 12048 Prefix pfx, Long delta, Int sz ) 12049 { 12050 /* As with FXSAVE above we ignore the value of REX.W since we're 12051 not bothering with the FPU DP and IP fields. */ 12052 IRTemp addr = IRTemp_INVALID; 12053 Int alen = 0; 12054 HChar dis_buf[50]; 12055 UChar modrm = getUChar(delta); 12056 vassert(!epartIsReg(modrm)); /* ensured by caller */ 12057 vassert(sz == 4 || sz == 8); /* ditto */ 12058 12059 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12060 delta += alen; 12061 gen_SEGV_if_not_16_aligned(addr); 12062 12063 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 12064 12065 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also 12066 as if components 0 and 1 are set as present in XSTATE_BV in the 12067 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore, 12068 generate the XRSTOR sequence accordingly, and let iropt fold out 12069 the unused (AVX) parts accordingly. */ 12070 IRTemp three = newTemp(Ity_I64); 12071 assign(three, mkU64(3)); 12072 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/); 12073 12074 return delta; 12075 } 12076 12077 12078 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 12079 { 12080 vassert(imm8 >= 0 && imm8 <= 7); 12081 12082 // Create a V128 value which has the selected word in the 12083 // specified lane, and zeroes everywhere else. 12084 IRTemp tmp128 = newTemp(Ity_V128); 12085 IRTemp halfshift = newTemp(Ity_I64); 12086 assign(halfshift, binop(Iop_Shl64, 12087 unop(Iop_16Uto64, mkexpr(u16)), 12088 mkU8(16 * (imm8 & 3)))); 12089 if (imm8 < 4) { 12090 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 12091 } else { 12092 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 12093 } 12094 12095 UShort mask = ~(3 << (imm8 * 2)); 12096 IRTemp res = newTemp(Ity_V128); 12097 assign( res, binop(Iop_OrV128, 12098 mkexpr(tmp128), 12099 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 12100 return res; 12101 } 12102 12103 12104 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 12105 { 12106 IRTemp s1, s0, d1, d0; 12107 s1 = s0 = d1 = d0 = IRTemp_INVALID; 12108 12109 breakupV128to64s( sV, &s1, &s0 ); 12110 breakupV128to64s( dV, &d1, &d0 ); 12111 12112 IRTemp res = newTemp(Ity_V128); 12113 assign( res, 12114 binop(Iop_64HLtoV128, 12115 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12116 "amd64g_calculate_mmx_psadbw", 12117 &amd64g_calculate_mmx_psadbw, 12118 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 12119 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12120 "amd64g_calculate_mmx_psadbw", 12121 &amd64g_calculate_mmx_psadbw, 12122 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 12123 return res; 12124 } 12125 12126 12127 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV ) 12128 { 12129 IRTemp sHi, sLo, dHi, dLo; 12130 sHi = sLo = dHi = dLo = IRTemp_INVALID; 12131 breakupV256toV128s( dV, &dHi, &dLo); 12132 breakupV256toV128s( sV, &sHi, &sLo); 12133 IRTemp res = newTemp(Ity_V256); 12134 assign(res, binop(Iop_V128HLtoV256, 12135 mkexpr(math_PSADBW_128(dHi, sHi)), 12136 mkexpr(math_PSADBW_128(dLo, sLo)))); 12137 return res; 12138 } 12139 12140 12141 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx, 12142 Long delta, Bool isAvx ) 12143 { 12144 IRTemp regD = newTemp(Ity_V128); 12145 IRTemp mask = newTemp(Ity_V128); 12146 IRTemp olddata = newTemp(Ity_V128); 12147 IRTemp newdata = newTemp(Ity_V128); 12148 IRTemp addr = newTemp(Ity_I64); 12149 UChar modrm = getUChar(delta); 12150 UInt rG = gregOfRexRM(pfx,modrm); 12151 UInt rE = eregOfRexRM(pfx,modrm); 12152 12153 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 12154 assign( regD, getXMMReg( rG )); 12155 12156 /* Unfortunately can't do the obvious thing with SarN8x16 12157 here since that can't be re-emitted as SSE2 code - no such 12158 insn. */ 12159 assign( mask, 12160 binop(Iop_64HLtoV128, 12161 binop(Iop_SarN8x8, 12162 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 12163 mkU8(7) ), 12164 binop(Iop_SarN8x8, 12165 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 12166 mkU8(7) ) )); 12167 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 12168 assign( newdata, binop(Iop_OrV128, 12169 binop(Iop_AndV128, 12170 mkexpr(regD), 12171 mkexpr(mask) ), 12172 binop(Iop_AndV128, 12173 mkexpr(olddata), 12174 unop(Iop_NotV128, mkexpr(mask)))) ); 12175 storeLE( mkexpr(addr), mkexpr(newdata) ); 12176 12177 delta += 1; 12178 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 12179 nameXMMReg(rE), nameXMMReg(rG) ); 12180 return delta; 12181 } 12182 12183 12184 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx, 12185 Long delta, Bool isAvx ) 12186 { 12187 UChar modrm = getUChar(delta); 12188 UInt rG = gregOfRexRM(pfx,modrm); 12189 UInt rE = eregOfRexRM(pfx,modrm); 12190 IRTemp t0 = newTemp(Ity_I32); 12191 IRTemp t1 = newTemp(Ity_I32); 12192 IRTemp t2 = newTemp(Ity_I32); 12193 IRTemp t3 = newTemp(Ity_I32); 12194 delta += 1; 12195 assign( t0, binop( Iop_And32, 12196 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 12197 mkU32(1) )); 12198 assign( t1, binop( Iop_And32, 12199 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 12200 mkU32(2) )); 12201 assign( t2, binop( Iop_And32, 12202 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 12203 mkU32(4) )); 12204 assign( t3, binop( Iop_And32, 12205 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 12206 mkU32(8) )); 12207 putIReg32( rG, binop(Iop_Or32, 12208 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12209 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12210 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 12211 nameXMMReg(rE), nameIReg32(rG)); 12212 return delta; 12213 } 12214 12215 12216 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12217 { 12218 UChar modrm = getUChar(delta); 12219 UInt rG = gregOfRexRM(pfx,modrm); 12220 UInt rE = eregOfRexRM(pfx,modrm); 12221 IRTemp t0 = newTemp(Ity_I32); 12222 IRTemp t1 = newTemp(Ity_I32); 12223 IRTemp t2 = newTemp(Ity_I32); 12224 IRTemp t3 = newTemp(Ity_I32); 12225 IRTemp t4 = newTemp(Ity_I32); 12226 IRTemp t5 = newTemp(Ity_I32); 12227 IRTemp t6 = newTemp(Ity_I32); 12228 IRTemp t7 = newTemp(Ity_I32); 12229 delta += 1; 12230 assign( t0, binop( Iop_And32, 12231 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 12232 mkU32(1) )); 12233 assign( t1, binop( Iop_And32, 12234 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 12235 mkU32(2) )); 12236 assign( t2, binop( Iop_And32, 12237 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 12238 mkU32(4) )); 12239 assign( t3, binop( Iop_And32, 12240 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 12241 mkU32(8) )); 12242 assign( t4, binop( Iop_And32, 12243 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 12244 mkU32(16) )); 12245 assign( t5, binop( Iop_And32, 12246 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 12247 mkU32(32) )); 12248 assign( t6, binop( Iop_And32, 12249 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 12250 mkU32(64) )); 12251 assign( t7, binop( Iop_And32, 12252 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 12253 mkU32(128) )); 12254 putIReg32( rG, binop(Iop_Or32, 12255 binop(Iop_Or32, 12256 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12257 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 12258 binop(Iop_Or32, 12259 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 12260 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 12261 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12262 return delta; 12263 } 12264 12265 12266 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx, 12267 Long delta, Bool isAvx ) 12268 { 12269 UChar modrm = getUChar(delta); 12270 UInt rG = gregOfRexRM(pfx,modrm); 12271 UInt rE = eregOfRexRM(pfx,modrm); 12272 IRTemp t0 = newTemp(Ity_I32); 12273 IRTemp t1 = newTemp(Ity_I32); 12274 delta += 1; 12275 assign( t0, binop( Iop_And32, 12276 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 12277 mkU32(1) )); 12278 assign( t1, binop( Iop_And32, 12279 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 12280 mkU32(2) )); 12281 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 12282 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 12283 nameXMMReg(rE), nameIReg32(rG)); 12284 return delta; 12285 } 12286 12287 12288 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12289 { 12290 UChar modrm = getUChar(delta); 12291 UInt rG = gregOfRexRM(pfx,modrm); 12292 UInt rE = eregOfRexRM(pfx,modrm); 12293 IRTemp t0 = newTemp(Ity_I32); 12294 IRTemp t1 = newTemp(Ity_I32); 12295 IRTemp t2 = newTemp(Ity_I32); 12296 IRTemp t3 = newTemp(Ity_I32); 12297 delta += 1; 12298 assign( t0, binop( Iop_And32, 12299 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 12300 mkU32(1) )); 12301 assign( t1, binop( Iop_And32, 12302 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 12303 mkU32(2) )); 12304 assign( t2, binop( Iop_And32, 12305 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 12306 mkU32(4) )); 12307 assign( t3, binop( Iop_And32, 12308 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 12309 mkU32(8) )); 12310 putIReg32( rG, binop(Iop_Or32, 12311 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12312 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12313 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12314 return delta; 12315 } 12316 12317 12318 /* Note, this also handles SSE(1) insns. */ 12319 __attribute__((noinline)) 12320 static 12321 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 12322 const VexArchInfo* archinfo, 12323 const VexAbiInfo* vbi, 12324 Prefix pfx, Int sz, Long deltaIN, 12325 DisResult* dres ) 12326 { 12327 IRTemp addr = IRTemp_INVALID; 12328 IRTemp t0 = IRTemp_INVALID; 12329 IRTemp t1 = IRTemp_INVALID; 12330 IRTemp t2 = IRTemp_INVALID; 12331 IRTemp t3 = IRTemp_INVALID; 12332 IRTemp t4 = IRTemp_INVALID; 12333 IRTemp t5 = IRTemp_INVALID; 12334 IRTemp t6 = IRTemp_INVALID; 12335 UChar modrm = 0; 12336 Int alen = 0; 12337 HChar dis_buf[50]; 12338 12339 *decode_OK = False; 12340 12341 Long delta = deltaIN; 12342 UChar opc = getUChar(delta); 12343 delta++; 12344 switch (opc) { 12345 12346 case 0x10: 12347 if (have66noF2noF3(pfx) 12348 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12349 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 12350 modrm = getUChar(delta); 12351 if (epartIsReg(modrm)) { 12352 putXMMReg( gregOfRexRM(pfx,modrm), 12353 getXMMReg( eregOfRexRM(pfx,modrm) )); 12354 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12355 nameXMMReg(gregOfRexRM(pfx,modrm))); 12356 delta += 1; 12357 } else { 12358 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12359 putXMMReg( gregOfRexRM(pfx,modrm), 12360 loadLE(Ity_V128, mkexpr(addr)) ); 12361 DIP("movupd %s,%s\n", dis_buf, 12362 nameXMMReg(gregOfRexRM(pfx,modrm))); 12363 delta += alen; 12364 } 12365 goto decode_success; 12366 } 12367 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 12368 G (lo half xmm). If E is mem, upper half of G is zeroed out. 12369 If E is reg, upper half of G is unchanged. */ 12370 if (haveF2no66noF3(pfx) 12371 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 12372 modrm = getUChar(delta); 12373 if (epartIsReg(modrm)) { 12374 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12375 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 12376 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12377 nameXMMReg(gregOfRexRM(pfx,modrm))); 12378 delta += 1; 12379 } else { 12380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12381 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12382 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12383 loadLE(Ity_I64, mkexpr(addr)) ); 12384 DIP("movsd %s,%s\n", dis_buf, 12385 nameXMMReg(gregOfRexRM(pfx,modrm))); 12386 delta += alen; 12387 } 12388 goto decode_success; 12389 } 12390 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 12391 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 12392 if (haveF3no66noF2(pfx) 12393 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12394 modrm = getUChar(delta); 12395 if (epartIsReg(modrm)) { 12396 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12397 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 12398 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12399 nameXMMReg(gregOfRexRM(pfx,modrm))); 12400 delta += 1; 12401 } else { 12402 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12403 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12404 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12405 loadLE(Ity_I32, mkexpr(addr)) ); 12406 DIP("movss %s,%s\n", dis_buf, 12407 nameXMMReg(gregOfRexRM(pfx,modrm))); 12408 delta += alen; 12409 } 12410 goto decode_success; 12411 } 12412 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 12413 if (haveNo66noF2noF3(pfx) 12414 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12415 modrm = getUChar(delta); 12416 if (epartIsReg(modrm)) { 12417 putXMMReg( gregOfRexRM(pfx,modrm), 12418 getXMMReg( eregOfRexRM(pfx,modrm) )); 12419 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12420 nameXMMReg(gregOfRexRM(pfx,modrm))); 12421 delta += 1; 12422 } else { 12423 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12424 putXMMReg( gregOfRexRM(pfx,modrm), 12425 loadLE(Ity_V128, mkexpr(addr)) ); 12426 DIP("movups %s,%s\n", dis_buf, 12427 nameXMMReg(gregOfRexRM(pfx,modrm))); 12428 delta += alen; 12429 } 12430 goto decode_success; 12431 } 12432 break; 12433 12434 case 0x11: 12435 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 12436 or lo half xmm). */ 12437 if (haveF2no66noF3(pfx) 12438 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12439 modrm = getUChar(delta); 12440 if (epartIsReg(modrm)) { 12441 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 12442 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 12443 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12444 nameXMMReg(eregOfRexRM(pfx,modrm))); 12445 delta += 1; 12446 } else { 12447 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12448 storeLE( mkexpr(addr), 12449 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 12450 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12451 dis_buf); 12452 delta += alen; 12453 } 12454 goto decode_success; 12455 } 12456 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 12457 or lo 1/4 xmm). */ 12458 if (haveF3no66noF2(pfx) && sz == 4) { 12459 modrm = getUChar(delta); 12460 if (epartIsReg(modrm)) { 12461 /* fall through, we don't yet have a test case */ 12462 } else { 12463 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12464 storeLE( mkexpr(addr), 12465 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 12466 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12467 dis_buf); 12468 delta += alen; 12469 goto decode_success; 12470 } 12471 } 12472 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 12473 if (have66noF2noF3(pfx) 12474 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12475 modrm = getUChar(delta); 12476 if (epartIsReg(modrm)) { 12477 putXMMReg( eregOfRexRM(pfx,modrm), 12478 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12479 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12480 nameXMMReg(eregOfRexRM(pfx,modrm))); 12481 delta += 1; 12482 } else { 12483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12484 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12485 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12486 dis_buf ); 12487 delta += alen; 12488 } 12489 goto decode_success; 12490 } 12491 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 12492 if (haveNo66noF2noF3(pfx) 12493 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12494 modrm = getUChar(delta); 12495 if (epartIsReg(modrm)) { 12496 /* fall through; awaiting test case */ 12497 } else { 12498 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12499 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12500 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12501 dis_buf ); 12502 delta += alen; 12503 goto decode_success; 12504 } 12505 } 12506 break; 12507 12508 case 0x12: 12509 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 12510 /* Identical to MOVLPS ? */ 12511 if (have66noF2noF3(pfx) 12512 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12513 modrm = getUChar(delta); 12514 if (epartIsReg(modrm)) { 12515 /* fall through; apparently reg-reg is not possible */ 12516 } else { 12517 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12518 delta += alen; 12519 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12520 0/*lower lane*/, 12521 loadLE(Ity_I64, mkexpr(addr)) ); 12522 DIP("movlpd %s, %s\n", 12523 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12524 goto decode_success; 12525 } 12526 } 12527 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 12528 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 12529 if (haveNo66noF2noF3(pfx) 12530 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12531 modrm = getUChar(delta); 12532 if (epartIsReg(modrm)) { 12533 delta += 1; 12534 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12535 0/*lower lane*/, 12536 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 12537 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12538 nameXMMReg(gregOfRexRM(pfx,modrm))); 12539 } else { 12540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12541 delta += alen; 12542 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 12543 loadLE(Ity_I64, mkexpr(addr)) ); 12544 DIP("movlps %s, %s\n", 12545 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12546 } 12547 goto decode_success; 12548 } 12549 break; 12550 12551 case 0x13: 12552 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 12553 if (haveNo66noF2noF3(pfx) 12554 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12555 modrm = getUChar(delta); 12556 if (!epartIsReg(modrm)) { 12557 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12558 delta += alen; 12559 storeLE( mkexpr(addr), 12560 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12561 0/*lower lane*/ ) ); 12562 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12563 dis_buf); 12564 goto decode_success; 12565 } 12566 /* else fall through */ 12567 } 12568 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 12569 /* Identical to MOVLPS ? */ 12570 if (have66noF2noF3(pfx) 12571 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12572 modrm = getUChar(delta); 12573 if (!epartIsReg(modrm)) { 12574 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12575 delta += alen; 12576 storeLE( mkexpr(addr), 12577 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12578 0/*lower lane*/ ) ); 12579 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12580 dis_buf); 12581 goto decode_success; 12582 } 12583 /* else fall through */ 12584 } 12585 break; 12586 12587 case 0x14: 12588 case 0x15: 12589 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 12590 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 12591 /* These just appear to be special cases of SHUFPS */ 12592 if (haveNo66noF2noF3(pfx) && sz == 4) { 12593 Bool hi = toBool(opc == 0x15); 12594 IRTemp sV = newTemp(Ity_V128); 12595 IRTemp dV = newTemp(Ity_V128); 12596 modrm = getUChar(delta); 12597 UInt rG = gregOfRexRM(pfx,modrm); 12598 assign( dV, getXMMReg(rG) ); 12599 if (epartIsReg(modrm)) { 12600 UInt rE = eregOfRexRM(pfx,modrm); 12601 assign( sV, getXMMReg(rE) ); 12602 delta += 1; 12603 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12604 nameXMMReg(rE), nameXMMReg(rG)); 12605 } else { 12606 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12607 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12608 delta += alen; 12609 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12610 dis_buf, nameXMMReg(rG)); 12611 } 12612 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 12613 putXMMReg( rG, mkexpr(res) ); 12614 goto decode_success; 12615 } 12616 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 12617 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 12618 /* These just appear to be special cases of SHUFPS */ 12619 if (have66noF2noF3(pfx) 12620 && sz == 2 /* could be 8 if rex also present */) { 12621 Bool hi = toBool(opc == 0x15); 12622 IRTemp sV = newTemp(Ity_V128); 12623 IRTemp dV = newTemp(Ity_V128); 12624 modrm = getUChar(delta); 12625 UInt rG = gregOfRexRM(pfx,modrm); 12626 assign( dV, getXMMReg(rG) ); 12627 if (epartIsReg(modrm)) { 12628 UInt rE = eregOfRexRM(pfx,modrm); 12629 assign( sV, getXMMReg(rE) ); 12630 delta += 1; 12631 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12632 nameXMMReg(rE), nameXMMReg(rG)); 12633 } else { 12634 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12635 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12636 delta += alen; 12637 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12638 dis_buf, nameXMMReg(rG)); 12639 } 12640 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 12641 putXMMReg( rG, mkexpr(res) ); 12642 goto decode_success; 12643 } 12644 break; 12645 12646 case 0x16: 12647 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 12648 /* These seems identical to MOVHPS. This instruction encoding is 12649 completely crazy. */ 12650 if (have66noF2noF3(pfx) 12651 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12652 modrm = getUChar(delta); 12653 if (epartIsReg(modrm)) { 12654 /* fall through; apparently reg-reg is not possible */ 12655 } else { 12656 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12657 delta += alen; 12658 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12659 loadLE(Ity_I64, mkexpr(addr)) ); 12660 DIP("movhpd %s,%s\n", dis_buf, 12661 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12662 goto decode_success; 12663 } 12664 } 12665 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 12666 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 12667 if (haveNo66noF2noF3(pfx) 12668 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12669 modrm = getUChar(delta); 12670 if (epartIsReg(modrm)) { 12671 delta += 1; 12672 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12673 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 12674 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12675 nameXMMReg(gregOfRexRM(pfx,modrm))); 12676 } else { 12677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12678 delta += alen; 12679 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12680 loadLE(Ity_I64, mkexpr(addr)) ); 12681 DIP("movhps %s,%s\n", dis_buf, 12682 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12683 } 12684 goto decode_success; 12685 } 12686 break; 12687 12688 case 0x17: 12689 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 12690 if (haveNo66noF2noF3(pfx) 12691 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12692 modrm = getUChar(delta); 12693 if (!epartIsReg(modrm)) { 12694 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12695 delta += alen; 12696 storeLE( mkexpr(addr), 12697 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12698 1/*upper lane*/ ) ); 12699 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12700 dis_buf); 12701 goto decode_success; 12702 } 12703 /* else fall through */ 12704 } 12705 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 12706 /* Again, this seems identical to MOVHPS. */ 12707 if (have66noF2noF3(pfx) 12708 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12709 modrm = getUChar(delta); 12710 if (!epartIsReg(modrm)) { 12711 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12712 delta += alen; 12713 storeLE( mkexpr(addr), 12714 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12715 1/*upper lane*/ ) ); 12716 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12717 dis_buf); 12718 goto decode_success; 12719 } 12720 /* else fall through */ 12721 } 12722 break; 12723 12724 case 0x18: 12725 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 12726 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 12727 /* 0F 18 /2 = PREFETCH1 */ 12728 /* 0F 18 /3 = PREFETCH2 */ 12729 if (haveNo66noF2noF3(pfx) 12730 && !epartIsReg(getUChar(delta)) 12731 && gregLO3ofRM(getUChar(delta)) >= 0 12732 && gregLO3ofRM(getUChar(delta)) <= 3) { 12733 const HChar* hintstr = "??"; 12734 12735 modrm = getUChar(delta); 12736 vassert(!epartIsReg(modrm)); 12737 12738 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12739 delta += alen; 12740 12741 switch (gregLO3ofRM(modrm)) { 12742 case 0: hintstr = "nta"; break; 12743 case 1: hintstr = "t0"; break; 12744 case 2: hintstr = "t1"; break; 12745 case 3: hintstr = "t2"; break; 12746 default: vassert(0); 12747 } 12748 12749 DIP("prefetch%s %s\n", hintstr, dis_buf); 12750 goto decode_success; 12751 } 12752 break; 12753 12754 case 0x28: 12755 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 12756 if (have66noF2noF3(pfx) 12757 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12758 modrm = getUChar(delta); 12759 if (epartIsReg(modrm)) { 12760 putXMMReg( gregOfRexRM(pfx,modrm), 12761 getXMMReg( eregOfRexRM(pfx,modrm) )); 12762 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12763 nameXMMReg(gregOfRexRM(pfx,modrm))); 12764 delta += 1; 12765 } else { 12766 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12767 gen_SEGV_if_not_16_aligned( addr ); 12768 putXMMReg( gregOfRexRM(pfx,modrm), 12769 loadLE(Ity_V128, mkexpr(addr)) ); 12770 DIP("movapd %s,%s\n", dis_buf, 12771 nameXMMReg(gregOfRexRM(pfx,modrm))); 12772 delta += alen; 12773 } 12774 goto decode_success; 12775 } 12776 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 12777 if (haveNo66noF2noF3(pfx) 12778 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12779 modrm = getUChar(delta); 12780 if (epartIsReg(modrm)) { 12781 putXMMReg( gregOfRexRM(pfx,modrm), 12782 getXMMReg( eregOfRexRM(pfx,modrm) )); 12783 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12784 nameXMMReg(gregOfRexRM(pfx,modrm))); 12785 delta += 1; 12786 } else { 12787 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12788 gen_SEGV_if_not_16_aligned( addr ); 12789 putXMMReg( gregOfRexRM(pfx,modrm), 12790 loadLE(Ity_V128, mkexpr(addr)) ); 12791 DIP("movaps %s,%s\n", dis_buf, 12792 nameXMMReg(gregOfRexRM(pfx,modrm))); 12793 delta += alen; 12794 } 12795 goto decode_success; 12796 } 12797 break; 12798 12799 case 0x29: 12800 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 12801 if (haveNo66noF2noF3(pfx) 12802 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12803 modrm = getUChar(delta); 12804 if (epartIsReg(modrm)) { 12805 putXMMReg( eregOfRexRM(pfx,modrm), 12806 getXMMReg( gregOfRexRM(pfx,modrm) )); 12807 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12808 nameXMMReg(eregOfRexRM(pfx,modrm))); 12809 delta += 1; 12810 } else { 12811 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12812 gen_SEGV_if_not_16_aligned( addr ); 12813 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12814 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12815 dis_buf ); 12816 delta += alen; 12817 } 12818 goto decode_success; 12819 } 12820 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 12821 if (have66noF2noF3(pfx) 12822 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12823 modrm = getUChar(delta); 12824 if (epartIsReg(modrm)) { 12825 putXMMReg( eregOfRexRM(pfx,modrm), 12826 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12827 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12828 nameXMMReg(eregOfRexRM(pfx,modrm))); 12829 delta += 1; 12830 } else { 12831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12832 gen_SEGV_if_not_16_aligned( addr ); 12833 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12834 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12835 dis_buf ); 12836 delta += alen; 12837 } 12838 goto decode_success; 12839 } 12840 break; 12841 12842 case 0x2A: 12843 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 12844 half xmm */ 12845 if (haveNo66noF2noF3(pfx) && sz == 4) { 12846 IRTemp arg64 = newTemp(Ity_I64); 12847 IRTemp rmode = newTemp(Ity_I32); 12848 12849 modrm = getUChar(delta); 12850 do_MMX_preamble(); 12851 if (epartIsReg(modrm)) { 12852 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12853 delta += 1; 12854 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12855 nameXMMReg(gregOfRexRM(pfx,modrm))); 12856 } else { 12857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12858 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12859 delta += alen; 12860 DIP("cvtpi2ps %s,%s\n", dis_buf, 12861 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12862 } 12863 12864 assign( rmode, get_sse_roundingmode() ); 12865 12866 putXMMRegLane32F( 12867 gregOfRexRM(pfx,modrm), 0, 12868 binop(Iop_F64toF32, 12869 mkexpr(rmode), 12870 unop(Iop_I32StoF64, 12871 unop(Iop_64to32, mkexpr(arg64)) )) ); 12872 12873 putXMMRegLane32F( 12874 gregOfRexRM(pfx,modrm), 1, 12875 binop(Iop_F64toF32, 12876 mkexpr(rmode), 12877 unop(Iop_I32StoF64, 12878 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 12879 12880 goto decode_success; 12881 } 12882 /* F3 0F 2A = CVTSI2SS 12883 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 12884 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 12885 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12886 IRTemp rmode = newTemp(Ity_I32); 12887 assign( rmode, get_sse_roundingmode() ); 12888 modrm = getUChar(delta); 12889 if (sz == 4) { 12890 IRTemp arg32 = newTemp(Ity_I32); 12891 if (epartIsReg(modrm)) { 12892 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12893 delta += 1; 12894 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12895 nameXMMReg(gregOfRexRM(pfx,modrm))); 12896 } else { 12897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12898 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12899 delta += alen; 12900 DIP("cvtsi2ss %s,%s\n", dis_buf, 12901 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12902 } 12903 putXMMRegLane32F( 12904 gregOfRexRM(pfx,modrm), 0, 12905 binop(Iop_F64toF32, 12906 mkexpr(rmode), 12907 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 12908 } else { 12909 /* sz == 8 */ 12910 IRTemp arg64 = newTemp(Ity_I64); 12911 if (epartIsReg(modrm)) { 12912 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12913 delta += 1; 12914 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12915 nameXMMReg(gregOfRexRM(pfx,modrm))); 12916 } else { 12917 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12918 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12919 delta += alen; 12920 DIP("cvtsi2ssq %s,%s\n", dis_buf, 12921 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12922 } 12923 putXMMRegLane32F( 12924 gregOfRexRM(pfx,modrm), 0, 12925 binop(Iop_F64toF32, 12926 mkexpr(rmode), 12927 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 12928 } 12929 goto decode_success; 12930 } 12931 /* F2 0F 2A = CVTSI2SD 12932 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 12933 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 12934 */ 12935 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12936 modrm = getUChar(delta); 12937 if (sz == 4) { 12938 IRTemp arg32 = newTemp(Ity_I32); 12939 if (epartIsReg(modrm)) { 12940 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12941 delta += 1; 12942 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12943 nameXMMReg(gregOfRexRM(pfx,modrm))); 12944 } else { 12945 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12946 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12947 delta += alen; 12948 DIP("cvtsi2sdl %s,%s\n", dis_buf, 12949 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12950 } 12951 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12952 unop(Iop_I32StoF64, mkexpr(arg32)) 12953 ); 12954 } else { 12955 /* sz == 8 */ 12956 IRTemp arg64 = newTemp(Ity_I64); 12957 if (epartIsReg(modrm)) { 12958 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12959 delta += 1; 12960 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12961 nameXMMReg(gregOfRexRM(pfx,modrm))); 12962 } else { 12963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12964 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12965 delta += alen; 12966 DIP("cvtsi2sdq %s,%s\n", dis_buf, 12967 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12968 } 12969 putXMMRegLane64F( 12970 gregOfRexRM(pfx,modrm), 12971 0, 12972 binop( Iop_I64StoF64, 12973 get_sse_roundingmode(), 12974 mkexpr(arg64) 12975 ) 12976 ); 12977 } 12978 goto decode_success; 12979 } 12980 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 12981 xmm(G) */ 12982 if (have66noF2noF3(pfx) && sz == 2) { 12983 IRTemp arg64 = newTemp(Ity_I64); 12984 12985 modrm = getUChar(delta); 12986 if (epartIsReg(modrm)) { 12987 /* Only switch to MMX mode if the source is a MMX register. 12988 This is inconsistent with all other instructions which 12989 convert between XMM and (M64 or MMX), which always switch 12990 to MMX mode even if 64-bit operand is M64 and not MMX. At 12991 least, that's what the Intel docs seem to me to say. 12992 Fixes #210264. */ 12993 do_MMX_preamble(); 12994 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12995 delta += 1; 12996 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12997 nameXMMReg(gregOfRexRM(pfx,modrm))); 12998 } else { 12999 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13000 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 13001 delta += alen; 13002 DIP("cvtpi2pd %s,%s\n", dis_buf, 13003 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13004 } 13005 13006 putXMMRegLane64F( 13007 gregOfRexRM(pfx,modrm), 0, 13008 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 13009 ); 13010 13011 putXMMRegLane64F( 13012 gregOfRexRM(pfx,modrm), 1, 13013 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 13014 ); 13015 13016 goto decode_success; 13017 } 13018 break; 13019 13020 case 0x2B: 13021 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 13022 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 13023 if ( (haveNo66noF2noF3(pfx) && sz == 4) 13024 || (have66noF2noF3(pfx) && sz == 2) ) { 13025 modrm = getUChar(delta); 13026 if (!epartIsReg(modrm)) { 13027 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13028 gen_SEGV_if_not_16_aligned( addr ); 13029 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13030 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 13031 dis_buf, 13032 nameXMMReg(gregOfRexRM(pfx,modrm))); 13033 delta += alen; 13034 goto decode_success; 13035 } 13036 /* else fall through */ 13037 } 13038 break; 13039 13040 case 0x2C: 13041 case 0x2D: 13042 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13043 I32 in mmx, according to prevailing SSE rounding mode */ 13044 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13045 I32 in mmx, rounding towards zero */ 13046 if (haveNo66noF2noF3(pfx) && sz == 4) { 13047 IRTemp dst64 = newTemp(Ity_I64); 13048 IRTemp rmode = newTemp(Ity_I32); 13049 IRTemp f32lo = newTemp(Ity_F32); 13050 IRTemp f32hi = newTemp(Ity_F32); 13051 Bool r2zero = toBool(opc == 0x2C); 13052 13053 do_MMX_preamble(); 13054 modrm = getUChar(delta); 13055 13056 if (epartIsReg(modrm)) { 13057 delta += 1; 13058 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13059 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 13060 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13061 nameXMMReg(eregOfRexRM(pfx,modrm)), 13062 nameMMXReg(gregLO3ofRM(modrm))); 13063 } else { 13064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13065 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13066 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 13067 mkexpr(addr), 13068 mkU64(4) ))); 13069 delta += alen; 13070 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13071 dis_buf, 13072 nameMMXReg(gregLO3ofRM(modrm))); 13073 } 13074 13075 if (r2zero) { 13076 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13077 } else { 13078 assign( rmode, get_sse_roundingmode() ); 13079 } 13080 13081 assign( 13082 dst64, 13083 binop( Iop_32HLto64, 13084 binop( Iop_F64toI32S, 13085 mkexpr(rmode), 13086 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 13087 binop( Iop_F64toI32S, 13088 mkexpr(rmode), 13089 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 13090 ) 13091 ); 13092 13093 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13094 goto decode_success; 13095 } 13096 /* F3 0F 2D = CVTSS2SI 13097 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13098 according to prevailing SSE rounding mode 13099 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13100 according to prevailing SSE rounding mode 13101 */ 13102 /* F3 0F 2C = CVTTSS2SI 13103 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13104 truncating towards zero 13105 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13106 truncating towards zero 13107 */ 13108 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 13109 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13110 goto decode_success; 13111 } 13112 /* F2 0F 2D = CVTSD2SI 13113 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13114 according to prevailing SSE rounding mode 13115 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13116 according to prevailing SSE rounding mode 13117 */ 13118 /* F2 0F 2C = CVTTSD2SI 13119 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13120 truncating towards zero 13121 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13122 truncating towards zero 13123 */ 13124 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 13125 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13126 goto decode_success; 13127 } 13128 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13129 I32 in mmx, according to prevailing SSE rounding mode */ 13130 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13131 I32 in mmx, rounding towards zero */ 13132 if (have66noF2noF3(pfx) && sz == 2) { 13133 IRTemp dst64 = newTemp(Ity_I64); 13134 IRTemp rmode = newTemp(Ity_I32); 13135 IRTemp f64lo = newTemp(Ity_F64); 13136 IRTemp f64hi = newTemp(Ity_F64); 13137 Bool r2zero = toBool(opc == 0x2C); 13138 13139 do_MMX_preamble(); 13140 modrm = getUChar(delta); 13141 13142 if (epartIsReg(modrm)) { 13143 delta += 1; 13144 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13145 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 13146 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 13147 nameXMMReg(eregOfRexRM(pfx,modrm)), 13148 nameMMXReg(gregLO3ofRM(modrm))); 13149 } else { 13150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13151 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13152 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 13153 mkexpr(addr), 13154 mkU64(8) ))); 13155 delta += alen; 13156 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 13157 dis_buf, 13158 nameMMXReg(gregLO3ofRM(modrm))); 13159 } 13160 13161 if (r2zero) { 13162 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13163 } else { 13164 assign( rmode, get_sse_roundingmode() ); 13165 } 13166 13167 assign( 13168 dst64, 13169 binop( Iop_32HLto64, 13170 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 13171 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 13172 ) 13173 ); 13174 13175 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13176 goto decode_success; 13177 } 13178 break; 13179 13180 case 0x2E: 13181 case 0x2F: 13182 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 13183 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 13184 if (have66noF2noF3(pfx) && sz == 2) { 13185 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 13186 goto decode_success; 13187 } 13188 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 13189 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 13190 if (haveNo66noF2noF3(pfx) && sz == 4) { 13191 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 13192 goto decode_success; 13193 } 13194 break; 13195 13196 case 0x50: 13197 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 13198 to 4 lowest bits of ireg(G) */ 13199 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13200 && epartIsReg(getUChar(delta))) { 13201 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13202 set to 1, which has been known to happen: 13203 13204 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 13205 13206 20071106: Intel docs say that REX.W isn't redundant: when 13207 present, a 64-bit register is written; when not present, only 13208 the 32-bit half is written. However, testing on a Core2 13209 machine suggests the entire 64 bit register is written 13210 irrespective of the status of REX.W. That could be because 13211 of the default rule that says "if the lower half of a 32-bit 13212 register is written, the upper half is zeroed". By using 13213 putIReg32 here we inadvertantly produce the same behaviour as 13214 the Core2, for the same reason -- putIReg32 implements said 13215 rule. 13216 13217 AMD docs give no indication that REX.W is even valid for this 13218 insn. */ 13219 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13220 goto decode_success; 13221 } 13222 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 13223 2 lowest bits of ireg(G) */ 13224 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 13225 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13226 set to 1, which has been known to happen: 13227 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 13228 20071106: see further comments on MOVMSKPS implementation above. 13229 */ 13230 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13231 goto decode_success; 13232 } 13233 break; 13234 13235 case 0x51: 13236 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 13237 if (haveF3no66noF2(pfx) && sz == 4) { 13238 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13239 "sqrtss", Iop_Sqrt32F0x4 ); 13240 goto decode_success; 13241 } 13242 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 13243 if (haveNo66noF2noF3(pfx) && sz == 4) { 13244 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13245 "sqrtps", Iop_Sqrt32Fx4 ); 13246 goto decode_success; 13247 } 13248 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 13249 if (haveF2no66noF3(pfx) && sz == 4) { 13250 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 13251 "sqrtsd", Iop_Sqrt64F0x2 ); 13252 goto decode_success; 13253 } 13254 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 13255 if (have66noF2noF3(pfx) && sz == 2) { 13256 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13257 "sqrtpd", Iop_Sqrt64Fx2 ); 13258 goto decode_success; 13259 } 13260 break; 13261 13262 case 0x52: 13263 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 13264 if (haveF3no66noF2(pfx) && sz == 4) { 13265 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13266 "rsqrtss", Iop_RSqrtEst32F0x4 ); 13267 goto decode_success; 13268 } 13269 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 13270 if (haveNo66noF2noF3(pfx) && sz == 4) { 13271 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13272 "rsqrtps", Iop_RSqrtEst32Fx4 ); 13273 goto decode_success; 13274 } 13275 break; 13276 13277 case 0x53: 13278 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 13279 if (haveF3no66noF2(pfx) && sz == 4) { 13280 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13281 "rcpss", Iop_RecipEst32F0x4 ); 13282 goto decode_success; 13283 } 13284 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 13285 if (haveNo66noF2noF3(pfx) && sz == 4) { 13286 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13287 "rcpps", Iop_RecipEst32Fx4 ); 13288 goto decode_success; 13289 } 13290 break; 13291 13292 case 0x54: 13293 /* 0F 54 = ANDPS -- G = G and E */ 13294 if (haveNo66noF2noF3(pfx) && sz == 4) { 13295 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 13296 goto decode_success; 13297 } 13298 /* 66 0F 54 = ANDPD -- G = G and E */ 13299 if (have66noF2noF3(pfx) && sz == 2) { 13300 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 13301 goto decode_success; 13302 } 13303 break; 13304 13305 case 0x55: 13306 /* 0F 55 = ANDNPS -- G = (not G) and E */ 13307 if (haveNo66noF2noF3(pfx) && sz == 4) { 13308 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 13309 Iop_AndV128 ); 13310 goto decode_success; 13311 } 13312 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 13313 if (have66noF2noF3(pfx) && sz == 2) { 13314 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 13315 Iop_AndV128 ); 13316 goto decode_success; 13317 } 13318 break; 13319 13320 case 0x56: 13321 /* 0F 56 = ORPS -- G = G and E */ 13322 if (haveNo66noF2noF3(pfx) && sz == 4) { 13323 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 13324 goto decode_success; 13325 } 13326 /* 66 0F 56 = ORPD -- G = G and E */ 13327 if (have66noF2noF3(pfx) && sz == 2) { 13328 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 13329 goto decode_success; 13330 } 13331 break; 13332 13333 case 0x57: 13334 /* 66 0F 57 = XORPD -- G = G xor E */ 13335 if (have66noF2noF3(pfx) && sz == 2) { 13336 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 13337 goto decode_success; 13338 } 13339 /* 0F 57 = XORPS -- G = G xor E */ 13340 if (haveNo66noF2noF3(pfx) && sz == 4) { 13341 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 13342 goto decode_success; 13343 } 13344 break; 13345 13346 case 0x58: 13347 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 13348 if (haveNo66noF2noF3(pfx) && sz == 4) { 13349 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 13350 goto decode_success; 13351 } 13352 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 13353 if (haveF3no66noF2(pfx) && sz == 4) { 13354 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 13355 goto decode_success; 13356 } 13357 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 13358 if (haveF2no66noF3(pfx) 13359 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13360 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 13361 goto decode_success; 13362 } 13363 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 13364 if (have66noF2noF3(pfx) 13365 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13366 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 13367 goto decode_success; 13368 } 13369 break; 13370 13371 case 0x59: 13372 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 13373 if (haveF2no66noF3(pfx) 13374 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13375 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 13376 goto decode_success; 13377 } 13378 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 13379 if (haveF3no66noF2(pfx) && sz == 4) { 13380 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 13381 goto decode_success; 13382 } 13383 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 13384 if (haveNo66noF2noF3(pfx) && sz == 4) { 13385 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 13386 goto decode_success; 13387 } 13388 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 13389 if (have66noF2noF3(pfx) 13390 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13391 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 13392 goto decode_success; 13393 } 13394 break; 13395 13396 case 0x5A: 13397 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 13398 F64 in xmm(G). */ 13399 if (haveNo66noF2noF3(pfx) 13400 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13401 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13402 goto decode_success; 13403 } 13404 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 13405 low half xmm(G) */ 13406 if (haveF3no66noF2(pfx) && sz == 4) { 13407 IRTemp f32lo = newTemp(Ity_F32); 13408 13409 modrm = getUChar(delta); 13410 if (epartIsReg(modrm)) { 13411 delta += 1; 13412 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13413 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13414 nameXMMReg(gregOfRexRM(pfx,modrm))); 13415 } else { 13416 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13417 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13418 delta += alen; 13419 DIP("cvtss2sd %s,%s\n", dis_buf, 13420 nameXMMReg(gregOfRexRM(pfx,modrm))); 13421 } 13422 13423 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 13424 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 13425 13426 goto decode_success; 13427 } 13428 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 13429 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 13430 if (haveF2no66noF3(pfx) && sz == 4) { 13431 IRTemp rmode = newTemp(Ity_I32); 13432 IRTemp f64lo = newTemp(Ity_F64); 13433 13434 modrm = getUChar(delta); 13435 if (epartIsReg(modrm)) { 13436 delta += 1; 13437 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13438 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13439 nameXMMReg(gregOfRexRM(pfx,modrm))); 13440 } else { 13441 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13442 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13443 delta += alen; 13444 DIP("cvtsd2ss %s,%s\n", dis_buf, 13445 nameXMMReg(gregOfRexRM(pfx,modrm))); 13446 } 13447 13448 assign( rmode, get_sse_roundingmode() ); 13449 putXMMRegLane32F( 13450 gregOfRexRM(pfx,modrm), 0, 13451 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 13452 ); 13453 13454 goto decode_success; 13455 } 13456 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 13457 lo half xmm(G), rounding according to prevailing SSE rounding 13458 mode, and zero upper half */ 13459 /* Note, this is practically identical to CVTPD2DQ. It would have 13460 be nice to merge them together. */ 13461 if (have66noF2noF3(pfx) && sz == 2) { 13462 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13463 goto decode_success; 13464 } 13465 break; 13466 13467 case 0x5B: 13468 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13469 xmm(G), rounding towards zero */ 13470 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13471 xmm(G), as per the prevailing rounding mode */ 13472 if ( (have66noF2noF3(pfx) && sz == 2) 13473 || (haveF3no66noF2(pfx) && sz == 4) ) { 13474 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 13475 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 13476 goto decode_success; 13477 } 13478 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 13479 xmm(G) */ 13480 if (haveNo66noF2noF3(pfx) && sz == 4) { 13481 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13482 goto decode_success; 13483 } 13484 break; 13485 13486 case 0x5C: 13487 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 13488 if (haveF3no66noF2(pfx) && sz == 4) { 13489 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 13490 goto decode_success; 13491 } 13492 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 13493 if (haveF2no66noF3(pfx) 13494 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13495 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 13496 goto decode_success; 13497 } 13498 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 13499 if (haveNo66noF2noF3(pfx) && sz == 4) { 13500 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 13501 goto decode_success; 13502 } 13503 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 13504 if (have66noF2noF3(pfx) && sz == 2) { 13505 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 13506 goto decode_success; 13507 } 13508 break; 13509 13510 case 0x5D: 13511 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 13512 if (haveNo66noF2noF3(pfx) && sz == 4) { 13513 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 13514 goto decode_success; 13515 } 13516 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 13517 if (haveF3no66noF2(pfx) && sz == 4) { 13518 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 13519 goto decode_success; 13520 } 13521 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 13522 if (haveF2no66noF3(pfx) && sz == 4) { 13523 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 13524 goto decode_success; 13525 } 13526 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 13527 if (have66noF2noF3(pfx) && sz == 2) { 13528 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 13529 goto decode_success; 13530 } 13531 break; 13532 13533 case 0x5E: 13534 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 13535 if (haveF2no66noF3(pfx) && sz == 4) { 13536 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 13537 goto decode_success; 13538 } 13539 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 13540 if (haveNo66noF2noF3(pfx) && sz == 4) { 13541 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 13542 goto decode_success; 13543 } 13544 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 13545 if (haveF3no66noF2(pfx) && sz == 4) { 13546 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 13547 goto decode_success; 13548 } 13549 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 13550 if (have66noF2noF3(pfx) && sz == 2) { 13551 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 13552 goto decode_success; 13553 } 13554 break; 13555 13556 case 0x5F: 13557 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 13558 if (haveNo66noF2noF3(pfx) && sz == 4) { 13559 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 13560 goto decode_success; 13561 } 13562 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 13563 if (haveF3no66noF2(pfx) && sz == 4) { 13564 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 13565 goto decode_success; 13566 } 13567 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 13568 if (haveF2no66noF3(pfx) && sz == 4) { 13569 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 13570 goto decode_success; 13571 } 13572 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 13573 if (have66noF2noF3(pfx) && sz == 2) { 13574 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 13575 goto decode_success; 13576 } 13577 break; 13578 13579 case 0x60: 13580 /* 66 0F 60 = PUNPCKLBW */ 13581 if (have66noF2noF3(pfx) && sz == 2) { 13582 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13583 "punpcklbw", 13584 Iop_InterleaveLO8x16, True ); 13585 goto decode_success; 13586 } 13587 break; 13588 13589 case 0x61: 13590 /* 66 0F 61 = PUNPCKLWD */ 13591 if (have66noF2noF3(pfx) && sz == 2) { 13592 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13593 "punpcklwd", 13594 Iop_InterleaveLO16x8, True ); 13595 goto decode_success; 13596 } 13597 break; 13598 13599 case 0x62: 13600 /* 66 0F 62 = PUNPCKLDQ */ 13601 if (have66noF2noF3(pfx) && sz == 2) { 13602 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13603 "punpckldq", 13604 Iop_InterleaveLO32x4, True ); 13605 goto decode_success; 13606 } 13607 break; 13608 13609 case 0x63: 13610 /* 66 0F 63 = PACKSSWB */ 13611 if (have66noF2noF3(pfx) && sz == 2) { 13612 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13613 "packsswb", 13614 Iop_QNarrowBin16Sto8Sx16, True ); 13615 goto decode_success; 13616 } 13617 break; 13618 13619 case 0x64: 13620 /* 66 0F 64 = PCMPGTB */ 13621 if (have66noF2noF3(pfx) && sz == 2) { 13622 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13623 "pcmpgtb", Iop_CmpGT8Sx16, False ); 13624 goto decode_success; 13625 } 13626 break; 13627 13628 case 0x65: 13629 /* 66 0F 65 = PCMPGTW */ 13630 if (have66noF2noF3(pfx) && sz == 2) { 13631 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13632 "pcmpgtw", Iop_CmpGT16Sx8, False ); 13633 goto decode_success; 13634 } 13635 break; 13636 13637 case 0x66: 13638 /* 66 0F 66 = PCMPGTD */ 13639 if (have66noF2noF3(pfx) && sz == 2) { 13640 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13641 "pcmpgtd", Iop_CmpGT32Sx4, False ); 13642 goto decode_success; 13643 } 13644 break; 13645 13646 case 0x67: 13647 /* 66 0F 67 = PACKUSWB */ 13648 if (have66noF2noF3(pfx) && sz == 2) { 13649 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13650 "packuswb", 13651 Iop_QNarrowBin16Sto8Ux16, True ); 13652 goto decode_success; 13653 } 13654 break; 13655 13656 case 0x68: 13657 /* 66 0F 68 = PUNPCKHBW */ 13658 if (have66noF2noF3(pfx) && sz == 2) { 13659 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13660 "punpckhbw", 13661 Iop_InterleaveHI8x16, True ); 13662 goto decode_success; 13663 } 13664 break; 13665 13666 case 0x69: 13667 /* 66 0F 69 = PUNPCKHWD */ 13668 if (have66noF2noF3(pfx) && sz == 2) { 13669 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13670 "punpckhwd", 13671 Iop_InterleaveHI16x8, True ); 13672 goto decode_success; 13673 } 13674 break; 13675 13676 case 0x6A: 13677 /* 66 0F 6A = PUNPCKHDQ */ 13678 if (have66noF2noF3(pfx) && sz == 2) { 13679 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13680 "punpckhdq", 13681 Iop_InterleaveHI32x4, True ); 13682 goto decode_success; 13683 } 13684 break; 13685 13686 case 0x6B: 13687 /* 66 0F 6B = PACKSSDW */ 13688 if (have66noF2noF3(pfx) && sz == 2) { 13689 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13690 "packssdw", 13691 Iop_QNarrowBin32Sto16Sx8, True ); 13692 goto decode_success; 13693 } 13694 break; 13695 13696 case 0x6C: 13697 /* 66 0F 6C = PUNPCKLQDQ */ 13698 if (have66noF2noF3(pfx) && sz == 2) { 13699 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13700 "punpcklqdq", 13701 Iop_InterleaveLO64x2, True ); 13702 goto decode_success; 13703 } 13704 break; 13705 13706 case 0x6D: 13707 /* 66 0F 6D = PUNPCKHQDQ */ 13708 if (have66noF2noF3(pfx) && sz == 2) { 13709 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13710 "punpckhqdq", 13711 Iop_InterleaveHI64x2, True ); 13712 goto decode_success; 13713 } 13714 break; 13715 13716 case 0x6E: 13717 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 13718 zeroing high 3/4 of xmm. */ 13719 /* or from ireg64/m64 to xmm lo 1/2, 13720 zeroing high 1/2 of xmm. */ 13721 if (have66noF2noF3(pfx)) { 13722 vassert(sz == 2 || sz == 8); 13723 if (sz == 2) sz = 4; 13724 modrm = getUChar(delta); 13725 if (epartIsReg(modrm)) { 13726 delta += 1; 13727 if (sz == 4) { 13728 putXMMReg( 13729 gregOfRexRM(pfx,modrm), 13730 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 13731 ); 13732 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13733 nameXMMReg(gregOfRexRM(pfx,modrm))); 13734 } else { 13735 putXMMReg( 13736 gregOfRexRM(pfx,modrm), 13737 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 13738 ); 13739 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13740 nameXMMReg(gregOfRexRM(pfx,modrm))); 13741 } 13742 } else { 13743 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13744 delta += alen; 13745 putXMMReg( 13746 gregOfRexRM(pfx,modrm), 13747 sz == 4 13748 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 13749 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 13750 ); 13751 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 13752 nameXMMReg(gregOfRexRM(pfx,modrm))); 13753 } 13754 goto decode_success; 13755 } 13756 break; 13757 13758 case 0x6F: 13759 if (have66noF2noF3(pfx) 13760 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13761 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 13762 modrm = getUChar(delta); 13763 if (epartIsReg(modrm)) { 13764 putXMMReg( gregOfRexRM(pfx,modrm), 13765 getXMMReg( eregOfRexRM(pfx,modrm) )); 13766 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13767 nameXMMReg(gregOfRexRM(pfx,modrm))); 13768 delta += 1; 13769 } else { 13770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13771 gen_SEGV_if_not_16_aligned( addr ); 13772 putXMMReg( gregOfRexRM(pfx,modrm), 13773 loadLE(Ity_V128, mkexpr(addr)) ); 13774 DIP("movdqa %s,%s\n", dis_buf, 13775 nameXMMReg(gregOfRexRM(pfx,modrm))); 13776 delta += alen; 13777 } 13778 goto decode_success; 13779 } 13780 if (haveF3no66noF2(pfx) && sz == 4) { 13781 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 13782 modrm = getUChar(delta); 13783 if (epartIsReg(modrm)) { 13784 putXMMReg( gregOfRexRM(pfx,modrm), 13785 getXMMReg( eregOfRexRM(pfx,modrm) )); 13786 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13787 nameXMMReg(gregOfRexRM(pfx,modrm))); 13788 delta += 1; 13789 } else { 13790 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13791 putXMMReg( gregOfRexRM(pfx,modrm), 13792 loadLE(Ity_V128, mkexpr(addr)) ); 13793 DIP("movdqu %s,%s\n", dis_buf, 13794 nameXMMReg(gregOfRexRM(pfx,modrm))); 13795 delta += alen; 13796 } 13797 goto decode_success; 13798 } 13799 break; 13800 13801 case 0x70: 13802 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 13803 if (have66noF2noF3(pfx) && sz == 2) { 13804 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 13805 goto decode_success; 13806 } 13807 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13808 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 13809 if (haveNo66noF2noF3(pfx) && sz == 4) { 13810 Int order; 13811 IRTemp sV, dV, s3, s2, s1, s0; 13812 s3 = s2 = s1 = s0 = IRTemp_INVALID; 13813 sV = newTemp(Ity_I64); 13814 dV = newTemp(Ity_I64); 13815 do_MMX_preamble(); 13816 modrm = getUChar(delta); 13817 if (epartIsReg(modrm)) { 13818 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13819 order = (Int)getUChar(delta+1); 13820 delta += 1+1; 13821 DIP("pshufw $%d,%s,%s\n", order, 13822 nameMMXReg(eregLO3ofRM(modrm)), 13823 nameMMXReg(gregLO3ofRM(modrm))); 13824 } else { 13825 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13826 1/*extra byte after amode*/ ); 13827 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13828 order = (Int)getUChar(delta+alen); 13829 delta += 1+alen; 13830 DIP("pshufw $%d,%s,%s\n", order, 13831 dis_buf, 13832 nameMMXReg(gregLO3ofRM(modrm))); 13833 } 13834 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 13835 # define SEL(n) \ 13836 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 13837 assign(dV, 13838 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 13839 SEL((order>>2)&3), SEL((order>>0)&3) ) 13840 ); 13841 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 13842 # undef SEL 13843 goto decode_success; 13844 } 13845 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 13846 mem) to G(xmm), and copy upper half */ 13847 if (haveF2no66noF3(pfx) && sz == 4) { 13848 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13849 False/*!isAvx*/, False/*!xIsH*/ ); 13850 goto decode_success; 13851 } 13852 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 13853 mem) to G(xmm), and copy lower half */ 13854 if (haveF3no66noF2(pfx) && sz == 4) { 13855 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13856 False/*!isAvx*/, True/*xIsH*/ ); 13857 goto decode_success; 13858 } 13859 break; 13860 13861 case 0x71: 13862 /* 66 0F 71 /2 ib = PSRLW by immediate */ 13863 if (have66noF2noF3(pfx) && sz == 2 13864 && epartIsReg(getUChar(delta)) 13865 && gregLO3ofRM(getUChar(delta)) == 2) { 13866 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 13867 goto decode_success; 13868 } 13869 /* 66 0F 71 /4 ib = PSRAW by immediate */ 13870 if (have66noF2noF3(pfx) && sz == 2 13871 && epartIsReg(getUChar(delta)) 13872 && gregLO3ofRM(getUChar(delta)) == 4) { 13873 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 13874 goto decode_success; 13875 } 13876 /* 66 0F 71 /6 ib = PSLLW by immediate */ 13877 if (have66noF2noF3(pfx) && sz == 2 13878 && epartIsReg(getUChar(delta)) 13879 && gregLO3ofRM(getUChar(delta)) == 6) { 13880 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 13881 goto decode_success; 13882 } 13883 break; 13884 13885 case 0x72: 13886 /* 66 0F 72 /2 ib = PSRLD by immediate */ 13887 if (have66noF2noF3(pfx) && sz == 2 13888 && epartIsReg(getUChar(delta)) 13889 && gregLO3ofRM(getUChar(delta)) == 2) { 13890 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 13891 goto decode_success; 13892 } 13893 /* 66 0F 72 /4 ib = PSRAD by immediate */ 13894 if (have66noF2noF3(pfx) && sz == 2 13895 && epartIsReg(getUChar(delta)) 13896 && gregLO3ofRM(getUChar(delta)) == 4) { 13897 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 13898 goto decode_success; 13899 } 13900 /* 66 0F 72 /6 ib = PSLLD by immediate */ 13901 if (have66noF2noF3(pfx) && sz == 2 13902 && epartIsReg(getUChar(delta)) 13903 && gregLO3ofRM(getUChar(delta)) == 6) { 13904 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 13905 goto decode_success; 13906 } 13907 break; 13908 13909 case 0x73: 13910 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 13911 /* note, if mem case ever filled in, 1 byte after amode */ 13912 if (have66noF2noF3(pfx) && sz == 2 13913 && epartIsReg(getUChar(delta)) 13914 && gregLO3ofRM(getUChar(delta)) == 3) { 13915 Int imm = (Int)getUChar(delta+1); 13916 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13917 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 13918 delta += 2; 13919 IRTemp sV = newTemp(Ity_V128); 13920 assign( sV, getXMMReg(reg) ); 13921 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 13922 goto decode_success; 13923 } 13924 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 13925 /* note, if mem case ever filled in, 1 byte after amode */ 13926 if (have66noF2noF3(pfx) && sz == 2 13927 && epartIsReg(getUChar(delta)) 13928 && gregLO3ofRM(getUChar(delta)) == 7) { 13929 Int imm = (Int)getUChar(delta+1); 13930 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13931 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 13932 vassert(imm >= 0 && imm <= 255); 13933 delta += 2; 13934 IRTemp sV = newTemp(Ity_V128); 13935 assign( sV, getXMMReg(reg) ); 13936 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 13937 goto decode_success; 13938 } 13939 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 13940 if (have66noF2noF3(pfx) && sz == 2 13941 && epartIsReg(getUChar(delta)) 13942 && gregLO3ofRM(getUChar(delta)) == 2) { 13943 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 13944 goto decode_success; 13945 } 13946 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 13947 if (have66noF2noF3(pfx) && sz == 2 13948 && epartIsReg(getUChar(delta)) 13949 && gregLO3ofRM(getUChar(delta)) == 6) { 13950 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 13951 goto decode_success; 13952 } 13953 break; 13954 13955 case 0x74: 13956 /* 66 0F 74 = PCMPEQB */ 13957 if (have66noF2noF3(pfx) && sz == 2) { 13958 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13959 "pcmpeqb", Iop_CmpEQ8x16, False ); 13960 goto decode_success; 13961 } 13962 break; 13963 13964 case 0x75: 13965 /* 66 0F 75 = PCMPEQW */ 13966 if (have66noF2noF3(pfx) && sz == 2) { 13967 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13968 "pcmpeqw", Iop_CmpEQ16x8, False ); 13969 goto decode_success; 13970 } 13971 break; 13972 13973 case 0x76: 13974 /* 66 0F 76 = PCMPEQD */ 13975 if (have66noF2noF3(pfx) && sz == 2) { 13976 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13977 "pcmpeqd", Iop_CmpEQ32x4, False ); 13978 goto decode_success; 13979 } 13980 break; 13981 13982 case 0x7E: 13983 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 13984 G (lo half xmm). Upper half of G is zeroed out. */ 13985 if (haveF3no66noF2(pfx) 13986 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13987 modrm = getUChar(delta); 13988 if (epartIsReg(modrm)) { 13989 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13990 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 13991 /* zero bits 127:64 */ 13992 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 13993 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13994 nameXMMReg(gregOfRexRM(pfx,modrm))); 13995 delta += 1; 13996 } else { 13997 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13998 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 13999 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 14000 loadLE(Ity_I64, mkexpr(addr)) ); 14001 DIP("movsd %s,%s\n", dis_buf, 14002 nameXMMReg(gregOfRexRM(pfx,modrm))); 14003 delta += alen; 14004 } 14005 goto decode_success; 14006 } 14007 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 14008 /* or from xmm low 1/2 to ireg64 or m64. */ 14009 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 14010 if (sz == 2) sz = 4; 14011 modrm = getUChar(delta); 14012 if (epartIsReg(modrm)) { 14013 delta += 1; 14014 if (sz == 4) { 14015 putIReg32( eregOfRexRM(pfx,modrm), 14016 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 14017 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14018 nameIReg32(eregOfRexRM(pfx,modrm))); 14019 } else { 14020 putIReg64( eregOfRexRM(pfx,modrm), 14021 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 14022 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14023 nameIReg64(eregOfRexRM(pfx,modrm))); 14024 } 14025 } else { 14026 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14027 delta += alen; 14028 storeLE( mkexpr(addr), 14029 sz == 4 14030 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 14031 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 14032 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 14033 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14034 } 14035 goto decode_success; 14036 } 14037 break; 14038 14039 case 0x7F: 14040 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 14041 if (haveF3no66noF2(pfx) && sz == 4) { 14042 modrm = getUChar(delta); 14043 if (epartIsReg(modrm)) { 14044 goto decode_failure; /* awaiting test case */ 14045 delta += 1; 14046 putXMMReg( eregOfRexRM(pfx,modrm), 14047 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14048 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14049 nameXMMReg(eregOfRexRM(pfx,modrm))); 14050 } else { 14051 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14052 delta += alen; 14053 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14054 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14055 } 14056 goto decode_success; 14057 } 14058 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 14059 if (have66noF2noF3(pfx) && sz == 2) { 14060 modrm = getUChar(delta); 14061 if (epartIsReg(modrm)) { 14062 delta += 1; 14063 putXMMReg( eregOfRexRM(pfx,modrm), 14064 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14065 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14066 nameXMMReg(eregOfRexRM(pfx,modrm))); 14067 } else { 14068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14069 gen_SEGV_if_not_16_aligned( addr ); 14070 delta += alen; 14071 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14072 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14073 } 14074 goto decode_success; 14075 } 14076 break; 14077 14078 case 0xAE: 14079 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 14080 if (haveNo66noF2noF3(pfx) 14081 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14082 && sz == 4) { 14083 delta += 1; 14084 /* Insert a memory fence. It's sometimes important that these 14085 are carried through to the generated code. */ 14086 stmt( IRStmt_MBE(Imbe_Fence) ); 14087 DIP("sfence\n"); 14088 goto decode_success; 14089 } 14090 /* mindless duplication follows .. */ 14091 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 14092 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 14093 if (haveNo66noF2noF3(pfx) 14094 && epartIsReg(getUChar(delta)) 14095 && (gregLO3ofRM(getUChar(delta)) == 5 14096 || gregLO3ofRM(getUChar(delta)) == 6) 14097 && sz == 4) { 14098 delta += 1; 14099 /* Insert a memory fence. It's sometimes important that these 14100 are carried through to the generated code. */ 14101 stmt( IRStmt_MBE(Imbe_Fence) ); 14102 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 14103 goto decode_success; 14104 } 14105 14106 /* 0F AE /7 = CLFLUSH -- flush cache line */ 14107 if (haveNo66noF2noF3(pfx) 14108 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14109 && sz == 4) { 14110 14111 /* This is something of a hack. We need to know the size of 14112 the cache line containing addr. Since we don't (easily), 14113 assume 256 on the basis that no real cache would have a 14114 line that big. It's safe to invalidate more stuff than we 14115 need, just inefficient. */ 14116 ULong lineszB = 256ULL; 14117 14118 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14119 delta += alen; 14120 14121 /* Round addr down to the start of the containing block. */ 14122 stmt( IRStmt_Put( 14123 OFFB_CMSTART, 14124 binop( Iop_And64, 14125 mkexpr(addr), 14126 mkU64( ~(lineszB-1) ))) ); 14127 14128 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); 14129 14130 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); 14131 14132 DIP("clflush %s\n", dis_buf); 14133 goto decode_success; 14134 } 14135 14136 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 14137 if (haveNo66noF2noF3(pfx) 14138 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 14139 && sz == 4) { 14140 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14141 goto decode_success; 14142 } 14143 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 14144 if (haveNo66noF2noF3(pfx) 14145 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 14146 && sz == 4) { 14147 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14148 goto decode_success; 14149 } 14150 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 14151 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14152 && !epartIsReg(getUChar(delta)) 14153 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 14154 delta = dis_FXSAVE(vbi, pfx, delta, sz); 14155 goto decode_success; 14156 } 14157 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 14158 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14159 && !epartIsReg(getUChar(delta)) 14160 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 14161 delta = dis_FXRSTOR(vbi, pfx, delta, sz); 14162 goto decode_success; 14163 } 14164 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */ 14165 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14166 && !epartIsReg(getUChar(delta)) 14167 && gregOfRexRM(pfx,getUChar(delta)) == 4 14168 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14169 delta = dis_XSAVE(vbi, pfx, delta, sz); 14170 goto decode_success; 14171 } 14172 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */ 14173 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14174 && !epartIsReg(getUChar(delta)) 14175 && gregOfRexRM(pfx,getUChar(delta)) == 5 14176 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14177 delta = dis_XRSTOR(vbi, pfx, delta, sz); 14178 goto decode_success; 14179 } 14180 break; 14181 14182 case 0xC2: 14183 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 14184 if (haveNo66noF2noF3(pfx) && sz == 4) { 14185 Long delta0 = delta; 14186 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 14187 if (delta > delta0) goto decode_success; 14188 } 14189 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 14190 if (haveF3no66noF2(pfx) && sz == 4) { 14191 Long delta0 = delta; 14192 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 14193 if (delta > delta0) goto decode_success; 14194 } 14195 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 14196 if (haveF2no66noF3(pfx) && sz == 4) { 14197 Long delta0 = delta; 14198 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 14199 if (delta > delta0) goto decode_success; 14200 } 14201 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 14202 if (have66noF2noF3(pfx) && sz == 2) { 14203 Long delta0 = delta; 14204 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 14205 if (delta > delta0) goto decode_success; 14206 } 14207 break; 14208 14209 case 0xC3: 14210 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 14211 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14212 modrm = getUChar(delta); 14213 if (!epartIsReg(modrm)) { 14214 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14215 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 14216 DIP("movnti %s,%s\n", dis_buf, 14217 nameIRegG(sz, pfx, modrm)); 14218 delta += alen; 14219 goto decode_success; 14220 } 14221 /* else fall through */ 14222 } 14223 break; 14224 14225 case 0xC4: 14226 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14227 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14228 put it into the specified lane of mmx(G). */ 14229 if (haveNo66noF2noF3(pfx) 14230 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14231 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 14232 mmx reg. t4 is the new lane value. t5 is the original 14233 mmx value. t6 is the new mmx value. */ 14234 Int lane; 14235 t4 = newTemp(Ity_I16); 14236 t5 = newTemp(Ity_I64); 14237 t6 = newTemp(Ity_I64); 14238 modrm = getUChar(delta); 14239 do_MMX_preamble(); 14240 14241 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 14242 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 14243 14244 if (epartIsReg(modrm)) { 14245 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 14246 delta += 1+1; 14247 lane = getUChar(delta-1); 14248 DIP("pinsrw $%d,%s,%s\n", lane, 14249 nameIReg16(eregOfRexRM(pfx,modrm)), 14250 nameMMXReg(gregLO3ofRM(modrm))); 14251 } else { 14252 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14253 delta += 1+alen; 14254 lane = getUChar(delta-1); 14255 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14256 DIP("pinsrw $%d,%s,%s\n", lane, 14257 dis_buf, 14258 nameMMXReg(gregLO3ofRM(modrm))); 14259 } 14260 14261 switch (lane & 3) { 14262 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 14263 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 14264 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 14265 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 14266 default: vassert(0); 14267 } 14268 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 14269 goto decode_success; 14270 } 14271 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14272 put it into the specified lane of xmm(G). */ 14273 if (have66noF2noF3(pfx) 14274 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14275 Int lane; 14276 t4 = newTemp(Ity_I16); 14277 modrm = getUChar(delta); 14278 UInt rG = gregOfRexRM(pfx,modrm); 14279 if (epartIsReg(modrm)) { 14280 UInt rE = eregOfRexRM(pfx,modrm); 14281 assign(t4, getIReg16(rE)); 14282 delta += 1+1; 14283 lane = getUChar(delta-1); 14284 DIP("pinsrw $%d,%s,%s\n", 14285 lane, nameIReg16(rE), nameXMMReg(rG)); 14286 } else { 14287 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 14288 1/*byte after the amode*/ ); 14289 delta += 1+alen; 14290 lane = getUChar(delta-1); 14291 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14292 DIP("pinsrw $%d,%s,%s\n", 14293 lane, dis_buf, nameXMMReg(rG)); 14294 } 14295 IRTemp src_vec = newTemp(Ity_V128); 14296 assign(src_vec, getXMMReg(rG)); 14297 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 14298 putXMMReg(rG, mkexpr(res_vec)); 14299 goto decode_success; 14300 } 14301 break; 14302 14303 case 0xC5: 14304 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14305 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 14306 zero-extend of it in ireg(G). */ 14307 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14308 modrm = getUChar(delta); 14309 if (epartIsReg(modrm)) { 14310 IRTemp sV = newTemp(Ity_I64); 14311 t5 = newTemp(Ity_I16); 14312 do_MMX_preamble(); 14313 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 14314 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 14315 switch (getUChar(delta+1) & 3) { 14316 case 0: assign(t5, mkexpr(t0)); break; 14317 case 1: assign(t5, mkexpr(t1)); break; 14318 case 2: assign(t5, mkexpr(t2)); break; 14319 case 3: assign(t5, mkexpr(t3)); break; 14320 default: vassert(0); 14321 } 14322 if (sz == 8) 14323 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 14324 else 14325 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 14326 DIP("pextrw $%d,%s,%s\n", 14327 (Int)getUChar(delta+1), 14328 nameMMXReg(eregLO3ofRM(modrm)), 14329 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 14330 : nameIReg32(gregOfRexRM(pfx,modrm)) 14331 ); 14332 delta += 2; 14333 goto decode_success; 14334 } 14335 /* else fall through */ 14336 /* note, for anyone filling in the mem case: this insn has one 14337 byte after the amode and therefore you must pass 1 as the 14338 last arg to disAMode */ 14339 } 14340 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 14341 zero-extend of it in ireg(G). */ 14342 if (have66noF2noF3(pfx) 14343 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14344 Long delta0 = delta; 14345 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 14346 False/*!isAvx*/ ); 14347 if (delta > delta0) goto decode_success; 14348 /* else fall through -- decoding has failed */ 14349 } 14350 break; 14351 14352 case 0xC6: 14353 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 14354 if (haveNo66noF2noF3(pfx) && sz == 4) { 14355 Int imm8 = 0; 14356 IRTemp sV = newTemp(Ity_V128); 14357 IRTemp dV = newTemp(Ity_V128); 14358 modrm = getUChar(delta); 14359 UInt rG = gregOfRexRM(pfx,modrm); 14360 assign( dV, getXMMReg(rG) ); 14361 if (epartIsReg(modrm)) { 14362 UInt rE = eregOfRexRM(pfx,modrm); 14363 assign( sV, getXMMReg(rE) ); 14364 imm8 = (Int)getUChar(delta+1); 14365 delta += 1+1; 14366 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 14367 } else { 14368 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14369 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14370 imm8 = (Int)getUChar(delta+alen); 14371 delta += 1+alen; 14372 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 14373 } 14374 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 14375 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14376 goto decode_success; 14377 } 14378 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 14379 if (have66noF2noF3(pfx) && sz == 2) { 14380 Int select; 14381 IRTemp sV = newTemp(Ity_V128); 14382 IRTemp dV = newTemp(Ity_V128); 14383 14384 modrm = getUChar(delta); 14385 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14386 14387 if (epartIsReg(modrm)) { 14388 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14389 select = (Int)getUChar(delta+1); 14390 delta += 1+1; 14391 DIP("shufpd $%d,%s,%s\n", select, 14392 nameXMMReg(eregOfRexRM(pfx,modrm)), 14393 nameXMMReg(gregOfRexRM(pfx,modrm))); 14394 } else { 14395 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14396 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14397 select = getUChar(delta+alen); 14398 delta += 1+alen; 14399 DIP("shufpd $%d,%s,%s\n", select, 14400 dis_buf, 14401 nameXMMReg(gregOfRexRM(pfx,modrm))); 14402 } 14403 14404 IRTemp res = math_SHUFPD_128( sV, dV, select ); 14405 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14406 goto decode_success; 14407 } 14408 break; 14409 14410 case 0xD1: 14411 /* 66 0F D1 = PSRLW by E */ 14412 if (have66noF2noF3(pfx) && sz == 2) { 14413 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 14414 goto decode_success; 14415 } 14416 break; 14417 14418 case 0xD2: 14419 /* 66 0F D2 = PSRLD by E */ 14420 if (have66noF2noF3(pfx) && sz == 2) { 14421 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 14422 goto decode_success; 14423 } 14424 break; 14425 14426 case 0xD3: 14427 /* 66 0F D3 = PSRLQ by E */ 14428 if (have66noF2noF3(pfx) && sz == 2) { 14429 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 14430 goto decode_success; 14431 } 14432 break; 14433 14434 case 0xD4: 14435 /* 66 0F D4 = PADDQ */ 14436 if (have66noF2noF3(pfx) && sz == 2) { 14437 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14438 "paddq", Iop_Add64x2, False ); 14439 goto decode_success; 14440 } 14441 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14442 /* 0F D4 = PADDQ -- add 64x1 */ 14443 if (haveNo66noF2noF3(pfx) && sz == 4) { 14444 do_MMX_preamble(); 14445 delta = dis_MMXop_regmem_to_reg ( 14446 vbi, pfx, delta, opc, "paddq", False ); 14447 goto decode_success; 14448 } 14449 break; 14450 14451 case 0xD5: 14452 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 14453 if (have66noF2noF3(pfx) && sz == 2) { 14454 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14455 "pmullw", Iop_Mul16x8, False ); 14456 goto decode_success; 14457 } 14458 break; 14459 14460 case 0xD6: 14461 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 14462 hi half). */ 14463 if (haveF3no66noF2(pfx) && sz == 4) { 14464 modrm = getUChar(delta); 14465 if (epartIsReg(modrm)) { 14466 do_MMX_preamble(); 14467 putXMMReg( gregOfRexRM(pfx,modrm), 14468 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 14469 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14470 nameXMMReg(gregOfRexRM(pfx,modrm))); 14471 delta += 1; 14472 goto decode_success; 14473 } 14474 /* apparently no mem case for this insn */ 14475 } 14476 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 14477 or lo half xmm). */ 14478 if (have66noF2noF3(pfx) 14479 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14480 modrm = getUChar(delta); 14481 if (epartIsReg(modrm)) { 14482 /* fall through, awaiting test case */ 14483 /* dst: lo half copied, hi half zeroed */ 14484 } else { 14485 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14486 storeLE( mkexpr(addr), 14487 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 14488 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 14489 delta += alen; 14490 goto decode_success; 14491 } 14492 } 14493 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 14494 if (haveF2no66noF3(pfx) && sz == 4) { 14495 modrm = getUChar(delta); 14496 if (epartIsReg(modrm)) { 14497 do_MMX_preamble(); 14498 putMMXReg( gregLO3ofRM(modrm), 14499 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14500 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14501 nameMMXReg(gregLO3ofRM(modrm))); 14502 delta += 1; 14503 goto decode_success; 14504 } 14505 /* apparently no mem case for this insn */ 14506 } 14507 break; 14508 14509 case 0xD7: 14510 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 14511 lanes in xmm(E), turn them into a byte, and put 14512 zero-extend of it in ireg(G). Doing this directly is just 14513 too cumbersome; give up therefore and call a helper. */ 14514 if (have66noF2noF3(pfx) 14515 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14516 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 14517 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 14518 goto decode_success; 14519 } 14520 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14521 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 14522 mmx(E), turn them into a byte, and put zero-extend of it in 14523 ireg(G). */ 14524 if (haveNo66noF2noF3(pfx) 14525 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14526 modrm = getUChar(delta); 14527 if (epartIsReg(modrm)) { 14528 do_MMX_preamble(); 14529 t0 = newTemp(Ity_I64); 14530 t1 = newTemp(Ity_I32); 14531 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 14532 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 14533 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); 14534 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14535 nameIReg32(gregOfRexRM(pfx,modrm))); 14536 delta += 1; 14537 goto decode_success; 14538 } 14539 /* else fall through */ 14540 } 14541 break; 14542 14543 case 0xD8: 14544 /* 66 0F D8 = PSUBUSB */ 14545 if (have66noF2noF3(pfx) && sz == 2) { 14546 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14547 "psubusb", Iop_QSub8Ux16, False ); 14548 goto decode_success; 14549 } 14550 break; 14551 14552 case 0xD9: 14553 /* 66 0F D9 = PSUBUSW */ 14554 if (have66noF2noF3(pfx) && sz == 2) { 14555 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14556 "psubusw", Iop_QSub16Ux8, False ); 14557 goto decode_success; 14558 } 14559 break; 14560 14561 case 0xDA: 14562 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14563 /* 0F DA = PMINUB -- 8x8 unsigned min */ 14564 if (haveNo66noF2noF3(pfx) && sz == 4) { 14565 do_MMX_preamble(); 14566 delta = dis_MMXop_regmem_to_reg ( 14567 vbi, pfx, delta, opc, "pminub", False ); 14568 goto decode_success; 14569 } 14570 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 14571 if (have66noF2noF3(pfx) && sz == 2) { 14572 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14573 "pminub", Iop_Min8Ux16, False ); 14574 goto decode_success; 14575 } 14576 break; 14577 14578 case 0xDB: 14579 /* 66 0F DB = PAND */ 14580 if (have66noF2noF3(pfx) && sz == 2) { 14581 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 14582 goto decode_success; 14583 } 14584 break; 14585 14586 case 0xDC: 14587 /* 66 0F DC = PADDUSB */ 14588 if (have66noF2noF3(pfx) && sz == 2) { 14589 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14590 "paddusb", Iop_QAdd8Ux16, False ); 14591 goto decode_success; 14592 } 14593 break; 14594 14595 case 0xDD: 14596 /* 66 0F DD = PADDUSW */ 14597 if (have66noF2noF3(pfx) && sz == 2) { 14598 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14599 "paddusw", Iop_QAdd16Ux8, False ); 14600 goto decode_success; 14601 } 14602 break; 14603 14604 case 0xDE: 14605 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14606 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 14607 if (haveNo66noF2noF3(pfx) && sz == 4) { 14608 do_MMX_preamble(); 14609 delta = dis_MMXop_regmem_to_reg ( 14610 vbi, pfx, delta, opc, "pmaxub", False ); 14611 goto decode_success; 14612 } 14613 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 14614 if (have66noF2noF3(pfx) && sz == 2) { 14615 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14616 "pmaxub", Iop_Max8Ux16, False ); 14617 goto decode_success; 14618 } 14619 break; 14620 14621 case 0xDF: 14622 /* 66 0F DF = PANDN */ 14623 if (have66noF2noF3(pfx) && sz == 2) { 14624 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 14625 goto decode_success; 14626 } 14627 break; 14628 14629 case 0xE0: 14630 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14631 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 14632 if (haveNo66noF2noF3(pfx) && sz == 4) { 14633 do_MMX_preamble(); 14634 delta = dis_MMXop_regmem_to_reg ( 14635 vbi, pfx, delta, opc, "pavgb", False ); 14636 goto decode_success; 14637 } 14638 /* 66 0F E0 = PAVGB */ 14639 if (have66noF2noF3(pfx) && sz == 2) { 14640 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14641 "pavgb", Iop_Avg8Ux16, False ); 14642 goto decode_success; 14643 } 14644 break; 14645 14646 case 0xE1: 14647 /* 66 0F E1 = PSRAW by E */ 14648 if (have66noF2noF3(pfx) && sz == 2) { 14649 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 14650 goto decode_success; 14651 } 14652 break; 14653 14654 case 0xE2: 14655 /* 66 0F E2 = PSRAD by E */ 14656 if (have66noF2noF3(pfx) && sz == 2) { 14657 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 14658 goto decode_success; 14659 } 14660 break; 14661 14662 case 0xE3: 14663 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14664 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 14665 if (haveNo66noF2noF3(pfx) && sz == 4) { 14666 do_MMX_preamble(); 14667 delta = dis_MMXop_regmem_to_reg ( 14668 vbi, pfx, delta, opc, "pavgw", False ); 14669 goto decode_success; 14670 } 14671 /* 66 0F E3 = PAVGW */ 14672 if (have66noF2noF3(pfx) && sz == 2) { 14673 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14674 "pavgw", Iop_Avg16Ux8, False ); 14675 goto decode_success; 14676 } 14677 break; 14678 14679 case 0xE4: 14680 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14681 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 14682 if (haveNo66noF2noF3(pfx) && sz == 4) { 14683 do_MMX_preamble(); 14684 delta = dis_MMXop_regmem_to_reg ( 14685 vbi, pfx, delta, opc, "pmuluh", False ); 14686 goto decode_success; 14687 } 14688 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 14689 if (have66noF2noF3(pfx) && sz == 2) { 14690 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14691 "pmulhuw", Iop_MulHi16Ux8, False ); 14692 goto decode_success; 14693 } 14694 break; 14695 14696 case 0xE5: 14697 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 14698 if (have66noF2noF3(pfx) && sz == 2) { 14699 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14700 "pmulhw", Iop_MulHi16Sx8, False ); 14701 goto decode_success; 14702 } 14703 break; 14704 14705 case 0xE6: 14706 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14707 lo half xmm(G), and zero upper half, rounding towards zero */ 14708 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14709 lo half xmm(G), according to prevailing rounding mode, and zero 14710 upper half */ 14711 if ( (haveF2no66noF3(pfx) && sz == 4) 14712 || (have66noF2noF3(pfx) && sz == 2) ) { 14713 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 14714 toBool(sz == 2)/*r2zero*/); 14715 goto decode_success; 14716 } 14717 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 14718 F64 in xmm(G) */ 14719 if (haveF3no66noF2(pfx) && sz == 4) { 14720 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 14721 goto decode_success; 14722 } 14723 break; 14724 14725 case 0xE7: 14726 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14727 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 14728 Intel manual does not say anything about the usual business of 14729 the FP reg tags getting trashed whenever an MMX insn happens. 14730 So we just leave them alone. 14731 */ 14732 if (haveNo66noF2noF3(pfx) && sz == 4) { 14733 modrm = getUChar(delta); 14734 if (!epartIsReg(modrm)) { 14735 /* do_MMX_preamble(); Intel docs don't specify this */ 14736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14737 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 14738 DIP("movntq %s,%s\n", dis_buf, 14739 nameMMXReg(gregLO3ofRM(modrm))); 14740 delta += alen; 14741 goto decode_success; 14742 } 14743 /* else fall through */ 14744 } 14745 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 14746 if (have66noF2noF3(pfx) && sz == 2) { 14747 modrm = getUChar(delta); 14748 if (!epartIsReg(modrm)) { 14749 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14750 gen_SEGV_if_not_16_aligned( addr ); 14751 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14752 DIP("movntdq %s,%s\n", dis_buf, 14753 nameXMMReg(gregOfRexRM(pfx,modrm))); 14754 delta += alen; 14755 goto decode_success; 14756 } 14757 /* else fall through */ 14758 } 14759 break; 14760 14761 case 0xE8: 14762 /* 66 0F E8 = PSUBSB */ 14763 if (have66noF2noF3(pfx) && sz == 2) { 14764 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14765 "psubsb", Iop_QSub8Sx16, False ); 14766 goto decode_success; 14767 } 14768 break; 14769 14770 case 0xE9: 14771 /* 66 0F E9 = PSUBSW */ 14772 if (have66noF2noF3(pfx) && sz == 2) { 14773 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14774 "psubsw", Iop_QSub16Sx8, False ); 14775 goto decode_success; 14776 } 14777 break; 14778 14779 case 0xEA: 14780 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14781 /* 0F EA = PMINSW -- 16x4 signed min */ 14782 if (haveNo66noF2noF3(pfx) && sz == 4) { 14783 do_MMX_preamble(); 14784 delta = dis_MMXop_regmem_to_reg ( 14785 vbi, pfx, delta, opc, "pminsw", False ); 14786 goto decode_success; 14787 } 14788 /* 66 0F EA = PMINSW -- 16x8 signed min */ 14789 if (have66noF2noF3(pfx) && sz == 2) { 14790 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14791 "pminsw", Iop_Min16Sx8, False ); 14792 goto decode_success; 14793 } 14794 break; 14795 14796 case 0xEB: 14797 /* 66 0F EB = POR */ 14798 if (have66noF2noF3(pfx) && sz == 2) { 14799 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 14800 goto decode_success; 14801 } 14802 break; 14803 14804 case 0xEC: 14805 /* 66 0F EC = PADDSB */ 14806 if (have66noF2noF3(pfx) && sz == 2) { 14807 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14808 "paddsb", Iop_QAdd8Sx16, False ); 14809 goto decode_success; 14810 } 14811 break; 14812 14813 case 0xED: 14814 /* 66 0F ED = PADDSW */ 14815 if (have66noF2noF3(pfx) && sz == 2) { 14816 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14817 "paddsw", Iop_QAdd16Sx8, False ); 14818 goto decode_success; 14819 } 14820 break; 14821 14822 case 0xEE: 14823 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14824 /* 0F EE = PMAXSW -- 16x4 signed max */ 14825 if (haveNo66noF2noF3(pfx) && sz == 4) { 14826 do_MMX_preamble(); 14827 delta = dis_MMXop_regmem_to_reg ( 14828 vbi, pfx, delta, opc, "pmaxsw", False ); 14829 goto decode_success; 14830 } 14831 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 14832 if (have66noF2noF3(pfx) && sz == 2) { 14833 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14834 "pmaxsw", Iop_Max16Sx8, False ); 14835 goto decode_success; 14836 } 14837 break; 14838 14839 case 0xEF: 14840 /* 66 0F EF = PXOR */ 14841 if (have66noF2noF3(pfx) && sz == 2) { 14842 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 14843 goto decode_success; 14844 } 14845 break; 14846 14847 case 0xF1: 14848 /* 66 0F F1 = PSLLW by E */ 14849 if (have66noF2noF3(pfx) && sz == 2) { 14850 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 14851 goto decode_success; 14852 } 14853 break; 14854 14855 case 0xF2: 14856 /* 66 0F F2 = PSLLD by E */ 14857 if (have66noF2noF3(pfx) && sz == 2) { 14858 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 14859 goto decode_success; 14860 } 14861 break; 14862 14863 case 0xF3: 14864 /* 66 0F F3 = PSLLQ by E */ 14865 if (have66noF2noF3(pfx) && sz == 2) { 14866 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 14867 goto decode_success; 14868 } 14869 break; 14870 14871 case 0xF4: 14872 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14873 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 14874 half */ 14875 if (have66noF2noF3(pfx) && sz == 2) { 14876 IRTemp sV = newTemp(Ity_V128); 14877 IRTemp dV = newTemp(Ity_V128); 14878 modrm = getUChar(delta); 14879 UInt rG = gregOfRexRM(pfx,modrm); 14880 assign( dV, getXMMReg(rG) ); 14881 if (epartIsReg(modrm)) { 14882 UInt rE = eregOfRexRM(pfx,modrm); 14883 assign( sV, getXMMReg(rE) ); 14884 delta += 1; 14885 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14886 } else { 14887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14888 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14889 delta += alen; 14890 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 14891 } 14892 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 14893 goto decode_success; 14894 } 14895 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14896 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14897 0 to form 64-bit result */ 14898 if (haveNo66noF2noF3(pfx) && sz == 4) { 14899 IRTemp sV = newTemp(Ity_I64); 14900 IRTemp dV = newTemp(Ity_I64); 14901 t1 = newTemp(Ity_I32); 14902 t0 = newTemp(Ity_I32); 14903 modrm = getUChar(delta); 14904 14905 do_MMX_preamble(); 14906 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14907 14908 if (epartIsReg(modrm)) { 14909 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14910 delta += 1; 14911 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14912 nameMMXReg(gregLO3ofRM(modrm))); 14913 } else { 14914 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14915 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14916 delta += alen; 14917 DIP("pmuludq %s,%s\n", dis_buf, 14918 nameMMXReg(gregLO3ofRM(modrm))); 14919 } 14920 14921 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 14922 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 14923 putMMXReg( gregLO3ofRM(modrm), 14924 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 14925 goto decode_success; 14926 } 14927 break; 14928 14929 case 0xF5: 14930 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 14931 E(xmm or mem) to G(xmm) */ 14932 if (have66noF2noF3(pfx) && sz == 2) { 14933 IRTemp sV = newTemp(Ity_V128); 14934 IRTemp dV = newTemp(Ity_V128); 14935 modrm = getUChar(delta); 14936 UInt rG = gregOfRexRM(pfx,modrm); 14937 if (epartIsReg(modrm)) { 14938 UInt rE = eregOfRexRM(pfx,modrm); 14939 assign( sV, getXMMReg(rE) ); 14940 delta += 1; 14941 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14942 } else { 14943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14944 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14945 delta += alen; 14946 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 14947 } 14948 assign( dV, getXMMReg(rG) ); 14949 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 14950 goto decode_success; 14951 } 14952 break; 14953 14954 case 0xF6: 14955 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14956 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 14957 if (haveNo66noF2noF3(pfx) && sz == 4) { 14958 do_MMX_preamble(); 14959 delta = dis_MMXop_regmem_to_reg ( 14960 vbi, pfx, delta, opc, "psadbw", False ); 14961 goto decode_success; 14962 } 14963 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 14964 from E(xmm or mem) to G(xmm) */ 14965 if (have66noF2noF3(pfx) && sz == 2) { 14966 IRTemp sV = newTemp(Ity_V128); 14967 IRTemp dV = newTemp(Ity_V128); 14968 modrm = getUChar(delta); 14969 UInt rG = gregOfRexRM(pfx,modrm); 14970 if (epartIsReg(modrm)) { 14971 UInt rE = eregOfRexRM(pfx,modrm); 14972 assign( sV, getXMMReg(rE) ); 14973 delta += 1; 14974 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14975 } else { 14976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14977 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14978 delta += alen; 14979 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 14980 } 14981 assign( dV, getXMMReg(rG) ); 14982 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 14983 14984 goto decode_success; 14985 } 14986 break; 14987 14988 case 0xF7: 14989 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14990 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 14991 if (haveNo66noF2noF3(pfx) && sz == 4) { 14992 Bool ok = False; 14993 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 14994 if (ok) goto decode_success; 14995 } 14996 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 14997 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 14998 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 14999 goto decode_success; 15000 } 15001 break; 15002 15003 case 0xF8: 15004 /* 66 0F F8 = PSUBB */ 15005 if (have66noF2noF3(pfx) && sz == 2) { 15006 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15007 "psubb", Iop_Sub8x16, False ); 15008 goto decode_success; 15009 } 15010 break; 15011 15012 case 0xF9: 15013 /* 66 0F F9 = PSUBW */ 15014 if (have66noF2noF3(pfx) && sz == 2) { 15015 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15016 "psubw", Iop_Sub16x8, False ); 15017 goto decode_success; 15018 } 15019 break; 15020 15021 case 0xFA: 15022 /* 66 0F FA = PSUBD */ 15023 if (have66noF2noF3(pfx) && sz == 2) { 15024 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15025 "psubd", Iop_Sub32x4, False ); 15026 goto decode_success; 15027 } 15028 break; 15029 15030 case 0xFB: 15031 /* 66 0F FB = PSUBQ */ 15032 if (have66noF2noF3(pfx) && sz == 2) { 15033 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15034 "psubq", Iop_Sub64x2, False ); 15035 goto decode_success; 15036 } 15037 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 15038 /* 0F FB = PSUBQ -- sub 64x1 */ 15039 if (haveNo66noF2noF3(pfx) && sz == 4) { 15040 do_MMX_preamble(); 15041 delta = dis_MMXop_regmem_to_reg ( 15042 vbi, pfx, delta, opc, "psubq", False ); 15043 goto decode_success; 15044 } 15045 break; 15046 15047 case 0xFC: 15048 /* 66 0F FC = PADDB */ 15049 if (have66noF2noF3(pfx) && sz == 2) { 15050 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15051 "paddb", Iop_Add8x16, False ); 15052 goto decode_success; 15053 } 15054 break; 15055 15056 case 0xFD: 15057 /* 66 0F FD = PADDW */ 15058 if (have66noF2noF3(pfx) && sz == 2) { 15059 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15060 "paddw", Iop_Add16x8, False ); 15061 goto decode_success; 15062 } 15063 break; 15064 15065 case 0xFE: 15066 /* 66 0F FE = PADDD */ 15067 if (have66noF2noF3(pfx) && sz == 2) { 15068 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15069 "paddd", Iop_Add32x4, False ); 15070 goto decode_success; 15071 } 15072 break; 15073 15074 default: 15075 goto decode_failure; 15076 15077 } 15078 15079 decode_failure: 15080 *decode_OK = False; 15081 return deltaIN; 15082 15083 decode_success: 15084 *decode_OK = True; 15085 return delta; 15086 } 15087 15088 15089 /*------------------------------------------------------------*/ 15090 /*--- ---*/ 15091 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 15092 /*--- ---*/ 15093 /*------------------------------------------------------------*/ 15094 15095 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15096 Long delta, Bool isAvx ) 15097 { 15098 IRTemp addr = IRTemp_INVALID; 15099 Int alen = 0; 15100 HChar dis_buf[50]; 15101 IRTemp sV = newTemp(Ity_V128); 15102 IRTemp d0 = newTemp(Ity_I64); 15103 UChar modrm = getUChar(delta); 15104 UInt rG = gregOfRexRM(pfx,modrm); 15105 if (epartIsReg(modrm)) { 15106 UInt rE = eregOfRexRM(pfx,modrm); 15107 assign( sV, getXMMReg(rE) ); 15108 DIP("%smovddup %s,%s\n", 15109 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 15110 delta += 1; 15111 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 15112 } else { 15113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15114 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15115 DIP("%smovddup %s,%s\n", 15116 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 15117 delta += alen; 15118 } 15119 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15120 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 15121 return delta; 15122 } 15123 15124 15125 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15126 Long delta ) 15127 { 15128 IRTemp addr = IRTemp_INVALID; 15129 Int alen = 0; 15130 HChar dis_buf[50]; 15131 IRTemp d0 = newTemp(Ity_I64); 15132 IRTemp d1 = newTemp(Ity_I64); 15133 UChar modrm = getUChar(delta); 15134 UInt rG = gregOfRexRM(pfx,modrm); 15135 if (epartIsReg(modrm)) { 15136 UInt rE = eregOfRexRM(pfx,modrm); 15137 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 15138 delta += 1; 15139 assign ( d0, getYMMRegLane64(rE, 0) ); 15140 assign ( d1, getYMMRegLane64(rE, 2) ); 15141 } else { 15142 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15143 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15144 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 15145 mkexpr(addr), mkU64(16))) ); 15146 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 15147 delta += alen; 15148 } 15149 putYMMRegLane64( rG, 0, mkexpr(d0) ); 15150 putYMMRegLane64( rG, 1, mkexpr(d0) ); 15151 putYMMRegLane64( rG, 2, mkexpr(d1) ); 15152 putYMMRegLane64( rG, 3, mkexpr(d1) ); 15153 return delta; 15154 } 15155 15156 15157 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15158 Long delta, Bool isAvx, Bool isL ) 15159 { 15160 IRTemp addr = IRTemp_INVALID; 15161 Int alen = 0; 15162 HChar dis_buf[50]; 15163 IRTemp sV = newTemp(Ity_V128); 15164 UChar modrm = getUChar(delta); 15165 UInt rG = gregOfRexRM(pfx,modrm); 15166 IRTemp s3, s2, s1, s0; 15167 s3 = s2 = s1 = s0 = IRTemp_INVALID; 15168 if (epartIsReg(modrm)) { 15169 UInt rE = eregOfRexRM(pfx,modrm); 15170 assign( sV, getXMMReg(rE) ); 15171 DIP("%smovs%cdup %s,%s\n", 15172 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 15173 delta += 1; 15174 } else { 15175 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15176 if (!isAvx) 15177 gen_SEGV_if_not_16_aligned( addr ); 15178 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15179 DIP("%smovs%cdup %s,%s\n", 15180 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 15181 delta += alen; 15182 } 15183 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15184 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15185 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 15186 : mkV128from32s( s3, s3, s1, s1 ) ); 15187 return delta; 15188 } 15189 15190 15191 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15192 Long delta, Bool isL ) 15193 { 15194 IRTemp addr = IRTemp_INVALID; 15195 Int alen = 0; 15196 HChar dis_buf[50]; 15197 IRTemp sV = newTemp(Ity_V256); 15198 UChar modrm = getUChar(delta); 15199 UInt rG = gregOfRexRM(pfx,modrm); 15200 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 15201 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 15202 if (epartIsReg(modrm)) { 15203 UInt rE = eregOfRexRM(pfx,modrm); 15204 assign( sV, getYMMReg(rE) ); 15205 DIP("vmovs%cdup %s,%s\n", 15206 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 15207 delta += 1; 15208 } else { 15209 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15210 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15211 DIP("vmovs%cdup %s,%s\n", 15212 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 15213 delta += alen; 15214 } 15215 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 15216 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 15217 : mkV128from32s( s7, s7, s5, s5 ) ); 15218 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 15219 : mkV128from32s( s3, s3, s1, s1 ) ); 15220 return delta; 15221 } 15222 15223 15224 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15225 { 15226 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15227 IRTemp leftV = newTemp(Ity_V128); 15228 IRTemp rightV = newTemp(Ity_V128); 15229 IRTemp rm = newTemp(Ity_I32); 15230 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15231 15232 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15233 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 15234 15235 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 15236 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 15237 15238 IRTemp res = newTemp(Ity_V128); 15239 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15240 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 15241 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15242 return res; 15243 } 15244 15245 15246 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15247 { 15248 IRTemp s1, s0, d1, d0; 15249 IRTemp leftV = newTemp(Ity_V128); 15250 IRTemp rightV = newTemp(Ity_V128); 15251 IRTemp rm = newTemp(Ity_I32); 15252 s1 = s0 = d1 = d0 = IRTemp_INVALID; 15253 15254 breakupV128to64s( sV, &s1, &s0 ); 15255 breakupV128to64s( dV, &d1, &d0 ); 15256 15257 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 15258 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 15259 15260 IRTemp res = newTemp(Ity_V128); 15261 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15262 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 15263 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15264 return res; 15265 } 15266 15267 15268 __attribute__((noinline)) 15269 static 15270 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 15271 const VexAbiInfo* vbi, 15272 Prefix pfx, Int sz, Long deltaIN ) 15273 { 15274 IRTemp addr = IRTemp_INVALID; 15275 UChar modrm = 0; 15276 Int alen = 0; 15277 HChar dis_buf[50]; 15278 15279 *decode_OK = False; 15280 15281 Long delta = deltaIN; 15282 UChar opc = getUChar(delta); 15283 delta++; 15284 switch (opc) { 15285 15286 case 0x12: 15287 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 15288 duplicating some lanes (2:2:0:0). */ 15289 if (haveF3no66noF2(pfx) && sz == 4) { 15290 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 15291 True/*isL*/ ); 15292 goto decode_success; 15293 } 15294 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 15295 duplicating some lanes (0:1:0:1). */ 15296 if (haveF2no66noF3(pfx) 15297 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 15298 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ ); 15299 goto decode_success; 15300 } 15301 break; 15302 15303 case 0x16: 15304 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 15305 duplicating some lanes (3:3:1:1). */ 15306 if (haveF3no66noF2(pfx) && sz == 4) { 15307 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 15308 False/*!isL*/ ); 15309 goto decode_success; 15310 } 15311 break; 15312 15313 case 0x7C: 15314 case 0x7D: 15315 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 15316 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 15317 if (haveF2no66noF3(pfx) && sz == 4) { 15318 IRTemp eV = newTemp(Ity_V128); 15319 IRTemp gV = newTemp(Ity_V128); 15320 Bool isAdd = opc == 0x7C; 15321 const HChar* str = isAdd ? "add" : "sub"; 15322 modrm = getUChar(delta); 15323 UInt rG = gregOfRexRM(pfx,modrm); 15324 if (epartIsReg(modrm)) { 15325 UInt rE = eregOfRexRM(pfx,modrm); 15326 assign( eV, getXMMReg(rE) ); 15327 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 15328 delta += 1; 15329 } else { 15330 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15331 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15332 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG)); 15333 delta += alen; 15334 } 15335 15336 assign( gV, getXMMReg(rG) ); 15337 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) ); 15338 goto decode_success; 15339 } 15340 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 15341 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 15342 if (have66noF2noF3(pfx) && sz == 2) { 15343 IRTemp eV = newTemp(Ity_V128); 15344 IRTemp gV = newTemp(Ity_V128); 15345 Bool isAdd = opc == 0x7C; 15346 const HChar* str = isAdd ? "add" : "sub"; 15347 modrm = getUChar(delta); 15348 UInt rG = gregOfRexRM(pfx,modrm); 15349 if (epartIsReg(modrm)) { 15350 UInt rE = eregOfRexRM(pfx,modrm); 15351 assign( eV, getXMMReg(rE) ); 15352 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 15353 delta += 1; 15354 } else { 15355 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15356 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15357 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG)); 15358 delta += alen; 15359 } 15360 15361 assign( gV, getXMMReg(rG) ); 15362 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) ); 15363 goto decode_success; 15364 } 15365 break; 15366 15367 case 0xD0: 15368 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 15369 if (have66noF2noF3(pfx) && sz == 2) { 15370 IRTemp eV = newTemp(Ity_V128); 15371 IRTemp gV = newTemp(Ity_V128); 15372 modrm = getUChar(delta); 15373 UInt rG = gregOfRexRM(pfx,modrm); 15374 if (epartIsReg(modrm)) { 15375 UInt rE = eregOfRexRM(pfx,modrm); 15376 assign( eV, getXMMReg(rE) ); 15377 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15378 delta += 1; 15379 } else { 15380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15381 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15382 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG)); 15383 delta += alen; 15384 } 15385 15386 assign( gV, getXMMReg(rG) ); 15387 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) ); 15388 goto decode_success; 15389 } 15390 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 15391 if (haveF2no66noF3(pfx) && sz == 4) { 15392 IRTemp eV = newTemp(Ity_V128); 15393 IRTemp gV = newTemp(Ity_V128); 15394 modrm = getUChar(delta); 15395 UInt rG = gregOfRexRM(pfx,modrm); 15396 15397 modrm = getUChar(delta); 15398 if (epartIsReg(modrm)) { 15399 UInt rE = eregOfRexRM(pfx,modrm); 15400 assign( eV, getXMMReg(rE) ); 15401 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15402 delta += 1; 15403 } else { 15404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15405 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15406 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG)); 15407 delta += alen; 15408 } 15409 15410 assign( gV, getXMMReg(rG) ); 15411 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) ); 15412 goto decode_success; 15413 } 15414 break; 15415 15416 case 0xF0: 15417 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 15418 if (haveF2no66noF3(pfx) && sz == 4) { 15419 modrm = getUChar(delta); 15420 if (epartIsReg(modrm)) { 15421 goto decode_failure; 15422 } else { 15423 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15424 putXMMReg( gregOfRexRM(pfx,modrm), 15425 loadLE(Ity_V128, mkexpr(addr)) ); 15426 DIP("lddqu %s,%s\n",