1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2013 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * some of the FCOM cases could do with testing -- not convinced 55 that the args are the right way round. 56 57 * FSAVE does not re-initialise the FPU; it should do 58 59 * FINIT not only initialises the FPU environment, it also zeroes 60 all the FP registers. It should leave the registers unchanged. 61 62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 63 per Intel docs this bit has no meaning anyway. Since PUSHF is the 64 only way to observe eflags[1], a proper fix would be to make that 65 bit be set by PUSHF. 66 67 This module uses global variables and so is not MT-safe (if that 68 should ever become relevant). 69 */ 70 71 /* Notes re address size overrides (0x67). 72 73 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 75 and System Instructions"), Section 1.2.3 ("Address-Size Override 76 Prefix"): 77 78 0x67 applies to all explicit memory references, causing the top 79 32 bits of the effective address to become zero. 80 81 0x67 has no effect on stack references (push/pop); these always 82 use a 64-bit address. 83 84 0x67 changes the interpretation of instructions which implicitly 85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 86 instead. These are: 87 88 cmp{s,sb,sw,sd,sq} 89 in{s,sb,sw,sd} 90 jcxz, jecxz, jrcxz 91 lod{s,sb,sw,sd,sq} 92 loop{,e,bz,be,z} 93 mov{s,sb,sw,sd,sq} 94 out{s,sb,sw,sd} 95 rep{,e,ne,nz} 96 sca{s,sb,sw,sd,sq} 97 sto{s,sb,sw,sd,sq} 98 xlat{,b} */ 99 100 /* "Special" instructions. 101 102 This instruction decoder can decode three special instructions 103 which mean nothing natively (are no-ops as far as regs/mem are 104 concerned) but have meaning for supporting Valgrind. A special 105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 108 Following that, one of the following 3 are allowed (standard 109 interpretation in parentheses): 110 111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 114 4887F6 (xchgq %rdi,%rdi) IR injection 115 116 Any other bytes following the 16-byte preamble are illegal and 117 constitute a failure in instruction decoding. This all assumes 118 that the preamble will never occur except in specific code 119 fragments designed for Valgrind to catch. 120 121 No prefixes may precede a "Special" instruction. 122 */ 123 124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 125 insns: the side-exit back to the start of the insn is done with 126 Ijk_Boring. This is quite wrong, it should be done with 127 Ijk_NoRedir, since otherwise the side exit, which is intended to 128 restart the instruction for whatever reason, could go somewhere 129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 130 no-redir jumps performance critical, at least for rep-prefixed 131 instructions, since all iterations thereof would involve such a 132 jump. It's not such a big deal with casLE since the side exit is 133 only taken if the CAS fails, that is, the location is contended, 134 which is relatively unlikely. 135 136 Note also, the test for CAS success vs failure is done using 137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 139 shouldn't definedness-check these comparisons. See 140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 141 background/rationale. 142 */ 143 144 /* LOCK prefixed instructions. These are translated using IR-level 145 CAS statements (IRCAS) and are believed to preserve atomicity, even 146 from the point of view of some other process racing against a 147 simulated one (presumably they communicate via a shared memory 148 segment). 149 150 Handlers which are aware of LOCK prefixes are: 151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 152 dis_cmpxchg_G_E (cmpxchg) 153 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 154 dis_Grp3 (not, neg) 155 dis_Grp4 (inc, dec) 156 dis_Grp5 (inc, dec) 157 dis_Grp8_Imm (bts, btc, btr) 158 dis_bt_G_E (bts, btc, btr) 159 dis_xadd_G_E (xadd) 160 */ 161 162 163 #include "libvex_basictypes.h" 164 #include "libvex_ir.h" 165 #include "libvex.h" 166 #include "libvex_guest_amd64.h" 167 168 #include "main_util.h" 169 #include "main_globals.h" 170 #include "guest_generic_bb_to_IR.h" 171 #include "guest_generic_x87.h" 172 #include "guest_amd64_defs.h" 173 174 175 /*------------------------------------------------------------*/ 176 /*--- Globals ---*/ 177 /*------------------------------------------------------------*/ 178 179 /* These are set at the start of the translation of an insn, right 180 down in disInstr_AMD64, so that we don't have to pass them around 181 endlessly. They are all constant during the translation of any 182 given insn. */ 183 184 /* These are set at the start of the translation of a BB, so 185 that we don't have to pass them around endlessly. */ 186 187 /* We need to know this to do sub-register accesses correctly. */ 188 static Bool host_is_bigendian; 189 190 /* Pointer to the guest code area (points to start of BB, not to the 191 insn being processed). */ 192 static UChar* guest_code; 193 194 /* The guest address corresponding to guest_code[0]. */ 195 static Addr64 guest_RIP_bbstart; 196 197 /* The guest address for the instruction currently being 198 translated. */ 199 static Addr64 guest_RIP_curr_instr; 200 201 /* The IRSB* into which we're generating code. */ 202 static IRSB* irsb; 203 204 /* For ensuring that %rip-relative addressing is done right. A read 205 of %rip generates the address of the next instruction. It may be 206 that we don't conveniently know that inside disAMode(). For sanity 207 checking, if the next insn %rip is needed, we make a guess at what 208 it is, record that guess here, and set the accompanying Bool to 209 indicate that -- after this insn's decode is finished -- that guess 210 needs to be checked. */ 211 212 /* At the start of each insn decode, is set to (0, False). 213 After the decode, if _mustcheck is now True, _assumed is 214 checked. */ 215 216 static Addr64 guest_RIP_next_assumed; 217 static Bool guest_RIP_next_mustcheck; 218 219 220 /*------------------------------------------------------------*/ 221 /*--- Helpers for constructing IR. ---*/ 222 /*------------------------------------------------------------*/ 223 224 /* Generate a new temporary of the given type. */ 225 static IRTemp newTemp ( IRType ty ) 226 { 227 vassert(isPlausibleIRType(ty)); 228 return newIRTemp( irsb->tyenv, ty ); 229 } 230 231 /* Add a statement to the list held by "irsb". */ 232 static void stmt ( IRStmt* st ) 233 { 234 addStmtToIRSB( irsb, st ); 235 } 236 237 /* Generate a statement "dst := e". */ 238 static void assign ( IRTemp dst, IRExpr* e ) 239 { 240 stmt( IRStmt_WrTmp(dst, e) ); 241 } 242 243 static IRExpr* unop ( IROp op, IRExpr* a ) 244 { 245 return IRExpr_Unop(op, a); 246 } 247 248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 249 { 250 return IRExpr_Binop(op, a1, a2); 251 } 252 253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 254 { 255 return IRExpr_Triop(op, a1, a2, a3); 256 } 257 258 static IRExpr* mkexpr ( IRTemp tmp ) 259 { 260 return IRExpr_RdTmp(tmp); 261 } 262 263 static IRExpr* mkU8 ( ULong i ) 264 { 265 vassert(i < 256); 266 return IRExpr_Const(IRConst_U8( (UChar)i )); 267 } 268 269 static IRExpr* mkU16 ( ULong i ) 270 { 271 vassert(i < 0x10000ULL); 272 return IRExpr_Const(IRConst_U16( (UShort)i )); 273 } 274 275 static IRExpr* mkU32 ( ULong i ) 276 { 277 vassert(i < 0x100000000ULL); 278 return IRExpr_Const(IRConst_U32( (UInt)i )); 279 } 280 281 static IRExpr* mkU64 ( ULong i ) 282 { 283 return IRExpr_Const(IRConst_U64(i)); 284 } 285 286 static IRExpr* mkU ( IRType ty, ULong i ) 287 { 288 switch (ty) { 289 case Ity_I8: return mkU8(i); 290 case Ity_I16: return mkU16(i); 291 case Ity_I32: return mkU32(i); 292 case Ity_I64: return mkU64(i); 293 default: vpanic("mkU(amd64)"); 294 } 295 } 296 297 static void storeLE ( IRExpr* addr, IRExpr* data ) 298 { 299 stmt( IRStmt_Store(Iend_LE, addr, data) ); 300 } 301 302 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 303 { 304 return IRExpr_Load(Iend_LE, ty, addr); 305 } 306 307 static IROp mkSizedOp ( IRType ty, IROp op8 ) 308 { 309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 310 || op8 == Iop_Mul8 311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 314 || op8 == Iop_CasCmpNE8 315 || op8 == Iop_Not8 ); 316 switch (ty) { 317 case Ity_I8: return 0 +op8; 318 case Ity_I16: return 1 +op8; 319 case Ity_I32: return 2 +op8; 320 case Ity_I64: return 3 +op8; 321 default: vpanic("mkSizedOp(amd64)"); 322 } 323 } 324 325 static 326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 327 { 328 if (szSmall == 1 && szBig == 4) { 329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 330 } 331 if (szSmall == 1 && szBig == 2) { 332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 333 } 334 if (szSmall == 2 && szBig == 4) { 335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 336 } 337 if (szSmall == 1 && szBig == 8 && !signd) { 338 return unop(Iop_8Uto64, src); 339 } 340 if (szSmall == 1 && szBig == 8 && signd) { 341 return unop(Iop_8Sto64, src); 342 } 343 if (szSmall == 2 && szBig == 8 && !signd) { 344 return unop(Iop_16Uto64, src); 345 } 346 if (szSmall == 2 && szBig == 8 && signd) { 347 return unop(Iop_16Sto64, src); 348 } 349 vpanic("doScalarWidening(amd64)"); 350 } 351 352 353 354 /*------------------------------------------------------------*/ 355 /*--- Debugging output ---*/ 356 /*------------------------------------------------------------*/ 357 358 /* Bomb out if we can't handle something. */ 359 __attribute__ ((noreturn)) 360 static void unimplemented ( const HChar* str ) 361 { 362 vex_printf("amd64toIR: unimplemented feature\n"); 363 vpanic(str); 364 } 365 366 #define DIP(format, args...) \ 367 if (vex_traceflags & VEX_TRACE_FE) \ 368 vex_printf(format, ## args) 369 370 #define DIS(buf, format, args...) \ 371 if (vex_traceflags & VEX_TRACE_FE) \ 372 vex_sprintf(buf, format, ## args) 373 374 375 /*------------------------------------------------------------*/ 376 /*--- Offsets of various parts of the amd64 guest state. ---*/ 377 /*------------------------------------------------------------*/ 378 379 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 380 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 381 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 382 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 383 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 384 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 385 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 386 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 387 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 388 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 389 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 390 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 391 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 392 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 393 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 394 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 395 396 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 397 398 #define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO) 399 #define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60) 400 401 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 402 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 403 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 404 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 405 406 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 407 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 408 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 409 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 410 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 411 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 412 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 413 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 414 415 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 416 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 417 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 418 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 419 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 420 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 421 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 422 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 423 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 424 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 425 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 426 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 427 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 428 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 429 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 430 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 431 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 432 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 433 434 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) 435 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) 436 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) 437 438 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 439 440 441 /*------------------------------------------------------------*/ 442 /*--- Helper bits and pieces for deconstructing the ---*/ 443 /*--- amd64 insn stream. ---*/ 444 /*------------------------------------------------------------*/ 445 446 /* This is the AMD64 register encoding -- integer regs. */ 447 #define R_RAX 0 448 #define R_RCX 1 449 #define R_RDX 2 450 #define R_RBX 3 451 #define R_RSP 4 452 #define R_RBP 5 453 #define R_RSI 6 454 #define R_RDI 7 455 #define R_R8 8 456 #define R_R9 9 457 #define R_R10 10 458 #define R_R11 11 459 #define R_R12 12 460 #define R_R13 13 461 #define R_R14 14 462 #define R_R15 15 463 464 /* This is the Intel register encoding -- segment regs. */ 465 #define R_ES 0 466 #define R_CS 1 467 #define R_SS 2 468 #define R_DS 3 469 #define R_FS 4 470 #define R_GS 5 471 472 473 /* Various simple conversions */ 474 475 static ULong extend_s_8to64 ( UChar x ) 476 { 477 return (ULong)((((Long)x) << 56) >> 56); 478 } 479 480 static ULong extend_s_16to64 ( UShort x ) 481 { 482 return (ULong)((((Long)x) << 48) >> 48); 483 } 484 485 static ULong extend_s_32to64 ( UInt x ) 486 { 487 return (ULong)((((Long)x) << 32) >> 32); 488 } 489 490 /* Figure out whether the mod and rm parts of a modRM byte refer to a 491 register or memory. If so, the byte will have the form 11XXXYYY, 492 where YYY is the register number. */ 493 inline 494 static Bool epartIsReg ( UChar mod_reg_rm ) 495 { 496 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 497 } 498 499 /* Extract the 'g' field from a modRM byte. This only produces 3 500 bits, which is not a complete register number. You should avoid 501 this function if at all possible. */ 502 inline 503 static Int gregLO3ofRM ( UChar mod_reg_rm ) 504 { 505 return (Int)( (mod_reg_rm >> 3) & 7 ); 506 } 507 508 /* Ditto the 'e' field of a modRM byte. */ 509 inline 510 static Int eregLO3ofRM ( UChar mod_reg_rm ) 511 { 512 return (Int)(mod_reg_rm & 0x7); 513 } 514 515 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 516 517 static inline UChar getUChar ( Long delta ) 518 { 519 UChar v = guest_code[delta+0]; 520 return v; 521 } 522 523 static UInt getUDisp16 ( Long delta ) 524 { 525 UInt v = guest_code[delta+1]; v <<= 8; 526 v |= guest_code[delta+0]; 527 return v & 0xFFFF; 528 } 529 530 //.. static UInt getUDisp ( Int size, Long delta ) 531 //.. { 532 //.. switch (size) { 533 //.. case 4: return getUDisp32(delta); 534 //.. case 2: return getUDisp16(delta); 535 //.. case 1: return getUChar(delta); 536 //.. default: vpanic("getUDisp(x86)"); 537 //.. } 538 //.. return 0; /*notreached*/ 539 //.. } 540 541 542 /* Get a byte value out of the insn stream and sign-extend to 64 543 bits. */ 544 static Long getSDisp8 ( Long delta ) 545 { 546 return extend_s_8to64( guest_code[delta] ); 547 } 548 549 /* Get a 16-bit value out of the insn stream and sign-extend to 64 550 bits. */ 551 static Long getSDisp16 ( Long delta ) 552 { 553 UInt v = guest_code[delta+1]; v <<= 8; 554 v |= guest_code[delta+0]; 555 return extend_s_16to64( (UShort)v ); 556 } 557 558 /* Get a 32-bit value out of the insn stream and sign-extend to 64 559 bits. */ 560 static Long getSDisp32 ( Long delta ) 561 { 562 UInt v = guest_code[delta+3]; v <<= 8; 563 v |= guest_code[delta+2]; v <<= 8; 564 v |= guest_code[delta+1]; v <<= 8; 565 v |= guest_code[delta+0]; 566 return extend_s_32to64( v ); 567 } 568 569 /* Get a 64-bit value out of the insn stream. */ 570 static Long getDisp64 ( Long delta ) 571 { 572 ULong v = 0; 573 v |= guest_code[delta+7]; v <<= 8; 574 v |= guest_code[delta+6]; v <<= 8; 575 v |= guest_code[delta+5]; v <<= 8; 576 v |= guest_code[delta+4]; v <<= 8; 577 v |= guest_code[delta+3]; v <<= 8; 578 v |= guest_code[delta+2]; v <<= 8; 579 v |= guest_code[delta+1]; v <<= 8; 580 v |= guest_code[delta+0]; 581 return v; 582 } 583 584 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 585 if this is called with size==8. Should not happen. */ 586 static Long getSDisp ( Int size, Long delta ) 587 { 588 switch (size) { 589 case 4: return getSDisp32(delta); 590 case 2: return getSDisp16(delta); 591 case 1: return getSDisp8(delta); 592 default: vpanic("getSDisp(amd64)"); 593 } 594 } 595 596 static ULong mkSizeMask ( Int sz ) 597 { 598 switch (sz) { 599 case 1: return 0x00000000000000FFULL; 600 case 2: return 0x000000000000FFFFULL; 601 case 4: return 0x00000000FFFFFFFFULL; 602 case 8: return 0xFFFFFFFFFFFFFFFFULL; 603 default: vpanic("mkSzMask(amd64)"); 604 } 605 } 606 607 static Int imin ( Int a, Int b ) 608 { 609 return (a < b) ? a : b; 610 } 611 612 static IRType szToITy ( Int n ) 613 { 614 switch (n) { 615 case 1: return Ity_I8; 616 case 2: return Ity_I16; 617 case 4: return Ity_I32; 618 case 8: return Ity_I64; 619 default: vex_printf("\nszToITy(%d)\n", n); 620 vpanic("szToITy(amd64)"); 621 } 622 } 623 624 625 /*------------------------------------------------------------*/ 626 /*--- For dealing with prefixes. ---*/ 627 /*------------------------------------------------------------*/ 628 629 /* The idea is to pass around an int holding a bitmask summarising 630 info from the prefixes seen on the current instruction, including 631 info from the REX byte. This info is used in various places, but 632 most especially when making sense of register fields in 633 instructions. 634 635 The top 8 bits of the prefix are 0x55, just as a hacky way to 636 ensure it really is a valid prefix. 637 638 Things you can safely assume about a well-formed prefix: 639 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 640 * if REX is not present then REXW,REXR,REXX,REXB will read 641 as zero. 642 * F2 and F3 will not both be 1. 643 */ 644 645 typedef UInt Prefix; 646 647 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 648 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 649 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 650 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 651 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 652 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 653 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 654 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 655 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 656 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 657 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 658 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 659 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 660 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 661 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 662 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 663 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 664 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 665 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 666 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 667 positions. */ 668 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 669 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 670 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 671 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 672 673 674 #define PFX_EMPTY 0x55000000 675 676 static Bool IS_VALID_PFX ( Prefix pfx ) { 677 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 678 } 679 680 static Bool haveREX ( Prefix pfx ) { 681 return toBool(pfx & PFX_REX); 682 } 683 684 static Int getRexW ( Prefix pfx ) { 685 return (pfx & PFX_REXW) ? 1 : 0; 686 } 687 static Int getRexR ( Prefix pfx ) { 688 return (pfx & PFX_REXR) ? 1 : 0; 689 } 690 static Int getRexX ( Prefix pfx ) { 691 return (pfx & PFX_REXX) ? 1 : 0; 692 } 693 static Int getRexB ( Prefix pfx ) { 694 return (pfx & PFX_REXB) ? 1 : 0; 695 } 696 697 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 698 completely changes what instruction it really is. */ 699 static Bool haveF2orF3 ( Prefix pfx ) { 700 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 701 } 702 static Bool haveF2andF3 ( Prefix pfx ) { 703 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3)); 704 } 705 static Bool haveF2 ( Prefix pfx ) { 706 return toBool((pfx & PFX_F2) > 0); 707 } 708 static Bool haveF3 ( Prefix pfx ) { 709 return toBool((pfx & PFX_F3) > 0); 710 } 711 712 static Bool have66 ( Prefix pfx ) { 713 return toBool((pfx & PFX_66) > 0); 714 } 715 static Bool haveASO ( Prefix pfx ) { 716 return toBool((pfx & PFX_ASO) > 0); 717 } 718 static Bool haveLOCK ( Prefix pfx ) { 719 return toBool((pfx & PFX_LOCK) > 0); 720 } 721 722 /* Return True iff pfx has 66 set and F2 and F3 clear */ 723 static Bool have66noF2noF3 ( Prefix pfx ) 724 { 725 return 726 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 727 } 728 729 /* Return True iff pfx has F2 set and 66 and F3 clear */ 730 static Bool haveF2no66noF3 ( Prefix pfx ) 731 { 732 return 733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 734 } 735 736 /* Return True iff pfx has F3 set and 66 and F2 clear */ 737 static Bool haveF3no66noF2 ( Prefix pfx ) 738 { 739 return 740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 741 } 742 743 /* Return True iff pfx has F3 set and F2 clear */ 744 static Bool haveF3noF2 ( Prefix pfx ) 745 { 746 return 747 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 748 } 749 750 /* Return True iff pfx has F2 set and F3 clear */ 751 static Bool haveF2noF3 ( Prefix pfx ) 752 { 753 return 754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 755 } 756 757 /* Return True iff pfx has 66, F2 and F3 clear */ 758 static Bool haveNo66noF2noF3 ( Prefix pfx ) 759 { 760 return 761 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 762 } 763 764 /* Return True iff pfx has any of 66, F2 and F3 set */ 765 static Bool have66orF2orF3 ( Prefix pfx ) 766 { 767 return toBool( ! haveNo66noF2noF3(pfx) ); 768 } 769 770 /* Return True iff pfx has 66 or F3 set */ 771 static Bool have66orF3 ( Prefix pfx ) 772 { 773 return toBool((pfx & (PFX_66|PFX_F3)) > 0); 774 } 775 776 /* Clear all the segment-override bits in a prefix. */ 777 static Prefix clearSegBits ( Prefix p ) 778 { 779 return 780 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 781 } 782 783 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 784 static UInt getVexNvvvv ( Prefix pfx ) { 785 UInt r = (UInt)pfx; 786 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 787 return r & 0xF; 788 } 789 790 static Bool haveVEX ( Prefix pfx ) { 791 return toBool(pfx & PFX_VEX); 792 } 793 794 static Int getVexL ( Prefix pfx ) { 795 return (pfx & PFX_VEXL) ? 1 : 0; 796 } 797 798 799 /*------------------------------------------------------------*/ 800 /*--- For dealing with escapes ---*/ 801 /*------------------------------------------------------------*/ 802 803 804 /* Escapes come after the prefixes, but before the primary opcode 805 byte. They escape the primary opcode byte into a bigger space. 806 The 0xF0000000 isn't significant, except so as to make it not 807 overlap valid Prefix values, for sanity checking. 808 */ 809 810 typedef 811 enum { 812 ESC_NONE=0xF0000000, // none 813 ESC_0F, // 0F 814 ESC_0F38, // 0F 38 815 ESC_0F3A // 0F 3A 816 } 817 Escape; 818 819 820 /*------------------------------------------------------------*/ 821 /*--- For dealing with integer registers ---*/ 822 /*------------------------------------------------------------*/ 823 824 /* This is somewhat complex. The rules are: 825 826 For 64, 32 and 16 bit register references, the e or g fields in the 827 modrm bytes supply the low 3 bits of the register number. The 828 fourth (most-significant) bit of the register number is supplied by 829 the REX byte, if it is present; else that bit is taken to be zero. 830 831 The REX.R bit supplies the high bit corresponding to the g register 832 field, and the REX.B bit supplies the high bit corresponding to the 833 e register field (when the mod part of modrm indicates that modrm's 834 e component refers to a register and not to memory). 835 836 The REX.X bit supplies a high register bit for certain registers 837 in SIB address modes, and is generally rarely used. 838 839 For 8 bit register references, the presence of the REX byte itself 840 has significance. If there is no REX present, then the 3-bit 841 number extracted from the modrm e or g field is treated as an index 842 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 843 old x86 encoding scheme. 844 845 But if there is a REX present, the register reference is 846 interpreted in the same way as for 64/32/16-bit references: a high 847 bit is extracted from REX, giving a 4-bit number, and the denoted 848 register is the lowest 8 bits of the 16 integer registers denoted 849 by the number. In particular, values 3 through 7 of this sequence 850 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 851 %rsp %rbp %rsi %rdi. 852 853 The REX.W bit has no bearing at all on register numbers. Instead 854 its presence indicates that the operand size is to be overridden 855 from its default value (32 bits) to 64 bits instead. This is in 856 the same fashion that an 0x66 prefix indicates the operand size is 857 to be overridden from 32 bits down to 16 bits. When both REX.W and 858 0x66 are present there is a conflict, and REX.W takes precedence. 859 860 Rather than try to handle this complexity using a single huge 861 function, several smaller ones are provided. The aim is to make it 862 as difficult as possible to screw up register decoding in a subtle 863 and hard-to-track-down way. 864 865 Because these routines fish around in the host's memory (that is, 866 in the guest state area) for sub-parts of guest registers, their 867 correctness depends on the host's endianness. So far these 868 routines only work for little-endian hosts. Those for which 869 endianness is important have assertions to ensure sanity. 870 */ 871 872 873 /* About the simplest question you can ask: where do the 64-bit 874 integer registers live (in the guest state) ? */ 875 876 static Int integerGuestReg64Offset ( UInt reg ) 877 { 878 switch (reg) { 879 case R_RAX: return OFFB_RAX; 880 case R_RCX: return OFFB_RCX; 881 case R_RDX: return OFFB_RDX; 882 case R_RBX: return OFFB_RBX; 883 case R_RSP: return OFFB_RSP; 884 case R_RBP: return OFFB_RBP; 885 case R_RSI: return OFFB_RSI; 886 case R_RDI: return OFFB_RDI; 887 case R_R8: return OFFB_R8; 888 case R_R9: return OFFB_R9; 889 case R_R10: return OFFB_R10; 890 case R_R11: return OFFB_R11; 891 case R_R12: return OFFB_R12; 892 case R_R13: return OFFB_R13; 893 case R_R14: return OFFB_R14; 894 case R_R15: return OFFB_R15; 895 default: vpanic("integerGuestReg64Offset(amd64)"); 896 } 897 } 898 899 900 /* Produce the name of an integer register, for printing purposes. 901 reg is a number in the range 0 .. 15 that has been generated from a 902 3-bit reg-field number and a REX extension bit. irregular denotes 903 the case where sz==1 and no REX byte is present. */ 904 905 static 906 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 907 { 908 static const HChar* ireg64_names[16] 909 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 910 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 911 static const HChar* ireg32_names[16] 912 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 913 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 914 static const HChar* ireg16_names[16] 915 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 916 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 917 static const HChar* ireg8_names[16] 918 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 919 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 920 static const HChar* ireg8_irregular[8] 921 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 922 923 vassert(reg < 16); 924 if (sz == 1) { 925 if (irregular) 926 vassert(reg < 8); 927 } else { 928 vassert(irregular == False); 929 } 930 931 switch (sz) { 932 case 8: return ireg64_names[reg]; 933 case 4: return ireg32_names[reg]; 934 case 2: return ireg16_names[reg]; 935 case 1: if (irregular) { 936 return ireg8_irregular[reg]; 937 } else { 938 return ireg8_names[reg]; 939 } 940 default: vpanic("nameIReg(amd64)"); 941 } 942 } 943 944 /* Using the same argument conventions as nameIReg, produce the 945 guest state offset of an integer register. */ 946 947 static 948 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 949 { 950 vassert(reg < 16); 951 if (sz == 1) { 952 if (irregular) 953 vassert(reg < 8); 954 } else { 955 vassert(irregular == False); 956 } 957 958 /* Deal with irregular case -- sz==1 and no REX present */ 959 if (sz == 1 && irregular) { 960 switch (reg) { 961 case R_RSP: return 1+ OFFB_RAX; 962 case R_RBP: return 1+ OFFB_RCX; 963 case R_RSI: return 1+ OFFB_RDX; 964 case R_RDI: return 1+ OFFB_RBX; 965 default: break; /* use the normal case */ 966 } 967 } 968 969 /* Normal case */ 970 return integerGuestReg64Offset(reg); 971 } 972 973 974 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 975 976 static IRExpr* getIRegCL ( void ) 977 { 978 vassert(!host_is_bigendian); 979 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 980 } 981 982 983 /* Write to the %AH register. */ 984 985 static void putIRegAH ( IRExpr* e ) 986 { 987 vassert(!host_is_bigendian); 988 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 989 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 990 } 991 992 993 /* Read/write various widths of %RAX, as it has various 994 special-purpose uses. */ 995 996 static const HChar* nameIRegRAX ( Int sz ) 997 { 998 switch (sz) { 999 case 1: return "%al"; 1000 case 2: return "%ax"; 1001 case 4: return "%eax"; 1002 case 8: return "%rax"; 1003 default: vpanic("nameIRegRAX(amd64)"); 1004 } 1005 } 1006 1007 static IRExpr* getIRegRAX ( Int sz ) 1008 { 1009 vassert(!host_is_bigendian); 1010 switch (sz) { 1011 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1012 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1013 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1014 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1015 default: vpanic("getIRegRAX(amd64)"); 1016 } 1017 } 1018 1019 static void putIRegRAX ( Int sz, IRExpr* e ) 1020 { 1021 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1022 vassert(!host_is_bigendian); 1023 switch (sz) { 1024 case 8: vassert(ty == Ity_I64); 1025 stmt( IRStmt_Put( OFFB_RAX, e )); 1026 break; 1027 case 4: vassert(ty == Ity_I32); 1028 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1029 break; 1030 case 2: vassert(ty == Ity_I16); 1031 stmt( IRStmt_Put( OFFB_RAX, e )); 1032 break; 1033 case 1: vassert(ty == Ity_I8); 1034 stmt( IRStmt_Put( OFFB_RAX, e )); 1035 break; 1036 default: vpanic("putIRegRAX(amd64)"); 1037 } 1038 } 1039 1040 1041 /* Read/write various widths of %RDX, as it has various 1042 special-purpose uses. */ 1043 1044 static const HChar* nameIRegRDX ( Int sz ) 1045 { 1046 switch (sz) { 1047 case 1: return "%dl"; 1048 case 2: return "%dx"; 1049 case 4: return "%edx"; 1050 case 8: return "%rdx"; 1051 default: vpanic("nameIRegRDX(amd64)"); 1052 } 1053 } 1054 1055 static IRExpr* getIRegRDX ( Int sz ) 1056 { 1057 vassert(!host_is_bigendian); 1058 switch (sz) { 1059 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1060 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1061 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1062 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1063 default: vpanic("getIRegRDX(amd64)"); 1064 } 1065 } 1066 1067 static void putIRegRDX ( Int sz, IRExpr* e ) 1068 { 1069 vassert(!host_is_bigendian); 1070 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1071 switch (sz) { 1072 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1073 break; 1074 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1075 break; 1076 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1077 break; 1078 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1079 break; 1080 default: vpanic("putIRegRDX(amd64)"); 1081 } 1082 } 1083 1084 1085 /* Simplistic functions to deal with the integer registers as a 1086 straightforward bank of 16 64-bit regs. */ 1087 1088 static IRExpr* getIReg64 ( UInt regno ) 1089 { 1090 return IRExpr_Get( integerGuestReg64Offset(regno), 1091 Ity_I64 ); 1092 } 1093 1094 static void putIReg64 ( UInt regno, IRExpr* e ) 1095 { 1096 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1097 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1098 } 1099 1100 static const HChar* nameIReg64 ( UInt regno ) 1101 { 1102 return nameIReg( 8, regno, False ); 1103 } 1104 1105 1106 /* Simplistic functions to deal with the lower halves of integer 1107 registers as a straightforward bank of 16 32-bit regs. */ 1108 1109 static IRExpr* getIReg32 ( UInt regno ) 1110 { 1111 vassert(!host_is_bigendian); 1112 return unop(Iop_64to32, 1113 IRExpr_Get( integerGuestReg64Offset(regno), 1114 Ity_I64 )); 1115 } 1116 1117 static void putIReg32 ( UInt regno, IRExpr* e ) 1118 { 1119 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1120 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1121 unop(Iop_32Uto64,e) ) ); 1122 } 1123 1124 static const HChar* nameIReg32 ( UInt regno ) 1125 { 1126 return nameIReg( 4, regno, False ); 1127 } 1128 1129 1130 /* Simplistic functions to deal with the lower quarters of integer 1131 registers as a straightforward bank of 16 16-bit regs. */ 1132 1133 static IRExpr* getIReg16 ( UInt regno ) 1134 { 1135 vassert(!host_is_bigendian); 1136 return IRExpr_Get( integerGuestReg64Offset(regno), 1137 Ity_I16 ); 1138 } 1139 1140 static void putIReg16 ( UInt regno, IRExpr* e ) 1141 { 1142 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1143 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1144 unop(Iop_16Uto64,e) ) ); 1145 } 1146 1147 static const HChar* nameIReg16 ( UInt regno ) 1148 { 1149 return nameIReg( 2, regno, False ); 1150 } 1151 1152 1153 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1154 which field of the REX byte is to be used to extend to a 4-bit 1155 number. These functions cater for that situation. 1156 */ 1157 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1158 { 1159 vassert(lo3bits < 8); 1160 vassert(IS_VALID_PFX(pfx)); 1161 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1162 } 1163 1164 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1165 { 1166 vassert(lo3bits < 8); 1167 vassert(IS_VALID_PFX(pfx)); 1168 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1169 } 1170 1171 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1172 { 1173 vassert(lo3bits < 8); 1174 vassert(IS_VALID_PFX(pfx)); 1175 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1176 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1177 toBool(sz==1 && !haveREX(pfx)) ); 1178 } 1179 1180 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1181 { 1182 vassert(lo3bits < 8); 1183 vassert(IS_VALID_PFX(pfx)); 1184 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1185 if (sz == 4) { 1186 sz = 8; 1187 return unop(Iop_64to32, 1188 IRExpr_Get( 1189 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1190 False/*!irregular*/ ), 1191 szToITy(sz) 1192 ) 1193 ); 1194 } else { 1195 return IRExpr_Get( 1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1197 toBool(sz==1 && !haveREX(pfx)) ), 1198 szToITy(sz) 1199 ); 1200 } 1201 } 1202 1203 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1204 { 1205 vassert(lo3bits < 8); 1206 vassert(IS_VALID_PFX(pfx)); 1207 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1208 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1209 stmt( IRStmt_Put( 1210 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1211 toBool(sz==1 && !haveREX(pfx)) ), 1212 sz==4 ? unop(Iop_32Uto64,e) : e 1213 )); 1214 } 1215 1216 1217 /* Functions for getting register numbers from modrm bytes and REX 1218 when we don't have to consider the complexities of integer subreg 1219 accesses. 1220 */ 1221 /* Extract the g reg field from a modRM byte, and augment it using the 1222 REX.R bit from the supplied REX byte. The R bit usually is 1223 associated with the g register field. 1224 */ 1225 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1226 { 1227 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1228 reg += (pfx & PFX_REXR) ? 8 : 0; 1229 return reg; 1230 } 1231 1232 /* Extract the e reg field from a modRM byte, and augment it using the 1233 REX.B bit from the supplied REX byte. The B bit usually is 1234 associated with the e register field (when modrm indicates e is a 1235 register, that is). 1236 */ 1237 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1238 { 1239 Int rm; 1240 vassert(epartIsReg(mod_reg_rm)); 1241 rm = (Int)(mod_reg_rm & 0x7); 1242 rm += (pfx & PFX_REXB) ? 8 : 0; 1243 return rm; 1244 } 1245 1246 1247 /* General functions for dealing with integer register access. */ 1248 1249 /* Produce the guest state offset for a reference to the 'g' register 1250 field in a modrm byte, taking into account REX (or its absence), 1251 and the size of the access. 1252 */ 1253 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1254 { 1255 UInt reg; 1256 vassert(!host_is_bigendian); 1257 vassert(IS_VALID_PFX(pfx)); 1258 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1259 reg = gregOfRexRM( pfx, mod_reg_rm ); 1260 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1261 } 1262 1263 static 1264 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1265 { 1266 if (sz == 4) { 1267 sz = 8; 1268 return unop(Iop_64to32, 1269 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1270 szToITy(sz) )); 1271 } else { 1272 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1273 szToITy(sz) ); 1274 } 1275 } 1276 1277 static 1278 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1279 { 1280 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1281 if (sz == 4) { 1282 e = unop(Iop_32Uto64,e); 1283 } 1284 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1285 } 1286 1287 static 1288 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1289 { 1290 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1291 toBool(sz==1 && !haveREX(pfx)) ); 1292 } 1293 1294 1295 static 1296 IRExpr* getIRegV ( Int sz, Prefix pfx ) 1297 { 1298 if (sz == 4) { 1299 sz = 8; 1300 return unop(Iop_64to32, 1301 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1302 szToITy(sz) )); 1303 } else { 1304 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1305 szToITy(sz) ); 1306 } 1307 } 1308 1309 static 1310 void putIRegV ( Int sz, Prefix pfx, IRExpr* e ) 1311 { 1312 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1313 if (sz == 4) { 1314 e = unop(Iop_32Uto64,e); 1315 } 1316 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) ); 1317 } 1318 1319 static 1320 const HChar* nameIRegV ( Int sz, Prefix pfx ) 1321 { 1322 return nameIReg( sz, getVexNvvvv(pfx), False ); 1323 } 1324 1325 1326 1327 /* Produce the guest state offset for a reference to the 'e' register 1328 field in a modrm byte, taking into account REX (or its absence), 1329 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1330 denotes a memory access rather than a register access. 1331 */ 1332 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1333 { 1334 UInt reg; 1335 vassert(!host_is_bigendian); 1336 vassert(IS_VALID_PFX(pfx)); 1337 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1338 reg = eregOfRexRM( pfx, mod_reg_rm ); 1339 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1340 } 1341 1342 static 1343 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1344 { 1345 if (sz == 4) { 1346 sz = 8; 1347 return unop(Iop_64to32, 1348 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1349 szToITy(sz) )); 1350 } else { 1351 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1352 szToITy(sz) ); 1353 } 1354 } 1355 1356 static 1357 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1358 { 1359 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1360 if (sz == 4) { 1361 e = unop(Iop_32Uto64,e); 1362 } 1363 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1364 } 1365 1366 static 1367 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1368 { 1369 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1370 toBool(sz==1 && !haveREX(pfx)) ); 1371 } 1372 1373 1374 /*------------------------------------------------------------*/ 1375 /*--- For dealing with XMM registers ---*/ 1376 /*------------------------------------------------------------*/ 1377 1378 static Int ymmGuestRegOffset ( UInt ymmreg ) 1379 { 1380 switch (ymmreg) { 1381 case 0: return OFFB_YMM0; 1382 case 1: return OFFB_YMM1; 1383 case 2: return OFFB_YMM2; 1384 case 3: return OFFB_YMM3; 1385 case 4: return OFFB_YMM4; 1386 case 5: return OFFB_YMM5; 1387 case 6: return OFFB_YMM6; 1388 case 7: return OFFB_YMM7; 1389 case 8: return OFFB_YMM8; 1390 case 9: return OFFB_YMM9; 1391 case 10: return OFFB_YMM10; 1392 case 11: return OFFB_YMM11; 1393 case 12: return OFFB_YMM12; 1394 case 13: return OFFB_YMM13; 1395 case 14: return OFFB_YMM14; 1396 case 15: return OFFB_YMM15; 1397 default: vpanic("ymmGuestRegOffset(amd64)"); 1398 } 1399 } 1400 1401 static Int xmmGuestRegOffset ( UInt xmmreg ) 1402 { 1403 /* Correct for little-endian host only. */ 1404 vassert(!host_is_bigendian); 1405 return ymmGuestRegOffset( xmmreg ); 1406 } 1407 1408 /* Lanes of vector registers are always numbered from zero being the 1409 least significant lane (rightmost in the register). */ 1410 1411 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1412 { 1413 /* Correct for little-endian host only. */ 1414 vassert(!host_is_bigendian); 1415 vassert(laneno >= 0 && laneno < 8); 1416 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1417 } 1418 1419 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1420 { 1421 /* Correct for little-endian host only. */ 1422 vassert(!host_is_bigendian); 1423 vassert(laneno >= 0 && laneno < 4); 1424 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1425 } 1426 1427 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1428 { 1429 /* Correct for little-endian host only. */ 1430 vassert(!host_is_bigendian); 1431 vassert(laneno >= 0 && laneno < 2); 1432 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1433 } 1434 1435 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1436 { 1437 /* Correct for little-endian host only. */ 1438 vassert(!host_is_bigendian); 1439 vassert(laneno >= 0 && laneno < 2); 1440 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1441 } 1442 1443 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1444 { 1445 /* Correct for little-endian host only. */ 1446 vassert(!host_is_bigendian); 1447 vassert(laneno >= 0 && laneno < 4); 1448 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1449 } 1450 1451 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1452 { 1453 /* Correct for little-endian host only. */ 1454 vassert(!host_is_bigendian); 1455 vassert(laneno >= 0 && laneno < 8); 1456 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1457 } 1458 1459 static IRExpr* getXMMReg ( UInt xmmreg ) 1460 { 1461 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1462 } 1463 1464 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1465 { 1466 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1467 } 1468 1469 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1470 { 1471 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1472 } 1473 1474 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1475 { 1476 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1477 } 1478 1479 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1480 { 1481 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1482 } 1483 1484 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1485 { 1486 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1487 } 1488 1489 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1490 { 1491 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1492 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1493 } 1494 1495 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1496 { 1497 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1498 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1499 } 1500 1501 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1502 { 1503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1504 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1505 } 1506 1507 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1508 { 1509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1510 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1511 } 1512 1513 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1514 { 1515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1516 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1517 } 1518 1519 static IRExpr* getYMMReg ( UInt xmmreg ) 1520 { 1521 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1522 } 1523 1524 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1525 { 1526 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1527 } 1528 1529 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1530 { 1531 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1532 } 1533 1534 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1535 { 1536 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1537 } 1538 1539 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1540 { 1541 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1542 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1543 } 1544 1545 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1546 { 1547 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1548 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1549 } 1550 1551 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1552 { 1553 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1554 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1555 } 1556 1557 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1558 { 1559 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1560 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1561 } 1562 1563 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1564 { 1565 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1566 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1567 } 1568 1569 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1570 { 1571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1572 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1573 } 1574 1575 static IRExpr* mkV128 ( UShort mask ) 1576 { 1577 return IRExpr_Const(IRConst_V128(mask)); 1578 } 1579 1580 /* Write the low half of a YMM reg and zero out the upper half. */ 1581 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1582 { 1583 putYMMRegLane128( ymmreg, 0, e ); 1584 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1585 } 1586 1587 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1588 { 1589 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1590 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1591 return unop(Iop_64to1, 1592 binop(Iop_And64, 1593 unop(Iop_1Uto64,x), 1594 unop(Iop_1Uto64,y))); 1595 } 1596 1597 /* Generate a compare-and-swap operation, operating on memory at 1598 'addr'. The expected value is 'expVal' and the new value is 1599 'newVal'. If the operation fails, then transfer control (with a 1600 no-redir jump (XXX no -- see comment at top of this file)) to 1601 'restart_point', which is presumably the address of the guest 1602 instruction again -- retrying, essentially. */ 1603 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1604 Addr64 restart_point ) 1605 { 1606 IRCAS* cas; 1607 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1608 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1609 IRTemp oldTmp = newTemp(tyE); 1610 IRTemp expTmp = newTemp(tyE); 1611 vassert(tyE == tyN); 1612 vassert(tyE == Ity_I64 || tyE == Ity_I32 1613 || tyE == Ity_I16 || tyE == Ity_I8); 1614 assign(expTmp, expVal); 1615 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1616 NULL, mkexpr(expTmp), NULL, newVal ); 1617 stmt( IRStmt_CAS(cas) ); 1618 stmt( IRStmt_Exit( 1619 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1620 mkexpr(oldTmp), mkexpr(expTmp) ), 1621 Ijk_Boring, /*Ijk_NoRedir*/ 1622 IRConst_U64( restart_point ), 1623 OFFB_RIP 1624 )); 1625 } 1626 1627 1628 /*------------------------------------------------------------*/ 1629 /*--- Helpers for %rflags. ---*/ 1630 /*------------------------------------------------------------*/ 1631 1632 /* -------------- Evaluating the flags-thunk. -------------- */ 1633 1634 /* Build IR to calculate all the eflags from stored 1635 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1636 Ity_I64. */ 1637 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1638 { 1639 IRExpr** args 1640 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1641 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1642 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1643 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1644 IRExpr* call 1645 = mkIRExprCCall( 1646 Ity_I64, 1647 0/*regparm*/, 1648 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1649 args 1650 ); 1651 /* Exclude OP and NDEP from definedness checking. We're only 1652 interested in DEP1 and DEP2. */ 1653 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1654 return call; 1655 } 1656 1657 /* Build IR to calculate some particular condition from stored 1658 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1659 Ity_Bit. */ 1660 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1661 { 1662 IRExpr** args 1663 = mkIRExprVec_5( mkU64(cond), 1664 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1665 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1666 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1667 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1668 IRExpr* call 1669 = mkIRExprCCall( 1670 Ity_I64, 1671 0/*regparm*/, 1672 "amd64g_calculate_condition", &amd64g_calculate_condition, 1673 args 1674 ); 1675 /* Exclude the requested condition, OP and NDEP from definedness 1676 checking. We're only interested in DEP1 and DEP2. */ 1677 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1678 return unop(Iop_64to1, call); 1679 } 1680 1681 /* Build IR to calculate just the carry flag from stored 1682 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1683 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1684 { 1685 IRExpr** args 1686 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1690 IRExpr* call 1691 = mkIRExprCCall( 1692 Ity_I64, 1693 0/*regparm*/, 1694 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1695 args 1696 ); 1697 /* Exclude OP and NDEP from definedness checking. We're only 1698 interested in DEP1 and DEP2. */ 1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1700 return call; 1701 } 1702 1703 1704 /* -------------- Building the flags-thunk. -------------- */ 1705 1706 /* The machinery in this section builds the flag-thunk following a 1707 flag-setting operation. Hence the various setFlags_* functions. 1708 */ 1709 1710 static Bool isAddSub ( IROp op8 ) 1711 { 1712 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1713 } 1714 1715 static Bool isLogic ( IROp op8 ) 1716 { 1717 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1718 } 1719 1720 /* U-widen 1/8/16/32/64 bit int expr to 64. */ 1721 static IRExpr* widenUto64 ( IRExpr* e ) 1722 { 1723 switch (typeOfIRExpr(irsb->tyenv,e)) { 1724 case Ity_I64: return e; 1725 case Ity_I32: return unop(Iop_32Uto64, e); 1726 case Ity_I16: return unop(Iop_16Uto64, e); 1727 case Ity_I8: return unop(Iop_8Uto64, e); 1728 case Ity_I1: return unop(Iop_1Uto64, e); 1729 default: vpanic("widenUto64"); 1730 } 1731 } 1732 1733 /* S-widen 8/16/32/64 bit int expr to 32. */ 1734 static IRExpr* widenSto64 ( IRExpr* e ) 1735 { 1736 switch (typeOfIRExpr(irsb->tyenv,e)) { 1737 case Ity_I64: return e; 1738 case Ity_I32: return unop(Iop_32Sto64, e); 1739 case Ity_I16: return unop(Iop_16Sto64, e); 1740 case Ity_I8: return unop(Iop_8Sto64, e); 1741 default: vpanic("widenSto64"); 1742 } 1743 } 1744 1745 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1746 of these combinations make sense. */ 1747 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1748 { 1749 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1750 if (src_ty == dst_ty) 1751 return e; 1752 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1753 return unop(Iop_32to16, e); 1754 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1755 return unop(Iop_32to8, e); 1756 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1757 return unop(Iop_64to32, e); 1758 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1759 return unop(Iop_64to16, e); 1760 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1761 return unop(Iop_64to8, e); 1762 1763 vex_printf("\nsrc, dst tys are: "); 1764 ppIRType(src_ty); 1765 vex_printf(", "); 1766 ppIRType(dst_ty); 1767 vex_printf("\n"); 1768 vpanic("narrowTo(amd64)"); 1769 } 1770 1771 1772 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1773 auto-sized up to the real op. */ 1774 1775 static 1776 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1777 { 1778 Int ccOp = 0; 1779 switch (ty) { 1780 case Ity_I8: ccOp = 0; break; 1781 case Ity_I16: ccOp = 1; break; 1782 case Ity_I32: ccOp = 2; break; 1783 case Ity_I64: ccOp = 3; break; 1784 default: vassert(0); 1785 } 1786 switch (op8) { 1787 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1788 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1789 default: ppIROp(op8); 1790 vpanic("setFlags_DEP1_DEP2(amd64)"); 1791 } 1792 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1793 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1794 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1795 } 1796 1797 1798 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1799 1800 static 1801 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1802 { 1803 Int ccOp = 0; 1804 switch (ty) { 1805 case Ity_I8: ccOp = 0; break; 1806 case Ity_I16: ccOp = 1; break; 1807 case Ity_I32: ccOp = 2; break; 1808 case Ity_I64: ccOp = 3; break; 1809 default: vassert(0); 1810 } 1811 switch (op8) { 1812 case Iop_Or8: 1813 case Iop_And8: 1814 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1815 default: ppIROp(op8); 1816 vpanic("setFlags_DEP1(amd64)"); 1817 } 1818 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1819 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1820 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1821 } 1822 1823 1824 /* For shift operations, we put in the result and the undershifted 1825 result. Except if the shift amount is zero, the thunk is left 1826 unchanged. */ 1827 1828 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1829 IRTemp res, 1830 IRTemp resUS, 1831 IRType ty, 1832 IRTemp guard ) 1833 { 1834 Int ccOp = 0; 1835 switch (ty) { 1836 case Ity_I8: ccOp = 0; break; 1837 case Ity_I16: ccOp = 1; break; 1838 case Ity_I32: ccOp = 2; break; 1839 case Ity_I64: ccOp = 3; break; 1840 default: vassert(0); 1841 } 1842 1843 vassert(guard); 1844 1845 /* Both kinds of right shifts are handled by the same thunk 1846 operation. */ 1847 switch (op64) { 1848 case Iop_Shr64: 1849 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1850 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1851 default: ppIROp(op64); 1852 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1853 } 1854 1855 /* guard :: Ity_I8. We need to convert it to I1. */ 1856 IRTemp guardB = newTemp(Ity_I1); 1857 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 1858 1859 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1860 stmt( IRStmt_Put( OFFB_CC_OP, 1861 IRExpr_ITE( mkexpr(guardB), 1862 mkU64(ccOp), 1863 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 1864 stmt( IRStmt_Put( OFFB_CC_DEP1, 1865 IRExpr_ITE( mkexpr(guardB), 1866 widenUto64(mkexpr(res)), 1867 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 1868 stmt( IRStmt_Put( OFFB_CC_DEP2, 1869 IRExpr_ITE( mkexpr(guardB), 1870 widenUto64(mkexpr(resUS)), 1871 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 1872 } 1873 1874 1875 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1876 the former value of the carry flag, which unfortunately we have to 1877 compute. */ 1878 1879 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1880 { 1881 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1882 1883 switch (ty) { 1884 case Ity_I8: ccOp += 0; break; 1885 case Ity_I16: ccOp += 1; break; 1886 case Ity_I32: ccOp += 2; break; 1887 case Ity_I64: ccOp += 3; break; 1888 default: vassert(0); 1889 } 1890 1891 /* This has to come first, because calculating the C flag 1892 may require reading all four thunk fields. */ 1893 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1895 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1896 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1897 } 1898 1899 1900 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1901 two arguments. */ 1902 1903 static 1904 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1905 { 1906 switch (ty) { 1907 case Ity_I8: 1908 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1909 break; 1910 case Ity_I16: 1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1912 break; 1913 case Ity_I32: 1914 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1915 break; 1916 case Ity_I64: 1917 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1918 break; 1919 default: 1920 vpanic("setFlags_MUL(amd64)"); 1921 } 1922 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1923 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1924 } 1925 1926 1927 /* -------------- Condition codes. -------------- */ 1928 1929 /* Condition codes, using the AMD encoding. */ 1930 1931 static const HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1932 { 1933 switch (cond) { 1934 case AMD64CondO: return "o"; 1935 case AMD64CondNO: return "no"; 1936 case AMD64CondB: return "b"; 1937 case AMD64CondNB: return "ae"; /*"nb";*/ 1938 case AMD64CondZ: return "e"; /*"z";*/ 1939 case AMD64CondNZ: return "ne"; /*"nz";*/ 1940 case AMD64CondBE: return "be"; 1941 case AMD64CondNBE: return "a"; /*"nbe";*/ 1942 case AMD64CondS: return "s"; 1943 case AMD64CondNS: return "ns"; 1944 case AMD64CondP: return "p"; 1945 case AMD64CondNP: return "np"; 1946 case AMD64CondL: return "l"; 1947 case AMD64CondNL: return "ge"; /*"nl";*/ 1948 case AMD64CondLE: return "le"; 1949 case AMD64CondNLE: return "g"; /*"nle";*/ 1950 case AMD64CondAlways: return "ALWAYS"; 1951 default: vpanic("name_AMD64Condcode"); 1952 } 1953 } 1954 1955 static 1956 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1957 /*OUT*/Bool* needInvert ) 1958 { 1959 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1960 if (cond & 1) { 1961 *needInvert = True; 1962 return cond-1; 1963 } else { 1964 *needInvert = False; 1965 return cond; 1966 } 1967 } 1968 1969 1970 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1971 1972 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1973 appropriately. 1974 1975 Optionally, generate a store for the 'tres' value. This can either 1976 be a normal store, or it can be a cas-with-possible-failure style 1977 store: 1978 1979 if taddr is IRTemp_INVALID, then no store is generated. 1980 1981 if taddr is not IRTemp_INVALID, then a store (using taddr as 1982 the address) is generated: 1983 1984 if texpVal is IRTemp_INVALID then a normal store is 1985 generated, and restart_point must be zero (it is irrelevant). 1986 1987 if texpVal is not IRTemp_INVALID then a cas-style store is 1988 generated. texpVal is the expected value, restart_point 1989 is the restart point if the store fails, and texpVal must 1990 have the same type as tres. 1991 1992 */ 1993 static void helper_ADC ( Int sz, 1994 IRTemp tres, IRTemp ta1, IRTemp ta2, 1995 /* info about optional store: */ 1996 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 1997 { 1998 UInt thunkOp; 1999 IRType ty = szToITy(sz); 2000 IRTemp oldc = newTemp(Ity_I64); 2001 IRTemp oldcn = newTemp(ty); 2002 IROp plus = mkSizedOp(ty, Iop_Add8); 2003 IROp xor = mkSizedOp(ty, Iop_Xor8); 2004 2005 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2006 2007 switch (sz) { 2008 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 2009 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 2010 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 2011 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 2012 default: vassert(0); 2013 } 2014 2015 /* oldc = old carry flag, 0 or 1 */ 2016 assign( oldc, binop(Iop_And64, 2017 mk_amd64g_calculate_rflags_c(), 2018 mkU64(1)) ); 2019 2020 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2021 2022 assign( tres, binop(plus, 2023 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2024 mkexpr(oldcn)) ); 2025 2026 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2027 start of this function. */ 2028 if (taddr != IRTemp_INVALID) { 2029 if (texpVal == IRTemp_INVALID) { 2030 vassert(restart_point == 0); 2031 storeLE( mkexpr(taddr), mkexpr(tres) ); 2032 } else { 2033 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2034 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2035 casLE( mkexpr(taddr), 2036 mkexpr(texpVal), mkexpr(tres), restart_point ); 2037 } 2038 } 2039 2040 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2041 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2042 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2043 mkexpr(oldcn)) )) ); 2044 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2045 } 2046 2047 2048 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2049 appropriately. As with helper_ADC, possibly generate a store of 2050 the result -- see comments on helper_ADC for details. 2051 */ 2052 static void helper_SBB ( Int sz, 2053 IRTemp tres, IRTemp ta1, IRTemp ta2, 2054 /* info about optional store: */ 2055 IRTemp taddr, IRTemp texpVal, Addr32 restart_point ) 2056 { 2057 UInt thunkOp; 2058 IRType ty = szToITy(sz); 2059 IRTemp oldc = newTemp(Ity_I64); 2060 IRTemp oldcn = newTemp(ty); 2061 IROp minus = mkSizedOp(ty, Iop_Sub8); 2062 IROp xor = mkSizedOp(ty, Iop_Xor8); 2063 2064 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2065 2066 switch (sz) { 2067 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2068 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2069 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2070 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2071 default: vassert(0); 2072 } 2073 2074 /* oldc = old carry flag, 0 or 1 */ 2075 assign( oldc, binop(Iop_And64, 2076 mk_amd64g_calculate_rflags_c(), 2077 mkU64(1)) ); 2078 2079 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2080 2081 assign( tres, binop(minus, 2082 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2083 mkexpr(oldcn)) ); 2084 2085 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2086 start of this function. */ 2087 if (taddr != IRTemp_INVALID) { 2088 if (texpVal == IRTemp_INVALID) { 2089 vassert(restart_point == 0); 2090 storeLE( mkexpr(taddr), mkexpr(tres) ); 2091 } else { 2092 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2093 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2094 casLE( mkexpr(taddr), 2095 mkexpr(texpVal), mkexpr(tres), restart_point ); 2096 } 2097 } 2098 2099 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2100 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2101 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2102 mkexpr(oldcn)) )) ); 2103 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2104 } 2105 2106 2107 /* -------------- Helpers for disassembly printing. -------------- */ 2108 2109 static const HChar* nameGrp1 ( Int opc_aux ) 2110 { 2111 static const HChar* grp1_names[8] 2112 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2113 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2114 return grp1_names[opc_aux]; 2115 } 2116 2117 static const HChar* nameGrp2 ( Int opc_aux ) 2118 { 2119 static const HChar* grp2_names[8] 2120 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2121 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2122 return grp2_names[opc_aux]; 2123 } 2124 2125 static const HChar* nameGrp4 ( Int opc_aux ) 2126 { 2127 static const HChar* grp4_names[8] 2128 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2129 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2130 return grp4_names[opc_aux]; 2131 } 2132 2133 static const HChar* nameGrp5 ( Int opc_aux ) 2134 { 2135 static const HChar* grp5_names[8] 2136 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2137 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2138 return grp5_names[opc_aux]; 2139 } 2140 2141 static const HChar* nameGrp8 ( Int opc_aux ) 2142 { 2143 static const HChar* grp8_names[8] 2144 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2145 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2146 return grp8_names[opc_aux]; 2147 } 2148 2149 //.. static const HChar* nameSReg ( UInt sreg ) 2150 //.. { 2151 //.. switch (sreg) { 2152 //.. case R_ES: return "%es"; 2153 //.. case R_CS: return "%cs"; 2154 //.. case R_SS: return "%ss"; 2155 //.. case R_DS: return "%ds"; 2156 //.. case R_FS: return "%fs"; 2157 //.. case R_GS: return "%gs"; 2158 //.. default: vpanic("nameSReg(x86)"); 2159 //.. } 2160 //.. } 2161 2162 static const HChar* nameMMXReg ( Int mmxreg ) 2163 { 2164 static const HChar* mmx_names[8] 2165 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2166 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2167 return mmx_names[mmxreg]; 2168 } 2169 2170 static const HChar* nameXMMReg ( Int xmmreg ) 2171 { 2172 static const HChar* xmm_names[16] 2173 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2174 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2175 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2176 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2177 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2178 return xmm_names[xmmreg]; 2179 } 2180 2181 static const HChar* nameMMXGran ( Int gran ) 2182 { 2183 switch (gran) { 2184 case 0: return "b"; 2185 case 1: return "w"; 2186 case 2: return "d"; 2187 case 3: return "q"; 2188 default: vpanic("nameMMXGran(amd64,guest)"); 2189 } 2190 } 2191 2192 static HChar nameISize ( Int size ) 2193 { 2194 switch (size) { 2195 case 8: return 'q'; 2196 case 4: return 'l'; 2197 case 2: return 'w'; 2198 case 1: return 'b'; 2199 default: vpanic("nameISize(amd64)"); 2200 } 2201 } 2202 2203 static const HChar* nameYMMReg ( Int ymmreg ) 2204 { 2205 static const HChar* ymm_names[16] 2206 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2207 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2208 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2209 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2210 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2211 return ymm_names[ymmreg]; 2212 } 2213 2214 2215 /*------------------------------------------------------------*/ 2216 /*--- JMP helpers ---*/ 2217 /*------------------------------------------------------------*/ 2218 2219 static void jmp_lit( /*MOD*/DisResult* dres, 2220 IRJumpKind kind, Addr64 d64 ) 2221 { 2222 vassert(dres->whatNext == Dis_Continue); 2223 vassert(dres->len == 0); 2224 vassert(dres->continueAt == 0); 2225 vassert(dres->jk_StopHere == Ijk_INVALID); 2226 dres->whatNext = Dis_StopHere; 2227 dres->jk_StopHere = kind; 2228 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2229 } 2230 2231 static void jmp_treg( /*MOD*/DisResult* dres, 2232 IRJumpKind kind, IRTemp t ) 2233 { 2234 vassert(dres->whatNext == Dis_Continue); 2235 vassert(dres->len == 0); 2236 vassert(dres->continueAt == 0); 2237 vassert(dres->jk_StopHere == Ijk_INVALID); 2238 dres->whatNext = Dis_StopHere; 2239 dres->jk_StopHere = kind; 2240 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2241 } 2242 2243 static 2244 void jcc_01 ( /*MOD*/DisResult* dres, 2245 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2246 { 2247 Bool invert; 2248 AMD64Condcode condPos; 2249 vassert(dres->whatNext == Dis_Continue); 2250 vassert(dres->len == 0); 2251 vassert(dres->continueAt == 0); 2252 vassert(dres->jk_StopHere == Ijk_INVALID); 2253 dres->whatNext = Dis_StopHere; 2254 dres->jk_StopHere = Ijk_Boring; 2255 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2256 if (invert) { 2257 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2258 Ijk_Boring, 2259 IRConst_U64(d64_false), 2260 OFFB_RIP ) ); 2261 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2262 } else { 2263 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2264 Ijk_Boring, 2265 IRConst_U64(d64_true), 2266 OFFB_RIP ) ); 2267 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2268 } 2269 } 2270 2271 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2272 guest address of the next instruction to be executed. 2273 2274 This function generates an AbiHint to say that -128(%rsp) 2275 .. -1(%rsp) should now be regarded as uninitialised. 2276 */ 2277 static 2278 void make_redzone_AbiHint ( VexAbiInfo* vbi, 2279 IRTemp new_rsp, IRTemp nia, const HChar* who ) 2280 { 2281 Int szB = vbi->guest_stack_redzone_size; 2282 vassert(szB >= 0); 2283 2284 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2285 for is ELF. So just check it's the expected 128 value 2286 (paranoia). */ 2287 vassert(szB == 128); 2288 2289 if (0) vex_printf("AbiHint: %s\n", who); 2290 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2291 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2292 if (szB > 0) 2293 stmt( IRStmt_AbiHint( 2294 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2295 szB, 2296 mkexpr(nia) 2297 )); 2298 } 2299 2300 2301 /*------------------------------------------------------------*/ 2302 /*--- Disassembling addressing modes ---*/ 2303 /*------------------------------------------------------------*/ 2304 2305 static 2306 const HChar* segRegTxt ( Prefix pfx ) 2307 { 2308 if (pfx & PFX_CS) return "%cs:"; 2309 if (pfx & PFX_DS) return "%ds:"; 2310 if (pfx & PFX_ES) return "%es:"; 2311 if (pfx & PFX_FS) return "%fs:"; 2312 if (pfx & PFX_GS) return "%gs:"; 2313 if (pfx & PFX_SS) return "%ss:"; 2314 return ""; /* no override */ 2315 } 2316 2317 2318 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2319 linear address by adding any required segment override as indicated 2320 by sorb, and also dealing with any address size override 2321 present. */ 2322 static 2323 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi, 2324 Prefix pfx, IRExpr* virtual ) 2325 { 2326 /* --- segment overrides --- */ 2327 if (pfx & PFX_FS) { 2328 if (vbi->guest_amd64_assume_fs_is_zero) { 2329 /* Note that this is a linux-kernel specific hack that relies 2330 on the assumption that %fs is always zero. */ 2331 /* return virtual + guest_FS_ZERO. */ 2332 virtual = binop(Iop_Add64, virtual, 2333 IRExpr_Get(OFFB_FS_ZERO, Ity_I64)); 2334 } else { 2335 unimplemented("amd64 %fs segment override"); 2336 } 2337 } 2338 2339 if (pfx & PFX_GS) { 2340 if (vbi->guest_amd64_assume_gs_is_0x60) { 2341 /* Note that this is a darwin-kernel specific hack that relies 2342 on the assumption that %gs is always 0x60. */ 2343 /* return virtual + guest_GS_0x60. */ 2344 virtual = binop(Iop_Add64, virtual, 2345 IRExpr_Get(OFFB_GS_0x60, Ity_I64)); 2346 } else { 2347 unimplemented("amd64 %gs segment override"); 2348 } 2349 } 2350 2351 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2352 2353 /* --- address size override --- */ 2354 if (haveASO(pfx)) 2355 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2356 2357 return virtual; 2358 } 2359 2360 //.. { 2361 //.. Int sreg; 2362 //.. IRType hWordTy; 2363 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2364 //.. 2365 //.. if (sorb == 0) 2366 //.. /* the common case - no override */ 2367 //.. return virtual; 2368 //.. 2369 //.. switch (sorb) { 2370 //.. case 0x3E: sreg = R_DS; break; 2371 //.. case 0x26: sreg = R_ES; break; 2372 //.. case 0x64: sreg = R_FS; break; 2373 //.. case 0x65: sreg = R_GS; break; 2374 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2375 //.. } 2376 //.. 2377 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2378 //.. 2379 //.. seg_selector = newTemp(Ity_I32); 2380 //.. ldt_ptr = newTemp(hWordTy); 2381 //.. gdt_ptr = newTemp(hWordTy); 2382 //.. r64 = newTemp(Ity_I64); 2383 //.. 2384 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2385 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2386 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2387 //.. 2388 //.. /* 2389 //.. Call this to do the translation and limit checks: 2390 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2391 //.. UInt seg_selector, UInt virtual_addr ) 2392 //.. */ 2393 //.. assign( 2394 //.. r64, 2395 //.. mkIRExprCCall( 2396 //.. Ity_I64, 2397 //.. 0/*regparms*/, 2398 //.. "x86g_use_seg_selector", 2399 //.. &x86g_use_seg_selector, 2400 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2401 //.. mkexpr(seg_selector), virtual) 2402 //.. ) 2403 //.. ); 2404 //.. 2405 //.. /* If the high 32 of the result are non-zero, there was a 2406 //.. failure in address translation. In which case, make a 2407 //.. quick exit. 2408 //.. */ 2409 //.. stmt( 2410 //.. IRStmt_Exit( 2411 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2412 //.. Ijk_MapFail, 2413 //.. IRConst_U32( guest_eip_curr_instr ) 2414 //.. ) 2415 //.. ); 2416 //.. 2417 //.. /* otherwise, here's the translated result. */ 2418 //.. return unop(Iop_64to32, mkexpr(r64)); 2419 //.. } 2420 2421 2422 /* Generate IR to calculate an address indicated by a ModRM and 2423 following SIB bytes. The expression, and the number of bytes in 2424 the address mode, are returned (the latter in *len). Note that 2425 this fn should not be called if the R/M part of the address denotes 2426 a register instead of memory. If print_codegen is true, text of 2427 the addressing mode is placed in buf. 2428 2429 The computed address is stored in a new tempreg, and the 2430 identity of the tempreg is returned. 2431 2432 extra_bytes holds the number of bytes after the amode, as supplied 2433 by the caller. This is needed to make sense of %rip-relative 2434 addresses. Note that the value that *len is set to is only the 2435 length of the amode itself and does not include the value supplied 2436 in extra_bytes. 2437 */ 2438 2439 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2440 { 2441 IRTemp tmp = newTemp(Ity_I64); 2442 assign( tmp, addr64 ); 2443 return tmp; 2444 } 2445 2446 static 2447 IRTemp disAMode ( /*OUT*/Int* len, 2448 VexAbiInfo* vbi, Prefix pfx, Long delta, 2449 /*OUT*/HChar* buf, Int extra_bytes ) 2450 { 2451 UChar mod_reg_rm = getUChar(delta); 2452 delta++; 2453 2454 buf[0] = (UChar)0; 2455 vassert(extra_bytes >= 0 && extra_bytes < 10); 2456 2457 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2458 jump table seems a bit excessive. 2459 */ 2460 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2461 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2462 /* is now XX0XXYYY */ 2463 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2464 switch (mod_reg_rm) { 2465 2466 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2467 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2468 */ 2469 case 0x00: case 0x01: case 0x02: case 0x03: 2470 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2471 { UChar rm = toUChar(mod_reg_rm & 7); 2472 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2473 *len = 1; 2474 return disAMode_copy2tmp( 2475 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2476 } 2477 2478 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2479 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2480 */ 2481 case 0x08: case 0x09: case 0x0A: case 0x0B: 2482 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2483 { UChar rm = toUChar(mod_reg_rm & 7); 2484 Long d = getSDisp8(delta); 2485 if (d == 0) { 2486 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2487 } else { 2488 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2489 } 2490 *len = 2; 2491 return disAMode_copy2tmp( 2492 handleAddrOverrides(vbi, pfx, 2493 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2494 } 2495 2496 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2497 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2498 */ 2499 case 0x10: case 0x11: case 0x12: case 0x13: 2500 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2501 { UChar rm = toUChar(mod_reg_rm & 7); 2502 Long d = getSDisp32(delta); 2503 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2504 *len = 5; 2505 return disAMode_copy2tmp( 2506 handleAddrOverrides(vbi, pfx, 2507 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2508 } 2509 2510 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2511 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2512 case 0x18: case 0x19: case 0x1A: case 0x1B: 2513 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2514 vpanic("disAMode(amd64): not an addr!"); 2515 2516 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2517 correctly at the start of handling each instruction. */ 2518 case 0x05: 2519 { Long d = getSDisp32(delta); 2520 *len = 5; 2521 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2522 /* We need to know the next instruction's start address. 2523 Try and figure out what it is, record the guess, and ask 2524 the top-level driver logic (bbToIR_AMD64) to check we 2525 guessed right, after the instruction is completely 2526 decoded. */ 2527 guest_RIP_next_mustcheck = True; 2528 guest_RIP_next_assumed = guest_RIP_bbstart 2529 + delta+4 + extra_bytes; 2530 return disAMode_copy2tmp( 2531 handleAddrOverrides(vbi, pfx, 2532 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2533 mkU64(d)))); 2534 } 2535 2536 case 0x04: { 2537 /* SIB, with no displacement. Special cases: 2538 -- %rsp cannot act as an index value. 2539 If index_r indicates %rsp, zero is used for the index. 2540 -- when mod is zero and base indicates RBP or R13, base is 2541 instead a 32-bit sign-extended literal. 2542 It's all madness, I tell you. Extract %index, %base and 2543 scale from the SIB byte. The value denoted is then: 2544 | %index == %RSP && (%base == %RBP || %base == %R13) 2545 = d32 following SIB byte 2546 | %index == %RSP && !(%base == %RBP || %base == %R13) 2547 = %base 2548 | %index != %RSP && (%base == %RBP || %base == %R13) 2549 = d32 following SIB byte + (%index << scale) 2550 | %index != %RSP && !(%base == %RBP || %base == %R13) 2551 = %base + (%index << scale) 2552 */ 2553 UChar sib = getUChar(delta); 2554 UChar scale = toUChar((sib >> 6) & 3); 2555 UChar index_r = toUChar((sib >> 3) & 7); 2556 UChar base_r = toUChar(sib & 7); 2557 /* correct since #(R13) == 8 + #(RBP) */ 2558 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2559 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2560 delta++; 2561 2562 if ((!index_is_SP) && (!base_is_BPor13)) { 2563 if (scale == 0) { 2564 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2565 nameIRegRexB(8,pfx,base_r), 2566 nameIReg64rexX(pfx,index_r)); 2567 } else { 2568 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2569 nameIRegRexB(8,pfx,base_r), 2570 nameIReg64rexX(pfx,index_r), 1<<scale); 2571 } 2572 *len = 2; 2573 return 2574 disAMode_copy2tmp( 2575 handleAddrOverrides(vbi, pfx, 2576 binop(Iop_Add64, 2577 getIRegRexB(8,pfx,base_r), 2578 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2579 mkU8(scale))))); 2580 } 2581 2582 if ((!index_is_SP) && base_is_BPor13) { 2583 Long d = getSDisp32(delta); 2584 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2585 nameIReg64rexX(pfx,index_r), 1<<scale); 2586 *len = 6; 2587 return 2588 disAMode_copy2tmp( 2589 handleAddrOverrides(vbi, pfx, 2590 binop(Iop_Add64, 2591 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2592 mkU8(scale)), 2593 mkU64(d)))); 2594 } 2595 2596 if (index_is_SP && (!base_is_BPor13)) { 2597 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2598 *len = 2; 2599 return disAMode_copy2tmp( 2600 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2601 } 2602 2603 if (index_is_SP && base_is_BPor13) { 2604 Long d = getSDisp32(delta); 2605 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2606 *len = 6; 2607 return disAMode_copy2tmp( 2608 handleAddrOverrides(vbi, pfx, mkU64(d))); 2609 } 2610 2611 vassert(0); 2612 } 2613 2614 /* SIB, with 8-bit displacement. Special cases: 2615 -- %esp cannot act as an index value. 2616 If index_r indicates %esp, zero is used for the index. 2617 Denoted value is: 2618 | %index == %ESP 2619 = d8 + %base 2620 | %index != %ESP 2621 = d8 + %base + (%index << scale) 2622 */ 2623 case 0x0C: { 2624 UChar sib = getUChar(delta); 2625 UChar scale = toUChar((sib >> 6) & 3); 2626 UChar index_r = toUChar((sib >> 3) & 7); 2627 UChar base_r = toUChar(sib & 7); 2628 Long d = getSDisp8(delta+1); 2629 2630 if (index_r == R_RSP && 0==getRexX(pfx)) { 2631 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2632 d, nameIRegRexB(8,pfx,base_r)); 2633 *len = 3; 2634 return disAMode_copy2tmp( 2635 handleAddrOverrides(vbi, pfx, 2636 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2637 } else { 2638 if (scale == 0) { 2639 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2640 nameIRegRexB(8,pfx,base_r), 2641 nameIReg64rexX(pfx,index_r)); 2642 } else { 2643 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2644 nameIRegRexB(8,pfx,base_r), 2645 nameIReg64rexX(pfx,index_r), 1<<scale); 2646 } 2647 *len = 3; 2648 return 2649 disAMode_copy2tmp( 2650 handleAddrOverrides(vbi, pfx, 2651 binop(Iop_Add64, 2652 binop(Iop_Add64, 2653 getIRegRexB(8,pfx,base_r), 2654 binop(Iop_Shl64, 2655 getIReg64rexX(pfx,index_r), mkU8(scale))), 2656 mkU64(d)))); 2657 } 2658 vassert(0); /*NOTREACHED*/ 2659 } 2660 2661 /* SIB, with 32-bit displacement. Special cases: 2662 -- %rsp cannot act as an index value. 2663 If index_r indicates %rsp, zero is used for the index. 2664 Denoted value is: 2665 | %index == %RSP 2666 = d32 + %base 2667 | %index != %RSP 2668 = d32 + %base + (%index << scale) 2669 */ 2670 case 0x14: { 2671 UChar sib = getUChar(delta); 2672 UChar scale = toUChar((sib >> 6) & 3); 2673 UChar index_r = toUChar((sib >> 3) & 7); 2674 UChar base_r = toUChar(sib & 7); 2675 Long d = getSDisp32(delta+1); 2676 2677 if (index_r == R_RSP && 0==getRexX(pfx)) { 2678 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2679 d, nameIRegRexB(8,pfx,base_r)); 2680 *len = 6; 2681 return disAMode_copy2tmp( 2682 handleAddrOverrides(vbi, pfx, 2683 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2684 } else { 2685 if (scale == 0) { 2686 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2687 nameIRegRexB(8,pfx,base_r), 2688 nameIReg64rexX(pfx,index_r)); 2689 } else { 2690 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2691 nameIRegRexB(8,pfx,base_r), 2692 nameIReg64rexX(pfx,index_r), 1<<scale); 2693 } 2694 *len = 6; 2695 return 2696 disAMode_copy2tmp( 2697 handleAddrOverrides(vbi, pfx, 2698 binop(Iop_Add64, 2699 binop(Iop_Add64, 2700 getIRegRexB(8,pfx,base_r), 2701 binop(Iop_Shl64, 2702 getIReg64rexX(pfx,index_r), mkU8(scale))), 2703 mkU64(d)))); 2704 } 2705 vassert(0); /*NOTREACHED*/ 2706 } 2707 2708 default: 2709 vpanic("disAMode(amd64)"); 2710 return 0; /*notreached*/ 2711 } 2712 } 2713 2714 2715 /* Similarly for VSIB addressing. This returns just the addend, 2716 and fills in *rI and *vscale with the register number of the vector 2717 index and its multiplicand. */ 2718 static 2719 IRTemp disAVSIBMode ( /*OUT*/Int* len, 2720 VexAbiInfo* vbi, Prefix pfx, Long delta, 2721 /*OUT*/HChar* buf, /*OUT*/UInt* rI, 2722 IRType ty, /*OUT*/Int* vscale ) 2723 { 2724 UChar mod_reg_rm = getUChar(delta); 2725 const HChar *vindex; 2726 2727 *len = 0; 2728 *rI = 0; 2729 *vscale = 0; 2730 buf[0] = (UChar)0; 2731 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm)) 2732 return IRTemp_INVALID; 2733 2734 UChar sib = getUChar(delta+1); 2735 UChar scale = toUChar((sib >> 6) & 3); 2736 UChar index_r = toUChar((sib >> 3) & 7); 2737 UChar base_r = toUChar(sib & 7); 2738 Long d = 0; 2739 /* correct since #(R13) == 8 + #(RBP) */ 2740 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2741 delta += 2; 2742 *len = 2; 2743 2744 *rI = index_r | (getRexX(pfx) << 3); 2745 if (ty == Ity_V128) 2746 vindex = nameXMMReg(*rI); 2747 else 2748 vindex = nameYMMReg(*rI); 2749 *vscale = 1<<scale; 2750 2751 switch (mod_reg_rm >> 6) { 2752 case 0: 2753 if (base_is_BPor13) { 2754 d = getSDisp32(delta); 2755 *len += 4; 2756 if (scale == 0) { 2757 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex); 2758 } else { 2759 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale); 2760 } 2761 return disAMode_copy2tmp( mkU64(d) ); 2762 } else { 2763 if (scale == 0) { 2764 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2765 nameIRegRexB(8,pfx,base_r), vindex); 2766 } else { 2767 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2768 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2769 } 2770 } 2771 break; 2772 case 1: 2773 d = getSDisp8(delta); 2774 *len += 1; 2775 goto have_disp; 2776 case 2: 2777 d = getSDisp32(delta); 2778 *len += 4; 2779 have_disp: 2780 if (scale == 0) { 2781 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2782 nameIRegRexB(8,pfx,base_r), vindex); 2783 } else { 2784 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2785 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2786 } 2787 break; 2788 } 2789 2790 if (!d) 2791 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) ); 2792 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r), 2793 mkU64(d)) ); 2794 } 2795 2796 2797 /* Figure out the number of (insn-stream) bytes constituting the amode 2798 beginning at delta. Is useful for getting hold of literals beyond 2799 the end of the amode before it has been disassembled. */ 2800 2801 static UInt lengthAMode ( Prefix pfx, Long delta ) 2802 { 2803 UChar mod_reg_rm = getUChar(delta); 2804 delta++; 2805 2806 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2807 jump table seems a bit excessive. 2808 */ 2809 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2810 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2811 /* is now XX0XXYYY */ 2812 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2813 switch (mod_reg_rm) { 2814 2815 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2816 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2817 */ 2818 case 0x00: case 0x01: case 0x02: case 0x03: 2819 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2820 return 1; 2821 2822 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2823 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2824 */ 2825 case 0x08: case 0x09: case 0x0A: case 0x0B: 2826 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2827 return 2; 2828 2829 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2830 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2831 */ 2832 case 0x10: case 0x11: case 0x12: case 0x13: 2833 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2834 return 5; 2835 2836 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2837 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2838 /* Not an address, but still handled. */ 2839 case 0x18: case 0x19: case 0x1A: case 0x1B: 2840 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2841 return 1; 2842 2843 /* RIP + disp32. */ 2844 case 0x05: 2845 return 5; 2846 2847 case 0x04: { 2848 /* SIB, with no displacement. */ 2849 UChar sib = getUChar(delta); 2850 UChar base_r = toUChar(sib & 7); 2851 /* correct since #(R13) == 8 + #(RBP) */ 2852 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2853 2854 if (base_is_BPor13) { 2855 return 6; 2856 } else { 2857 return 2; 2858 } 2859 } 2860 2861 /* SIB, with 8-bit displacement. */ 2862 case 0x0C: 2863 return 3; 2864 2865 /* SIB, with 32-bit displacement. */ 2866 case 0x14: 2867 return 6; 2868 2869 default: 2870 vpanic("lengthAMode(amd64)"); 2871 return 0; /*notreached*/ 2872 } 2873 } 2874 2875 2876 /*------------------------------------------------------------*/ 2877 /*--- Disassembling common idioms ---*/ 2878 /*------------------------------------------------------------*/ 2879 2880 /* Handle binary integer instructions of the form 2881 op E, G meaning 2882 op reg-or-mem, reg 2883 Is passed the a ptr to the modRM byte, the actual operation, and the 2884 data size. Returns the address advanced completely over this 2885 instruction. 2886 2887 E(src) is reg-or-mem 2888 G(dst) is reg. 2889 2890 If E is reg, --> GET %G, tmp 2891 OP %E, tmp 2892 PUT tmp, %G 2893 2894 If E is mem and OP is not reversible, 2895 --> (getAddr E) -> tmpa 2896 LD (tmpa), tmpa 2897 GET %G, tmp2 2898 OP tmpa, tmp2 2899 PUT tmp2, %G 2900 2901 If E is mem and OP is reversible 2902 --> (getAddr E) -> tmpa 2903 LD (tmpa), tmpa 2904 OP %G, tmpa 2905 PUT tmpa, %G 2906 */ 2907 static 2908 ULong dis_op2_E_G ( VexAbiInfo* vbi, 2909 Prefix pfx, 2910 Bool addSubCarry, 2911 IROp op8, 2912 Bool keep, 2913 Int size, 2914 Long delta0, 2915 const HChar* t_amd64opc ) 2916 { 2917 HChar dis_buf[50]; 2918 Int len; 2919 IRType ty = szToITy(size); 2920 IRTemp dst1 = newTemp(ty); 2921 IRTemp src = newTemp(ty); 2922 IRTemp dst0 = newTemp(ty); 2923 UChar rm = getUChar(delta0); 2924 IRTemp addr = IRTemp_INVALID; 2925 2926 /* addSubCarry == True indicates the intended operation is 2927 add-with-carry or subtract-with-borrow. */ 2928 if (addSubCarry) { 2929 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2930 vassert(keep); 2931 } 2932 2933 if (epartIsReg(rm)) { 2934 /* Specially handle XOR reg,reg, because that doesn't really 2935 depend on reg, and doing the obvious thing potentially 2936 generates a spurious value check failure due to the bogus 2937 dependency. */ 2938 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2939 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2940 if (False && op8 == Iop_Sub8) 2941 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2942 putIRegG(size,pfx,rm, mkU(ty,0)); 2943 } 2944 2945 assign( dst0, getIRegG(size,pfx,rm) ); 2946 assign( src, getIRegE(size,pfx,rm) ); 2947 2948 if (addSubCarry && op8 == Iop_Add8) { 2949 helper_ADC( size, dst1, dst0, src, 2950 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2951 putIRegG(size, pfx, rm, mkexpr(dst1)); 2952 } else 2953 if (addSubCarry && op8 == Iop_Sub8) { 2954 helper_SBB( size, dst1, dst0, src, 2955 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2956 putIRegG(size, pfx, rm, mkexpr(dst1)); 2957 } else { 2958 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2959 if (isAddSub(op8)) 2960 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2961 else 2962 setFlags_DEP1(op8, dst1, ty); 2963 if (keep) 2964 putIRegG(size, pfx, rm, mkexpr(dst1)); 2965 } 2966 2967 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2968 nameIRegE(size,pfx,rm), 2969 nameIRegG(size,pfx,rm)); 2970 return 1+delta0; 2971 } else { 2972 /* E refers to memory */ 2973 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2974 assign( dst0, getIRegG(size,pfx,rm) ); 2975 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2976 2977 if (addSubCarry && op8 == Iop_Add8) { 2978 helper_ADC( size, dst1, dst0, src, 2979 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2980 putIRegG(size, pfx, rm, mkexpr(dst1)); 2981 } else 2982 if (addSubCarry && op8 == Iop_Sub8) { 2983 helper_SBB( size, dst1, dst0, src, 2984 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2985 putIRegG(size, pfx, rm, mkexpr(dst1)); 2986 } else { 2987 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2988 if (isAddSub(op8)) 2989 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2990 else 2991 setFlags_DEP1(op8, dst1, ty); 2992 if (keep) 2993 putIRegG(size, pfx, rm, mkexpr(dst1)); 2994 } 2995 2996 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2997 dis_buf, nameIRegG(size, pfx, rm)); 2998 return len+delta0; 2999 } 3000 } 3001 3002 3003 3004 /* Handle binary integer instructions of the form 3005 op G, E meaning 3006 op reg, reg-or-mem 3007 Is passed the a ptr to the modRM byte, the actual operation, and the 3008 data size. Returns the address advanced completely over this 3009 instruction. 3010 3011 G(src) is reg. 3012 E(dst) is reg-or-mem 3013 3014 If E is reg, --> GET %E, tmp 3015 OP %G, tmp 3016 PUT tmp, %E 3017 3018 If E is mem, --> (getAddr E) -> tmpa 3019 LD (tmpa), tmpv 3020 OP %G, tmpv 3021 ST tmpv, (tmpa) 3022 */ 3023 static 3024 ULong dis_op2_G_E ( VexAbiInfo* vbi, 3025 Prefix pfx, 3026 Bool addSubCarry, 3027 IROp op8, 3028 Bool keep, 3029 Int size, 3030 Long delta0, 3031 const HChar* t_amd64opc ) 3032 { 3033 HChar dis_buf[50]; 3034 Int len; 3035 IRType ty = szToITy(size); 3036 IRTemp dst1 = newTemp(ty); 3037 IRTemp src = newTemp(ty); 3038 IRTemp dst0 = newTemp(ty); 3039 UChar rm = getUChar(delta0); 3040 IRTemp addr = IRTemp_INVALID; 3041 3042 /* addSubCarry == True indicates the intended operation is 3043 add-with-carry or subtract-with-borrow. */ 3044 if (addSubCarry) { 3045 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 3046 vassert(keep); 3047 } 3048 3049 if (epartIsReg(rm)) { 3050 /* Specially handle XOR reg,reg, because that doesn't really 3051 depend on reg, and doing the obvious thing potentially 3052 generates a spurious value check failure due to the bogus 3053 dependency. Ditto SBB reg,reg. */ 3054 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 3055 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3056 putIRegE(size,pfx,rm, mkU(ty,0)); 3057 } 3058 3059 assign(dst0, getIRegE(size,pfx,rm)); 3060 assign(src, getIRegG(size,pfx,rm)); 3061 3062 if (addSubCarry && op8 == Iop_Add8) { 3063 helper_ADC( size, dst1, dst0, src, 3064 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3065 putIRegE(size, pfx, rm, mkexpr(dst1)); 3066 } else 3067 if (addSubCarry && op8 == Iop_Sub8) { 3068 helper_SBB( size, dst1, dst0, src, 3069 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3070 putIRegE(size, pfx, rm, mkexpr(dst1)); 3071 } else { 3072 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3073 if (isAddSub(op8)) 3074 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3075 else 3076 setFlags_DEP1(op8, dst1, ty); 3077 if (keep) 3078 putIRegE(size, pfx, rm, mkexpr(dst1)); 3079 } 3080 3081 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3082 nameIRegG(size,pfx,rm), 3083 nameIRegE(size,pfx,rm)); 3084 return 1+delta0; 3085 } 3086 3087 /* E refers to memory */ 3088 { 3089 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3090 assign(dst0, loadLE(ty,mkexpr(addr))); 3091 assign(src, getIRegG(size,pfx,rm)); 3092 3093 if (addSubCarry && op8 == Iop_Add8) { 3094 if (haveLOCK(pfx)) { 3095 /* cas-style store */ 3096 helper_ADC( size, dst1, dst0, src, 3097 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3098 } else { 3099 /* normal store */ 3100 helper_ADC( size, dst1, dst0, src, 3101 /*store*/addr, IRTemp_INVALID, 0 ); 3102 } 3103 } else 3104 if (addSubCarry && op8 == Iop_Sub8) { 3105 if (haveLOCK(pfx)) { 3106 /* cas-style store */ 3107 helper_SBB( size, dst1, dst0, src, 3108 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3109 } else { 3110 /* normal store */ 3111 helper_SBB( size, dst1, dst0, src, 3112 /*store*/addr, IRTemp_INVALID, 0 ); 3113 } 3114 } else { 3115 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3116 if (keep) { 3117 if (haveLOCK(pfx)) { 3118 if (0) vex_printf("locked case\n" ); 3119 casLE( mkexpr(addr), 3120 mkexpr(dst0)/*expval*/, 3121 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3122 } else { 3123 if (0) vex_printf("nonlocked case\n"); 3124 storeLE(mkexpr(addr), mkexpr(dst1)); 3125 } 3126 } 3127 if (isAddSub(op8)) 3128 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3129 else 3130 setFlags_DEP1(op8, dst1, ty); 3131 } 3132 3133 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3134 nameIRegG(size,pfx,rm), dis_buf); 3135 return len+delta0; 3136 } 3137 } 3138 3139 3140 /* Handle move instructions of the form 3141 mov E, G meaning 3142 mov reg-or-mem, reg 3143 Is passed the a ptr to the modRM byte, and the data size. Returns 3144 the address advanced completely over this instruction. 3145 3146 E(src) is reg-or-mem 3147 G(dst) is reg. 3148 3149 If E is reg, --> GET %E, tmpv 3150 PUT tmpv, %G 3151 3152 If E is mem --> (getAddr E) -> tmpa 3153 LD (tmpa), tmpb 3154 PUT tmpb, %G 3155 */ 3156 static 3157 ULong dis_mov_E_G ( VexAbiInfo* vbi, 3158 Prefix pfx, 3159 Int size, 3160 Long delta0 ) 3161 { 3162 Int len; 3163 UChar rm = getUChar(delta0); 3164 HChar dis_buf[50]; 3165 3166 if (epartIsReg(rm)) { 3167 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3168 DIP("mov%c %s,%s\n", nameISize(size), 3169 nameIRegE(size,pfx,rm), 3170 nameIRegG(size,pfx,rm)); 3171 return 1+delta0; 3172 } 3173 3174 /* E refers to memory */ 3175 { 3176 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3177 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3178 DIP("mov%c %s,%s\n", nameISize(size), 3179 dis_buf, 3180 nameIRegG(size,pfx,rm)); 3181 return delta0+len; 3182 } 3183 } 3184 3185 3186 /* Handle move instructions of the form 3187 mov G, E meaning 3188 mov reg, reg-or-mem 3189 Is passed the a ptr to the modRM byte, and the data size. Returns 3190 the address advanced completely over this instruction. 3191 We have to decide here whether F2 or F3 are acceptable. F2 never is. 3192 3193 G(src) is reg. 3194 E(dst) is reg-or-mem 3195 3196 If E is reg, --> GET %G, tmp 3197 PUT tmp, %E 3198 3199 If E is mem, --> (getAddr E) -> tmpa 3200 GET %G, tmpv 3201 ST tmpv, (tmpa) 3202 */ 3203 static 3204 ULong dis_mov_G_E ( VexAbiInfo* vbi, 3205 Prefix pfx, 3206 Int size, 3207 Long delta0, 3208 /*OUT*/Bool* ok ) 3209 { 3210 Int len; 3211 UChar rm = getUChar(delta0); 3212 HChar dis_buf[50]; 3213 3214 *ok = True; 3215 3216 if (epartIsReg(rm)) { 3217 if (haveF2orF3(pfx)) { *ok = False; return delta0; } 3218 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3219 DIP("mov%c %s,%s\n", nameISize(size), 3220 nameIRegG(size,pfx,rm), 3221 nameIRegE(size,pfx,rm)); 3222 return 1+delta0; 3223 } 3224 3225 /* E refers to memory */ 3226 { 3227 if (haveF2(pfx)) { *ok = False; return delta0; } 3228 /* F3(XRELEASE) is acceptable, though. */ 3229 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3230 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3231 DIP("mov%c %s,%s\n", nameISize(size), 3232 nameIRegG(size,pfx,rm), 3233 dis_buf); 3234 return len+delta0; 3235 } 3236 } 3237 3238 3239 /* op $immediate, AL/AX/EAX/RAX. */ 3240 static 3241 ULong dis_op_imm_A ( Int size, 3242 Bool carrying, 3243 IROp op8, 3244 Bool keep, 3245 Long delta, 3246 const HChar* t_amd64opc ) 3247 { 3248 Int size4 = imin(size,4); 3249 IRType ty = szToITy(size); 3250 IRTemp dst0 = newTemp(ty); 3251 IRTemp src = newTemp(ty); 3252 IRTemp dst1 = newTemp(ty); 3253 Long lit = getSDisp(size4,delta); 3254 assign(dst0, getIRegRAX(size)); 3255 assign(src, mkU(ty,lit & mkSizeMask(size))); 3256 3257 if (isAddSub(op8) && !carrying) { 3258 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3259 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3260 } 3261 else 3262 if (isLogic(op8)) { 3263 vassert(!carrying); 3264 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3265 setFlags_DEP1(op8, dst1, ty); 3266 } 3267 else 3268 if (op8 == Iop_Add8 && carrying) { 3269 helper_ADC( size, dst1, dst0, src, 3270 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3271 } 3272 else 3273 if (op8 == Iop_Sub8 && carrying) { 3274 helper_SBB( size, dst1, dst0, src, 3275 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3276 } 3277 else 3278 vpanic("dis_op_imm_A(amd64,guest)"); 3279 3280 if (keep) 3281 putIRegRAX(size, mkexpr(dst1)); 3282 3283 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3284 lit, nameIRegRAX(size)); 3285 return delta+size4; 3286 } 3287 3288 3289 /* Sign- and Zero-extending moves. */ 3290 static 3291 ULong dis_movx_E_G ( VexAbiInfo* vbi, 3292 Prefix pfx, 3293 Long delta, Int szs, Int szd, Bool sign_extend ) 3294 { 3295 UChar rm = getUChar(delta); 3296 if (epartIsReg(rm)) { 3297 putIRegG(szd, pfx, rm, 3298 doScalarWidening( 3299 szs,szd,sign_extend, 3300 getIRegE(szs,pfx,rm))); 3301 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3302 nameISize(szs), 3303 nameISize(szd), 3304 nameIRegE(szs,pfx,rm), 3305 nameIRegG(szd,pfx,rm)); 3306 return 1+delta; 3307 } 3308 3309 /* E refers to memory */ 3310 { 3311 Int len; 3312 HChar dis_buf[50]; 3313 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3314 putIRegG(szd, pfx, rm, 3315 doScalarWidening( 3316 szs,szd,sign_extend, 3317 loadLE(szToITy(szs),mkexpr(addr)))); 3318 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3319 nameISize(szs), 3320 nameISize(szd), 3321 dis_buf, 3322 nameIRegG(szd,pfx,rm)); 3323 return len+delta; 3324 } 3325 } 3326 3327 3328 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3329 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3330 static 3331 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3332 { 3333 /* special-case the 64-bit case */ 3334 if (sz == 8) { 3335 IROp op = signed_divide ? Iop_DivModS128to64 3336 : Iop_DivModU128to64; 3337 IRTemp src128 = newTemp(Ity_I128); 3338 IRTemp dst128 = newTemp(Ity_I128); 3339 assign( src128, binop(Iop_64HLto128, 3340 getIReg64(R_RDX), 3341 getIReg64(R_RAX)) ); 3342 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3343 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3344 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3345 } else { 3346 IROp op = signed_divide ? Iop_DivModS64to32 3347 : Iop_DivModU64to32; 3348 IRTemp src64 = newTemp(Ity_I64); 3349 IRTemp dst64 = newTemp(Ity_I64); 3350 switch (sz) { 3351 case 4: 3352 assign( src64, 3353 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3354 assign( dst64, 3355 binop(op, mkexpr(src64), mkexpr(t)) ); 3356 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3357 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3358 break; 3359 case 2: { 3360 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3361 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3362 assign( src64, unop(widen3264, 3363 binop(Iop_16HLto32, 3364 getIRegRDX(2), 3365 getIRegRAX(2))) ); 3366 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3367 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3368 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3369 break; 3370 } 3371 case 1: { 3372 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3373 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3374 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3375 assign( src64, unop(widen3264, 3376 unop(widen1632, getIRegRAX(2))) ); 3377 assign( dst64, 3378 binop(op, mkexpr(src64), 3379 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3380 putIRegRAX( 1, unop(Iop_16to8, 3381 unop(Iop_32to16, 3382 unop(Iop_64to32,mkexpr(dst64)))) ); 3383 putIRegAH( unop(Iop_16to8, 3384 unop(Iop_32to16, 3385 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3386 break; 3387 } 3388 default: 3389 vpanic("codegen_div(amd64)"); 3390 } 3391 } 3392 } 3393 3394 static 3395 ULong dis_Grp1 ( VexAbiInfo* vbi, 3396 Prefix pfx, 3397 Long delta, UChar modrm, 3398 Int am_sz, Int d_sz, Int sz, Long d64 ) 3399 { 3400 Int len; 3401 HChar dis_buf[50]; 3402 IRType ty = szToITy(sz); 3403 IRTemp dst1 = newTemp(ty); 3404 IRTemp src = newTemp(ty); 3405 IRTemp dst0 = newTemp(ty); 3406 IRTemp addr = IRTemp_INVALID; 3407 IROp op8 = Iop_INVALID; 3408 ULong mask = mkSizeMask(sz); 3409 3410 switch (gregLO3ofRM(modrm)) { 3411 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3412 case 2: break; // ADC 3413 case 3: break; // SBB 3414 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3415 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3416 /*NOTREACHED*/ 3417 default: vpanic("dis_Grp1(amd64): unhandled case"); 3418 } 3419 3420 if (epartIsReg(modrm)) { 3421 vassert(am_sz == 1); 3422 3423 assign(dst0, getIRegE(sz,pfx,modrm)); 3424 assign(src, mkU(ty,d64 & mask)); 3425 3426 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3427 helper_ADC( sz, dst1, dst0, src, 3428 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3429 } else 3430 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3431 helper_SBB( sz, dst1, dst0, src, 3432 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3433 } else { 3434 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3435 if (isAddSub(op8)) 3436 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3437 else 3438 setFlags_DEP1(op8, dst1, ty); 3439 } 3440 3441 if (gregLO3ofRM(modrm) < 7) 3442 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3443 3444 delta += (am_sz + d_sz); 3445 DIP("%s%c $%lld, %s\n", 3446 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3447 nameIRegE(sz,pfx,modrm)); 3448 } else { 3449 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3450 3451 assign(dst0, loadLE(ty,mkexpr(addr))); 3452 assign(src, mkU(ty,d64 & mask)); 3453 3454 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3455 if (haveLOCK(pfx)) { 3456 /* cas-style store */ 3457 helper_ADC( sz, dst1, dst0, src, 3458 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3459 } else { 3460 /* normal store */ 3461 helper_ADC( sz, dst1, dst0, src, 3462 /*store*/addr, IRTemp_INVALID, 0 ); 3463 } 3464 } else 3465 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3466 if (haveLOCK(pfx)) { 3467 /* cas-style store */ 3468 helper_SBB( sz, dst1, dst0, src, 3469 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3470 } else { 3471 /* normal store */ 3472 helper_SBB( sz, dst1, dst0, src, 3473 /*store*/addr, IRTemp_INVALID, 0 ); 3474 } 3475 } else { 3476 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3477 if (gregLO3ofRM(modrm) < 7) { 3478 if (haveLOCK(pfx)) { 3479 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3480 mkexpr(dst1)/*newVal*/, 3481 guest_RIP_curr_instr ); 3482 } else { 3483 storeLE(mkexpr(addr), mkexpr(dst1)); 3484 } 3485 } 3486 if (isAddSub(op8)) 3487 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3488 else 3489 setFlags_DEP1(op8, dst1, ty); 3490 } 3491 3492 delta += (len+d_sz); 3493 DIP("%s%c $%lld, %s\n", 3494 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3495 d64, dis_buf); 3496 } 3497 return delta; 3498 } 3499 3500 3501 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3502 expression. */ 3503 3504 static 3505 ULong dis_Grp2 ( VexAbiInfo* vbi, 3506 Prefix pfx, 3507 Long delta, UChar modrm, 3508 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3509 const HChar* shift_expr_txt, Bool* decode_OK ) 3510 { 3511 /* delta on entry points at the modrm byte. */ 3512 HChar dis_buf[50]; 3513 Int len; 3514 Bool isShift, isRotate, isRotateC; 3515 IRType ty = szToITy(sz); 3516 IRTemp dst0 = newTemp(ty); 3517 IRTemp dst1 = newTemp(ty); 3518 IRTemp addr = IRTemp_INVALID; 3519 3520 *decode_OK = True; 3521 3522 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3523 3524 /* Put value to shift/rotate in dst0. */ 3525 if (epartIsReg(modrm)) { 3526 assign(dst0, getIRegE(sz, pfx, modrm)); 3527 delta += (am_sz + d_sz); 3528 } else { 3529 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3530 assign(dst0, loadLE(ty,mkexpr(addr))); 3531 delta += len + d_sz; 3532 } 3533 3534 isShift = False; 3535 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3536 3537 isRotate = False; 3538 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3539 3540 isRotateC = False; 3541 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3542 3543 if (!isShift && !isRotate && !isRotateC) { 3544 /*NOTREACHED*/ 3545 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3546 } 3547 3548 if (isRotateC) { 3549 /* Call a helper; this insn is so ridiculous it does not deserve 3550 better. One problem is, the helper has to calculate both the 3551 new value and the new flags. This is more than 64 bits, and 3552 there is no way to return more than 64 bits from the helper. 3553 Hence the crude and obvious solution is to call it twice, 3554 using the sign of the sz field to indicate whether it is the 3555 value or rflags result we want. 3556 */ 3557 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3558 IRExpr** argsVALUE; 3559 IRExpr** argsRFLAGS; 3560 3561 IRTemp new_value = newTemp(Ity_I64); 3562 IRTemp new_rflags = newTemp(Ity_I64); 3563 IRTemp old_rflags = newTemp(Ity_I64); 3564 3565 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3566 3567 argsVALUE 3568 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3569 widenUto64(shift_expr), /* rotate amount */ 3570 mkexpr(old_rflags), 3571 mkU64(sz) ); 3572 assign( new_value, 3573 mkIRExprCCall( 3574 Ity_I64, 3575 0/*regparm*/, 3576 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3577 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3578 argsVALUE 3579 ) 3580 ); 3581 3582 argsRFLAGS 3583 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3584 widenUto64(shift_expr), /* rotate amount */ 3585 mkexpr(old_rflags), 3586 mkU64(-sz) ); 3587 assign( new_rflags, 3588 mkIRExprCCall( 3589 Ity_I64, 3590 0/*regparm*/, 3591 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3592 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3593 argsRFLAGS 3594 ) 3595 ); 3596 3597 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3598 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3599 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3600 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3601 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3602 } 3603 3604 else 3605 if (isShift) { 3606 3607 IRTemp pre64 = newTemp(Ity_I64); 3608 IRTemp res64 = newTemp(Ity_I64); 3609 IRTemp res64ss = newTemp(Ity_I64); 3610 IRTemp shift_amt = newTemp(Ity_I8); 3611 UChar mask = toUChar(sz==8 ? 63 : 31); 3612 IROp op64; 3613 3614 switch (gregLO3ofRM(modrm)) { 3615 case 4: op64 = Iop_Shl64; break; 3616 case 5: op64 = Iop_Shr64; break; 3617 case 6: op64 = Iop_Shl64; break; 3618 case 7: op64 = Iop_Sar64; break; 3619 /*NOTREACHED*/ 3620 default: vpanic("dis_Grp2:shift"); break; 3621 } 3622 3623 /* Widen the value to be shifted to 64 bits, do the shift, and 3624 narrow back down. This seems surprisingly long-winded, but 3625 unfortunately the AMD semantics requires that 8/16/32-bit 3626 shifts give defined results for shift values all the way up 3627 to 32, and this seems the simplest way to do it. It has the 3628 advantage that the only IR level shifts generated are of 64 3629 bit values, and the shift amount is guaranteed to be in the 3630 range 0 .. 63, thereby observing the IR semantics requiring 3631 all shift values to be in the range 0 .. 2^word_size-1. 3632 3633 Therefore the shift amount is masked with 63 for 64-bit shifts 3634 and 31 for all others. 3635 */ 3636 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3637 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3638 3639 /* suitably widen the value to be shifted to 64 bits. */ 3640 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3641 : widenUto64(mkexpr(dst0)) ); 3642 3643 /* res64 = pre64 `shift` shift_amt */ 3644 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3645 3646 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3647 assign( res64ss, 3648 binop(op64, 3649 mkexpr(pre64), 3650 binop(Iop_And8, 3651 binop(Iop_Sub8, 3652 mkexpr(shift_amt), mkU8(1)), 3653 mkU8(mask))) ); 3654 3655 /* Build the flags thunk. */ 3656 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3657 3658 /* Narrow the result back down. */ 3659 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3660 3661 } /* if (isShift) */ 3662 3663 else 3664 if (isRotate) { 3665 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3666 : (ty==Ity_I32 ? 2 : 3)); 3667 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3668 IRTemp rot_amt = newTemp(Ity_I8); 3669 IRTemp rot_amt64 = newTemp(Ity_I8); 3670 IRTemp oldFlags = newTemp(Ity_I64); 3671 UChar mask = toUChar(sz==8 ? 63 : 31); 3672 3673 /* rot_amt = shift_expr & mask */ 3674 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3675 expressions never shift beyond the word size and thus remain 3676 well defined. */ 3677 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3678 3679 if (ty == Ity_I64) 3680 assign(rot_amt, mkexpr(rot_amt64)); 3681 else 3682 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3683 3684 if (left) { 3685 3686 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3687 assign(dst1, 3688 binop( mkSizedOp(ty,Iop_Or8), 3689 binop( mkSizedOp(ty,Iop_Shl8), 3690 mkexpr(dst0), 3691 mkexpr(rot_amt) 3692 ), 3693 binop( mkSizedOp(ty,Iop_Shr8), 3694 mkexpr(dst0), 3695 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3696 ) 3697 ) 3698 ); 3699 ccOp += AMD64G_CC_OP_ROLB; 3700 3701 } else { /* right */ 3702 3703 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3704 assign(dst1, 3705 binop( mkSizedOp(ty,Iop_Or8), 3706 binop( mkSizedOp(ty,Iop_Shr8), 3707 mkexpr(dst0), 3708 mkexpr(rot_amt) 3709 ), 3710 binop( mkSizedOp(ty,Iop_Shl8), 3711 mkexpr(dst0), 3712 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3713 ) 3714 ) 3715 ); 3716 ccOp += AMD64G_CC_OP_RORB; 3717 3718 } 3719 3720 /* dst1 now holds the rotated value. Build flag thunk. We 3721 need the resulting value for this, and the previous flags. 3722 Except don't set it if the rotate count is zero. */ 3723 3724 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3725 3726 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */ 3727 IRTemp rot_amt64b = newTemp(Ity_I1); 3728 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) ); 3729 3730 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3731 stmt( IRStmt_Put( OFFB_CC_OP, 3732 IRExpr_ITE( mkexpr(rot_amt64b), 3733 mkU64(ccOp), 3734 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 3735 stmt( IRStmt_Put( OFFB_CC_DEP1, 3736 IRExpr_ITE( mkexpr(rot_amt64b), 3737 widenUto64(mkexpr(dst1)), 3738 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 3739 stmt( IRStmt_Put( OFFB_CC_DEP2, 3740 IRExpr_ITE( mkexpr(rot_amt64b), 3741 mkU64(0), 3742 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 3743 stmt( IRStmt_Put( OFFB_CC_NDEP, 3744 IRExpr_ITE( mkexpr(rot_amt64b), 3745 mkexpr(oldFlags), 3746 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) )); 3747 } /* if (isRotate) */ 3748 3749 /* Save result, and finish up. */ 3750 if (epartIsReg(modrm)) { 3751 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3752 if (vex_traceflags & VEX_TRACE_FE) { 3753 vex_printf("%s%c ", 3754 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3755 if (shift_expr_txt) 3756 vex_printf("%s", shift_expr_txt); 3757 else 3758 ppIRExpr(shift_expr); 3759 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3760 } 3761 } else { 3762 storeLE(mkexpr(addr), mkexpr(dst1)); 3763 if (vex_traceflags & VEX_TRACE_FE) { 3764 vex_printf("%s%c ", 3765 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3766 if (shift_expr_txt) 3767 vex_printf("%s", shift_expr_txt); 3768 else 3769 ppIRExpr(shift_expr); 3770 vex_printf(", %s\n", dis_buf); 3771 } 3772 } 3773 return delta; 3774 } 3775 3776 3777 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3778 static 3779 ULong dis_Grp8_Imm ( VexAbiInfo* vbi, 3780 Prefix pfx, 3781 Long delta, UChar modrm, 3782 Int am_sz, Int sz, ULong src_val, 3783 Bool* decode_OK ) 3784 { 3785 /* src_val denotes a d8. 3786 And delta on entry points at the modrm byte. */ 3787 3788 IRType ty = szToITy(sz); 3789 IRTemp t2 = newTemp(Ity_I64); 3790 IRTemp t2m = newTemp(Ity_I64); 3791 IRTemp t_addr = IRTemp_INVALID; 3792 HChar dis_buf[50]; 3793 ULong mask; 3794 3795 /* we're optimists :-) */ 3796 *decode_OK = True; 3797 3798 /* Check whether F2 or F3 are acceptable. */ 3799 if (epartIsReg(modrm)) { 3800 /* F2 or F3 are not allowed in the register case. */ 3801 if (haveF2orF3(pfx)) { 3802 *decode_OK = False; 3803 return delta; 3804 } 3805 } else { 3806 /* F2 or F3 (but not both) are allowable provided LOCK is also 3807 present. */ 3808 if (haveF2orF3(pfx)) { 3809 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 3810 *decode_OK = False; 3811 return delta; 3812 } 3813 } 3814 } 3815 3816 /* Limit src_val -- the bit offset -- to something within a word. 3817 The Intel docs say that literal offsets larger than a word are 3818 masked in this way. */ 3819 switch (sz) { 3820 case 2: src_val &= 15; break; 3821 case 4: src_val &= 31; break; 3822 case 8: src_val &= 63; break; 3823 default: *decode_OK = False; return delta; 3824 } 3825 3826 /* Invent a mask suitable for the operation. */ 3827 switch (gregLO3ofRM(modrm)) { 3828 case 4: /* BT */ mask = 0; break; 3829 case 5: /* BTS */ mask = 1ULL << src_val; break; 3830 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3831 case 7: /* BTC */ mask = 1ULL << src_val; break; 3832 /* If this needs to be extended, probably simplest to make a 3833 new function to handle the other cases (0 .. 3). The 3834 Intel docs do however not indicate any use for 0 .. 3, so 3835 we don't expect this to happen. */ 3836 default: *decode_OK = False; return delta; 3837 } 3838 3839 /* Fetch the value to be tested and modified into t2, which is 3840 64-bits wide regardless of sz. */ 3841 if (epartIsReg(modrm)) { 3842 vassert(am_sz == 1); 3843 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3844 delta += (am_sz + 1); 3845 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3846 nameISize(sz), 3847 src_val, nameIRegE(sz,pfx,modrm)); 3848 } else { 3849 Int len; 3850 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3851 delta += (len+1); 3852 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3853 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3854 nameISize(sz), 3855 src_val, dis_buf); 3856 } 3857 3858 /* Compute the new value into t2m, if non-BT. */ 3859 switch (gregLO3ofRM(modrm)) { 3860 case 4: /* BT */ 3861 break; 3862 case 5: /* BTS */ 3863 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3864 break; 3865 case 6: /* BTR */ 3866 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3867 break; 3868 case 7: /* BTC */ 3869 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3870 break; 3871 default: 3872 /*NOTREACHED*/ /*the previous switch guards this*/ 3873 vassert(0); 3874 } 3875 3876 /* Write the result back, if non-BT. */ 3877 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3878 if (epartIsReg(modrm)) { 3879 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3880 } else { 3881 if (haveLOCK(pfx)) { 3882 casLE( mkexpr(t_addr), 3883 narrowTo(ty, mkexpr(t2))/*expd*/, 3884 narrowTo(ty, mkexpr(t2m))/*new*/, 3885 guest_RIP_curr_instr ); 3886 } else { 3887 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3888 } 3889 } 3890 } 3891 3892 /* Copy relevant bit from t2 into the carry flag. */ 3893 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3895 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3896 stmt( IRStmt_Put( 3897 OFFB_CC_DEP1, 3898 binop(Iop_And64, 3899 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3900 mkU64(1)) 3901 )); 3902 /* Set NDEP even though it isn't used. This makes redundant-PUT 3903 elimination of previous stores to this field work better. */ 3904 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3905 3906 return delta; 3907 } 3908 3909 3910 /* Signed/unsigned widening multiply. Generate IR to multiply the 3911 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3912 RDX:RAX/EDX:EAX/DX:AX/AX. 3913 */ 3914 static void codegen_mulL_A_D ( Int sz, Bool syned, 3915 IRTemp tmp, const HChar* tmp_txt ) 3916 { 3917 IRType ty = szToITy(sz); 3918 IRTemp t1 = newTemp(ty); 3919 3920 assign( t1, getIRegRAX(sz) ); 3921 3922 switch (ty) { 3923 case Ity_I64: { 3924 IRTemp res128 = newTemp(Ity_I128); 3925 IRTemp resHi = newTemp(Ity_I64); 3926 IRTemp resLo = newTemp(Ity_I64); 3927 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3928 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3929 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3930 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3931 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3932 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3933 putIReg64(R_RDX, mkexpr(resHi)); 3934 putIReg64(R_RAX, mkexpr(resLo)); 3935 break; 3936 } 3937 case Ity_I32: { 3938 IRTemp res64 = newTemp(Ity_I64); 3939 IRTemp resHi = newTemp(Ity_I32); 3940 IRTemp resLo = newTemp(Ity_I32); 3941 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3942 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3943 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3944 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3945 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3946 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3947 putIRegRDX(4, mkexpr(resHi)); 3948 putIRegRAX(4, mkexpr(resLo)); 3949 break; 3950 } 3951 case Ity_I16: { 3952 IRTemp res32 = newTemp(Ity_I32); 3953 IRTemp resHi = newTemp(Ity_I16); 3954 IRTemp resLo = newTemp(Ity_I16); 3955 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3956 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3957 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3958 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3959 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3960 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3961 putIRegRDX(2, mkexpr(resHi)); 3962 putIRegRAX(2, mkexpr(resLo)); 3963 break; 3964 } 3965 case Ity_I8: { 3966 IRTemp res16 = newTemp(Ity_I16); 3967 IRTemp resHi = newTemp(Ity_I8); 3968 IRTemp resLo = newTemp(Ity_I8); 3969 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3970 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3971 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3972 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3973 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3974 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3975 putIRegRAX(2, mkexpr(res16)); 3976 break; 3977 } 3978 default: 3979 ppIRType(ty); 3980 vpanic("codegen_mulL_A_D(amd64)"); 3981 } 3982 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3983 } 3984 3985 3986 /* Group 3 extended opcodes. We have to decide here whether F2 and F3 3987 might be valid.*/ 3988 static 3989 ULong dis_Grp3 ( VexAbiInfo* vbi, 3990 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3991 { 3992 Long d64; 3993 UChar modrm; 3994 HChar dis_buf[50]; 3995 Int len; 3996 IRTemp addr; 3997 IRType ty = szToITy(sz); 3998 IRTemp t1 = newTemp(ty); 3999 IRTemp dst1, src, dst0; 4000 *decode_OK = True; 4001 modrm = getUChar(delta); 4002 if (epartIsReg(modrm)) { 4003 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4004 if (haveF2orF3(pfx)) goto unhandled; 4005 switch (gregLO3ofRM(modrm)) { 4006 case 0: { /* TEST */ 4007 delta++; 4008 d64 = getSDisp(imin(4,sz), delta); 4009 delta += imin(4,sz); 4010 dst1 = newTemp(ty); 4011 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4012 getIRegE(sz,pfx,modrm), 4013 mkU(ty, d64 & mkSizeMask(sz)))); 4014 setFlags_DEP1( Iop_And8, dst1, ty ); 4015 DIP("test%c $%lld, %s\n", 4016 nameISize(sz), d64, 4017 nameIRegE(sz, pfx, modrm)); 4018 break; 4019 } 4020 case 1: 4021 *decode_OK = False; 4022 return delta; 4023 case 2: /* NOT */ 4024 delta++; 4025 putIRegE(sz, pfx, modrm, 4026 unop(mkSizedOp(ty,Iop_Not8), 4027 getIRegE(sz, pfx, modrm))); 4028 DIP("not%c %s\n", nameISize(sz), 4029 nameIRegE(sz, pfx, modrm)); 4030 break; 4031 case 3: /* NEG */ 4032 delta++; 4033 dst0 = newTemp(ty); 4034 src = newTemp(ty); 4035 dst1 = newTemp(ty); 4036 assign(dst0, mkU(ty,0)); 4037 assign(src, getIRegE(sz, pfx, modrm)); 4038 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4039 mkexpr(src))); 4040 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4041 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 4042 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 4043 break; 4044 case 4: /* MUL (unsigned widening) */ 4045 delta++; 4046 src = newTemp(ty); 4047 assign(src, getIRegE(sz,pfx,modrm)); 4048 codegen_mulL_A_D ( sz, False, src, 4049 nameIRegE(sz,pfx,modrm) ); 4050 break; 4051 case 5: /* IMUL (signed widening) */ 4052 delta++; 4053 src = newTemp(ty); 4054 assign(src, getIRegE(sz,pfx,modrm)); 4055 codegen_mulL_A_D ( sz, True, src, 4056 nameIRegE(sz,pfx,modrm) ); 4057 break; 4058 case 6: /* DIV */ 4059 delta++; 4060 assign( t1, getIRegE(sz, pfx, modrm) ); 4061 codegen_div ( sz, t1, False ); 4062 DIP("div%c %s\n", nameISize(sz), 4063 nameIRegE(sz, pfx, modrm)); 4064 break; 4065 case 7: /* IDIV */ 4066 delta++; 4067 assign( t1, getIRegE(sz, pfx, modrm) ); 4068 codegen_div ( sz, t1, True ); 4069 DIP("idiv%c %s\n", nameISize(sz), 4070 nameIRegE(sz, pfx, modrm)); 4071 break; 4072 default: 4073 /*NOTREACHED*/ 4074 vpanic("Grp3(amd64,R)"); 4075 } 4076 } else { 4077 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4078 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4079 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/) 4080 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4081 validF2orF3 = True; 4082 } 4083 if (!validF2orF3) goto unhandled; 4084 /* */ 4085 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 4086 /* we have to inform disAMode of any immediate 4087 bytes used */ 4088 gregLO3ofRM(modrm)==0/*TEST*/ 4089 ? imin(4,sz) 4090 : 0 4091 ); 4092 t1 = newTemp(ty); 4093 delta += len; 4094 assign(t1, loadLE(ty,mkexpr(addr))); 4095 switch (gregLO3ofRM(modrm)) { 4096 case 0: { /* TEST */ 4097 d64 = getSDisp(imin(4,sz), delta); 4098 delta += imin(4,sz); 4099 dst1 = newTemp(ty); 4100 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4101 mkexpr(t1), 4102 mkU(ty, d64 & mkSizeMask(sz)))); 4103 setFlags_DEP1( Iop_And8, dst1, ty ); 4104 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 4105 break; 4106 } 4107 case 1: 4108 *decode_OK = False; 4109 return delta; 4110 case 2: /* NOT */ 4111 dst1 = newTemp(ty); 4112 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 4113 if (haveLOCK(pfx)) { 4114 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4115 guest_RIP_curr_instr ); 4116 } else { 4117 storeLE( mkexpr(addr), mkexpr(dst1) ); 4118 } 4119 DIP("not%c %s\n", nameISize(sz), dis_buf); 4120 break; 4121 case 3: /* NEG */ 4122 dst0 = newTemp(ty); 4123 src = newTemp(ty); 4124 dst1 = newTemp(ty); 4125 assign(dst0, mkU(ty,0)); 4126 assign(src, mkexpr(t1)); 4127 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4128 mkexpr(src))); 4129 if (haveLOCK(pfx)) { 4130 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4131 guest_RIP_curr_instr ); 4132 } else { 4133 storeLE( mkexpr(addr), mkexpr(dst1) ); 4134 } 4135 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4136 DIP("neg%c %s\n", nameISize(sz), dis_buf); 4137 break; 4138 case 4: /* MUL (unsigned widening) */ 4139 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 4140 break; 4141 case 5: /* IMUL */ 4142 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 4143 break; 4144 case 6: /* DIV */ 4145 codegen_div ( sz, t1, False ); 4146 DIP("div%c %s\n", nameISize(sz), dis_buf); 4147 break; 4148 case 7: /* IDIV */ 4149 codegen_div ( sz, t1, True ); 4150 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 4151 break; 4152 default: 4153 /*NOTREACHED*/ 4154 vpanic("Grp3(amd64,M)"); 4155 } 4156 } 4157 return delta; 4158 unhandled: 4159 *decode_OK = False; 4160 return delta; 4161 } 4162 4163 4164 /* Group 4 extended opcodes. We have to decide here whether F2 and F3 4165 might be valid. */ 4166 static 4167 ULong dis_Grp4 ( VexAbiInfo* vbi, 4168 Prefix pfx, Long delta, Bool* decode_OK ) 4169 { 4170 Int alen; 4171 UChar modrm; 4172 HChar dis_buf[50]; 4173 IRType ty = Ity_I8; 4174 IRTemp t1 = newTemp(ty); 4175 IRTemp t2 = newTemp(ty); 4176 4177 *decode_OK = True; 4178 4179 modrm = getUChar(delta); 4180 if (epartIsReg(modrm)) { 4181 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4182 if (haveF2orF3(pfx)) goto unhandled; 4183 assign(t1, getIRegE(1, pfx, modrm)); 4184 switch (gregLO3ofRM(modrm)) { 4185 case 0: /* INC */ 4186 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4187 putIRegE(1, pfx, modrm, mkexpr(t2)); 4188 setFlags_INC_DEC( True, t2, ty ); 4189 break; 4190 case 1: /* DEC */ 4191 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4192 putIRegE(1, pfx, modrm, mkexpr(t2)); 4193 setFlags_INC_DEC( False, t2, ty ); 4194 break; 4195 default: 4196 *decode_OK = False; 4197 return delta; 4198 } 4199 delta++; 4200 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4201 nameIRegE(1, pfx, modrm)); 4202 } else { 4203 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4204 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4205 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4206 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4207 validF2orF3 = True; 4208 } 4209 if (!validF2orF3) goto unhandled; 4210 /* */ 4211 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4212 assign( t1, loadLE(ty, mkexpr(addr)) ); 4213 switch (gregLO3ofRM(modrm)) { 4214 case 0: /* INC */ 4215 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4216 if (haveLOCK(pfx)) { 4217 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4218 guest_RIP_curr_instr ); 4219 } else { 4220 storeLE( mkexpr(addr), mkexpr(t2) ); 4221 } 4222 setFlags_INC_DEC( True, t2, ty ); 4223 break; 4224 case 1: /* DEC */ 4225 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4226 if (haveLOCK(pfx)) { 4227 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4228 guest_RIP_curr_instr ); 4229 } else { 4230 storeLE( mkexpr(addr), mkexpr(t2) ); 4231 } 4232 setFlags_INC_DEC( False, t2, ty ); 4233 break; 4234 default: 4235 *decode_OK = False; 4236 return delta; 4237 } 4238 delta += alen; 4239 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4240 } 4241 return delta; 4242 unhandled: 4243 *decode_OK = False; 4244 return delta; 4245 } 4246 4247 4248 /* Group 5 extended opcodes. We have to decide here whether F2 and F3 4249 might be valid. */ 4250 static 4251 ULong dis_Grp5 ( VexAbiInfo* vbi, 4252 Prefix pfx, Int sz, Long delta, 4253 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4254 { 4255 Int len; 4256 UChar modrm; 4257 HChar dis_buf[50]; 4258 IRTemp addr = IRTemp_INVALID; 4259 IRType ty = szToITy(sz); 4260 IRTemp t1 = newTemp(ty); 4261 IRTemp t2 = IRTemp_INVALID; 4262 IRTemp t3 = IRTemp_INVALID; 4263 Bool showSz = True; 4264 4265 *decode_OK = True; 4266 4267 modrm = getUChar(delta); 4268 if (epartIsReg(modrm)) { 4269 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. 4270 F2/CALL and F2/JMP may have bnd prefix. */ 4271 if (haveF2orF3(pfx) 4272 && ! (haveF2(pfx) 4273 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) 4274 goto unhandledR; 4275 assign(t1, getIRegE(sz,pfx,modrm)); 4276 switch (gregLO3ofRM(modrm)) { 4277 case 0: /* INC */ 4278 t2 = newTemp(ty); 4279 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4280 mkexpr(t1), mkU(ty,1))); 4281 setFlags_INC_DEC( True, t2, ty ); 4282 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4283 break; 4284 case 1: /* DEC */ 4285 t2 = newTemp(ty); 4286 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4287 mkexpr(t1), mkU(ty,1))); 4288 setFlags_INC_DEC( False, t2, ty ); 4289 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4290 break; 4291 case 2: /* call Ev */ 4292 /* Ignore any sz value and operate as if sz==8. */ 4293 if (!(sz == 4 || sz == 8)) goto unhandledR; 4294 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4295 sz = 8; 4296 t3 = newTemp(Ity_I64); 4297 assign(t3, getIRegE(sz,pfx,modrm)); 4298 t2 = newTemp(Ity_I64); 4299 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4300 putIReg64(R_RSP, mkexpr(t2)); 4301 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4302 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4303 jmp_treg(dres, Ijk_Call, t3); 4304 vassert(dres->whatNext == Dis_StopHere); 4305 showSz = False; 4306 break; 4307 case 4: /* jmp Ev */ 4308 /* Ignore any sz value and operate as if sz==8. */ 4309 if (!(sz == 4 || sz == 8)) goto unhandledR; 4310 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4311 sz = 8; 4312 t3 = newTemp(Ity_I64); 4313 assign(t3, getIRegE(sz,pfx,modrm)); 4314 jmp_treg(dres, Ijk_Boring, t3); 4315 vassert(dres->whatNext == Dis_StopHere); 4316 showSz = False; 4317 break; 4318 case 6: /* PUSH Ev */ 4319 /* There is no encoding for 32-bit operand size; hence ... */ 4320 if (sz == 4) sz = 8; 4321 if (sz == 8 || sz == 2) { 4322 ty = szToITy(sz); /* redo it, since sz might have changed */ 4323 t3 = newTemp(ty); 4324 assign(t3, getIRegE(sz,pfx,modrm)); 4325 t2 = newTemp(Ity_I64); 4326 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4327 putIReg64(R_RSP, mkexpr(t2) ); 4328 storeLE( mkexpr(t2), mkexpr(t3) ); 4329 break; 4330 } else { 4331 goto unhandledR; /* awaiting test case */ 4332 } 4333 default: 4334 unhandledR: 4335 *decode_OK = False; 4336 return delta; 4337 } 4338 delta++; 4339 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4340 showSz ? nameISize(sz) : ' ', 4341 nameIRegE(sz, pfx, modrm)); 4342 } else { 4343 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ 4344 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4345 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4346 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4347 validF2orF3 = True; 4348 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) 4349 && (haveF2(pfx) && !haveF3(pfx))) { 4350 validF2orF3 = True; 4351 } 4352 if (!validF2orF3) goto unhandledM; 4353 /* */ 4354 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4355 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4356 && gregLO3ofRM(modrm) != 6) { 4357 assign(t1, loadLE(ty,mkexpr(addr))); 4358 } 4359 switch (gregLO3ofRM(modrm)) { 4360 case 0: /* INC */ 4361 t2 = newTemp(ty); 4362 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4363 mkexpr(t1), mkU(ty,1))); 4364 if (haveLOCK(pfx)) { 4365 casLE( mkexpr(addr), 4366 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4367 } else { 4368 storeLE(mkexpr(addr),mkexpr(t2)); 4369 } 4370 setFlags_INC_DEC( True, t2, ty ); 4371 break; 4372 case 1: /* DEC */ 4373 t2 = newTemp(ty); 4374 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4375 mkexpr(t1), mkU(ty,1))); 4376 if (haveLOCK(pfx)) { 4377 casLE( mkexpr(addr), 4378 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4379 } else { 4380 storeLE(mkexpr(addr),mkexpr(t2)); 4381 } 4382 setFlags_INC_DEC( False, t2, ty ); 4383 break; 4384 case 2: /* call Ev */ 4385 /* Ignore any sz value and operate as if sz==8. */ 4386 if (!(sz == 4 || sz == 8)) goto unhandledM; 4387 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4388 sz = 8; 4389 t3 = newTemp(Ity_I64); 4390 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4391 t2 = newTemp(Ity_I64); 4392 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4393 putIReg64(R_RSP, mkexpr(t2)); 4394 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4395 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4396 jmp_treg(dres, Ijk_Call, t3); 4397 vassert(dres->whatNext == Dis_StopHere); 4398 showSz = False; 4399 break; 4400 case 4: /* JMP Ev */ 4401 /* Ignore any sz value and operate as if sz==8. */ 4402 if (!(sz == 4 || sz == 8)) goto unhandledM; 4403 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4404 sz = 8; 4405 t3 = newTemp(Ity_I64); 4406 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4407 jmp_treg(dres, Ijk_Boring, t3); 4408 vassert(dres->whatNext == Dis_StopHere); 4409 showSz = False; 4410 break; 4411 case 6: /* PUSH Ev */ 4412 /* There is no encoding for 32-bit operand size; hence ... */ 4413 if (sz == 4) sz = 8; 4414 if (sz == 8 || sz == 2) { 4415 ty = szToITy(sz); /* redo it, since sz might have changed */ 4416 t3 = newTemp(ty); 4417 assign(t3, loadLE(ty,mkexpr(addr))); 4418 t2 = newTemp(Ity_I64); 4419 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4420 putIReg64(R_RSP, mkexpr(t2) ); 4421 storeLE( mkexpr(t2), mkexpr(t3) ); 4422 break; 4423 } else { 4424 goto unhandledM; /* awaiting test case */ 4425 } 4426 default: 4427 unhandledM: 4428 *decode_OK = False; 4429 return delta; 4430 } 4431 delta += len; 4432 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4433 showSz ? nameISize(sz) : ' ', 4434 dis_buf); 4435 } 4436 return delta; 4437 } 4438 4439 4440 /*------------------------------------------------------------*/ 4441 /*--- Disassembling string ops (including REP prefixes) ---*/ 4442 /*------------------------------------------------------------*/ 4443 4444 /* Code shared by all the string ops */ 4445 static 4446 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4447 { 4448 UChar logSz; 4449 if (sz == 8 || sz == 4 || sz == 2) { 4450 logSz = 1; 4451 if (sz == 4) logSz = 2; 4452 if (sz == 8) logSz = 3; 4453 assign( t_inc, 4454 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4455 mkU8(logSz) ) ); 4456 } else { 4457 assign( t_inc, 4458 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4459 } 4460 } 4461 4462 static 4463 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4464 Int sz, const HChar* name, Prefix pfx ) 4465 { 4466 IRTemp t_inc = newTemp(Ity_I64); 4467 /* Really we ought to inspect the override prefixes, but we don't. 4468 The following assertion catches any resulting sillyness. */ 4469 vassert(pfx == clearSegBits(pfx)); 4470 dis_string_op_increment(sz, t_inc); 4471 dis_OP( sz, t_inc, pfx ); 4472 DIP("%s%c\n", name, nameISize(sz)); 4473 } 4474 4475 static 4476 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4477 { 4478 IRType ty = szToITy(sz); 4479 IRTemp td = newTemp(Ity_I64); /* RDI */ 4480 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4481 IRExpr *incd, *incs; 4482 4483 if (haveASO(pfx)) { 4484 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4485 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4486 } else { 4487 assign( td, getIReg64(R_RDI) ); 4488 assign( ts, getIReg64(R_RSI) ); 4489 } 4490 4491 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4492 4493 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4494 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4495 if (haveASO(pfx)) { 4496 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4497 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4498 } 4499 putIReg64( R_RDI, incd ); 4500 putIReg64( R_RSI, incs ); 4501 } 4502 4503 static 4504 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4505 { 4506 IRType ty = szToITy(sz); 4507 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4508 IRExpr *incs; 4509 4510 if (haveASO(pfx)) 4511 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4512 else 4513 assign( ts, getIReg64(R_RSI) ); 4514 4515 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4516 4517 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4518 if (haveASO(pfx)) 4519 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4520 putIReg64( R_RSI, incs ); 4521 } 4522 4523 static 4524 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4525 { 4526 IRType ty = szToITy(sz); 4527 IRTemp ta = newTemp(ty); /* rAX */ 4528 IRTemp td = newTemp(Ity_I64); /* RDI */ 4529 IRExpr *incd; 4530 4531 assign( ta, getIRegRAX(sz) ); 4532 4533 if (haveASO(pfx)) 4534 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4535 else 4536 assign( td, getIReg64(R_RDI) ); 4537 4538 storeLE( mkexpr(td), mkexpr(ta) ); 4539 4540 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4541 if (haveASO(pfx)) 4542 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4543 putIReg64( R_RDI, incd ); 4544 } 4545 4546 static 4547 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4548 { 4549 IRType ty = szToITy(sz); 4550 IRTemp tdv = newTemp(ty); /* (RDI) */ 4551 IRTemp tsv = newTemp(ty); /* (RSI) */ 4552 IRTemp td = newTemp(Ity_I64); /* RDI */ 4553 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4554 IRExpr *incd, *incs; 4555 4556 if (haveASO(pfx)) { 4557 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4558 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4559 } else { 4560 assign( td, getIReg64(R_RDI) ); 4561 assign( ts, getIReg64(R_RSI) ); 4562 } 4563 4564 assign( tdv, loadLE(ty,mkexpr(td)) ); 4565 4566 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4567 4568 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4569 4570 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4571 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4572 if (haveASO(pfx)) { 4573 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4574 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4575 } 4576 putIReg64( R_RDI, incd ); 4577 putIReg64( R_RSI, incs ); 4578 } 4579 4580 static 4581 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4582 { 4583 IRType ty = szToITy(sz); 4584 IRTemp ta = newTemp(ty); /* rAX */ 4585 IRTemp td = newTemp(Ity_I64); /* RDI */ 4586 IRTemp tdv = newTemp(ty); /* (RDI) */ 4587 IRExpr *incd; 4588 4589 assign( ta, getIRegRAX(sz) ); 4590 4591 if (haveASO(pfx)) 4592 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4593 else 4594 assign( td, getIReg64(R_RDI) ); 4595 4596 assign( tdv, loadLE(ty,mkexpr(td)) ); 4597 4598 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4599 4600 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4601 if (haveASO(pfx)) 4602 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4603 putIReg64( R_RDI, incd ); 4604 } 4605 4606 4607 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4608 the insn is the last one in the basic block, and so emit a jump to 4609 the next insn, rather than just falling through. */ 4610 static 4611 void dis_REP_op ( /*MOD*/DisResult* dres, 4612 AMD64Condcode cond, 4613 void (*dis_OP)(Int, IRTemp, Prefix), 4614 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name, 4615 Prefix pfx ) 4616 { 4617 IRTemp t_inc = newTemp(Ity_I64); 4618 IRTemp tc; 4619 IRExpr* cmp; 4620 4621 /* Really we ought to inspect the override prefixes, but we don't. 4622 The following assertion catches any resulting sillyness. */ 4623 vassert(pfx == clearSegBits(pfx)); 4624 4625 if (haveASO(pfx)) { 4626 tc = newTemp(Ity_I32); /* ECX */ 4627 assign( tc, getIReg32(R_RCX) ); 4628 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4629 } else { 4630 tc = newTemp(Ity_I64); /* RCX */ 4631 assign( tc, getIReg64(R_RCX) ); 4632 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4633 } 4634 4635 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4636 IRConst_U64(rip_next), OFFB_RIP ) ); 4637 4638 if (haveASO(pfx)) 4639 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4640 else 4641 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4642 4643 dis_string_op_increment(sz, t_inc); 4644 dis_OP (sz, t_inc, pfx); 4645 4646 if (cond == AMD64CondAlways) { 4647 jmp_lit(dres, Ijk_Boring, rip); 4648 vassert(dres->whatNext == Dis_StopHere); 4649 } else { 4650 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4651 Ijk_Boring, 4652 IRConst_U64(rip), 4653 OFFB_RIP ) ); 4654 jmp_lit(dres, Ijk_Boring, rip_next); 4655 vassert(dres->whatNext == Dis_StopHere); 4656 } 4657 DIP("%s%c\n", name, nameISize(sz)); 4658 } 4659 4660 4661 /*------------------------------------------------------------*/ 4662 /*--- Arithmetic, etc. ---*/ 4663 /*------------------------------------------------------------*/ 4664 4665 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4666 static 4667 ULong dis_mul_E_G ( VexAbiInfo* vbi, 4668 Prefix pfx, 4669 Int size, 4670 Long delta0 ) 4671 { 4672 Int alen; 4673 HChar dis_buf[50]; 4674 UChar rm = getUChar(delta0); 4675 IRType ty = szToITy(size); 4676 IRTemp te = newTemp(ty); 4677 IRTemp tg = newTemp(ty); 4678 IRTemp resLo = newTemp(ty); 4679 4680 assign( tg, getIRegG(size, pfx, rm) ); 4681 if (epartIsReg(rm)) { 4682 assign( te, getIRegE(size, pfx, rm) ); 4683 } else { 4684 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4685 assign( te, loadLE(ty,mkexpr(addr)) ); 4686 } 4687 4688 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4689 4690 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4691 4692 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4693 4694 if (epartIsReg(rm)) { 4695 DIP("imul%c %s, %s\n", nameISize(size), 4696 nameIRegE(size,pfx,rm), 4697 nameIRegG(size,pfx,rm)); 4698 return 1+delta0; 4699 } else { 4700 DIP("imul%c %s, %s\n", nameISize(size), 4701 dis_buf, 4702 nameIRegG(size,pfx,rm)); 4703 return alen+delta0; 4704 } 4705 } 4706 4707 4708 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4709 static 4710 ULong dis_imul_I_E_G ( VexAbiInfo* vbi, 4711 Prefix pfx, 4712 Int size, 4713 Long delta, 4714 Int litsize ) 4715 { 4716 Long d64; 4717 Int alen; 4718 HChar dis_buf[50]; 4719 UChar rm = getUChar(delta); 4720 IRType ty = szToITy(size); 4721 IRTemp te = newTemp(ty); 4722 IRTemp tl = newTemp(ty); 4723 IRTemp resLo = newTemp(ty); 4724 4725 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4726 4727 if (epartIsReg(rm)) { 4728 assign(te, getIRegE(size, pfx, rm)); 4729 delta++; 4730 } else { 4731 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4732 imin(4,litsize) ); 4733 assign(te, loadLE(ty, mkexpr(addr))); 4734 delta += alen; 4735 } 4736 d64 = getSDisp(imin(4,litsize),delta); 4737 delta += imin(4,litsize); 4738 4739 d64 &= mkSizeMask(size); 4740 assign(tl, mkU(ty,d64)); 4741 4742 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4743 4744 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4745 4746 putIRegG(size, pfx, rm, mkexpr(resLo)); 4747 4748 DIP("imul%c $%lld, %s, %s\n", 4749 nameISize(size), d64, 4750 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4751 nameIRegG(size,pfx,rm) ); 4752 return delta; 4753 } 4754 4755 4756 /* Generate an IR sequence to do a popcount operation on the supplied 4757 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4758 Ity_I16, Ity_I32 or Ity_I64 only. */ 4759 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4760 { 4761 Int i; 4762 if (ty == Ity_I16) { 4763 IRTemp old = IRTemp_INVALID; 4764 IRTemp nyu = IRTemp_INVALID; 4765 IRTemp mask[4], shift[4]; 4766 for (i = 0; i < 4; i++) { 4767 mask[i] = newTemp(ty); 4768 shift[i] = 1 << i; 4769 } 4770 assign(mask[0], mkU16(0x5555)); 4771 assign(mask[1], mkU16(0x3333)); 4772 assign(mask[2], mkU16(0x0F0F)); 4773 assign(mask[3], mkU16(0x00FF)); 4774 old = src; 4775 for (i = 0; i < 4; i++) { 4776 nyu = newTemp(ty); 4777 assign(nyu, 4778 binop(Iop_Add16, 4779 binop(Iop_And16, 4780 mkexpr(old), 4781 mkexpr(mask[i])), 4782 binop(Iop_And16, 4783 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4784 mkexpr(mask[i])))); 4785 old = nyu; 4786 } 4787 return nyu; 4788 } 4789 if (ty == Ity_I32) { 4790 IRTemp old = IRTemp_INVALID; 4791 IRTemp nyu = IRTemp_INVALID; 4792 IRTemp mask[5], shift[5]; 4793 for (i = 0; i < 5; i++) { 4794 mask[i] = newTemp(ty); 4795 shift[i] = 1 << i; 4796 } 4797 assign(mask[0], mkU32(0x55555555)); 4798 assign(mask[1], mkU32(0x33333333)); 4799 assign(mask[2], mkU32(0x0F0F0F0F)); 4800 assign(mask[3], mkU32(0x00FF00FF)); 4801 assign(mask[4], mkU32(0x0000FFFF)); 4802 old = src; 4803 for (i = 0; i < 5; i++) { 4804 nyu = newTemp(ty); 4805 assign(nyu, 4806 binop(Iop_Add32, 4807 binop(Iop_And32, 4808 mkexpr(old), 4809 mkexpr(mask[i])), 4810 binop(Iop_And32, 4811 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4812 mkexpr(mask[i])))); 4813 old = nyu; 4814 } 4815 return nyu; 4816 } 4817 if (ty == Ity_I64) { 4818 IRTemp old = IRTemp_INVALID; 4819 IRTemp nyu = IRTemp_INVALID; 4820 IRTemp mask[6], shift[6]; 4821 for (i = 0; i < 6; i++) { 4822 mask[i] = newTemp(ty); 4823 shift[i] = 1 << i; 4824 } 4825 assign(mask[0], mkU64(0x5555555555555555ULL)); 4826 assign(mask[1], mkU64(0x3333333333333333ULL)); 4827 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4828 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4829 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4830 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4831 old = src; 4832 for (i = 0; i < 6; i++) { 4833 nyu = newTemp(ty); 4834 assign(nyu, 4835 binop(Iop_Add64, 4836 binop(Iop_And64, 4837 mkexpr(old), 4838 mkexpr(mask[i])), 4839 binop(Iop_And64, 4840 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4841 mkexpr(mask[i])))); 4842 old = nyu; 4843 } 4844 return nyu; 4845 } 4846 /*NOTREACHED*/ 4847 vassert(0); 4848 } 4849 4850 4851 /* Generate an IR sequence to do a count-leading-zeroes operation on 4852 the supplied IRTemp, and return a new IRTemp holding the result. 4853 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4854 the argument is zero, return the number of bits in the word (the 4855 natural semantics). */ 4856 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4857 { 4858 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4859 4860 IRTemp src64 = newTemp(Ity_I64); 4861 assign(src64, widenUto64( mkexpr(src) )); 4862 4863 IRTemp src64x = newTemp(Ity_I64); 4864 assign(src64x, 4865 binop(Iop_Shl64, mkexpr(src64), 4866 mkU8(64 - 8 * sizeofIRType(ty)))); 4867 4868 // Clz64 has undefined semantics when its input is zero, so 4869 // special-case around that. 4870 IRTemp res64 = newTemp(Ity_I64); 4871 assign(res64, 4872 IRExpr_ITE( 4873 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), 4874 mkU64(8 * sizeofIRType(ty)), 4875 unop(Iop_Clz64, mkexpr(src64x)) 4876 )); 4877 4878 IRTemp res = newTemp(ty); 4879 assign(res, narrowTo(ty, mkexpr(res64))); 4880 return res; 4881 } 4882 4883 4884 /* Generate an IR sequence to do a count-trailing-zeroes operation on 4885 the supplied IRTemp, and return a new IRTemp holding the result. 4886 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4887 the argument is zero, return the number of bits in the word (the 4888 natural semantics). */ 4889 static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) 4890 { 4891 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4892 4893 IRTemp src64 = newTemp(Ity_I64); 4894 assign(src64, widenUto64( mkexpr(src) )); 4895 4896 // Ctz64 has undefined semantics when its input is zero, so 4897 // special-case around that. 4898 IRTemp res64 = newTemp(Ity_I64); 4899 assign(res64, 4900 IRExpr_ITE( 4901 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), 4902 mkU64(8 * sizeofIRType(ty)), 4903 unop(Iop_Ctz64, mkexpr(src64)) 4904 )); 4905 4906 IRTemp res = newTemp(ty); 4907 assign(res, narrowTo(ty, mkexpr(res64))); 4908 return res; 4909 } 4910 4911 4912 /*------------------------------------------------------------*/ 4913 /*--- ---*/ 4914 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4915 /*--- ---*/ 4916 /*------------------------------------------------------------*/ 4917 4918 /* --- Helper functions for dealing with the register stack. --- */ 4919 4920 /* --- Set the emulation-warning pseudo-register. --- */ 4921 4922 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4923 { 4924 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4925 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 4926 } 4927 4928 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4929 4930 static IRExpr* mkQNaN64 ( void ) 4931 { 4932 /* QNaN is 0 2047 1 0(51times) 4933 == 0b 11111111111b 1 0(51times) 4934 == 0x7FF8 0000 0000 0000 4935 */ 4936 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4937 } 4938 4939 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4940 4941 static IRExpr* get_ftop ( void ) 4942 { 4943 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4944 } 4945 4946 static void put_ftop ( IRExpr* e ) 4947 { 4948 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4949 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4950 } 4951 4952 /* --------- Get/put the C3210 bits. --------- */ 4953 4954 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4955 { 4956 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4957 } 4958 4959 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4960 { 4961 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4962 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4963 } 4964 4965 /* --------- Get/put the FPU rounding mode. --------- */ 4966 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4967 { 4968 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4969 } 4970 4971 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4972 { 4973 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4974 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4975 } 4976 4977 4978 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4979 /* Produces a value in 0 .. 3, which is encoded as per the type 4980 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4981 per IRRoundingMode, we merely need to get it and mask it for 4982 safety. 4983 */ 4984 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4985 { 4986 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4987 } 4988 4989 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4990 { 4991 return mkU32(Irrm_NEAREST); 4992 } 4993 4994 4995 /* --------- Get/set FP register tag bytes. --------- */ 4996 4997 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 4998 4999 static void put_ST_TAG ( Int i, IRExpr* value ) 5000 { 5001 IRRegArray* descr; 5002 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 5003 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5004 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5005 } 5006 5007 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 5008 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 5009 5010 static IRExpr* get_ST_TAG ( Int i ) 5011 { 5012 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5013 return IRExpr_GetI( descr, get_ftop(), i ); 5014 } 5015 5016 5017 /* --------- Get/set FP registers. --------- */ 5018 5019 /* Given i, and some expression e, emit 'ST(i) = e' and set the 5020 register's tag to indicate the register is full. The previous 5021 state of the register is not checked. */ 5022 5023 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 5024 { 5025 IRRegArray* descr; 5026 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 5027 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5028 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5029 /* Mark the register as in-use. */ 5030 put_ST_TAG(i, mkU8(1)); 5031 } 5032 5033 /* Given i, and some expression e, emit 5034 ST(i) = is_full(i) ? NaN : e 5035 and set the tag accordingly. 5036 */ 5037 5038 static void put_ST ( Int i, IRExpr* value ) 5039 { 5040 put_ST_UNCHECKED( 5041 i, 5042 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5043 /* non-0 means full */ 5044 mkQNaN64(), 5045 /* 0 means empty */ 5046 value 5047 ) 5048 ); 5049 } 5050 5051 5052 /* Given i, generate an expression yielding 'ST(i)'. */ 5053 5054 static IRExpr* get_ST_UNCHECKED ( Int i ) 5055 { 5056 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5057 return IRExpr_GetI( descr, get_ftop(), i ); 5058 } 5059 5060 5061 /* Given i, generate an expression yielding 5062 is_full(i) ? ST(i) : NaN 5063 */ 5064 5065 static IRExpr* get_ST ( Int i ) 5066 { 5067 return 5068 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5069 /* non-0 means full */ 5070 get_ST_UNCHECKED(i), 5071 /* 0 means empty */ 5072 mkQNaN64()); 5073 } 5074 5075 5076 /* Given i, and some expression e, and a condition cond, generate IR 5077 which has the same effect as put_ST(i,e) when cond is true and has 5078 no effect when cond is false. Given the lack of proper 5079 if-then-else in the IR, this is pretty tricky. 5080 */ 5081 5082 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 5083 { 5084 // new_tag = if cond then FULL else old_tag 5085 // new_val = if cond then (if old_tag==FULL then NaN else val) 5086 // else old_val 5087 5088 IRTemp old_tag = newTemp(Ity_I8); 5089 assign(old_tag, get_ST_TAG(i)); 5090 IRTemp new_tag = newTemp(Ity_I8); 5091 assign(new_tag, 5092 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 5093 5094 IRTemp old_val = newTemp(Ity_F64); 5095 assign(old_val, get_ST_UNCHECKED(i)); 5096 IRTemp new_val = newTemp(Ity_F64); 5097 assign(new_val, 5098 IRExpr_ITE(mkexpr(cond), 5099 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 5100 /* non-0 means full */ 5101 mkQNaN64(), 5102 /* 0 means empty */ 5103 value), 5104 mkexpr(old_val))); 5105 5106 put_ST_UNCHECKED(i, mkexpr(new_val)); 5107 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 5108 // now set it to new_tag instead. 5109 put_ST_TAG(i, mkexpr(new_tag)); 5110 } 5111 5112 /* Adjust FTOP downwards by one register. */ 5113 5114 static void fp_push ( void ) 5115 { 5116 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 5117 } 5118 5119 /* Adjust FTOP downwards by one register when COND is 1:I1. Else 5120 don't change it. */ 5121 5122 static void maybe_fp_push ( IRTemp cond ) 5123 { 5124 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 5125 } 5126 5127 /* Adjust FTOP upwards by one register, and mark the vacated register 5128 as empty. */ 5129 5130 static void fp_pop ( void ) 5131 { 5132 put_ST_TAG(0, mkU8(0)); 5133 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5134 } 5135 5136 /* Set the C2 bit of the FPU status register to e[0]. Assumes that 5137 e[31:1] == 0. 5138 */ 5139 static void set_C2 ( IRExpr* e ) 5140 { 5141 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); 5142 put_C3210( binop(Iop_Or64, 5143 cleared, 5144 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); 5145 } 5146 5147 /* Generate code to check that abs(d64) < 2^63 and is finite. This is 5148 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 5149 test is simple, but the derivation of it is not so simple. 5150 5151 The exponent field for an IEEE754 double is 11 bits. That means it 5152 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 5153 the number is either a NaN or an Infinity and so is not finite. 5154 Furthermore, a finite value of exactly 2^63 is the smallest value 5155 that has exponent value 0x43E. Hence, what we need to do is 5156 extract the exponent, ignoring the sign bit and mantissa, and check 5157 it is < 0x43E, or <= 0x43D. 5158 5159 To make this easily applicable to 32- and 64-bit targets, a 5160 roundabout approach is used. First the number is converted to I64, 5161 then the top 32 bits are taken. Shifting them right by 20 bits 5162 places the sign bit and exponent in the bottom 12 bits. Anding 5163 with 0x7FF gets rid of the sign bit, leaving just the exponent 5164 available for comparison. 5165 */ 5166 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 5167 { 5168 IRTemp i64 = newTemp(Ity_I64); 5169 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 5170 IRTemp exponent = newTemp(Ity_I32); 5171 assign(exponent, 5172 binop(Iop_And32, 5173 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 5174 mkU32(0x7FF))); 5175 IRTemp in_range_and_finite = newTemp(Ity_I1); 5176 assign(in_range_and_finite, 5177 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 5178 return in_range_and_finite; 5179 } 5180 5181 /* Invent a plausible-looking FPU status word value: 5182 ((ftop & 7) << 11) | (c3210 & 0x4700) 5183 */ 5184 static IRExpr* get_FPU_sw ( void ) 5185 { 5186 return 5187 unop(Iop_32to16, 5188 binop(Iop_Or32, 5189 binop(Iop_Shl32, 5190 binop(Iop_And32, get_ftop(), mkU32(7)), 5191 mkU8(11)), 5192 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 5193 mkU32(0x4700)) 5194 )); 5195 } 5196 5197 5198 /* ------------------------------------------------------- */ 5199 /* Given all that stack-mangling junk, we can now go ahead 5200 and describe FP instructions. 5201 */ 5202 5203 /* ST(0) = ST(0) `op` mem64/32(addr) 5204 Need to check ST(0)'s tag on read, but not on write. 5205 */ 5206 static 5207 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5208 IROp op, Bool dbl ) 5209 { 5210 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5211 if (dbl) { 5212 put_ST_UNCHECKED(0, 5213 triop( op, 5214 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5215 get_ST(0), 5216 loadLE(Ity_F64,mkexpr(addr)) 5217 )); 5218 } else { 5219 put_ST_UNCHECKED(0, 5220 triop( op, 5221 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5222 get_ST(0), 5223 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 5224 )); 5225 } 5226 } 5227 5228 5229 /* ST(0) = mem64/32(addr) `op` ST(0) 5230 Need to check ST(0)'s tag on read, but not on write. 5231 */ 5232 static 5233 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5234 IROp op, Bool dbl ) 5235 { 5236 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5237 if (dbl) { 5238 put_ST_UNCHECKED(0, 5239 triop( op, 5240 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5241 loadLE(Ity_F64,mkexpr(addr)), 5242 get_ST(0) 5243 )); 5244 } else { 5245 put_ST_UNCHECKED(0, 5246 triop( op, 5247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5248 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 5249 get_ST(0) 5250 )); 5251 } 5252 } 5253 5254 5255 /* ST(dst) = ST(dst) `op` ST(src). 5256 Check dst and src tags when reading but not on write. 5257 */ 5258 static 5259 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5260 Bool pop_after ) 5261 { 5262 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5263 put_ST_UNCHECKED( 5264 st_dst, 5265 triop( op, 5266 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5267 get_ST(st_dst), 5268 get_ST(st_src) ) 5269 ); 5270 if (pop_after) 5271 fp_pop(); 5272 } 5273 5274 /* ST(dst) = ST(src) `op` ST(dst). 5275 Check dst and src tags when reading but not on write. 5276 */ 5277 static 5278 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5279 Bool pop_after ) 5280 { 5281 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5282 put_ST_UNCHECKED( 5283 st_dst, 5284 triop( op, 5285 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5286 get_ST(st_src), 5287 get_ST(st_dst) ) 5288 ); 5289 if (pop_after) 5290 fp_pop(); 5291 } 5292 5293 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 5294 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 5295 { 5296 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 5297 /* This is a bit of a hack (and isn't really right). It sets 5298 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 5299 documentation implies A and S are unchanged. 5300 */ 5301 /* It's also fishy in that it is used both for COMIP and 5302 UCOMIP, and they aren't the same (although similar). */ 5303 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 5304 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 5305 stmt( IRStmt_Put( 5306 OFFB_CC_DEP1, 5307 binop( Iop_And64, 5308 unop( Iop_32Uto64, 5309 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 5310 mkU64(0x45) 5311 ))); 5312 if (pop_after) 5313 fp_pop(); 5314 } 5315 5316 5317 /* returns 5318 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 5319 */ 5320 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 5321 { 5322 IRTemp t32 = newTemp(Ity_I32); 5323 assign( t32, e32 ); 5324 return 5325 IRExpr_ITE( 5326 binop(Iop_CmpLT64U, 5327 unop(Iop_32Uto64, 5328 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5329 mkU64(65536)), 5330 unop(Iop_32to16, mkexpr(t32)), 5331 mkU16( 0x8000 ) ); 5332 } 5333 5334 5335 static 5336 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5337 VexAbiInfo* vbi, Prefix pfx, Long delta ) 5338 { 5339 Int len; 5340 UInt r_src, r_dst; 5341 HChar dis_buf[50]; 5342 IRTemp t1, t2; 5343 5344 /* On entry, delta points at the second byte of the insn (the modrm 5345 byte).*/ 5346 UChar first_opcode = getUChar(delta-1); 5347 UChar modrm = getUChar(delta+0); 5348 5349 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5350 5351 if (first_opcode == 0xD8) { 5352 if (modrm < 0xC0) { 5353 5354 /* bits 5,4,3 are an opcode extension, and the modRM also 5355 specifies an address. */ 5356 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5357 delta += len; 5358 5359 switch (gregLO3ofRM(modrm)) { 5360 5361 case 0: /* FADD single-real */ 5362 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5363 break; 5364 5365 case 1: /* FMUL single-real */ 5366 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5367 break; 5368 5369 case 2: /* FCOM single-real */ 5370 DIP("fcoms %s\n", dis_buf); 5371 /* This forces C1 to zero, which isn't right. */ 5372 /* The AMD documentation suggests that forcing C1 to 5373 zero is correct (Eliot Moss) */ 5374 put_C3210( 5375 unop( Iop_32Uto64, 5376 binop( Iop_And32, 5377 binop(Iop_Shl32, 5378 binop(Iop_CmpF64, 5379 get_ST(0), 5380 unop(Iop_F32toF64, 5381 loadLE(Ity_F32,mkexpr(addr)))), 5382 mkU8(8)), 5383 mkU32(0x4500) 5384 ))); 5385 break; 5386 5387 case 3: /* FCOMP single-real */ 5388 /* The AMD documentation suggests that forcing C1 to 5389 zero is correct (Eliot Moss) */ 5390 DIP("fcomps %s\n", dis_buf); 5391 /* This forces C1 to zero, which isn't right. */ 5392 put_C3210( 5393 unop( Iop_32Uto64, 5394 binop( Iop_And32, 5395 binop(Iop_Shl32, 5396 binop(Iop_CmpF64, 5397 get_ST(0), 5398 unop(Iop_F32toF64, 5399 loadLE(Ity_F32,mkexpr(addr)))), 5400 mkU8(8)), 5401 mkU32(0x4500) 5402 ))); 5403 fp_pop(); 5404 break; 5405 5406 case 4: /* FSUB single-real */ 5407 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5408 break; 5409 5410 case 5: /* FSUBR single-real */ 5411 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5412 break; 5413 5414 case 6: /* FDIV single-real */ 5415 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5416 break; 5417 5418 case 7: /* FDIVR single-real */ 5419 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5420 break; 5421 5422 default: 5423 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5424 vex_printf("first_opcode == 0xD8\n"); 5425 goto decode_fail; 5426 } 5427 } else { 5428 delta++; 5429 switch (modrm) { 5430 5431 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5432 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5433 break; 5434 5435 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5436 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5437 break; 5438 5439 /* Dunno if this is right */ 5440 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5441 r_dst = (UInt)modrm - 0xD0; 5442 DIP("fcom %%st(0),%%st(%d)\n", r_dst); 5443 /* This forces C1 to zero, which isn't right. */ 5444 put_C3210( 5445 unop(Iop_32Uto64, 5446 binop( Iop_And32, 5447 binop(Iop_Shl32, 5448 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5449 mkU8(8)), 5450 mkU32(0x4500) 5451 ))); 5452 break; 5453 5454 /* Dunno if this is right */ 5455 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5456 r_dst = (UInt)modrm - 0xD8; 5457 DIP("fcomp %%st(0),%%st(%d)\n", r_dst); 5458 /* This forces C1 to zero, which isn't right. */ 5459 put_C3210( 5460 unop(Iop_32Uto64, 5461 binop( Iop_And32, 5462 binop(Iop_Shl32, 5463 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5464 mkU8(8)), 5465 mkU32(0x4500) 5466 ))); 5467 fp_pop(); 5468 break; 5469 5470 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5471 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5472 break; 5473 5474 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5475 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5476 break; 5477 5478 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5479 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5480 break; 5481 5482 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5483 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5484 break; 5485 5486 default: 5487 goto decode_fail; 5488 } 5489 } 5490 } 5491 5492 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5493 else 5494 if (first_opcode == 0xD9) { 5495 if (modrm < 0xC0) { 5496 5497 /* bits 5,4,3 are an opcode extension, and the modRM also 5498 specifies an address. */ 5499 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5500 delta += len; 5501 5502 switch (gregLO3ofRM(modrm)) { 5503 5504 case 0: /* FLD single-real */ 5505 DIP("flds %s\n", dis_buf); 5506 fp_push(); 5507 put_ST(0, unop(Iop_F32toF64, 5508 loadLE(Ity_F32, mkexpr(addr)))); 5509 break; 5510 5511 case 2: /* FST single-real */ 5512 DIP("fsts %s\n", dis_buf); 5513 storeLE(mkexpr(addr), 5514 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5515 break; 5516 5517 case 3: /* FSTP single-real */ 5518 DIP("fstps %s\n", dis_buf); 5519 storeLE(mkexpr(addr), 5520 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5521 fp_pop(); 5522 break; 5523 5524 case 4: { /* FLDENV m28 */ 5525 /* Uses dirty helper: 5526 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5527 IRTemp ew = newTemp(Ity_I32); 5528 IRTemp w64 = newTemp(Ity_I64); 5529 IRDirty* d = unsafeIRDirty_0_N ( 5530 0/*regparms*/, 5531 "amd64g_dirtyhelper_FLDENV", 5532 &amd64g_dirtyhelper_FLDENV, 5533 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5534 ); 5535 d->tmp = w64; 5536 /* declare we're reading memory */ 5537 d->mFx = Ifx_Read; 5538 d->mAddr = mkexpr(addr); 5539 d->mSize = 28; 5540 5541 /* declare we're writing guest state */ 5542 d->nFxState = 4; 5543 vex_bzero(&d->fxState, sizeof(d->fxState)); 5544 5545 d->fxState[0].fx = Ifx_Write; 5546 d->fxState[0].offset = OFFB_FTOP; 5547 d->fxState[0].size = sizeof(UInt); 5548 5549 d->fxState[1].fx = Ifx_Write; 5550 d->fxState[1].offset = OFFB_FPTAGS; 5551 d->fxState[1].size = 8 * sizeof(UChar); 5552 5553 d->fxState[2].fx = Ifx_Write; 5554 d->fxState[2].offset = OFFB_FPROUND; 5555 d->fxState[2].size = sizeof(ULong); 5556 5557 d->fxState[3].fx = Ifx_Write; 5558 d->fxState[3].offset = OFFB_FC3210; 5559 d->fxState[3].size = sizeof(ULong); 5560 5561 stmt( IRStmt_Dirty(d) ); 5562 5563 /* ew contains any emulation warning we may need to 5564 issue. If needed, side-exit to the next insn, 5565 reporting the warning, so that Valgrind's dispatcher 5566 sees the warning. */ 5567 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5568 put_emwarn( mkexpr(ew) ); 5569 stmt( 5570 IRStmt_Exit( 5571 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5572 Ijk_EmWarn, 5573 IRConst_U64( guest_RIP_bbstart+delta ), 5574 OFFB_RIP 5575 ) 5576 ); 5577 5578 DIP("fldenv %s\n", dis_buf); 5579 break; 5580 } 5581 5582 case 5: {/* FLDCW */ 5583 /* The only thing we observe in the control word is the 5584 rounding mode. Therefore, pass the 16-bit value 5585 (x87 native-format control word) to a clean helper, 5586 getting back a 64-bit value, the lower half of which 5587 is the FPROUND value to store, and the upper half of 5588 which is the emulation-warning token which may be 5589 generated. 5590 */ 5591 /* ULong amd64h_check_fldcw ( ULong ); */ 5592 IRTemp t64 = newTemp(Ity_I64); 5593 IRTemp ew = newTemp(Ity_I32); 5594 DIP("fldcw %s\n", dis_buf); 5595 assign( t64, mkIRExprCCall( 5596 Ity_I64, 0/*regparms*/, 5597 "amd64g_check_fldcw", 5598 &amd64g_check_fldcw, 5599 mkIRExprVec_1( 5600 unop( Iop_16Uto64, 5601 loadLE(Ity_I16, mkexpr(addr))) 5602 ) 5603 ) 5604 ); 5605 5606 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5607 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5608 put_emwarn( mkexpr(ew) ); 5609 /* Finally, if an emulation warning was reported, 5610 side-exit to the next insn, reporting the warning, 5611 so that Valgrind's dispatcher sees the warning. */ 5612 stmt( 5613 IRStmt_Exit( 5614 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5615 Ijk_EmWarn, 5616 IRConst_U64( guest_RIP_bbstart+delta ), 5617 OFFB_RIP 5618 ) 5619 ); 5620 break; 5621 } 5622 5623 case 6: { /* FNSTENV m28 */ 5624 /* Uses dirty helper: 5625 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5626 IRDirty* d = unsafeIRDirty_0_N ( 5627 0/*regparms*/, 5628 "amd64g_dirtyhelper_FSTENV", 5629 &amd64g_dirtyhelper_FSTENV, 5630 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5631 ); 5632 /* declare we're writing memory */ 5633 d->mFx = Ifx_Write; 5634 d->mAddr = mkexpr(addr); 5635 d->mSize = 28; 5636 5637 /* declare we're reading guest state */ 5638 d->nFxState = 4; 5639 vex_bzero(&d->fxState, sizeof(d->fxState)); 5640 5641 d->fxState[0].fx = Ifx_Read; 5642 d->fxState[0].offset = OFFB_FTOP; 5643 d->fxState[0].size = sizeof(UInt); 5644 5645 d->fxState[1].fx = Ifx_Read; 5646 d->fxState[1].offset = OFFB_FPTAGS; 5647 d->fxState[1].size = 8 * sizeof(UChar); 5648 5649 d->fxState[2].fx = Ifx_Read; 5650 d->fxState[2].offset = OFFB_FPROUND; 5651 d->fxState[2].size = sizeof(ULong); 5652 5653 d->fxState[3].fx = Ifx_Read; 5654 d->fxState[3].offset = OFFB_FC3210; 5655 d->fxState[3].size = sizeof(ULong); 5656 5657 stmt( IRStmt_Dirty(d) ); 5658 5659 DIP("fnstenv %s\n", dis_buf); 5660 break; 5661 } 5662 5663 case 7: /* FNSTCW */ 5664 /* Fake up a native x87 FPU control word. The only 5665 thing it depends on is FPROUND[1:0], so call a clean 5666 helper to cook it up. */ 5667 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5668 DIP("fnstcw %s\n", dis_buf); 5669 storeLE( 5670 mkexpr(addr), 5671 unop( Iop_64to16, 5672 mkIRExprCCall( 5673 Ity_I64, 0/*regp*/, 5674 "amd64g_create_fpucw", &amd64g_create_fpucw, 5675 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5676 ) 5677 ) 5678 ); 5679 break; 5680 5681 default: 5682 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 5683 vex_printf("first_opcode == 0xD9\n"); 5684 goto decode_fail; 5685 } 5686 5687 } else { 5688 delta++; 5689 switch (modrm) { 5690 5691 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5692 r_src = (UInt)modrm - 0xC0; 5693 DIP("fld %%st(%u)\n", r_src); 5694 t1 = newTemp(Ity_F64); 5695 assign(t1, get_ST(r_src)); 5696 fp_push(); 5697 put_ST(0, mkexpr(t1)); 5698 break; 5699 5700 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5701 r_src = (UInt)modrm - 0xC8; 5702 DIP("fxch %%st(%u)\n", r_src); 5703 t1 = newTemp(Ity_F64); 5704 t2 = newTemp(Ity_F64); 5705 assign(t1, get_ST(0)); 5706 assign(t2, get_ST(r_src)); 5707 put_ST_UNCHECKED(0, mkexpr(t2)); 5708 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5709 break; 5710 5711 case 0xE0: /* FCHS */ 5712 DIP("fchs\n"); 5713 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5714 break; 5715 5716 case 0xE1: /* FABS */ 5717 DIP("fabs\n"); 5718 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5719 break; 5720 5721 case 0xE5: { /* FXAM */ 5722 /* This is an interesting one. It examines %st(0), 5723 regardless of whether the tag says it's empty or not. 5724 Here, just pass both the tag (in our format) and the 5725 value (as a double, actually a ULong) to a helper 5726 function. */ 5727 IRExpr** args 5728 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5729 unop(Iop_ReinterpF64asI64, 5730 get_ST_UNCHECKED(0)) ); 5731 put_C3210(mkIRExprCCall( 5732 Ity_I64, 5733 0/*regparm*/, 5734 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5735 args 5736 )); 5737 DIP("fxam\n"); 5738 break; 5739 } 5740 5741 case 0xE8: /* FLD1 */ 5742 DIP("fld1\n"); 5743 fp_push(); 5744 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5745 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5746 break; 5747 5748 case 0xE9: /* FLDL2T */ 5749 DIP("fldl2t\n"); 5750 fp_push(); 5751 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5752 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5753 break; 5754 5755 case 0xEA: /* FLDL2E */ 5756 DIP("fldl2e\n"); 5757 fp_push(); 5758 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5759 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5760 break; 5761 5762 case 0xEB: /* FLDPI */ 5763 DIP("fldpi\n"); 5764 fp_push(); 5765 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5766 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5767 break; 5768 5769 case 0xEC: /* FLDLG2 */ 5770 DIP("fldlg2\n"); 5771 fp_push(); 5772 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5773 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5774 break; 5775 5776 case 0xED: /* FLDLN2 */ 5777 DIP("fldln2\n"); 5778 fp_push(); 5779 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5780 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5781 break; 5782 5783 case 0xEE: /* FLDZ */ 5784 DIP("fldz\n"); 5785 fp_push(); 5786 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5787 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5788 break; 5789 5790 case 0xF0: /* F2XM1 */ 5791 DIP("f2xm1\n"); 5792 put_ST_UNCHECKED(0, 5793 binop(Iop_2xm1F64, 5794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5795 get_ST(0))); 5796 break; 5797 5798 case 0xF1: /* FYL2X */ 5799 DIP("fyl2x\n"); 5800 put_ST_UNCHECKED(1, 5801 triop(Iop_Yl2xF64, 5802 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5803 get_ST(1), 5804 get_ST(0))); 5805 fp_pop(); 5806 break; 5807 5808 case 0xF2: { /* FPTAN */ 5809 DIP("fptan\n"); 5810 IRTemp argD = newTemp(Ity_F64); 5811 assign(argD, get_ST(0)); 5812 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5813 IRTemp resD = newTemp(Ity_F64); 5814 assign(resD, 5815 IRExpr_ITE( 5816 mkexpr(argOK), 5817 binop(Iop_TanF64, 5818 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5819 mkexpr(argD)), 5820 mkexpr(argD)) 5821 ); 5822 put_ST_UNCHECKED(0, mkexpr(resD)); 5823 /* Conditionally push 1.0 on the stack, if the arg is 5824 in range */ 5825 maybe_fp_push(argOK); 5826 maybe_put_ST(argOK, 0, 5827 IRExpr_Const(IRConst_F64(1.0))); 5828 set_C2( binop(Iop_Xor64, 5829 unop(Iop_1Uto64, mkexpr(argOK)), 5830 mkU64(1)) ); 5831 break; 5832 } 5833 5834 case 0xF3: /* FPATAN */ 5835 DIP("fpatan\n"); 5836 put_ST_UNCHECKED(1, 5837 triop(Iop_AtanF64, 5838 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5839 get_ST(1), 5840 get_ST(0))); 5841 fp_pop(); 5842 break; 5843 5844 case 0xF4: { /* FXTRACT */ 5845 IRTemp argF = newTemp(Ity_F64); 5846 IRTemp sigF = newTemp(Ity_F64); 5847 IRTemp expF = newTemp(Ity_F64); 5848 IRTemp argI = newTemp(Ity_I64); 5849 IRTemp sigI = newTemp(Ity_I64); 5850 IRTemp expI = newTemp(Ity_I64); 5851 DIP("fxtract\n"); 5852 assign( argF, get_ST(0) ); 5853 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5854 assign( sigI, 5855 mkIRExprCCall( 5856 Ity_I64, 0/*regparms*/, 5857 "x86amd64g_calculate_FXTRACT", 5858 &x86amd64g_calculate_FXTRACT, 5859 mkIRExprVec_2( mkexpr(argI), 5860 mkIRExpr_HWord(0)/*sig*/ )) 5861 ); 5862 assign( expI, 5863 mkIRExprCCall( 5864 Ity_I64, 0/*regparms*/, 5865 "x86amd64g_calculate_FXTRACT", 5866 &x86amd64g_calculate_FXTRACT, 5867 mkIRExprVec_2( mkexpr(argI), 5868 mkIRExpr_HWord(1)/*exp*/ )) 5869 ); 5870 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5871 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5872 /* exponent */ 5873 put_ST_UNCHECKED(0, mkexpr(expF) ); 5874 fp_push(); 5875 /* significand */ 5876 put_ST(0, mkexpr(sigF) ); 5877 break; 5878 } 5879 5880 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5881 IRTemp a1 = newTemp(Ity_F64); 5882 IRTemp a2 = newTemp(Ity_F64); 5883 DIP("fprem1\n"); 5884 /* Do FPREM1 twice, once to get the remainder, and once 5885 to get the C3210 flag values. */ 5886 assign( a1, get_ST(0) ); 5887 assign( a2, get_ST(1) ); 5888 put_ST_UNCHECKED(0, 5889 triop(Iop_PRem1F64, 5890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5891 mkexpr(a1), 5892 mkexpr(a2))); 5893 put_C3210( 5894 unop(Iop_32Uto64, 5895 triop(Iop_PRem1C3210F64, 5896 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5897 mkexpr(a1), 5898 mkexpr(a2)) )); 5899 break; 5900 } 5901 5902 case 0xF7: /* FINCSTP */ 5903 DIP("fincstp\n"); 5904 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5905 break; 5906 5907 case 0xF8: { /* FPREM -- not IEEE compliant */ 5908 IRTemp a1 = newTemp(Ity_F64); 5909 IRTemp a2 = newTemp(Ity_F64); 5910 DIP("fprem\n"); 5911 /* Do FPREM twice, once to get the remainder, and once 5912 to get the C3210 flag values. */ 5913 assign( a1, get_ST(0) ); 5914 assign( a2, get_ST(1) ); 5915 put_ST_UNCHECKED(0, 5916 triop(Iop_PRemF64, 5917 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5918 mkexpr(a1), 5919 mkexpr(a2))); 5920 put_C3210( 5921 unop(Iop_32Uto64, 5922 triop(Iop_PRemC3210F64, 5923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5924 mkexpr(a1), 5925 mkexpr(a2)) )); 5926 break; 5927 } 5928 5929 case 0xF9: /* FYL2XP1 */ 5930 DIP("fyl2xp1\n"); 5931 put_ST_UNCHECKED(1, 5932 triop(Iop_Yl2xp1F64, 5933 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5934 get_ST(1), 5935 get_ST(0))); 5936 fp_pop(); 5937 break; 5938 5939 case 0xFA: /* FSQRT */ 5940 DIP("fsqrt\n"); 5941 put_ST_UNCHECKED(0, 5942 binop(Iop_SqrtF64, 5943 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5944 get_ST(0))); 5945 break; 5946 5947 case 0xFB: { /* FSINCOS */ 5948 DIP("fsincos\n"); 5949 IRTemp argD = newTemp(Ity_F64); 5950 assign(argD, get_ST(0)); 5951 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5952 IRTemp resD = newTemp(Ity_F64); 5953 assign(resD, 5954 IRExpr_ITE( 5955 mkexpr(argOK), 5956 binop(Iop_SinF64, 5957 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5958 mkexpr(argD)), 5959 mkexpr(argD)) 5960 ); 5961 put_ST_UNCHECKED(0, mkexpr(resD)); 5962 /* Conditionally push the cos value on the stack, if 5963 the arg is in range */ 5964 maybe_fp_push(argOK); 5965 maybe_put_ST(argOK, 0, 5966 binop(Iop_CosF64, 5967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5968 mkexpr(argD))); 5969 set_C2( binop(Iop_Xor64, 5970 unop(Iop_1Uto64, mkexpr(argOK)), 5971 mkU64(1)) ); 5972 break; 5973 } 5974 5975 case 0xFC: /* FRNDINT */ 5976 DIP("frndint\n"); 5977 put_ST_UNCHECKED(0, 5978 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 5979 break; 5980 5981 case 0xFD: /* FSCALE */ 5982 DIP("fscale\n"); 5983 put_ST_UNCHECKED(0, 5984 triop(Iop_ScaleF64, 5985 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5986 get_ST(0), 5987 get_ST(1))); 5988 break; 5989 5990 case 0xFE: /* FSIN */ 5991 case 0xFF: { /* FCOS */ 5992 Bool isSIN = modrm == 0xFE; 5993 DIP("%s\n", isSIN ? "fsin" : "fcos"); 5994 IRTemp argD = newTemp(Ity_F64); 5995 assign(argD, get_ST(0)); 5996 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5997 IRTemp resD = newTemp(Ity_F64); 5998 assign(resD, 5999 IRExpr_ITE( 6000 mkexpr(argOK), 6001 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 6002 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6003 mkexpr(argD)), 6004 mkexpr(argD)) 6005 ); 6006 put_ST_UNCHECKED(0, mkexpr(resD)); 6007 set_C2( binop(Iop_Xor64, 6008 unop(Iop_1Uto64, mkexpr(argOK)), 6009 mkU64(1)) ); 6010 break; 6011 } 6012 6013 default: 6014 goto decode_fail; 6015 } 6016 } 6017 } 6018 6019 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 6020 else 6021 if (first_opcode == 0xDA) { 6022 6023 if (modrm < 0xC0) { 6024 6025 /* bits 5,4,3 are an opcode extension, and the modRM also 6026 specifies an address. */ 6027 IROp fop; 6028 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6029 delta += len; 6030 switch (gregLO3ofRM(modrm)) { 6031 6032 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 6033 DIP("fiaddl %s\n", dis_buf); 6034 fop = Iop_AddF64; 6035 goto do_fop_m32; 6036 6037 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 6038 DIP("fimull %s\n", dis_buf); 6039 fop = Iop_MulF64; 6040 goto do_fop_m32; 6041 6042 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 6043 DIP("fisubl %s\n", dis_buf); 6044 fop = Iop_SubF64; 6045 goto do_fop_m32; 6046 6047 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 6048 DIP("fisubrl %s\n", dis_buf); 6049 fop = Iop_SubF64; 6050 goto do_foprev_m32; 6051 6052 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 6053 DIP("fisubl %s\n", dis_buf); 6054 fop = Iop_DivF64; 6055 goto do_fop_m32; 6056 6057 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 6058 DIP("fidivrl %s\n", dis_buf); 6059 fop = Iop_DivF64; 6060 goto do_foprev_m32; 6061 6062 do_fop_m32: 6063 put_ST_UNCHECKED(0, 6064 triop(fop, 6065 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6066 get_ST(0), 6067 unop(Iop_I32StoF64, 6068 loadLE(Ity_I32, mkexpr(addr))))); 6069 break; 6070 6071 do_foprev_m32: 6072 put_ST_UNCHECKED(0, 6073 triop(fop, 6074 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6075 unop(Iop_I32StoF64, 6076 loadLE(Ity_I32, mkexpr(addr))), 6077 get_ST(0))); 6078 break; 6079 6080 default: 6081 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6082 vex_printf("first_opcode == 0xDA\n"); 6083 goto decode_fail; 6084 } 6085 6086 } else { 6087 6088 delta++; 6089 switch (modrm) { 6090 6091 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 6092 r_src = (UInt)modrm - 0xC0; 6093 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 6094 put_ST_UNCHECKED(0, 6095 IRExpr_ITE( 6096 mk_amd64g_calculate_condition(AMD64CondB), 6097 get_ST(r_src), get_ST(0)) ); 6098 break; 6099 6100 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 6101 r_src = (UInt)modrm - 0xC8; 6102 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 6103 put_ST_UNCHECKED(0, 6104 IRExpr_ITE( 6105 mk_amd64g_calculate_condition(AMD64CondZ), 6106 get_ST(r_src), get_ST(0)) ); 6107 break; 6108 6109 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 6110 r_src = (UInt)modrm - 0xD0; 6111 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 6112 put_ST_UNCHECKED(0, 6113 IRExpr_ITE( 6114 mk_amd64g_calculate_condition(AMD64CondBE), 6115 get_ST(r_src), get_ST(0)) ); 6116 break; 6117 6118 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 6119 r_src = (UInt)modrm - 0xD8; 6120 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 6121 put_ST_UNCHECKED(0, 6122 IRExpr_ITE( 6123 mk_amd64g_calculate_condition(AMD64CondP), 6124 get_ST(r_src), get_ST(0)) ); 6125 break; 6126 6127 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 6128 DIP("fucompp %%st(0),%%st(1)\n"); 6129 /* This forces C1 to zero, which isn't right. */ 6130 put_C3210( 6131 unop(Iop_32Uto64, 6132 binop( Iop_And32, 6133 binop(Iop_Shl32, 6134 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6135 mkU8(8)), 6136 mkU32(0x4500) 6137 ))); 6138 fp_pop(); 6139 fp_pop(); 6140 break; 6141 6142 default: 6143 goto decode_fail; 6144 } 6145 6146 } 6147 } 6148 6149 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 6150 else 6151 if (first_opcode == 0xDB) { 6152 if (modrm < 0xC0) { 6153 6154 /* bits 5,4,3 are an opcode extension, and the modRM also 6155 specifies an address. */ 6156 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6157 delta += len; 6158 6159 switch (gregLO3ofRM(modrm)) { 6160 6161 case 0: /* FILD m32int */ 6162 DIP("fildl %s\n", dis_buf); 6163 fp_push(); 6164 put_ST(0, unop(Iop_I32StoF64, 6165 loadLE(Ity_I32, mkexpr(addr)))); 6166 break; 6167 6168 case 1: /* FISTTPL m32 (SSE3) */ 6169 DIP("fisttpl %s\n", dis_buf); 6170 storeLE( mkexpr(addr), 6171 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 6172 fp_pop(); 6173 break; 6174 6175 case 2: /* FIST m32 */ 6176 DIP("fistl %s\n", dis_buf); 6177 storeLE( mkexpr(addr), 6178 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6179 break; 6180 6181 case 3: /* FISTP m32 */ 6182 DIP("fistpl %s\n", dis_buf); 6183 storeLE( mkexpr(addr), 6184 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6185 fp_pop(); 6186 break; 6187 6188 case 5: { /* FLD extended-real */ 6189 /* Uses dirty helper: 6190 ULong amd64g_loadF80le ( ULong ) 6191 addr holds the address. First, do a dirty call to 6192 get hold of the data. */ 6193 IRTemp val = newTemp(Ity_I64); 6194 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 6195 6196 IRDirty* d = unsafeIRDirty_1_N ( 6197 val, 6198 0/*regparms*/, 6199 "amd64g_dirtyhelper_loadF80le", 6200 &amd64g_dirtyhelper_loadF80le, 6201 args 6202 ); 6203 /* declare that we're reading memory */ 6204 d->mFx = Ifx_Read; 6205 d->mAddr = mkexpr(addr); 6206 d->mSize = 10; 6207 6208 /* execute the dirty call, dumping the result in val. */ 6209 stmt( IRStmt_Dirty(d) ); 6210 fp_push(); 6211 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 6212 6213 DIP("fldt %s\n", dis_buf); 6214 break; 6215 } 6216 6217 case 7: { /* FSTP extended-real */ 6218 /* Uses dirty helper: 6219 void amd64g_storeF80le ( ULong addr, ULong data ) 6220 */ 6221 IRExpr** args 6222 = mkIRExprVec_2( mkexpr(addr), 6223 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 6224 6225 IRDirty* d = unsafeIRDirty_0_N ( 6226 0/*regparms*/, 6227 "amd64g_dirtyhelper_storeF80le", 6228 &amd64g_dirtyhelper_storeF80le, 6229 args 6230 ); 6231 /* declare we're writing memory */ 6232 d->mFx = Ifx_Write; 6233 d->mAddr = mkexpr(addr); 6234 d->mSize = 10; 6235 6236 /* execute the dirty call. */ 6237 stmt( IRStmt_Dirty(d) ); 6238 fp_pop(); 6239 6240 DIP("fstpt\n %s", dis_buf); 6241 break; 6242 } 6243 6244 default: 6245 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6246 vex_printf("first_opcode == 0xDB\n"); 6247 goto decode_fail; 6248 } 6249 6250 } else { 6251 6252 delta++; 6253 switch (modrm) { 6254 6255 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 6256 r_src = (UInt)modrm - 0xC0; 6257 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 6258 put_ST_UNCHECKED(0, 6259 IRExpr_ITE( 6260 mk_amd64g_calculate_condition(AMD64CondNB), 6261 get_ST(r_src), get_ST(0)) ); 6262 break; 6263 6264 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 6265 r_src = (UInt)modrm - 0xC8; 6266 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 6267 put_ST_UNCHECKED( 6268 0, 6269 IRExpr_ITE( 6270 mk_amd64g_calculate_condition(AMD64CondNZ), 6271 get_ST(r_src), 6272 get_ST(0) 6273 ) 6274 ); 6275 break; 6276 6277 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 6278 r_src = (UInt)modrm - 0xD0; 6279 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 6280 put_ST_UNCHECKED( 6281 0, 6282 IRExpr_ITE( 6283 mk_amd64g_calculate_condition(AMD64CondNBE), 6284 get_ST(r_src), 6285 get_ST(0) 6286 ) 6287 ); 6288 break; 6289 6290 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 6291 r_src = (UInt)modrm - 0xD8; 6292 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 6293 put_ST_UNCHECKED( 6294 0, 6295 IRExpr_ITE( 6296 mk_amd64g_calculate_condition(AMD64CondNP), 6297 get_ST(r_src), 6298 get_ST(0) 6299 ) 6300 ); 6301 break; 6302 6303 case 0xE2: 6304 DIP("fnclex\n"); 6305 break; 6306 6307 case 0xE3: { 6308 /* Uses dirty helper: 6309 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 6310 IRDirty* d = unsafeIRDirty_0_N ( 6311 0/*regparms*/, 6312 "amd64g_dirtyhelper_FINIT", 6313 &amd64g_dirtyhelper_FINIT, 6314 mkIRExprVec_1( IRExpr_BBPTR() ) 6315 ); 6316 6317 /* declare we're writing guest state */ 6318 d->nFxState = 5; 6319 vex_bzero(&d->fxState, sizeof(d->fxState)); 6320 6321 d->fxState[0].fx = Ifx_Write; 6322 d->fxState[0].offset = OFFB_FTOP; 6323 d->fxState[0].size = sizeof(UInt); 6324 6325 d->fxState[1].fx = Ifx_Write; 6326 d->fxState[1].offset = OFFB_FPREGS; 6327 d->fxState[1].size = 8 * sizeof(ULong); 6328 6329 d->fxState[2].fx = Ifx_Write; 6330 d->fxState[2].offset = OFFB_FPTAGS; 6331 d->fxState[2].size = 8 * sizeof(UChar); 6332 6333 d->fxState[3].fx = Ifx_Write; 6334 d->fxState[3].offset = OFFB_FPROUND; 6335 d->fxState[3].size = sizeof(ULong); 6336 6337 d->fxState[4].fx = Ifx_Write; 6338 d->fxState[4].offset = OFFB_FC3210; 6339 d->fxState[4].size = sizeof(ULong); 6340 6341 stmt( IRStmt_Dirty(d) ); 6342 6343 DIP("fninit\n"); 6344 break; 6345 } 6346 6347 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6348 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6349 break; 6350 6351 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6352 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6353 break; 6354 6355 default: 6356 goto decode_fail; 6357 } 6358 } 6359 } 6360 6361 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6362 else 6363 if (first_opcode == 0xDC) { 6364 if (modrm < 0xC0) { 6365 6366 /* bits 5,4,3 are an opcode extension, and the modRM also 6367 specifies an address. */ 6368 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6369 delta += len; 6370 6371 switch (gregLO3ofRM(modrm)) { 6372 6373 case 0: /* FADD double-real */ 6374 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6375 break; 6376 6377 case 1: /* FMUL double-real */ 6378 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6379 break; 6380 6381 //.. case 2: /* FCOM double-real */ 6382 //.. DIP("fcoml %s\n", dis_buf); 6383 //.. /* This forces C1 to zero, which isn't right. */ 6384 //.. put_C3210( 6385 //.. binop( Iop_And32, 6386 //.. binop(Iop_Shl32, 6387 //.. binop(Iop_CmpF64, 6388 //.. get_ST(0), 6389 //.. loadLE(Ity_F64,mkexpr(addr))), 6390 //.. mkU8(8)), 6391 //.. mkU32(0x4500) 6392 //.. )); 6393 //.. break; 6394 6395 case 3: /* FCOMP double-real */ 6396 DIP("fcompl %s\n", dis_buf); 6397 /* This forces C1 to zero, which isn't right. */ 6398 put_C3210( 6399 unop(Iop_32Uto64, 6400 binop( Iop_And32, 6401 binop(Iop_Shl32, 6402 binop(Iop_CmpF64, 6403 get_ST(0), 6404 loadLE(Ity_F64,mkexpr(addr))), 6405 mkU8(8)), 6406 mkU32(0x4500) 6407 ))); 6408 fp_pop(); 6409 break; 6410 6411 case 4: /* FSUB double-real */ 6412 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6413 break; 6414 6415 case 5: /* FSUBR double-real */ 6416 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6417 break; 6418 6419 case 6: /* FDIV double-real */ 6420 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6421 break; 6422 6423 case 7: /* FDIVR double-real */ 6424 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6425 break; 6426 6427 default: 6428 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6429 vex_printf("first_opcode == 0xDC\n"); 6430 goto decode_fail; 6431 } 6432 6433 } else { 6434 6435 delta++; 6436 switch (modrm) { 6437 6438 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6439 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6440 break; 6441 6442 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6443 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6444 break; 6445 6446 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6447 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6448 break; 6449 6450 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6451 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6452 break; 6453 6454 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6455 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6456 break; 6457 6458 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6459 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6460 break; 6461 6462 default: 6463 goto decode_fail; 6464 } 6465 6466 } 6467 } 6468 6469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6470 else 6471 if (first_opcode == 0xDD) { 6472 6473 if (modrm < 0xC0) { 6474 6475 /* bits 5,4,3 are an opcode extension, and the modRM also 6476 specifies an address. */ 6477 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6478 delta += len; 6479 6480 switch (gregLO3ofRM(modrm)) { 6481 6482 case 0: /* FLD double-real */ 6483 DIP("fldl %s\n", dis_buf); 6484 fp_push(); 6485 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6486 break; 6487 6488 case 1: /* FISTTPQ m64 (SSE3) */ 6489 DIP("fistppll %s\n", dis_buf); 6490 storeLE( mkexpr(addr), 6491 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6492 fp_pop(); 6493 break; 6494 6495 case 2: /* FST double-real */ 6496 DIP("fstl %s\n", dis_buf); 6497 storeLE(mkexpr(addr), get_ST(0)); 6498 break; 6499 6500 case 3: /* FSTP double-real */ 6501 DIP("fstpl %s\n", dis_buf); 6502 storeLE(mkexpr(addr), get_ST(0)); 6503 fp_pop(); 6504 break; 6505 6506 case 4: { /* FRSTOR m94/m108 */ 6507 IRTemp ew = newTemp(Ity_I32); 6508 IRTemp w64 = newTemp(Ity_I64); 6509 IRDirty* d; 6510 if ( have66(pfx) ) { 6511 /* Uses dirty helper: 6512 VexEmNote amd64g_dirtyhelper_FRSTORS 6513 ( VexGuestAMD64State*, HWord ) */ 6514 d = unsafeIRDirty_0_N ( 6515 0/*regparms*/, 6516 "amd64g_dirtyhelper_FRSTORS", 6517 &amd64g_dirtyhelper_FRSTORS, 6518 mkIRExprVec_1( mkexpr(addr) ) 6519 ); 6520 d->mSize = 94; 6521 } else { 6522 /* Uses dirty helper: 6523 VexEmNote amd64g_dirtyhelper_FRSTOR 6524 ( VexGuestAMD64State*, HWord ) */ 6525 d = unsafeIRDirty_0_N ( 6526 0/*regparms*/, 6527 "amd64g_dirtyhelper_FRSTOR", 6528 &amd64g_dirtyhelper_FRSTOR, 6529 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6530 ); 6531 d->mSize = 108; 6532 } 6533 6534 d->tmp = w64; 6535 /* declare we're reading memory */ 6536 d->mFx = Ifx_Read; 6537 d->mAddr = mkexpr(addr); 6538 /* d->mSize set above */ 6539 6540 /* declare we're writing guest state */ 6541 d->nFxState = 5; 6542 vex_bzero(&d->fxState, sizeof(d->fxState)); 6543 6544 d->fxState[0].fx = Ifx_Write; 6545 d->fxState[0].offset = OFFB_FTOP; 6546 d->fxState[0].size = sizeof(UInt); 6547 6548 d->fxState[1].fx = Ifx_Write; 6549 d->fxState[1].offset = OFFB_FPREGS; 6550 d->fxState[1].size = 8 * sizeof(ULong); 6551 6552 d->fxState[2].fx = Ifx_Write; 6553 d->fxState[2].offset = OFFB_FPTAGS; 6554 d->fxState[2].size = 8 * sizeof(UChar); 6555 6556 d->fxState[3].fx = Ifx_Write; 6557 d->fxState[3].offset = OFFB_FPROUND; 6558 d->fxState[3].size = sizeof(ULong); 6559 6560 d->fxState[4].fx = Ifx_Write; 6561 d->fxState[4].offset = OFFB_FC3210; 6562 d->fxState[4].size = sizeof(ULong); 6563 6564 stmt( IRStmt_Dirty(d) ); 6565 6566 /* ew contains any emulation warning we may need to 6567 issue. If needed, side-exit to the next insn, 6568 reporting the warning, so that Valgrind's dispatcher 6569 sees the warning. */ 6570 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6571 put_emwarn( mkexpr(ew) ); 6572 stmt( 6573 IRStmt_Exit( 6574 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6575 Ijk_EmWarn, 6576 IRConst_U64( guest_RIP_bbstart+delta ), 6577 OFFB_RIP 6578 ) 6579 ); 6580 6581 if ( have66(pfx) ) { 6582 DIP("frstors %s\n", dis_buf); 6583 } else { 6584 DIP("frstor %s\n", dis_buf); 6585 } 6586 break; 6587 } 6588 6589 case 6: { /* FNSAVE m94/m108 */ 6590 IRDirty *d; 6591 if ( have66(pfx) ) { 6592 /* Uses dirty helper: 6593 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, 6594 HWord ) */ 6595 d = unsafeIRDirty_0_N ( 6596 0/*regparms*/, 6597 "amd64g_dirtyhelper_FNSAVES", 6598 &amd64g_dirtyhelper_FNSAVES, 6599 mkIRExprVec_1( mkexpr(addr) ) 6600 ); 6601 d->mSize = 94; 6602 } else { 6603 /* Uses dirty helper: 6604 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*, 6605 HWord ) */ 6606 d = unsafeIRDirty_0_N ( 6607 0/*regparms*/, 6608 "amd64g_dirtyhelper_FNSAVE", 6609 &amd64g_dirtyhelper_FNSAVE, 6610 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6611 ); 6612 d->mSize = 108; 6613 } 6614 6615 /* declare we're writing memory */ 6616 d->mFx = Ifx_Write; 6617 d->mAddr = mkexpr(addr); 6618 /* d->mSize set above */ 6619 6620 /* declare we're reading guest state */ 6621 d->nFxState = 5; 6622 vex_bzero(&d->fxState, sizeof(d->fxState)); 6623 6624 d->fxState[0].fx = Ifx_Read; 6625 d->fxState[0].offset = OFFB_FTOP; 6626 d->fxState[0].size = sizeof(UInt); 6627 6628 d->fxState[1].fx = Ifx_Read; 6629 d->fxState[1].offset = OFFB_FPREGS; 6630 d->fxState[1].size = 8 * sizeof(ULong); 6631 6632 d->fxState[2].fx = Ifx_Read; 6633 d->fxState[2].offset = OFFB_FPTAGS; 6634 d->fxState[2].size = 8 * sizeof(UChar); 6635 6636 d->fxState[3].fx = Ifx_Read; 6637 d->fxState[3].offset = OFFB_FPROUND; 6638 d->fxState[3].size = sizeof(ULong); 6639 6640 d->fxState[4].fx = Ifx_Read; 6641 d->fxState[4].offset = OFFB_FC3210; 6642 d->fxState[4].size = sizeof(ULong); 6643 6644 stmt( IRStmt_Dirty(d) ); 6645 6646 if ( have66(pfx) ) { 6647 DIP("fnsaves %s\n", dis_buf); 6648 } else { 6649 DIP("fnsave %s\n", dis_buf); 6650 } 6651 break; 6652 } 6653 6654 case 7: { /* FNSTSW m16 */ 6655 IRExpr* sw = get_FPU_sw(); 6656 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6657 storeLE( mkexpr(addr), sw ); 6658 DIP("fnstsw %s\n", dis_buf); 6659 break; 6660 } 6661 6662 default: 6663 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6664 vex_printf("first_opcode == 0xDD\n"); 6665 goto decode_fail; 6666 } 6667 } else { 6668 delta++; 6669 switch (modrm) { 6670 6671 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6672 r_dst = (UInt)modrm - 0xC0; 6673 DIP("ffree %%st(%u)\n", r_dst); 6674 put_ST_TAG ( r_dst, mkU8(0) ); 6675 break; 6676 6677 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6678 r_dst = (UInt)modrm - 0xD0; 6679 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6680 /* P4 manual says: "If the destination operand is a 6681 non-empty register, the invalid-operation exception 6682 is not generated. Hence put_ST_UNCHECKED. */ 6683 put_ST_UNCHECKED(r_dst, get_ST(0)); 6684 break; 6685 6686 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6687 r_dst = (UInt)modrm - 0xD8; 6688 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6689 /* P4 manual says: "If the destination operand is a 6690 non-empty register, the invalid-operation exception 6691 is not generated. Hence put_ST_UNCHECKED. */ 6692 put_ST_UNCHECKED(r_dst, get_ST(0)); 6693 fp_pop(); 6694 break; 6695 6696 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6697 r_dst = (UInt)modrm - 0xE0; 6698 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6699 /* This forces C1 to zero, which isn't right. */ 6700 put_C3210( 6701 unop(Iop_32Uto64, 6702 binop( Iop_And32, 6703 binop(Iop_Shl32, 6704 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6705 mkU8(8)), 6706 mkU32(0x4500) 6707 ))); 6708 break; 6709 6710 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6711 r_dst = (UInt)modrm - 0xE8; 6712 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6713 /* This forces C1 to zero, which isn't right. */ 6714 put_C3210( 6715 unop(Iop_32Uto64, 6716 binop( Iop_And32, 6717 binop(Iop_Shl32, 6718 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6719 mkU8(8)), 6720 mkU32(0x4500) 6721 ))); 6722 fp_pop(); 6723 break; 6724 6725 default: 6726 goto decode_fail; 6727 } 6728 } 6729 } 6730 6731 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6732 else 6733 if (first_opcode == 0xDE) { 6734 6735 if (modrm < 0xC0) { 6736 6737 /* bits 5,4,3 are an opcode extension, and the modRM also 6738 specifies an address. */ 6739 IROp fop; 6740 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6741 delta += len; 6742 6743 switch (gregLO3ofRM(modrm)) { 6744 6745 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6746 DIP("fiaddw %s\n", dis_buf); 6747 fop = Iop_AddF64; 6748 goto do_fop_m16; 6749 6750 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6751 DIP("fimulw %s\n", dis_buf); 6752 fop = Iop_MulF64; 6753 goto do_fop_m16; 6754 6755 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6756 DIP("fisubw %s\n", dis_buf); 6757 fop = Iop_SubF64; 6758 goto do_fop_m16; 6759 6760 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6761 DIP("fisubrw %s\n", dis_buf); 6762 fop = Iop_SubF64; 6763 goto do_foprev_m16; 6764 6765 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6766 DIP("fisubw %s\n", dis_buf); 6767 fop = Iop_DivF64; 6768 goto do_fop_m16; 6769 6770 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6771 DIP("fidivrw %s\n", dis_buf); 6772 fop = Iop_DivF64; 6773 goto do_foprev_m16; 6774 6775 do_fop_m16: 6776 put_ST_UNCHECKED(0, 6777 triop(fop, 6778 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6779 get_ST(0), 6780 unop(Iop_I32StoF64, 6781 unop(Iop_16Sto32, 6782 loadLE(Ity_I16, mkexpr(addr)))))); 6783 break; 6784 6785 do_foprev_m16: 6786 put_ST_UNCHECKED(0, 6787 triop(fop, 6788 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6789 unop(Iop_I32StoF64, 6790 unop(Iop_16Sto32, 6791 loadLE(Ity_I16, mkexpr(addr)))), 6792 get_ST(0))); 6793 break; 6794 6795 default: 6796 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6797 vex_printf("first_opcode == 0xDE\n"); 6798 goto decode_fail; 6799 } 6800 6801 } else { 6802 6803 delta++; 6804 switch (modrm) { 6805 6806 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6807 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6808 break; 6809 6810 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6811 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6812 break; 6813 6814 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6815 DIP("fcompp %%st(0),%%st(1)\n"); 6816 /* This forces C1 to zero, which isn't right. */ 6817 put_C3210( 6818 unop(Iop_32Uto64, 6819 binop( Iop_And32, 6820 binop(Iop_Shl32, 6821 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6822 mkU8(8)), 6823 mkU32(0x4500) 6824 ))); 6825 fp_pop(); 6826 fp_pop(); 6827 break; 6828 6829 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6830 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6831 break; 6832 6833 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6834 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6835 break; 6836 6837 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6838 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6839 break; 6840 6841 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6842 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6843 break; 6844 6845 default: 6846 goto decode_fail; 6847 } 6848 6849 } 6850 } 6851 6852 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6853 else 6854 if (first_opcode == 0xDF) { 6855 6856 if (modrm < 0xC0) { 6857 6858 /* bits 5,4,3 are an opcode extension, and the modRM also 6859 specifies an address. */ 6860 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6861 delta += len; 6862 6863 switch (gregLO3ofRM(modrm)) { 6864 6865 case 0: /* FILD m16int */ 6866 DIP("fildw %s\n", dis_buf); 6867 fp_push(); 6868 put_ST(0, unop(Iop_I32StoF64, 6869 unop(Iop_16Sto32, 6870 loadLE(Ity_I16, mkexpr(addr))))); 6871 break; 6872 6873 case 1: /* FISTTPS m16 (SSE3) */ 6874 DIP("fisttps %s\n", dis_buf); 6875 storeLE( mkexpr(addr), 6876 x87ishly_qnarrow_32_to_16( 6877 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6878 fp_pop(); 6879 break; 6880 6881 case 2: /* FIST m16 */ 6882 DIP("fists %s\n", dis_buf); 6883 storeLE( mkexpr(addr), 6884 x87ishly_qnarrow_32_to_16( 6885 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6886 break; 6887 6888 case 3: /* FISTP m16 */ 6889 DIP("fistps %s\n", dis_buf); 6890 storeLE( mkexpr(addr), 6891 x87ishly_qnarrow_32_to_16( 6892 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6893 fp_pop(); 6894 break; 6895 6896 case 5: /* FILD m64 */ 6897 DIP("fildll %s\n", dis_buf); 6898 fp_push(); 6899 put_ST(0, binop(Iop_I64StoF64, 6900 get_roundingmode(), 6901 loadLE(Ity_I64, mkexpr(addr)))); 6902 break; 6903 6904 case 7: /* FISTP m64 */ 6905 DIP("fistpll %s\n", dis_buf); 6906 storeLE( mkexpr(addr), 6907 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6908 fp_pop(); 6909 break; 6910 6911 default: 6912 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm)); 6913 vex_printf("first_opcode == 0xDF\n"); 6914 goto decode_fail; 6915 } 6916 6917 } else { 6918 6919 delta++; 6920 switch (modrm) { 6921 6922 case 0xC0: /* FFREEP %st(0) */ 6923 DIP("ffreep %%st(%d)\n", 0); 6924 put_ST_TAG ( 0, mkU8(0) ); 6925 fp_pop(); 6926 break; 6927 6928 case 0xE0: /* FNSTSW %ax */ 6929 DIP("fnstsw %%ax\n"); 6930 /* Invent a plausible-looking FPU status word value and 6931 dump it in %AX: 6932 ((ftop & 7) << 11) | (c3210 & 0x4700) 6933 */ 6934 putIRegRAX( 6935 2, 6936 unop(Iop_32to16, 6937 binop(Iop_Or32, 6938 binop(Iop_Shl32, 6939 binop(Iop_And32, get_ftop(), mkU32(7)), 6940 mkU8(11)), 6941 binop(Iop_And32, 6942 unop(Iop_64to32, get_C3210()), 6943 mkU32(0x4700)) 6944 ))); 6945 break; 6946 6947 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6948 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6949 break; 6950 6951 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6952 /* not really right since COMIP != UCOMIP */ 6953 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6954 break; 6955 6956 default: 6957 goto decode_fail; 6958 } 6959 } 6960 6961 } 6962 6963 else 6964 goto decode_fail; 6965 6966 *decode_ok = True; 6967 return delta; 6968 6969 decode_fail: 6970 *decode_ok = False; 6971 return delta; 6972 } 6973 6974 6975 /*------------------------------------------------------------*/ 6976 /*--- ---*/ 6977 /*--- MMX INSTRUCTIONS ---*/ 6978 /*--- ---*/ 6979 /*------------------------------------------------------------*/ 6980 6981 /* Effect of MMX insns on x87 FPU state (table 11-2 of 6982 IA32 arch manual, volume 3): 6983 6984 Read from, or write to MMX register (viz, any insn except EMMS): 6985 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 6986 * FP stack pointer set to zero 6987 6988 EMMS: 6989 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 6990 * FP stack pointer set to zero 6991 */ 6992 6993 static void do_MMX_preamble ( void ) 6994 { 6995 Int i; 6996 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 6997 IRExpr* zero = mkU32(0); 6998 IRExpr* tag1 = mkU8(1); 6999 put_ftop(zero); 7000 for (i = 0; i < 8; i++) 7001 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 7002 } 7003 7004 static void do_EMMS_preamble ( void ) 7005 { 7006 Int i; 7007 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7008 IRExpr* zero = mkU32(0); 7009 IRExpr* tag0 = mkU8(0); 7010 put_ftop(zero); 7011 for (i = 0; i < 8; i++) 7012 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 7013 } 7014 7015 7016 static IRExpr* getMMXReg ( UInt archreg ) 7017 { 7018 vassert(archreg < 8); 7019 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 7020 } 7021 7022 7023 static void putMMXReg ( UInt archreg, IRExpr* e ) 7024 { 7025 vassert(archreg < 8); 7026 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 7027 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 7028 } 7029 7030 7031 /* Helper for non-shift MMX insns. Note this is incomplete in the 7032 sense that it does not first call do_MMX_preamble() -- that is the 7033 responsibility of its caller. */ 7034 7035 static 7036 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, 7037 Prefix pfx, 7038 Long delta, 7039 UChar opc, 7040 const HChar* name, 7041 Bool show_granularity ) 7042 { 7043 HChar dis_buf[50]; 7044 UChar modrm = getUChar(delta); 7045 Bool isReg = epartIsReg(modrm); 7046 IRExpr* argL = NULL; 7047 IRExpr* argR = NULL; 7048 IRExpr* argG = NULL; 7049 IRExpr* argE = NULL; 7050 IRTemp res = newTemp(Ity_I64); 7051 7052 Bool invG = False; 7053 IROp op = Iop_INVALID; 7054 void* hAddr = NULL; 7055 const HChar* hName = NULL; 7056 Bool eLeft = False; 7057 7058 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 7059 7060 switch (opc) { 7061 /* Original MMX ones */ 7062 case 0xFC: op = Iop_Add8x8; break; 7063 case 0xFD: op = Iop_Add16x4; break; 7064 case 0xFE: op = Iop_Add32x2; break; 7065 7066 case 0xEC: op = Iop_QAdd8Sx8; break; 7067 case 0xED: op = Iop_QAdd16Sx4; break; 7068 7069 case 0xDC: op = Iop_QAdd8Ux8; break; 7070 case 0xDD: op = Iop_QAdd16Ux4; break; 7071 7072 case 0xF8: op = Iop_Sub8x8; break; 7073 case 0xF9: op = Iop_Sub16x4; break; 7074 case 0xFA: op = Iop_Sub32x2; break; 7075 7076 case 0xE8: op = Iop_QSub8Sx8; break; 7077 case 0xE9: op = Iop_QSub16Sx4; break; 7078 7079 case 0xD8: op = Iop_QSub8Ux8; break; 7080 case 0xD9: op = Iop_QSub16Ux4; break; 7081 7082 case 0xE5: op = Iop_MulHi16Sx4; break; 7083 case 0xD5: op = Iop_Mul16x4; break; 7084 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 7085 7086 case 0x74: op = Iop_CmpEQ8x8; break; 7087 case 0x75: op = Iop_CmpEQ16x4; break; 7088 case 0x76: op = Iop_CmpEQ32x2; break; 7089 7090 case 0x64: op = Iop_CmpGT8Sx8; break; 7091 case 0x65: op = Iop_CmpGT16Sx4; break; 7092 case 0x66: op = Iop_CmpGT32Sx2; break; 7093 7094 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 7095 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 7096 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 7097 7098 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 7099 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 7100 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 7101 7102 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 7103 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 7104 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 7105 7106 case 0xDB: op = Iop_And64; break; 7107 case 0xDF: op = Iop_And64; invG = True; break; 7108 case 0xEB: op = Iop_Or64; break; 7109 case 0xEF: /* Possibly do better here if argL and argR are the 7110 same reg */ 7111 op = Iop_Xor64; break; 7112 7113 /* Introduced in SSE1 */ 7114 case 0xE0: op = Iop_Avg8Ux8; break; 7115 case 0xE3: op = Iop_Avg16Ux4; break; 7116 case 0xEE: op = Iop_Max16Sx4; break; 7117 case 0xDE: op = Iop_Max8Ux8; break; 7118 case 0xEA: op = Iop_Min16Sx4; break; 7119 case 0xDA: op = Iop_Min8Ux8; break; 7120 case 0xE4: op = Iop_MulHi16Ux4; break; 7121 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 7122 7123 /* Introduced in SSE2 */ 7124 case 0xD4: op = Iop_Add64; break; 7125 case 0xFB: op = Iop_Sub64; break; 7126 7127 default: 7128 vex_printf("\n0x%x\n", (Int)opc); 7129 vpanic("dis_MMXop_regmem_to_reg"); 7130 } 7131 7132 # undef XXX 7133 7134 argG = getMMXReg(gregLO3ofRM(modrm)); 7135 if (invG) 7136 argG = unop(Iop_Not64, argG); 7137 7138 if (isReg) { 7139 delta++; 7140 argE = getMMXReg(eregLO3ofRM(modrm)); 7141 } else { 7142 Int len; 7143 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7144 delta += len; 7145 argE = loadLE(Ity_I64, mkexpr(addr)); 7146 } 7147 7148 if (eLeft) { 7149 argL = argE; 7150 argR = argG; 7151 } else { 7152 argL = argG; 7153 argR = argE; 7154 } 7155 7156 if (op != Iop_INVALID) { 7157 vassert(hName == NULL); 7158 vassert(hAddr == NULL); 7159 assign(res, binop(op, argL, argR)); 7160 } else { 7161 vassert(hName != NULL); 7162 vassert(hAddr != NULL); 7163 assign( res, 7164 mkIRExprCCall( 7165 Ity_I64, 7166 0/*regparms*/, hName, hAddr, 7167 mkIRExprVec_2( argL, argR ) 7168 ) 7169 ); 7170 } 7171 7172 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 7173 7174 DIP("%s%s %s, %s\n", 7175 name, show_granularity ? nameMMXGran(opc & 3) : "", 7176 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 7177 nameMMXReg(gregLO3ofRM(modrm)) ); 7178 7179 return delta; 7180 } 7181 7182 7183 /* Vector by scalar shift of G by the amount specified at the bottom 7184 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 7185 7186 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi, 7187 Prefix pfx, Long delta, 7188 const HChar* opname, IROp op ) 7189 { 7190 HChar dis_buf[50]; 7191 Int alen, size; 7192 IRTemp addr; 7193 Bool shl, shr, sar; 7194 UChar rm = getUChar(delta); 7195 IRTemp g0 = newTemp(Ity_I64); 7196 IRTemp g1 = newTemp(Ity_I64); 7197 IRTemp amt = newTemp(Ity_I64); 7198 IRTemp amt8 = newTemp(Ity_I8); 7199 7200 if (epartIsReg(rm)) { 7201 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 7202 DIP("%s %s,%s\n", opname, 7203 nameMMXReg(eregLO3ofRM(rm)), 7204 nameMMXReg(gregLO3ofRM(rm)) ); 7205 delta++; 7206 } else { 7207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 7208 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 7209 DIP("%s %s,%s\n", opname, 7210 dis_buf, 7211 nameMMXReg(gregLO3ofRM(rm)) ); 7212 delta += alen; 7213 } 7214 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 7215 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 7216 7217 shl = shr = sar = False; 7218 size = 0; 7219 switch (op) { 7220 case Iop_ShlN16x4: shl = True; size = 32; break; 7221 case Iop_ShlN32x2: shl = True; size = 32; break; 7222 case Iop_Shl64: shl = True; size = 64; break; 7223 case Iop_ShrN16x4: shr = True; size = 16; break; 7224 case Iop_ShrN32x2: shr = True; size = 32; break; 7225 case Iop_Shr64: shr = True; size = 64; break; 7226 case Iop_SarN16x4: sar = True; size = 16; break; 7227 case Iop_SarN32x2: sar = True; size = 32; break; 7228 default: vassert(0); 7229 } 7230 7231 if (shl || shr) { 7232 assign( 7233 g1, 7234 IRExpr_ITE( 7235 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7236 binop(op, mkexpr(g0), mkexpr(amt8)), 7237 mkU64(0) 7238 ) 7239 ); 7240 } else 7241 if (sar) { 7242 assign( 7243 g1, 7244 IRExpr_ITE( 7245 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7246 binop(op, mkexpr(g0), mkexpr(amt8)), 7247 binop(op, mkexpr(g0), mkU8(size-1)) 7248 ) 7249 ); 7250 } else { 7251 vassert(0); 7252 } 7253 7254 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 7255 return delta; 7256 } 7257 7258 7259 /* Vector by scalar shift of E by an immediate byte. This is a 7260 straight copy of dis_SSE_shiftE_imm. */ 7261 7262 static 7263 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op ) 7264 { 7265 Bool shl, shr, sar; 7266 UChar rm = getUChar(delta); 7267 IRTemp e0 = newTemp(Ity_I64); 7268 IRTemp e1 = newTemp(Ity_I64); 7269 UChar amt, size; 7270 vassert(epartIsReg(rm)); 7271 vassert(gregLO3ofRM(rm) == 2 7272 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 7273 amt = getUChar(delta+1); 7274 delta += 2; 7275 DIP("%s $%d,%s\n", opname, 7276 (Int)amt, 7277 nameMMXReg(eregLO3ofRM(rm)) ); 7278 7279 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 7280 7281 shl = shr = sar = False; 7282 size = 0; 7283 switch (op) { 7284 case Iop_ShlN16x4: shl = True; size = 16; break; 7285 case Iop_ShlN32x2: shl = True; size = 32; break; 7286 case Iop_Shl64: shl = True; size = 64; break; 7287 case Iop_SarN16x4: sar = True; size = 16; break; 7288 case Iop_SarN32x2: sar = True; size = 32; break; 7289 case Iop_ShrN16x4: shr = True; size = 16; break; 7290 case Iop_ShrN32x2: shr = True; size = 32; break; 7291 case Iop_Shr64: shr = True; size = 64; break; 7292 default: vassert(0); 7293 } 7294 7295 if (shl || shr) { 7296 assign( e1, amt >= size 7297 ? mkU64(0) 7298 : binop(op, mkexpr(e0), mkU8(amt)) 7299 ); 7300 } else 7301 if (sar) { 7302 assign( e1, amt >= size 7303 ? binop(op, mkexpr(e0), mkU8(size-1)) 7304 : binop(op, mkexpr(e0), mkU8(amt)) 7305 ); 7306 } else { 7307 vassert(0); 7308 } 7309 7310 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 7311 return delta; 7312 } 7313 7314 7315 /* Completely handle all MMX instructions except emms. */ 7316 7317 static 7318 ULong dis_MMX ( Bool* decode_ok, 7319 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 7320 { 7321 Int len; 7322 UChar modrm; 7323 HChar dis_buf[50]; 7324 UChar opc = getUChar(delta); 7325 delta++; 7326 7327 /* dis_MMX handles all insns except emms. */ 7328 do_MMX_preamble(); 7329 7330 switch (opc) { 7331 7332 case 0x6E: 7333 if (sz == 4) { 7334 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 7335 modrm = getUChar(delta); 7336 if (epartIsReg(modrm)) { 7337 delta++; 7338 putMMXReg( 7339 gregLO3ofRM(modrm), 7340 binop( Iop_32HLto64, 7341 mkU32(0), 7342 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 7343 DIP("movd %s, %s\n", 7344 nameIReg32(eregOfRexRM(pfx,modrm)), 7345 nameMMXReg(gregLO3ofRM(modrm))); 7346 } else { 7347 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7348 delta += len; 7349 putMMXReg( 7350 gregLO3ofRM(modrm), 7351 binop( Iop_32HLto64, 7352 mkU32(0), 7353 loadLE(Ity_I32, mkexpr(addr)) ) ); 7354 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7355 } 7356 } 7357 else 7358 if (sz == 8) { 7359 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7360 modrm = getUChar(delta); 7361 if (epartIsReg(modrm)) { 7362 delta++; 7363 putMMXReg( gregLO3ofRM(modrm), 7364 getIReg64(eregOfRexRM(pfx,modrm)) ); 7365 DIP("movd %s, %s\n", 7366 nameIReg64(eregOfRexRM(pfx,modrm)), 7367 nameMMXReg(gregLO3ofRM(modrm))); 7368 } else { 7369 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7370 delta += len; 7371 putMMXReg( gregLO3ofRM(modrm), 7372 loadLE(Ity_I64, mkexpr(addr)) ); 7373 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7374 } 7375 } 7376 else { 7377 goto mmx_decode_failure; 7378 } 7379 break; 7380 7381 case 0x7E: 7382 if (sz == 4) { 7383 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7384 modrm = getUChar(delta); 7385 if (epartIsReg(modrm)) { 7386 delta++; 7387 putIReg32( eregOfRexRM(pfx,modrm), 7388 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7389 DIP("movd %s, %s\n", 7390 nameMMXReg(gregLO3ofRM(modrm)), 7391 nameIReg32(eregOfRexRM(pfx,modrm))); 7392 } else { 7393 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7394 delta += len; 7395 storeLE( mkexpr(addr), 7396 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7397 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7398 } 7399 } 7400 else 7401 if (sz == 8) { 7402 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7403 modrm = getUChar(delta); 7404 if (epartIsReg(modrm)) { 7405 delta++; 7406 putIReg64( eregOfRexRM(pfx,modrm), 7407 getMMXReg(gregLO3ofRM(modrm)) ); 7408 DIP("movd %s, %s\n", 7409 nameMMXReg(gregLO3ofRM(modrm)), 7410 nameIReg64(eregOfRexRM(pfx,modrm))); 7411 } else { 7412 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7413 delta += len; 7414 storeLE( mkexpr(addr), 7415 getMMXReg(gregLO3ofRM(modrm)) ); 7416 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7417 } 7418 } else { 7419 goto mmx_decode_failure; 7420 } 7421 break; 7422 7423 case 0x6F: 7424 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7425 if (sz != 4 7426 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7427 goto mmx_decode_failure; 7428 modrm = getUChar(delta); 7429 if (epartIsReg(modrm)) { 7430 delta++; 7431 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7432 DIP("movq %s, %s\n", 7433 nameMMXReg(eregLO3ofRM(modrm)), 7434 nameMMXReg(gregLO3ofRM(modrm))); 7435 } else { 7436 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7437 delta += len; 7438 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7439 DIP("movq %s, %s\n", 7440 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7441 } 7442 break; 7443 7444 case 0x7F: 7445 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7446 if (sz != 4 7447 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7448 goto mmx_decode_failure; 7449 modrm = getUChar(delta); 7450 if (epartIsReg(modrm)) { 7451 delta++; 7452 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7453 DIP("movq %s, %s\n", 7454 nameMMXReg(gregLO3ofRM(modrm)), 7455 nameMMXReg(eregLO3ofRM(modrm))); 7456 } else { 7457 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7458 delta += len; 7459 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7460 DIP("mov(nt)q %s, %s\n", 7461 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7462 } 7463 break; 7464 7465 case 0xFC: 7466 case 0xFD: 7467 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7468 if (sz != 4) 7469 goto mmx_decode_failure; 7470 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7471 break; 7472 7473 case 0xEC: 7474 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7475 if (sz != 4 7476 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7477 goto mmx_decode_failure; 7478 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7479 break; 7480 7481 case 0xDC: 7482 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7483 if (sz != 4) 7484 goto mmx_decode_failure; 7485 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7486 break; 7487 7488 case 0xF8: 7489 case 0xF9: 7490 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7491 if (sz != 4) 7492 goto mmx_decode_failure; 7493 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7494 break; 7495 7496 case 0xE8: 7497 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7498 if (sz != 4) 7499 goto mmx_decode_failure; 7500 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7501 break; 7502 7503 case 0xD8: 7504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7505 if (sz != 4) 7506 goto mmx_decode_failure; 7507 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7508 break; 7509 7510 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7511 if (sz != 4) 7512 goto mmx_decode_failure; 7513 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7514 break; 7515 7516 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7517 if (sz != 4) 7518 goto mmx_decode_failure; 7519 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7520 break; 7521 7522 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7523 vassert(sz == 4); 7524 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7525 break; 7526 7527 case 0x74: 7528 case 0x75: 7529 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7530 if (sz != 4) 7531 goto mmx_decode_failure; 7532 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7533 break; 7534 7535 case 0x64: 7536 case 0x65: 7537 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7538 if (sz != 4) 7539 goto mmx_decode_failure; 7540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7541 break; 7542 7543 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7544 if (sz != 4) 7545 goto mmx_decode_failure; 7546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7547 break; 7548 7549 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7550 if (sz != 4) 7551 goto mmx_decode_failure; 7552 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7553 break; 7554 7555 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7556 if (sz != 4) 7557 goto mmx_decode_failure; 7558 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7559 break; 7560 7561 case 0x68: 7562 case 0x69: 7563 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7564 if (sz != 4 7565 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7566 goto mmx_decode_failure; 7567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7568 break; 7569 7570 case 0x60: 7571 case 0x61: 7572 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7573 if (sz != 4 7574 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7575 goto mmx_decode_failure; 7576 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7577 break; 7578 7579 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7580 if (sz != 4) 7581 goto mmx_decode_failure; 7582 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7583 break; 7584 7585 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7586 if (sz != 4) 7587 goto mmx_decode_failure; 7588 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7589 break; 7590 7591 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7592 if (sz != 4) 7593 goto mmx_decode_failure; 7594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7595 break; 7596 7597 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7598 if (sz != 4) 7599 goto mmx_decode_failure; 7600 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7601 break; 7602 7603 # define SHIFT_BY_REG(_name,_op) \ 7604 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7605 break; 7606 7607 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7608 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7609 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7610 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7611 7612 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7613 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7614 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7615 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7616 7617 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7618 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7619 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7620 7621 # undef SHIFT_BY_REG 7622 7623 case 0x71: 7624 case 0x72: 7625 case 0x73: { 7626 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7627 UChar byte2, subopc; 7628 if (sz != 4) 7629 goto mmx_decode_failure; 7630 byte2 = getUChar(delta); /* amode / sub-opcode */ 7631 subopc = toUChar( (byte2 >> 3) & 7 ); 7632 7633 # define SHIFT_BY_IMM(_name,_op) \ 7634 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7635 } while (0) 7636 7637 if (subopc == 2 /*SRL*/ && opc == 0x71) 7638 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7639 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7640 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7641 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7642 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7643 7644 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7645 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7646 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7647 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7648 7649 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7650 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7651 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7652 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7653 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7654 SHIFT_BY_IMM("psllq", Iop_Shl64); 7655 7656 else goto mmx_decode_failure; 7657 7658 # undef SHIFT_BY_IMM 7659 break; 7660 } 7661 7662 case 0xF7: { 7663 IRTemp addr = newTemp(Ity_I64); 7664 IRTemp regD = newTemp(Ity_I64); 7665 IRTemp regM = newTemp(Ity_I64); 7666 IRTemp mask = newTemp(Ity_I64); 7667 IRTemp olddata = newTemp(Ity_I64); 7668 IRTemp newdata = newTemp(Ity_I64); 7669 7670 modrm = getUChar(delta); 7671 if (sz != 4 || (!epartIsReg(modrm))) 7672 goto mmx_decode_failure; 7673 delta++; 7674 7675 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7676 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7677 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7678 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7679 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7680 assign( newdata, 7681 binop(Iop_Or64, 7682 binop(Iop_And64, 7683 mkexpr(regD), 7684 mkexpr(mask) ), 7685 binop(Iop_And64, 7686 mkexpr(olddata), 7687 unop(Iop_Not64, mkexpr(mask)))) ); 7688 storeLE( mkexpr(addr), mkexpr(newdata) ); 7689 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7690 nameMMXReg( gregLO3ofRM(modrm) ) ); 7691 break; 7692 } 7693 7694 /* --- MMX decode failure --- */ 7695 default: 7696 mmx_decode_failure: 7697 *decode_ok = False; 7698 return delta; /* ignored */ 7699 7700 } 7701 7702 *decode_ok = True; 7703 return delta; 7704 } 7705 7706 7707 /*------------------------------------------------------------*/ 7708 /*--- More misc arithmetic and other obscure insns. ---*/ 7709 /*------------------------------------------------------------*/ 7710 7711 /* Generate base << amt with vacated places filled with stuff 7712 from xtra. amt guaranteed in 0 .. 63. */ 7713 static 7714 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7715 { 7716 /* if amt == 0 7717 then base 7718 else (base << amt) | (xtra >>u (64-amt)) 7719 */ 7720 return 7721 IRExpr_ITE( 7722 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7723 binop(Iop_Or64, 7724 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7725 binop(Iop_Shr64, mkexpr(xtra), 7726 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7727 ), 7728 mkexpr(base) 7729 ); 7730 } 7731 7732 /* Generate base >>u amt with vacated places filled with stuff 7733 from xtra. amt guaranteed in 0 .. 63. */ 7734 static 7735 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7736 { 7737 /* if amt == 0 7738 then base 7739 else (base >>u amt) | (xtra << (64-amt)) 7740 */ 7741 return 7742 IRExpr_ITE( 7743 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7744 binop(Iop_Or64, 7745 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7746 binop(Iop_Shl64, mkexpr(xtra), 7747 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7748 ), 7749 mkexpr(base) 7750 ); 7751 } 7752 7753 /* Double length left and right shifts. Apparently only required in 7754 v-size (no b- variant). */ 7755 static 7756 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi, 7757 Prefix pfx, 7758 Long delta, UChar modrm, 7759 Int sz, 7760 IRExpr* shift_amt, 7761 Bool amt_is_literal, 7762 const HChar* shift_amt_txt, 7763 Bool left_shift ) 7764 { 7765 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7766 for printing it. And eip on entry points at the modrm byte. */ 7767 Int len; 7768 HChar dis_buf[50]; 7769 7770 IRType ty = szToITy(sz); 7771 IRTemp gsrc = newTemp(ty); 7772 IRTemp esrc = newTemp(ty); 7773 IRTemp addr = IRTemp_INVALID; 7774 IRTemp tmpSH = newTemp(Ity_I8); 7775 IRTemp tmpSS = newTemp(Ity_I8); 7776 IRTemp tmp64 = IRTemp_INVALID; 7777 IRTemp res64 = IRTemp_INVALID; 7778 IRTemp rss64 = IRTemp_INVALID; 7779 IRTemp resTy = IRTemp_INVALID; 7780 IRTemp rssTy = IRTemp_INVALID; 7781 Int mask = sz==8 ? 63 : 31; 7782 7783 vassert(sz == 2 || sz == 4 || sz == 8); 7784 7785 /* The E-part is the destination; this is shifted. The G-part 7786 supplies bits to be shifted into the E-part, but is not 7787 changed. 7788 7789 If shifting left, form a double-length word with E at the top 7790 and G at the bottom, and shift this left. The result is then in 7791 the high part. 7792 7793 If shifting right, form a double-length word with G at the top 7794 and E at the bottom, and shift this right. The result is then 7795 at the bottom. */ 7796 7797 /* Fetch the operands. */ 7798 7799 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7800 7801 if (epartIsReg(modrm)) { 7802 delta++; 7803 assign( esrc, getIRegE(sz, pfx, modrm) ); 7804 DIP("sh%cd%c %s, %s, %s\n", 7805 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7806 shift_amt_txt, 7807 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7808 } else { 7809 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7810 /* # bytes following amode */ 7811 amt_is_literal ? 1 : 0 ); 7812 delta += len; 7813 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7814 DIP("sh%cd%c %s, %s, %s\n", 7815 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7816 shift_amt_txt, 7817 nameIRegG(sz, pfx, modrm), dis_buf); 7818 } 7819 7820 /* Calculate the masked shift amount (tmpSH), the masked subshift 7821 amount (tmpSS), the shifted value (res64) and the subshifted 7822 value (rss64). */ 7823 7824 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7825 assign( tmpSS, binop(Iop_And8, 7826 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7827 mkU8(mask))); 7828 7829 tmp64 = newTemp(Ity_I64); 7830 res64 = newTemp(Ity_I64); 7831 rss64 = newTemp(Ity_I64); 7832 7833 if (sz == 2 || sz == 4) { 7834 7835 /* G is xtra; E is data */ 7836 /* what a freaking nightmare: */ 7837 if (sz == 4 && left_shift) { 7838 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7839 assign( res64, 7840 binop(Iop_Shr64, 7841 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7842 mkU8(32)) ); 7843 assign( rss64, 7844 binop(Iop_Shr64, 7845 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7846 mkU8(32)) ); 7847 } 7848 else 7849 if (sz == 4 && !left_shift) { 7850 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7851 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7852 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7853 } 7854 else 7855 if (sz == 2 && left_shift) { 7856 assign( tmp64, 7857 binop(Iop_32HLto64, 7858 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7859 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7860 )); 7861 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7862 assign( res64, 7863 binop(Iop_Shr64, 7864 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7865 mkU8(48)) ); 7866 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7867 assign( rss64, 7868 binop(Iop_Shr64, 7869 binop(Iop_Shl64, 7870 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7871 mkU8(48)), 7872 mkexpr(tmpSS)), 7873 mkU8(48)) ); 7874 } 7875 else 7876 if (sz == 2 && !left_shift) { 7877 assign( tmp64, 7878 binop(Iop_32HLto64, 7879 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7880 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7881 )); 7882 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7883 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7884 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7885 assign( rss64, binop(Iop_Shr64, 7886 unop(Iop_16Uto64, mkexpr(esrc)), 7887 mkexpr(tmpSS)) ); 7888 } 7889 7890 } else { 7891 7892 vassert(sz == 8); 7893 if (left_shift) { 7894 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7895 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7896 } else { 7897 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7898 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7899 } 7900 7901 } 7902 7903 resTy = newTemp(ty); 7904 rssTy = newTemp(ty); 7905 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7906 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7907 7908 /* Put result back and write the flags thunk. */ 7909 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7910 resTy, rssTy, ty, tmpSH ); 7911 7912 if (epartIsReg(modrm)) { 7913 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7914 } else { 7915 storeLE( mkexpr(addr), mkexpr(resTy) ); 7916 } 7917 7918 if (amt_is_literal) delta++; 7919 return delta; 7920 } 7921 7922 7923 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7924 required. */ 7925 7926 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7927 7928 static const HChar* nameBtOp ( BtOp op ) 7929 { 7930 switch (op) { 7931 case BtOpNone: return ""; 7932 case BtOpSet: return "s"; 7933 case BtOpReset: return "r"; 7934 case BtOpComp: return "c"; 7935 default: vpanic("nameBtOp(amd64)"); 7936 } 7937 } 7938 7939 7940 static 7941 ULong dis_bt_G_E ( VexAbiInfo* vbi, 7942 Prefix pfx, Int sz, Long delta, BtOp op, 7943 /*OUT*/Bool* decode_OK ) 7944 { 7945 HChar dis_buf[50]; 7946 UChar modrm; 7947 Int len; 7948 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7949 t_addr1, t_rsp, t_mask, t_new; 7950 7951 vassert(sz == 2 || sz == 4 || sz == 8); 7952 7953 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7954 = t_addr0 = t_addr1 = t_rsp 7955 = t_mask = t_new = IRTemp_INVALID; 7956 7957 t_fetched = newTemp(Ity_I8); 7958 t_new = newTemp(Ity_I8); 7959 t_bitno0 = newTemp(Ity_I64); 7960 t_bitno1 = newTemp(Ity_I64); 7961 t_bitno2 = newTemp(Ity_I8); 7962 t_addr1 = newTemp(Ity_I64); 7963 modrm = getUChar(delta); 7964 7965 *decode_OK = True; 7966 if (epartIsReg(modrm)) { 7967 /* F2 and F3 are never acceptable. */ 7968 if (haveF2orF3(pfx)) { 7969 *decode_OK = False; 7970 return delta; 7971 } 7972 } else { 7973 /* F2 or F3 (but not both) are allowed, provided LOCK is also 7974 present, and only for the BTC/BTS/BTR cases (not BT). */ 7975 if (haveF2orF3(pfx)) { 7976 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) { 7977 *decode_OK = False; 7978 return delta; 7979 } 7980 } 7981 } 7982 7983 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 7984 7985 if (epartIsReg(modrm)) { 7986 delta++; 7987 /* Get it onto the client's stack. Oh, this is a horrible 7988 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 7989 Because of the ELF ABI stack redzone, there may be live data 7990 up to 128 bytes below %RSP. So we can't just push it on the 7991 stack, else we may wind up trashing live data, and causing 7992 impossible-to-find simulation errors. (Yes, this did 7993 happen.) So we need to drop RSP before at least 128 before 7994 pushing it. That unfortunately means hitting Memcheck's 7995 fast-case painting code. Ideally we should drop more than 7996 128, to reduce the chances of breaking buggy programs that 7997 have live data below -128(%RSP). Memcheck fast-cases moves 7998 of 288 bytes due to the need to handle ppc64-linux quickly, 7999 so let's use 288. Of course the real fix is to get rid of 8000 this kludge entirely. */ 8001 t_rsp = newTemp(Ity_I64); 8002 t_addr0 = newTemp(Ity_I64); 8003 8004 vassert(vbi->guest_stack_redzone_size == 128); 8005 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 8006 putIReg64(R_RSP, mkexpr(t_rsp)); 8007 8008 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 8009 8010 /* Make t_addr0 point at it. */ 8011 assign( t_addr0, mkexpr(t_rsp) ); 8012 8013 /* Mask out upper bits of the shift amount, since we're doing a 8014 reg. */ 8015 assign( t_bitno1, binop(Iop_And64, 8016 mkexpr(t_bitno0), 8017 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 8018 8019 } else { 8020 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 8021 delta += len; 8022 assign( t_bitno1, mkexpr(t_bitno0) ); 8023 } 8024 8025 /* At this point: t_addr0 is the address being operated on. If it 8026 was a reg, we will have pushed it onto the client's stack. 8027 t_bitno1 is the bit number, suitably masked in the case of a 8028 reg. */ 8029 8030 /* Now the main sequence. */ 8031 assign( t_addr1, 8032 binop(Iop_Add64, 8033 mkexpr(t_addr0), 8034 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 8035 8036 /* t_addr1 now holds effective address */ 8037 8038 assign( t_bitno2, 8039 unop(Iop_64to8, 8040 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 8041 8042 /* t_bitno2 contains offset of bit within byte */ 8043 8044 if (op != BtOpNone) { 8045 t_mask = newTemp(Ity_I8); 8046 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 8047 } 8048 8049 /* t_mask is now a suitable byte mask */ 8050 8051 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 8052 8053 if (op != BtOpNone) { 8054 switch (op) { 8055 case BtOpSet: 8056 assign( t_new, 8057 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8058 break; 8059 case BtOpComp: 8060 assign( t_new, 8061 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8062 break; 8063 case BtOpReset: 8064 assign( t_new, 8065 binop(Iop_And8, mkexpr(t_fetched), 8066 unop(Iop_Not8, mkexpr(t_mask))) ); 8067 break; 8068 default: 8069 vpanic("dis_bt_G_E(amd64)"); 8070 } 8071 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) { 8072 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 8073 mkexpr(t_new)/*new*/, 8074 guest_RIP_curr_instr ); 8075 } else { 8076 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 8077 } 8078 } 8079 8080 /* Side effect done; now get selected bit into Carry flag */ 8081 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 8082 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8083 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8084 stmt( IRStmt_Put( 8085 OFFB_CC_DEP1, 8086 binop(Iop_And64, 8087 binop(Iop_Shr64, 8088 unop(Iop_8Uto64, mkexpr(t_fetched)), 8089 mkexpr(t_bitno2)), 8090 mkU64(1))) 8091 ); 8092 /* Set NDEP even though it isn't used. This makes redundant-PUT 8093 elimination of previous stores to this field work better. */ 8094 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8095 8096 /* Move reg operand from stack back to reg */ 8097 if (epartIsReg(modrm)) { 8098 /* t_rsp still points at it. */ 8099 /* only write the reg if actually modifying it; doing otherwise 8100 zeroes the top half erroneously when doing btl due to 8101 standard zero-extend rule */ 8102 if (op != BtOpNone) 8103 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 8104 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 8105 } 8106 8107 DIP("bt%s%c %s, %s\n", 8108 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 8109 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 8110 8111 return delta; 8112 } 8113 8114 8115 8116 /* Handle BSF/BSR. Only v-size seems necessary. */ 8117 static 8118 ULong dis_bs_E_G ( VexAbiInfo* vbi, 8119 Prefix pfx, Int sz, Long delta, Bool fwds ) 8120 { 8121 Bool isReg; 8122 UChar modrm; 8123 HChar dis_buf[50]; 8124 8125 IRType ty = szToITy(sz); 8126 IRTemp src = newTemp(ty); 8127 IRTemp dst = newTemp(ty); 8128 IRTemp src64 = newTemp(Ity_I64); 8129 IRTemp dst64 = newTemp(Ity_I64); 8130 IRTemp srcB = newTemp(Ity_I1); 8131 8132 vassert(sz == 8 || sz == 4 || sz == 2); 8133 8134 modrm = getUChar(delta); 8135 isReg = epartIsReg(modrm); 8136 if (isReg) { 8137 delta++; 8138 assign( src, getIRegE(sz, pfx, modrm) ); 8139 } else { 8140 Int len; 8141 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 8142 delta += len; 8143 assign( src, loadLE(ty, mkexpr(addr)) ); 8144 } 8145 8146 DIP("bs%c%c %s, %s\n", 8147 fwds ? 'f' : 'r', nameISize(sz), 8148 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 8149 nameIRegG(sz, pfx, modrm)); 8150 8151 /* First, widen src to 64 bits if it is not already. */ 8152 assign( src64, widenUto64(mkexpr(src)) ); 8153 8154 /* Generate a bool expression which is zero iff the original is 8155 zero, and nonzero otherwise. Ask for a CmpNE version which, if 8156 instrumented by Memcheck, is instrumented expensively, since 8157 this may be used on the output of a preceding movmskb insn, 8158 which has been known to be partially defined, and in need of 8159 careful handling. */ 8160 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) ); 8161 8162 /* Flags: Z is 1 iff source value is zero. All others 8163 are undefined -- we force them to zero. */ 8164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8165 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8166 stmt( IRStmt_Put( 8167 OFFB_CC_DEP1, 8168 IRExpr_ITE( mkexpr(srcB), 8169 /* src!=0 */ 8170 mkU64(0), 8171 /* src==0 */ 8172 mkU64(AMD64G_CC_MASK_Z) 8173 ) 8174 )); 8175 /* Set NDEP even though it isn't used. This makes redundant-PUT 8176 elimination of previous stores to this field work better. */ 8177 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8178 8179 /* Result: iff source value is zero, we can't use 8180 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 8181 But anyway, amd64 semantics say the result is undefined in 8182 such situations. Hence handle the zero case specially. */ 8183 8184 /* Bleh. What we compute: 8185 8186 bsf64: if src == 0 then {dst is unchanged} 8187 else Ctz64(src) 8188 8189 bsr64: if src == 0 then {dst is unchanged} 8190 else 63 - Clz64(src) 8191 8192 bsf32: if src == 0 then {dst is unchanged} 8193 else Ctz64(32Uto64(src)) 8194 8195 bsr32: if src == 0 then {dst is unchanged} 8196 else 63 - Clz64(32Uto64(src)) 8197 8198 bsf16: if src == 0 then {dst is unchanged} 8199 else Ctz64(32Uto64(16Uto32(src))) 8200 8201 bsr16: if src == 0 then {dst is unchanged} 8202 else 63 - Clz64(32Uto64(16Uto32(src))) 8203 */ 8204 8205 /* The main computation, guarding against zero. */ 8206 assign( dst64, 8207 IRExpr_ITE( 8208 mkexpr(srcB), 8209 /* src != 0 */ 8210 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 8211 : binop(Iop_Sub64, 8212 mkU64(63), 8213 unop(Iop_Clz64, mkexpr(src64))), 8214 /* src == 0 -- leave dst unchanged */ 8215 widenUto64( getIRegG( sz, pfx, modrm ) ) 8216 ) 8217 ); 8218 8219 if (sz == 2) 8220 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 8221 else 8222 if (sz == 4) 8223 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 8224 else 8225 assign( dst, mkexpr(dst64) ); 8226 8227 /* dump result back */ 8228 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 8229 8230 return delta; 8231 } 8232 8233 8234 /* swap rAX with the reg specified by reg and REX.B */ 8235 static 8236 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 8237 { 8238 IRType ty = szToITy(sz); 8239 IRTemp t1 = newTemp(ty); 8240 IRTemp t2 = newTemp(ty); 8241 vassert(sz == 2 || sz == 4 || sz == 8); 8242 vassert(regLo3 < 8); 8243 if (sz == 8) { 8244 assign( t1, getIReg64(R_RAX) ); 8245 assign( t2, getIRegRexB(8, pfx, regLo3) ); 8246 putIReg64( R_RAX, mkexpr(t2) ); 8247 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 8248 } else if (sz == 4) { 8249 assign( t1, getIReg32(R_RAX) ); 8250 assign( t2, getIRegRexB(4, pfx, regLo3) ); 8251 putIReg32( R_RAX, mkexpr(t2) ); 8252 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 8253 } else { 8254 assign( t1, getIReg16(R_RAX) ); 8255 assign( t2, getIRegRexB(2, pfx, regLo3) ); 8256 putIReg16( R_RAX, mkexpr(t2) ); 8257 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 8258 } 8259 DIP("xchg%c %s, %s\n", 8260 nameISize(sz), nameIRegRAX(sz), 8261 nameIRegRexB(sz,pfx, regLo3)); 8262 } 8263 8264 8265 static 8266 void codegen_SAHF ( void ) 8267 { 8268 /* Set the flags to: 8269 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 8270 -- retain the old O flag 8271 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8272 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 8273 */ 8274 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8275 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8276 IRTemp oldflags = newTemp(Ity_I64); 8277 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 8278 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8279 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8281 stmt( IRStmt_Put( OFFB_CC_DEP1, 8282 binop(Iop_Or64, 8283 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 8284 binop(Iop_And64, 8285 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 8286 mkU64(mask_SZACP)) 8287 ) 8288 )); 8289 } 8290 8291 8292 static 8293 void codegen_LAHF ( void ) 8294 { 8295 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 8296 IRExpr* rax_with_hole; 8297 IRExpr* new_byte; 8298 IRExpr* new_rax; 8299 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8300 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8301 8302 IRTemp flags = newTemp(Ity_I64); 8303 assign( flags, mk_amd64g_calculate_rflags_all() ); 8304 8305 rax_with_hole 8306 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 8307 new_byte 8308 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 8309 mkU64(1<<1)); 8310 new_rax 8311 = binop(Iop_Or64, rax_with_hole, 8312 binop(Iop_Shl64, new_byte, mkU8(8))); 8313 putIReg64(R_RAX, new_rax); 8314 } 8315 8316 8317 static 8318 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 8319 VexAbiInfo* vbi, 8320 Prefix pfx, 8321 Int size, 8322 Long delta0 ) 8323 { 8324 HChar dis_buf[50]; 8325 Int len; 8326 8327 IRType ty = szToITy(size); 8328 IRTemp acc = newTemp(ty); 8329 IRTemp src = newTemp(ty); 8330 IRTemp dest = newTemp(ty); 8331 IRTemp dest2 = newTemp(ty); 8332 IRTemp acc2 = newTemp(ty); 8333 IRTemp cond = newTemp(Ity_I1); 8334 IRTemp addr = IRTemp_INVALID; 8335 UChar rm = getUChar(delta0); 8336 8337 /* There are 3 cases to consider: 8338 8339 reg-reg: ignore any lock prefix, generate sequence based 8340 on ITE 8341 8342 reg-mem, not locked: ignore any lock prefix, generate sequence 8343 based on ITE 8344 8345 reg-mem, locked: use IRCAS 8346 */ 8347 8348 /* Decide whether F2 or F3 are acceptable. Never for register 8349 case, but for the memory case, one or the other is OK provided 8350 LOCK is also present. */ 8351 if (epartIsReg(rm)) { 8352 if (haveF2orF3(pfx)) { 8353 *ok = False; 8354 return delta0; 8355 } 8356 } else { 8357 if (haveF2orF3(pfx)) { 8358 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 8359 *ok = False; 8360 return delta0; 8361 } 8362 } 8363 } 8364 8365 if (epartIsReg(rm)) { 8366 /* case 1 */ 8367 assign( dest, getIRegE(size, pfx, rm) ); 8368 delta0++; 8369 assign( src, getIRegG(size, pfx, rm) ); 8370 assign( acc, getIRegRAX(size) ); 8371 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8372 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8373 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8374 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8375 putIRegRAX(size, mkexpr(acc2)); 8376 putIRegE(size, pfx, rm, mkexpr(dest2)); 8377 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8378 nameIRegG(size,pfx,rm), 8379 nameIRegE(size,pfx,rm) ); 8380 } 8381 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8382 /* case 2 */ 8383 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8384 assign( dest, loadLE(ty, mkexpr(addr)) ); 8385 delta0 += len; 8386 assign( src, getIRegG(size, pfx, rm) ); 8387 assign( acc, getIRegRAX(size) ); 8388 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8389 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8390 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8391 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8392 putIRegRAX(size, mkexpr(acc2)); 8393 storeLE( mkexpr(addr), mkexpr(dest2) ); 8394 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8395 nameIRegG(size,pfx,rm), dis_buf); 8396 } 8397 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8398 /* case 3 */ 8399 /* src is new value. acc is expected value. dest is old value. 8400 Compute success from the output of the IRCAS, and steer the 8401 new value for RAX accordingly: in case of success, RAX is 8402 unchanged. */ 8403 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8404 delta0 += len; 8405 assign( src, getIRegG(size, pfx, rm) ); 8406 assign( acc, getIRegRAX(size) ); 8407 stmt( IRStmt_CAS( 8408 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8409 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8410 )); 8411 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8412 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8413 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8414 putIRegRAX(size, mkexpr(acc2)); 8415 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8416 nameIRegG(size,pfx,rm), dis_buf); 8417 } 8418 else vassert(0); 8419 8420 *ok = True; 8421 return delta0; 8422 } 8423 8424 8425 /* Handle conditional move instructions of the form 8426 cmovcc E(reg-or-mem), G(reg) 8427 8428 E(src) is reg-or-mem 8429 G(dst) is reg. 8430 8431 If E is reg, --> GET %E, tmps 8432 GET %G, tmpd 8433 CMOVcc tmps, tmpd 8434 PUT tmpd, %G 8435 8436 If E is mem --> (getAddr E) -> tmpa 8437 LD (tmpa), tmps 8438 GET %G, tmpd 8439 CMOVcc tmps, tmpd 8440 PUT tmpd, %G 8441 */ 8442 static 8443 ULong dis_cmov_E_G ( VexAbiInfo* vbi, 8444 Prefix pfx, 8445 Int sz, 8446 AMD64Condcode cond, 8447 Long delta0 ) 8448 { 8449 UChar rm = getUChar(delta0); 8450 HChar dis_buf[50]; 8451 Int len; 8452 8453 IRType ty = szToITy(sz); 8454 IRTemp tmps = newTemp(ty); 8455 IRTemp tmpd = newTemp(ty); 8456 8457 if (epartIsReg(rm)) { 8458 assign( tmps, getIRegE(sz, pfx, rm) ); 8459 assign( tmpd, getIRegG(sz, pfx, rm) ); 8460 8461 putIRegG( sz, pfx, rm, 8462 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8463 mkexpr(tmps), 8464 mkexpr(tmpd) ) 8465 ); 8466 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8467 nameIRegE(sz,pfx,rm), 8468 nameIRegG(sz,pfx,rm)); 8469 return 1+delta0; 8470 } 8471 8472 /* E refers to memory */ 8473 { 8474 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8475 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8476 assign( tmpd, getIRegG(sz, pfx, rm) ); 8477 8478 putIRegG( sz, pfx, rm, 8479 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8480 mkexpr(tmps), 8481 mkexpr(tmpd) ) 8482 ); 8483 8484 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8485 dis_buf, 8486 nameIRegG(sz,pfx,rm)); 8487 return len+delta0; 8488 } 8489 } 8490 8491 8492 static 8493 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8494 VexAbiInfo* vbi, 8495 Prefix pfx, Int sz, Long delta0 ) 8496 { 8497 Int len; 8498 UChar rm = getUChar(delta0); 8499 HChar dis_buf[50]; 8500 8501 IRType ty = szToITy(sz); 8502 IRTemp tmpd = newTemp(ty); 8503 IRTemp tmpt0 = newTemp(ty); 8504 IRTemp tmpt1 = newTemp(ty); 8505 8506 /* There are 3 cases to consider: 8507 8508 reg-reg: ignore any lock prefix, 8509 generate 'naive' (non-atomic) sequence 8510 8511 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8512 (non-atomic) sequence 8513 8514 reg-mem, locked: use IRCAS 8515 */ 8516 8517 if (epartIsReg(rm)) { 8518 /* case 1 */ 8519 assign( tmpd, getIRegE(sz, pfx, rm) ); 8520 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8521 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8522 mkexpr(tmpd), mkexpr(tmpt0)) ); 8523 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8524 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8525 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8526 DIP("xadd%c %s, %s\n", 8527 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm)); 8528 *decode_ok = True; 8529 return 1+delta0; 8530 } 8531 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8532 /* case 2 */ 8533 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8534 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8535 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8536 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8537 mkexpr(tmpd), mkexpr(tmpt0)) ); 8538 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8539 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8540 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8541 DIP("xadd%c %s, %s\n", 8542 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8543 *decode_ok = True; 8544 return len+delta0; 8545 } 8546 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8547 /* case 3 */ 8548 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8549 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8550 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8551 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8552 mkexpr(tmpd), mkexpr(tmpt0)) ); 8553 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8554 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8555 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8556 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8557 DIP("xadd%c %s, %s\n", 8558 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8559 *decode_ok = True; 8560 return len+delta0; 8561 } 8562 /*UNREACHED*/ 8563 vassert(0); 8564 } 8565 8566 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8567 //.. 8568 //.. static 8569 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8570 //.. { 8571 //.. Int len; 8572 //.. IRTemp addr; 8573 //.. UChar rm = getUChar(delta0); 8574 //.. HChar dis_buf[50]; 8575 //.. 8576 //.. if (epartIsReg(rm)) { 8577 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8578 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8579 //.. return 1+delta0; 8580 //.. } else { 8581 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8582 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8583 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8584 //.. return len+delta0; 8585 //.. } 8586 //.. } 8587 //.. 8588 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8589 //.. dst is ireg and sz==4, zero out top half of it. */ 8590 //.. 8591 //.. static 8592 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8593 //.. Int sz, 8594 //.. UInt delta0 ) 8595 //.. { 8596 //.. Int len; 8597 //.. IRTemp addr; 8598 //.. UChar rm = getUChar(delta0); 8599 //.. HChar dis_buf[50]; 8600 //.. 8601 //.. vassert(sz == 2 || sz == 4); 8602 //.. 8603 //.. if (epartIsReg(rm)) { 8604 //.. if (sz == 4) 8605 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8606 //.. else 8607 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8608 //.. 8609 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8610 //.. return 1+delta0; 8611 //.. } else { 8612 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8613 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8614 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8615 //.. return len+delta0; 8616 //.. } 8617 //.. } 8618 //.. 8619 //.. 8620 //.. static 8621 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8622 //.. { 8623 //.. IRTemp t1 = newTemp(Ity_I16); 8624 //.. IRTemp ta = newTemp(Ity_I32); 8625 //.. vassert(sz == 2 || sz == 4); 8626 //.. 8627 //.. assign( t1, getSReg(sreg) ); 8628 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8629 //.. putIReg(4, R_ESP, mkexpr(ta)); 8630 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8631 //.. 8632 //.. DIP("pushw %s\n", nameSReg(sreg)); 8633 //.. } 8634 //.. 8635 //.. static 8636 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8637 //.. { 8638 //.. IRTemp t1 = newTemp(Ity_I16); 8639 //.. IRTemp ta = newTemp(Ity_I32); 8640 //.. vassert(sz == 2 || sz == 4); 8641 //.. 8642 //.. assign( ta, getIReg(4, R_ESP) ); 8643 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8644 //.. 8645 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8646 //.. putSReg( sreg, mkexpr(t1) ); 8647 //.. DIP("pop %s\n", nameSReg(sreg)); 8648 //.. } 8649 8650 static 8651 void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 ) 8652 { 8653 IRTemp t1 = newTemp(Ity_I64); 8654 IRTemp t2 = newTemp(Ity_I64); 8655 IRTemp t3 = newTemp(Ity_I64); 8656 assign(t1, getIReg64(R_RSP)); 8657 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8658 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8659 putIReg64(R_RSP, mkexpr(t3)); 8660 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8661 jmp_treg(dres, Ijk_Ret, t2); 8662 vassert(dres->whatNext == Dis_StopHere); 8663 } 8664 8665 8666 /*------------------------------------------------------------*/ 8667 /*--- SSE/SSE2/SSE3 helpers ---*/ 8668 /*------------------------------------------------------------*/ 8669 8670 /* Indicates whether the op requires a rounding-mode argument. Note 8671 that this covers only vector floating point arithmetic ops, and 8672 omits the scalar ones that need rounding modes. Note also that 8673 inconsistencies here will get picked up later by the IR sanity 8674 checker, so this isn't correctness-critical. */ 8675 static Bool requiresRMode ( IROp op ) 8676 { 8677 switch (op) { 8678 /* 128 bit ops */ 8679 case Iop_Add32Fx4: case Iop_Sub32Fx4: 8680 case Iop_Mul32Fx4: case Iop_Div32Fx4: 8681 case Iop_Add64Fx2: case Iop_Sub64Fx2: 8682 case Iop_Mul64Fx2: case Iop_Div64Fx2: 8683 /* 256 bit ops */ 8684 case Iop_Add32Fx8: case Iop_Sub32Fx8: 8685 case Iop_Mul32Fx8: case Iop_Div32Fx8: 8686 case Iop_Add64Fx4: case Iop_Sub64Fx4: 8687 case Iop_Mul64Fx4: case Iop_Div64Fx4: 8688 return True; 8689 default: 8690 break; 8691 } 8692 return False; 8693 } 8694 8695 8696 /* Worker function; do not call directly. 8697 Handles full width G = G `op` E and G = (not G) `op` E. 8698 */ 8699 8700 static ULong dis_SSE_E_to_G_all_wrk ( 8701 VexAbiInfo* vbi, 8702 Prefix pfx, Long delta, 8703 const HChar* opname, IROp op, 8704 Bool invertG 8705 ) 8706 { 8707 HChar dis_buf[50]; 8708 Int alen; 8709 IRTemp addr; 8710 UChar rm = getUChar(delta); 8711 Bool needsRMode = requiresRMode(op); 8712 IRExpr* gpart 8713 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8714 : getXMMReg(gregOfRexRM(pfx,rm)); 8715 if (epartIsReg(rm)) { 8716 putXMMReg( 8717 gregOfRexRM(pfx,rm), 8718 needsRMode 8719 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8720 gpart, 8721 getXMMReg(eregOfRexRM(pfx,rm))) 8722 : binop(op, gpart, 8723 getXMMReg(eregOfRexRM(pfx,rm))) 8724 ); 8725 DIP("%s %s,%s\n", opname, 8726 nameXMMReg(eregOfRexRM(pfx,rm)), 8727 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8728 return delta+1; 8729 } else { 8730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8731 putXMMReg( 8732 gregOfRexRM(pfx,rm), 8733 needsRMode 8734 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8735 gpart, 8736 loadLE(Ity_V128, mkexpr(addr))) 8737 : binop(op, gpart, 8738 loadLE(Ity_V128, mkexpr(addr))) 8739 ); 8740 DIP("%s %s,%s\n", opname, 8741 dis_buf, 8742 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8743 return delta+alen; 8744 } 8745 } 8746 8747 8748 /* All lanes SSE binary operation, G = G `op` E. */ 8749 8750 static 8751 ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi, 8752 Prefix pfx, Long delta, 8753 const HChar* opname, IROp op ) 8754 { 8755 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8756 } 8757 8758 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8759 8760 static 8761 ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi, 8762 Prefix pfx, Long delta, 8763 const HChar* opname, IROp op ) 8764 { 8765 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8766 } 8767 8768 8769 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8770 8771 static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi, 8772 Prefix pfx, Long delta, 8773 const HChar* opname, IROp op ) 8774 { 8775 HChar dis_buf[50]; 8776 Int alen; 8777 IRTemp addr; 8778 UChar rm = getUChar(delta); 8779 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8780 if (epartIsReg(rm)) { 8781 putXMMReg( gregOfRexRM(pfx,rm), 8782 binop(op, gpart, 8783 getXMMReg(eregOfRexRM(pfx,rm))) ); 8784 DIP("%s %s,%s\n", opname, 8785 nameXMMReg(eregOfRexRM(pfx,rm)), 8786 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8787 return delta+1; 8788 } else { 8789 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8790 E operand needs to be made simply of zeroes. */ 8791 IRTemp epart = newTemp(Ity_V128); 8792 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8793 assign( epart, unop( Iop_32UtoV128, 8794 loadLE(Ity_I32, mkexpr(addr))) ); 8795 putXMMReg( gregOfRexRM(pfx,rm), 8796 binop(op, gpart, mkexpr(epart)) ); 8797 DIP("%s %s,%s\n", opname, 8798 dis_buf, 8799 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8800 return delta+alen; 8801 } 8802 } 8803 8804 8805 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8806 8807 static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi, 8808 Prefix pfx, Long delta, 8809 const HChar* opname, IROp op ) 8810 { 8811 HChar dis_buf[50]; 8812 Int alen; 8813 IRTemp addr; 8814 UChar rm = getUChar(delta); 8815 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8816 if (epartIsReg(rm)) { 8817 putXMMReg( gregOfRexRM(pfx,rm), 8818 binop(op, gpart, 8819 getXMMReg(eregOfRexRM(pfx,rm))) ); 8820 DIP("%s %s,%s\n", opname, 8821 nameXMMReg(eregOfRexRM(pfx,rm)), 8822 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8823 return delta+1; 8824 } else { 8825 /* We can only do a 64-bit memory read, so the upper half of the 8826 E operand needs to be made simply of zeroes. */ 8827 IRTemp epart = newTemp(Ity_V128); 8828 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8829 assign( epart, unop( Iop_64UtoV128, 8830 loadLE(Ity_I64, mkexpr(addr))) ); 8831 putXMMReg( gregOfRexRM(pfx,rm), 8832 binop(op, gpart, mkexpr(epart)) ); 8833 DIP("%s %s,%s\n", opname, 8834 dis_buf, 8835 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8836 return delta+alen; 8837 } 8838 } 8839 8840 8841 /* All lanes unary SSE operation, G = op(E). */ 8842 8843 static ULong dis_SSE_E_to_G_unary_all ( 8844 VexAbiInfo* vbi, 8845 Prefix pfx, Long delta, 8846 const HChar* opname, IROp op 8847 ) 8848 { 8849 HChar dis_buf[50]; 8850 Int alen; 8851 IRTemp addr; 8852 UChar rm = getUChar(delta); 8853 if (epartIsReg(rm)) { 8854 putXMMReg( gregOfRexRM(pfx,rm), 8855 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) ); 8856 DIP("%s %s,%s\n", opname, 8857 nameXMMReg(eregOfRexRM(pfx,rm)), 8858 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8859 return delta+1; 8860 } else { 8861 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8862 putXMMReg( gregOfRexRM(pfx,rm), 8863 unop(op, loadLE(Ity_V128, mkexpr(addr))) ); 8864 DIP("%s %s,%s\n", opname, 8865 dis_buf, 8866 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8867 return delta+alen; 8868 } 8869 } 8870 8871 8872 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8873 8874 static ULong dis_SSE_E_to_G_unary_lo32 ( 8875 VexAbiInfo* vbi, 8876 Prefix pfx, Long delta, 8877 const HChar* opname, IROp op 8878 ) 8879 { 8880 /* First we need to get the old G value and patch the low 32 bits 8881 of the E operand into it. Then apply op and write back to G. */ 8882 HChar dis_buf[50]; 8883 Int alen; 8884 IRTemp addr; 8885 UChar rm = getUChar(delta); 8886 IRTemp oldG0 = newTemp(Ity_V128); 8887 IRTemp oldG1 = newTemp(Ity_V128); 8888 8889 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8890 8891 if (epartIsReg(rm)) { 8892 assign( oldG1, 8893 binop( Iop_SetV128lo32, 8894 mkexpr(oldG0), 8895 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8896 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8897 DIP("%s %s,%s\n", opname, 8898 nameXMMReg(eregOfRexRM(pfx,rm)), 8899 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8900 return delta+1; 8901 } else { 8902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8903 assign( oldG1, 8904 binop( Iop_SetV128lo32, 8905 mkexpr(oldG0), 8906 loadLE(Ity_I32, mkexpr(addr)) )); 8907 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8908 DIP("%s %s,%s\n", opname, 8909 dis_buf, 8910 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8911 return delta+alen; 8912 } 8913 } 8914 8915 8916 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 8917 8918 static ULong dis_SSE_E_to_G_unary_lo64 ( 8919 VexAbiInfo* vbi, 8920 Prefix pfx, Long delta, 8921 const HChar* opname, IROp op 8922 ) 8923 { 8924 /* First we need to get the old G value and patch the low 64 bits 8925 of the E operand into it. Then apply op and write back to G. */ 8926 HChar dis_buf[50]; 8927 Int alen; 8928 IRTemp addr; 8929 UChar rm = getUChar(delta); 8930 IRTemp oldG0 = newTemp(Ity_V128); 8931 IRTemp oldG1 = newTemp(Ity_V128); 8932 8933 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8934 8935 if (epartIsReg(rm)) { 8936 assign( oldG1, 8937 binop( Iop_SetV128lo64, 8938 mkexpr(oldG0), 8939 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 8940 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8941 DIP("%s %s,%s\n", opname, 8942 nameXMMReg(eregOfRexRM(pfx,rm)), 8943 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8944 return delta+1; 8945 } else { 8946 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8947 assign( oldG1, 8948 binop( Iop_SetV128lo64, 8949 mkexpr(oldG0), 8950 loadLE(Ity_I64, mkexpr(addr)) )); 8951 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8952 DIP("%s %s,%s\n", opname, 8953 dis_buf, 8954 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8955 return delta+alen; 8956 } 8957 } 8958 8959 8960 /* SSE integer binary operation: 8961 G = G `op` E (eLeft == False) 8962 G = E `op` G (eLeft == True) 8963 */ 8964 static ULong dis_SSEint_E_to_G( 8965 VexAbiInfo* vbi, 8966 Prefix pfx, Long delta, 8967 const HChar* opname, IROp op, 8968 Bool eLeft 8969 ) 8970 { 8971 HChar dis_buf[50]; 8972 Int alen; 8973 IRTemp addr; 8974 UChar rm = getUChar(delta); 8975 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8976 IRExpr* epart = NULL; 8977 if (epartIsReg(rm)) { 8978 epart = getXMMReg(eregOfRexRM(pfx,rm)); 8979 DIP("%s %s,%s\n", opname, 8980 nameXMMReg(eregOfRexRM(pfx,rm)), 8981 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8982 delta += 1; 8983 } else { 8984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8985 epart = loadLE(Ity_V128, mkexpr(addr)); 8986 DIP("%s %s,%s\n", opname, 8987 dis_buf, 8988 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8989 delta += alen; 8990 } 8991 putXMMReg( gregOfRexRM(pfx,rm), 8992 eLeft ? binop(op, epart, gpart) 8993 : binop(op, gpart, epart) ); 8994 return delta; 8995 } 8996 8997 8998 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 8999 This is all a bit of a kludge in that it ignores the subtleties of 9000 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 9001 spec. */ 9002 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 9003 /*OUT*/IROp* opP, 9004 /*OUT*/Bool* postNotP, 9005 UInt imm8, Bool all_lanes, Int sz ) 9006 { 9007 if (imm8 >= 32) return False; 9008 9009 /* First, compute a (preSwap, op, postNot) triple from 9010 the supplied imm8. */ 9011 Bool pre = False; 9012 IROp op = Iop_INVALID; 9013 Bool not = False; 9014 9015 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 9016 // If you add a case here, add a corresponding test for both VCMPSD_128 9017 // and VCMPSS_128 in avx-1.c. 9018 switch (imm8) { 9019 // "O" = ordered, "U" = unordered 9020 // "Q" = non-signalling (quiet), "S" = signalling 9021 // 9022 // swap operands? 9023 // | 9024 // | cmp op invert after? 9025 // | | | 9026 // v v v 9027 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 9028 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 9029 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 9030 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 9031 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 9032 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 9033 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 9034 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 9035 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ 9036 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 9037 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */ 9038 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 9039 // 0xB FALSE_OQ 9040 // 0xC: this isn't really right because it returns all-1s when 9041 // either operand is a NaN, and it should return all-0s. 9042 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 9043 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 9044 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 9045 // 0xF TRUE_UQ 9046 // 0x10 EQ_OS 9047 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 9048 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 9049 // 0x13 UNORD_S 9050 // 0x14 NEQ_US 9051 // 0x15 NLT_UQ 9052 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 9053 // 0x17 ORD_S 9054 // 0x18 EQ_US 9055 // 0x19 NGE_UQ 9056 // 0x1A NGT_UQ 9057 // 0x1B FALSE_OS 9058 // 0x1C NEQ_OS 9059 // 0x1D GE_OQ 9060 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 9061 // 0x1F TRUE_US 9062 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 9063 avx-1.c if new cases turn up. */ 9064 default: break; 9065 } 9066 # undef XXX 9067 if (op == Iop_INVALID) return False; 9068 9069 /* Now convert the op into one with the same arithmetic but that is 9070 correct for the width and laneage requirements. */ 9071 9072 /**/ if (sz == 4 && all_lanes) { 9073 switch (op) { 9074 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 9075 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 9076 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 9077 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 9078 default: vassert(0); 9079 } 9080 } 9081 else if (sz == 4 && !all_lanes) { 9082 switch (op) { 9083 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 9084 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 9085 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 9086 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 9087 default: vassert(0); 9088 } 9089 } 9090 else if (sz == 8 && all_lanes) { 9091 switch (op) { 9092 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 9093 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 9094 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 9095 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 9096 default: vassert(0); 9097 } 9098 } 9099 else if (sz == 8 && !all_lanes) { 9100 switch (op) { 9101 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 9102 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 9103 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 9104 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 9105 default: vassert(0); 9106 } 9107 } 9108 else { 9109 vpanic("findSSECmpOp(amd64,guest)"); 9110 } 9111 9112 *preSwapP = pre; *opP = op; *postNotP = not; 9113 return True; 9114 } 9115 9116 9117 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 9118 returns the original delta to indicate failure. */ 9119 9120 static Long dis_SSE_cmp_E_to_G ( VexAbiInfo* vbi, 9121 Prefix pfx, Long delta, 9122 const HChar* opname, Bool all_lanes, Int sz ) 9123 { 9124 Long delta0 = delta; 9125 HChar dis_buf[50]; 9126 Int alen; 9127 UInt imm8; 9128 IRTemp addr; 9129 Bool preSwap = False; 9130 IROp op = Iop_INVALID; 9131 Bool postNot = False; 9132 IRTemp plain = newTemp(Ity_V128); 9133 UChar rm = getUChar(delta); 9134 UShort mask = 0; 9135 vassert(sz == 4 || sz == 8); 9136 if (epartIsReg(rm)) { 9137 imm8 = getUChar(delta+1); 9138 if (imm8 >= 8) return delta0; /* FAIL */ 9139 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9140 if (!ok) return delta0; /* FAIL */ 9141 vassert(!preSwap); /* never needed for imm8 < 8 */ 9142 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 9143 getXMMReg(eregOfRexRM(pfx,rm))) ); 9144 delta += 2; 9145 DIP("%s $%d,%s,%s\n", opname, 9146 (Int)imm8, 9147 nameXMMReg(eregOfRexRM(pfx,rm)), 9148 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9149 } else { 9150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 9151 imm8 = getUChar(delta+alen); 9152 if (imm8 >= 8) return delta0; /* FAIL */ 9153 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9154 if (!ok) return delta0; /* FAIL */ 9155 vassert(!preSwap); /* never needed for imm8 < 8 */ 9156 assign( plain, 9157 binop( 9158 op, 9159 getXMMReg(gregOfRexRM(pfx,rm)), 9160 all_lanes 9161 ? loadLE(Ity_V128, mkexpr(addr)) 9162 : sz == 8 9163 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 9164 : /*sz==4*/ 9165 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 9166 ) 9167 ); 9168 delta += alen+1; 9169 DIP("%s $%d,%s,%s\n", opname, 9170 (Int)imm8, 9171 dis_buf, 9172 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9173 } 9174 9175 if (postNot && all_lanes) { 9176 putXMMReg( gregOfRexRM(pfx,rm), 9177 unop(Iop_NotV128, mkexpr(plain)) ); 9178 } 9179 else 9180 if (postNot && !all_lanes) { 9181 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 9182 putXMMReg( gregOfRexRM(pfx,rm), 9183 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 9184 } 9185 else { 9186 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 9187 } 9188 9189 return delta; 9190 } 9191 9192 9193 /* Vector by scalar shift of G by the amount specified at the bottom 9194 of E. */ 9195 9196 static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi, 9197 Prefix pfx, Long delta, 9198 const HChar* opname, IROp op ) 9199 { 9200 HChar dis_buf[50]; 9201 Int alen, size; 9202 IRTemp addr; 9203 Bool shl, shr, sar; 9204 UChar rm = getUChar(delta); 9205 IRTemp g0 = newTemp(Ity_V128); 9206 IRTemp g1 = newTemp(Ity_V128); 9207 IRTemp amt = newTemp(Ity_I64); 9208 IRTemp amt8 = newTemp(Ity_I8); 9209 if (epartIsReg(rm)) { 9210 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 9211 DIP("%s %s,%s\n", opname, 9212 nameXMMReg(eregOfRexRM(pfx,rm)), 9213 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9214 delta++; 9215 } else { 9216 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9217 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 9218 DIP("%s %s,%s\n", opname, 9219 dis_buf, 9220 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9221 delta += alen; 9222 } 9223 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9224 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 9225 9226 shl = shr = sar = False; 9227 size = 0; 9228 switch (op) { 9229 case Iop_ShlN16x8: shl = True; size = 32; break; 9230 case Iop_ShlN32x4: shl = True; size = 32; break; 9231 case Iop_ShlN64x2: shl = True; size = 64; break; 9232 case Iop_SarN16x8: sar = True; size = 16; break; 9233 case Iop_SarN32x4: sar = True; size = 32; break; 9234 case Iop_ShrN16x8: shr = True; size = 16; break; 9235 case Iop_ShrN32x4: shr = True; size = 32; break; 9236 case Iop_ShrN64x2: shr = True; size = 64; break; 9237 default: vassert(0); 9238 } 9239 9240 if (shl || shr) { 9241 assign( 9242 g1, 9243 IRExpr_ITE( 9244 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9245 binop(op, mkexpr(g0), mkexpr(amt8)), 9246 mkV128(0x0000) 9247 ) 9248 ); 9249 } else 9250 if (sar) { 9251 assign( 9252 g1, 9253 IRExpr_ITE( 9254 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9255 binop(op, mkexpr(g0), mkexpr(amt8)), 9256 binop(op, mkexpr(g0), mkU8(size-1)) 9257 ) 9258 ); 9259 } else { 9260 vassert(0); 9261 } 9262 9263 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 9264 return delta; 9265 } 9266 9267 9268 /* Vector by scalar shift of E by an immediate byte. */ 9269 9270 static 9271 ULong dis_SSE_shiftE_imm ( Prefix pfx, 9272 Long delta, const HChar* opname, IROp op ) 9273 { 9274 Bool shl, shr, sar; 9275 UChar rm = getUChar(delta); 9276 IRTemp e0 = newTemp(Ity_V128); 9277 IRTemp e1 = newTemp(Ity_V128); 9278 UChar amt, size; 9279 vassert(epartIsReg(rm)); 9280 vassert(gregLO3ofRM(rm) == 2 9281 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 9282 amt = getUChar(delta+1); 9283 delta += 2; 9284 DIP("%s $%d,%s\n", opname, 9285 (Int)amt, 9286 nameXMMReg(eregOfRexRM(pfx,rm)) ); 9287 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 9288 9289 shl = shr = sar = False; 9290 size = 0; 9291 switch (op) { 9292 case Iop_ShlN16x8: shl = True; size = 16; break; 9293 case Iop_ShlN32x4: shl = True; size = 32; break; 9294 case Iop_ShlN64x2: shl = True; size = 64; break; 9295 case Iop_SarN16x8: sar = True; size = 16; break; 9296 case Iop_SarN32x4: sar = True; size = 32; break; 9297 case Iop_ShrN16x8: shr = True; size = 16; break; 9298 case Iop_ShrN32x4: shr = True; size = 32; break; 9299 case Iop_ShrN64x2: shr = True; size = 64; break; 9300 default: vassert(0); 9301 } 9302 9303 if (shl || shr) { 9304 assign( e1, amt >= size 9305 ? mkV128(0x0000) 9306 : binop(op, mkexpr(e0), mkU8(amt)) 9307 ); 9308 } else 9309 if (sar) { 9310 assign( e1, amt >= size 9311 ? binop(op, mkexpr(e0), mkU8(size-1)) 9312 : binop(op, mkexpr(e0), mkU8(amt)) 9313 ); 9314 } else { 9315 vassert(0); 9316 } 9317 9318 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 9319 return delta; 9320 } 9321 9322 9323 /* Get the current SSE rounding mode. */ 9324 9325 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 9326 { 9327 return 9328 unop( Iop_64to32, 9329 binop( Iop_And64, 9330 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 9331 mkU64(3) )); 9332 } 9333 9334 static void put_sse_roundingmode ( IRExpr* sseround ) 9335 { 9336 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 9337 stmt( IRStmt_Put( OFFB_SSEROUND, 9338 unop(Iop_32Uto64,sseround) ) ); 9339 } 9340 9341 /* Break a V128-bit value up into four 32-bit ints. */ 9342 9343 static void breakupV128to32s ( IRTemp t128, 9344 /*OUTs*/ 9345 IRTemp* t3, IRTemp* t2, 9346 IRTemp* t1, IRTemp* t0 ) 9347 { 9348 IRTemp hi64 = newTemp(Ity_I64); 9349 IRTemp lo64 = newTemp(Ity_I64); 9350 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 9351 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 9352 9353 vassert(t0 && *t0 == IRTemp_INVALID); 9354 vassert(t1 && *t1 == IRTemp_INVALID); 9355 vassert(t2 && *t2 == IRTemp_INVALID); 9356 vassert(t3 && *t3 == IRTemp_INVALID); 9357 9358 *t0 = newTemp(Ity_I32); 9359 *t1 = newTemp(Ity_I32); 9360 *t2 = newTemp(Ity_I32); 9361 *t3 = newTemp(Ity_I32); 9362 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 9363 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 9364 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 9365 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 9366 } 9367 9368 /* Construct a V128-bit value from four 32-bit ints. */ 9369 9370 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 9371 IRTemp t1, IRTemp t0 ) 9372 { 9373 return 9374 binop( Iop_64HLtoV128, 9375 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9376 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 9377 ); 9378 } 9379 9380 /* Break a 64-bit value up into four 16-bit ints. */ 9381 9382 static void breakup64to16s ( IRTemp t64, 9383 /*OUTs*/ 9384 IRTemp* t3, IRTemp* t2, 9385 IRTemp* t1, IRTemp* t0 ) 9386 { 9387 IRTemp hi32 = newTemp(Ity_I32); 9388 IRTemp lo32 = newTemp(Ity_I32); 9389 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 9390 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 9391 9392 vassert(t0 && *t0 == IRTemp_INVALID); 9393 vassert(t1 && *t1 == IRTemp_INVALID); 9394 vassert(t2 && *t2 == IRTemp_INVALID); 9395 vassert(t3 && *t3 == IRTemp_INVALID); 9396 9397 *t0 = newTemp(Ity_I16); 9398 *t1 = newTemp(Ity_I16); 9399 *t2 = newTemp(Ity_I16); 9400 *t3 = newTemp(Ity_I16); 9401 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 9402 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 9403 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 9404 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 9405 } 9406 9407 /* Construct a 64-bit value from four 16-bit ints. */ 9408 9409 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 9410 IRTemp t1, IRTemp t0 ) 9411 { 9412 return 9413 binop( Iop_32HLto64, 9414 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9415 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9416 ); 9417 } 9418 9419 /* Break a V256-bit value up into four 64-bit ints. */ 9420 9421 static void breakupV256to64s ( IRTemp t256, 9422 /*OUTs*/ 9423 IRTemp* t3, IRTemp* t2, 9424 IRTemp* t1, IRTemp* t0 ) 9425 { 9426 vassert(t0 && *t0 == IRTemp_INVALID); 9427 vassert(t1 && *t1 == IRTemp_INVALID); 9428 vassert(t2 && *t2 == IRTemp_INVALID); 9429 vassert(t3 && *t3 == IRTemp_INVALID); 9430 *t0 = newTemp(Ity_I64); 9431 *t1 = newTemp(Ity_I64); 9432 *t2 = newTemp(Ity_I64); 9433 *t3 = newTemp(Ity_I64); 9434 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9435 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9436 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9437 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9438 } 9439 9440 /* Break a V256-bit value up into two V128s. */ 9441 9442 static void breakupV256toV128s ( IRTemp t256, 9443 /*OUTs*/ 9444 IRTemp* t1, IRTemp* t0 ) 9445 { 9446 vassert(t0 && *t0 == IRTemp_INVALID); 9447 vassert(t1 && *t1 == IRTemp_INVALID); 9448 *t0 = newTemp(Ity_V128); 9449 *t1 = newTemp(Ity_V128); 9450 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9451 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9452 } 9453 9454 /* Break a V256-bit value up into eight 32-bit ints. */ 9455 9456 static void breakupV256to32s ( IRTemp t256, 9457 /*OUTs*/ 9458 IRTemp* t7, IRTemp* t6, 9459 IRTemp* t5, IRTemp* t4, 9460 IRTemp* t3, IRTemp* t2, 9461 IRTemp* t1, IRTemp* t0 ) 9462 { 9463 IRTemp t128_1 = IRTemp_INVALID; 9464 IRTemp t128_0 = IRTemp_INVALID; 9465 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9466 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9467 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9468 } 9469 9470 /* Break a V128-bit value up into two 64-bit ints. */ 9471 9472 static void breakupV128to64s ( IRTemp t128, 9473 /*OUTs*/ 9474 IRTemp* t1, IRTemp* t0 ) 9475 { 9476 vassert(t0 && *t0 == IRTemp_INVALID); 9477 vassert(t1 && *t1 == IRTemp_INVALID); 9478 *t0 = newTemp(Ity_I64); 9479 *t1 = newTemp(Ity_I64); 9480 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9481 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9482 } 9483 9484 /* Construct a V256-bit value from eight 32-bit ints. */ 9485 9486 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9487 IRTemp t5, IRTemp t4, 9488 IRTemp t3, IRTemp t2, 9489 IRTemp t1, IRTemp t0 ) 9490 { 9491 return 9492 binop( Iop_V128HLtoV256, 9493 binop( Iop_64HLtoV128, 9494 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9495 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9496 binop( Iop_64HLtoV128, 9497 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9498 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9499 ); 9500 } 9501 9502 /* Construct a V256-bit value from four 64-bit ints. */ 9503 9504 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9505 IRTemp t1, IRTemp t0 ) 9506 { 9507 return 9508 binop( Iop_V128HLtoV256, 9509 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9510 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9511 ); 9512 } 9513 9514 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9515 values (aa,bb), computes, for each of the 4 16-bit lanes: 9516 9517 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9518 */ 9519 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9520 { 9521 IRTemp aa = newTemp(Ity_I64); 9522 IRTemp bb = newTemp(Ity_I64); 9523 IRTemp aahi32s = newTemp(Ity_I64); 9524 IRTemp aalo32s = newTemp(Ity_I64); 9525 IRTemp bbhi32s = newTemp(Ity_I64); 9526 IRTemp bblo32s = newTemp(Ity_I64); 9527 IRTemp rHi = newTemp(Ity_I64); 9528 IRTemp rLo = newTemp(Ity_I64); 9529 IRTemp one32x2 = newTemp(Ity_I64); 9530 assign(aa, aax); 9531 assign(bb, bbx); 9532 assign( aahi32s, 9533 binop(Iop_SarN32x2, 9534 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9535 mkU8(16) )); 9536 assign( aalo32s, 9537 binop(Iop_SarN32x2, 9538 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9539 mkU8(16) )); 9540 assign( bbhi32s, 9541 binop(Iop_SarN32x2, 9542 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9543 mkU8(16) )); 9544 assign( bblo32s, 9545 binop(Iop_SarN32x2, 9546 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9547 mkU8(16) )); 9548 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9549 assign( 9550 rHi, 9551 binop( 9552 Iop_ShrN32x2, 9553 binop( 9554 Iop_Add32x2, 9555 binop( 9556 Iop_ShrN32x2, 9557 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9558 mkU8(14) 9559 ), 9560 mkexpr(one32x2) 9561 ), 9562 mkU8(1) 9563 ) 9564 ); 9565 assign( 9566 rLo, 9567 binop( 9568 Iop_ShrN32x2, 9569 binop( 9570 Iop_Add32x2, 9571 binop( 9572 Iop_ShrN32x2, 9573 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9574 mkU8(14) 9575 ), 9576 mkexpr(one32x2) 9577 ), 9578 mkU8(1) 9579 ) 9580 ); 9581 return 9582 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9583 } 9584 9585 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9586 values (aa,bb), computes, for each lane: 9587 9588 if aa_lane < 0 then - bb_lane 9589 else if aa_lane > 0 then bb_lane 9590 else 0 9591 */ 9592 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9593 { 9594 IRTemp aa = newTemp(Ity_I64); 9595 IRTemp bb = newTemp(Ity_I64); 9596 IRTemp zero = newTemp(Ity_I64); 9597 IRTemp bbNeg = newTemp(Ity_I64); 9598 IRTemp negMask = newTemp(Ity_I64); 9599 IRTemp posMask = newTemp(Ity_I64); 9600 IROp opSub = Iop_INVALID; 9601 IROp opCmpGTS = Iop_INVALID; 9602 9603 switch (laneszB) { 9604 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9605 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9606 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9607 default: vassert(0); 9608 } 9609 9610 assign( aa, aax ); 9611 assign( bb, bbx ); 9612 assign( zero, mkU64(0) ); 9613 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9614 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9615 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9616 9617 return 9618 binop(Iop_Or64, 9619 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9620 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9621 9622 } 9623 9624 9625 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9626 value aa, computes, for each lane 9627 9628 if aa < 0 then -aa else aa 9629 9630 Note that the result is interpreted as unsigned, so that the 9631 absolute value of the most negative signed input can be 9632 represented. 9633 */ 9634 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9635 { 9636 IRTemp res = newTemp(Ity_I64); 9637 IRTemp zero = newTemp(Ity_I64); 9638 IRTemp aaNeg = newTemp(Ity_I64); 9639 IRTemp negMask = newTemp(Ity_I64); 9640 IRTemp posMask = newTemp(Ity_I64); 9641 IROp opSub = Iop_INVALID; 9642 IROp opSarN = Iop_INVALID; 9643 9644 switch (laneszB) { 9645 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9646 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9647 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9648 default: vassert(0); 9649 } 9650 9651 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9652 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9653 assign( zero, mkU64(0) ); 9654 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9655 assign( res, 9656 binop(Iop_Or64, 9657 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9658 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9659 return res; 9660 } 9661 9662 /* XMM version of math_PABS_MMX. */ 9663 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9664 { 9665 IRTemp res = newTemp(Ity_V128); 9666 IRTemp aaHi = newTemp(Ity_I64); 9667 IRTemp aaLo = newTemp(Ity_I64); 9668 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9669 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9670 assign(res, binop(Iop_64HLtoV128, 9671 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9672 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9673 return res; 9674 } 9675 9676 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9677 partial applications in C :-( */ 9678 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9679 return math_PABS_XMM(aa, 4); 9680 } 9681 9682 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9683 return math_PABS_XMM(aa, 2); 9684 } 9685 9686 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9687 return math_PABS_XMM(aa, 1); 9688 } 9689 9690 /* YMM version of math_PABS_XMM. */ 9691 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB ) 9692 { 9693 IRTemp res = newTemp(Ity_V256); 9694 IRTemp aaHi = IRTemp_INVALID; 9695 IRTemp aaLo = IRTemp_INVALID; 9696 breakupV256toV128s(aa, &aaHi, &aaLo); 9697 assign(res, binop(Iop_V128HLtoV256, 9698 mkexpr(math_PABS_XMM(aaHi, laneszB)), 9699 mkexpr(math_PABS_XMM(aaLo, laneszB)))); 9700 return res; 9701 } 9702 9703 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) { 9704 return math_PABS_YMM(aa, 4); 9705 } 9706 9707 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) { 9708 return math_PABS_YMM(aa, 2); 9709 } 9710 9711 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) { 9712 return math_PABS_YMM(aa, 1); 9713 } 9714 9715 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9716 IRTemp lo64, Long byteShift ) 9717 { 9718 vassert(byteShift >= 1 && byteShift <= 7); 9719 return 9720 binop(Iop_Or64, 9721 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9722 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9723 ); 9724 } 9725 9726 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9727 { 9728 IRTemp res = newTemp(Ity_V128); 9729 IRTemp sHi = newTemp(Ity_I64); 9730 IRTemp sLo = newTemp(Ity_I64); 9731 IRTemp dHi = newTemp(Ity_I64); 9732 IRTemp dLo = newTemp(Ity_I64); 9733 IRTemp rHi = newTemp(Ity_I64); 9734 IRTemp rLo = newTemp(Ity_I64); 9735 9736 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9737 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9738 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9739 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9740 9741 if (imm8 == 0) { 9742 assign( rHi, mkexpr(sHi) ); 9743 assign( rLo, mkexpr(sLo) ); 9744 } 9745 else if (imm8 >= 1 && imm8 <= 7) { 9746 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9747 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9748 } 9749 else if (imm8 == 8) { 9750 assign( rHi, mkexpr(dLo) ); 9751 assign( rLo, mkexpr(sHi) ); 9752 } 9753 else if (imm8 >= 9 && imm8 <= 15) { 9754 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9755 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9756 } 9757 else if (imm8 == 16) { 9758 assign( rHi, mkexpr(dHi) ); 9759 assign( rLo, mkexpr(dLo) ); 9760 } 9761 else if (imm8 >= 17 && imm8 <= 23) { 9762 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9763 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9764 } 9765 else if (imm8 == 24) { 9766 assign( rHi, mkU64(0) ); 9767 assign( rLo, mkexpr(dHi) ); 9768 } 9769 else if (imm8 >= 25 && imm8 <= 31) { 9770 assign( rHi, mkU64(0) ); 9771 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9772 } 9773 else if (imm8 >= 32 && imm8 <= 255) { 9774 assign( rHi, mkU64(0) ); 9775 assign( rLo, mkU64(0) ); 9776 } 9777 else 9778 vassert(0); 9779 9780 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 9781 return res; 9782 } 9783 9784 9785 /* Generate a SIGSEGV followed by a restart of the current instruction 9786 if effective_addr is not 16-aligned. This is required behaviour 9787 for some SSE3 instructions and all 128-bit SSSE3 instructions. 9788 This assumes that guest_RIP_curr_instr is set correctly! */ 9789 static 9790 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 9791 { 9792 stmt( 9793 IRStmt_Exit( 9794 binop(Iop_CmpNE64, 9795 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 9796 mkU64(0)), 9797 Ijk_SigSEGV, 9798 IRConst_U64(guest_RIP_curr_instr), 9799 OFFB_RIP 9800 ) 9801 ); 9802 } 9803 9804 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 9805 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 9806 } 9807 9808 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 9809 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 9810 } 9811 9812 /* Helper for deciding whether a given insn (starting at the opcode 9813 byte) may validly be used with a LOCK prefix. The following insns 9814 may be used with LOCK when their destination operand is in memory. 9815 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 9816 9817 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 9818 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 9819 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 9820 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 9821 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 9822 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 9823 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 9824 9825 DEC FE /1, FF /1 9826 INC FE /0, FF /0 9827 9828 NEG F6 /3, F7 /3 9829 NOT F6 /2, F7 /2 9830 9831 XCHG 86, 87 9832 9833 BTC 0F BB, 0F BA /7 9834 BTR 0F B3, 0F BA /6 9835 BTS 0F AB, 0F BA /5 9836 9837 CMPXCHG 0F B0, 0F B1 9838 CMPXCHG8B 0F C7 /1 9839 9840 XADD 0F C0, 0F C1 9841 9842 ------------------------------ 9843 9844 80 /0 = addb $imm8, rm8 9845 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 9846 82 /0 = addb $imm8, rm8 9847 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 9848 9849 00 = addb r8, rm8 9850 01 = addl r32, rm32 and addw r16, rm16 9851 9852 Same for ADD OR ADC SBB AND SUB XOR 9853 9854 FE /1 = dec rm8 9855 FF /1 = dec rm32 and dec rm16 9856 9857 FE /0 = inc rm8 9858 FF /0 = inc rm32 and inc rm16 9859 9860 F6 /3 = neg rm8 9861 F7 /3 = neg rm32 and neg rm16 9862 9863 F6 /2 = not rm8 9864 F7 /2 = not rm32 and not rm16 9865 9866 0F BB = btcw r16, rm16 and btcl r32, rm32 9867 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 9868 9869 Same for BTS, BTR 9870 */ 9871 static Bool can_be_used_with_LOCK_prefix ( UChar* opc ) 9872 { 9873 switch (opc[0]) { 9874 case 0x00: case 0x01: case 0x08: case 0x09: 9875 case 0x10: case 0x11: case 0x18: case 0x19: 9876 case 0x20: case 0x21: case 0x28: case 0x29: 9877 case 0x30: case 0x31: 9878 if (!epartIsReg(opc[1])) 9879 return True; 9880 break; 9881 9882 case 0x80: case 0x81: case 0x82: case 0x83: 9883 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 9884 && !epartIsReg(opc[1])) 9885 return True; 9886 break; 9887 9888 case 0xFE: case 0xFF: 9889 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 9890 && !epartIsReg(opc[1])) 9891 return True; 9892 break; 9893 9894 case 0xF6: case 0xF7: 9895 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 9896 && !epartIsReg(opc[1])) 9897 return True; 9898 break; 9899 9900 case 0x86: case 0x87: 9901 if (!epartIsReg(opc[1])) 9902 return True; 9903 break; 9904 9905 case 0x0F: { 9906 switch (opc[1]) { 9907 case 0xBB: case 0xB3: case 0xAB: 9908 if (!epartIsReg(opc[2])) 9909 return True; 9910 break; 9911 case 0xBA: 9912 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 9913 && !epartIsReg(opc[2])) 9914 return True; 9915 break; 9916 case 0xB0: case 0xB1: 9917 if (!epartIsReg(opc[2])) 9918 return True; 9919 break; 9920 case 0xC7: 9921 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 9922 return True; 9923 break; 9924 case 0xC0: case 0xC1: 9925 if (!epartIsReg(opc[2])) 9926 return True; 9927 break; 9928 default: 9929 break; 9930 } /* switch (opc[1]) */ 9931 break; 9932 } 9933 9934 default: 9935 break; 9936 } /* switch (opc[0]) */ 9937 9938 return False; 9939 } 9940 9941 9942 /*------------------------------------------------------------*/ 9943 /*--- ---*/ 9944 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 9945 /*--- ---*/ 9946 /*------------------------------------------------------------*/ 9947 9948 static Long dis_COMISD ( VexAbiInfo* vbi, Prefix pfx, 9949 Long delta, Bool isAvx, UChar opc ) 9950 { 9951 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 9952 Int alen = 0; 9953 HChar dis_buf[50]; 9954 IRTemp argL = newTemp(Ity_F64); 9955 IRTemp argR = newTemp(Ity_F64); 9956 UChar modrm = getUChar(delta); 9957 IRTemp addr = IRTemp_INVALID; 9958 if (epartIsReg(modrm)) { 9959 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 9960 0/*lowest lane*/ ) ); 9961 delta += 1; 9962 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9963 opc==0x2E ? "u" : "", 9964 nameXMMReg(eregOfRexRM(pfx,modrm)), 9965 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9966 } else { 9967 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9968 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 9969 delta += alen; 9970 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 9971 opc==0x2E ? "u" : "", 9972 dis_buf, 9973 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 9974 } 9975 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 9976 0/*lowest lane*/ ) ); 9977 9978 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 9979 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 9980 stmt( IRStmt_Put( 9981 OFFB_CC_DEP1, 9982 binop( Iop_And64, 9983 unop( Iop_32Uto64, 9984 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 9985 mkU64(0x45) 9986 ))); 9987 return delta; 9988 } 9989 9990 9991 static Long dis_COMISS ( VexAbiInfo* vbi, Prefix pfx, 9992 Long delta, Bool isAvx, UChar opc ) 9993 { 9994 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 9995 Int alen = 0; 9996 HChar dis_buf[50]; 9997 IRTemp argL = newTemp(Ity_F32); 9998 IRTemp argR = newTemp(Ity_F32); 9999 UChar modrm = getUChar(delta); 10000 IRTemp addr = IRTemp_INVALID; 10001 if (epartIsReg(modrm)) { 10002 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 10003 0/*lowest lane*/ ) ); 10004 delta += 1; 10005 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10006 opc==0x2E ? "u" : "", 10007 nameXMMReg(eregOfRexRM(pfx,modrm)), 10008 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10009 } else { 10010 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10011 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 10012 delta += alen; 10013 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10014 opc==0x2E ? "u" : "", 10015 dis_buf, 10016 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10017 } 10018 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 10019 0/*lowest lane*/ ) ); 10020 10021 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10022 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10023 stmt( IRStmt_Put( 10024 OFFB_CC_DEP1, 10025 binop( Iop_And64, 10026 unop( Iop_32Uto64, 10027 binop(Iop_CmpF64, 10028 unop(Iop_F32toF64,mkexpr(argL)), 10029 unop(Iop_F32toF64,mkexpr(argR)))), 10030 mkU64(0x45) 10031 ))); 10032 return delta; 10033 } 10034 10035 10036 static Long dis_PSHUFD_32x4 ( VexAbiInfo* vbi, Prefix pfx, 10037 Long delta, Bool writesYmm ) 10038 { 10039 Int order; 10040 Int alen = 0; 10041 HChar dis_buf[50]; 10042 IRTemp sV = newTemp(Ity_V128); 10043 UChar modrm = getUChar(delta); 10044 const HChar* strV = writesYmm ? "v" : ""; 10045 IRTemp addr = IRTemp_INVALID; 10046 if (epartIsReg(modrm)) { 10047 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10048 order = (Int)getUChar(delta+1); 10049 delta += 1+1; 10050 DIP("%spshufd $%d,%s,%s\n", strV, order, 10051 nameXMMReg(eregOfRexRM(pfx,modrm)), 10052 nameXMMReg(gregOfRexRM(pfx,modrm))); 10053 } else { 10054 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10055 1/*byte after the amode*/ ); 10056 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10057 order = (Int)getUChar(delta+alen); 10058 delta += alen+1; 10059 DIP("%spshufd $%d,%s,%s\n", strV, order, 10060 dis_buf, 10061 nameXMMReg(gregOfRexRM(pfx,modrm))); 10062 } 10063 10064 IRTemp s3, s2, s1, s0; 10065 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10066 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10067 10068 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10069 IRTemp dV = newTemp(Ity_V128); 10070 assign(dV, 10071 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 10072 SEL((order>>2)&3), SEL((order>>0)&3) ) 10073 ); 10074 # undef SEL 10075 10076 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 10077 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 10078 return delta; 10079 } 10080 10081 10082 static Long dis_PSHUFD_32x8 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 10083 { 10084 Int order; 10085 Int alen = 0; 10086 HChar dis_buf[50]; 10087 IRTemp sV = newTemp(Ity_V256); 10088 UChar modrm = getUChar(delta); 10089 IRTemp addr = IRTemp_INVALID; 10090 UInt rG = gregOfRexRM(pfx,modrm); 10091 if (epartIsReg(modrm)) { 10092 UInt rE = eregOfRexRM(pfx,modrm); 10093 assign( sV, getYMMReg(rE) ); 10094 order = (Int)getUChar(delta+1); 10095 delta += 1+1; 10096 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG)); 10097 } else { 10098 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10099 1/*byte after the amode*/ ); 10100 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 10101 order = (Int)getUChar(delta+alen); 10102 delta += alen+1; 10103 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG)); 10104 } 10105 10106 IRTemp s[8]; 10107 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 10108 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 10109 &s[3], &s[2], &s[1], &s[0] ); 10110 10111 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)], 10112 s[4 + ((order>>4)&3)], 10113 s[4 + ((order>>2)&3)], 10114 s[4 + ((order>>0)&3)], 10115 s[0 + ((order>>6)&3)], 10116 s[0 + ((order>>4)&3)], 10117 s[0 + ((order>>2)&3)], 10118 s[0 + ((order>>0)&3)] ) ); 10119 return delta; 10120 } 10121 10122 10123 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 10124 { 10125 IRTemp dV = newTemp(Ity_V128); 10126 IRTemp hi64 = newTemp(Ity_I64); 10127 IRTemp lo64 = newTemp(Ity_I64); 10128 IRTemp hi64r = newTemp(Ity_I64); 10129 IRTemp lo64r = newTemp(Ity_I64); 10130 10131 vassert(imm >= 0 && imm <= 255); 10132 if (imm >= 16) { 10133 assign(dV, mkV128(0x0000)); 10134 return dV; 10135 } 10136 10137 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10138 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10139 10140 if (imm == 0) { 10141 assign( lo64r, mkexpr(lo64) ); 10142 assign( hi64r, mkexpr(hi64) ); 10143 } 10144 else 10145 if (imm == 8) { 10146 assign( hi64r, mkU64(0) ); 10147 assign( lo64r, mkexpr(hi64) ); 10148 } 10149 else 10150 if (imm > 8) { 10151 assign( hi64r, mkU64(0) ); 10152 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 10153 } else { 10154 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 10155 assign( lo64r, 10156 binop( Iop_Or64, 10157 binop(Iop_Shr64, mkexpr(lo64), 10158 mkU8(8 * imm)), 10159 binop(Iop_Shl64, mkexpr(hi64), 10160 mkU8(8 * (8 - imm)) ) 10161 ) 10162 ); 10163 } 10164 10165 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10166 return dV; 10167 } 10168 10169 10170 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 10171 { 10172 IRTemp dV = newTemp(Ity_V128); 10173 IRTemp hi64 = newTemp(Ity_I64); 10174 IRTemp lo64 = newTemp(Ity_I64); 10175 IRTemp hi64r = newTemp(Ity_I64); 10176 IRTemp lo64r = newTemp(Ity_I64); 10177 10178 vassert(imm >= 0 && imm <= 255); 10179 if (imm >= 16) { 10180 assign(dV, mkV128(0x0000)); 10181 return dV; 10182 } 10183 10184 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10185 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10186 10187 if (imm == 0) { 10188 assign( lo64r, mkexpr(lo64) ); 10189 assign( hi64r, mkexpr(hi64) ); 10190 } 10191 else 10192 if (imm == 8) { 10193 assign( lo64r, mkU64(0) ); 10194 assign( hi64r, mkexpr(lo64) ); 10195 } 10196 else 10197 if (imm > 8) { 10198 assign( lo64r, mkU64(0) ); 10199 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 10200 } else { 10201 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 10202 assign( hi64r, 10203 binop( Iop_Or64, 10204 binop(Iop_Shl64, mkexpr(hi64), 10205 mkU8(8 * imm)), 10206 binop(Iop_Shr64, mkexpr(lo64), 10207 mkU8(8 * (8 - imm)) ) 10208 ) 10209 ); 10210 } 10211 10212 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10213 return dV; 10214 } 10215 10216 10217 static Long dis_CVTxSD2SI ( VexAbiInfo* vbi, Prefix pfx, 10218 Long delta, Bool isAvx, UChar opc, Int sz ) 10219 { 10220 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 10221 HChar dis_buf[50]; 10222 Int alen = 0; 10223 UChar modrm = getUChar(delta); 10224 IRTemp addr = IRTemp_INVALID; 10225 IRTemp rmode = newTemp(Ity_I32); 10226 IRTemp f64lo = newTemp(Ity_F64); 10227 Bool r2zero = toBool(opc == 0x2C); 10228 10229 if (epartIsReg(modrm)) { 10230 delta += 1; 10231 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10232 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10233 nameXMMReg(eregOfRexRM(pfx,modrm)), 10234 nameIReg(sz, gregOfRexRM(pfx,modrm), 10235 False)); 10236 } else { 10237 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10238 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10239 delta += alen; 10240 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10241 dis_buf, 10242 nameIReg(sz, gregOfRexRM(pfx,modrm), 10243 False)); 10244 } 10245 10246 if (r2zero) { 10247 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10248 } else { 10249 assign( rmode, get_sse_roundingmode() ); 10250 } 10251 10252 if (sz == 4) { 10253 putIReg32( gregOfRexRM(pfx,modrm), 10254 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10255 } else { 10256 vassert(sz == 8); 10257 putIReg64( gregOfRexRM(pfx,modrm), 10258 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10259 } 10260 10261 return delta; 10262 } 10263 10264 10265 static Long dis_CVTxSS2SI ( VexAbiInfo* vbi, Prefix pfx, 10266 Long delta, Bool isAvx, UChar opc, Int sz ) 10267 { 10268 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 10269 HChar dis_buf[50]; 10270 Int alen = 0; 10271 UChar modrm = getUChar(delta); 10272 IRTemp addr = IRTemp_INVALID; 10273 IRTemp rmode = newTemp(Ity_I32); 10274 IRTemp f32lo = newTemp(Ity_F32); 10275 Bool r2zero = toBool(opc == 0x2C); 10276 10277 if (epartIsReg(modrm)) { 10278 delta += 1; 10279 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 10280 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10281 nameXMMReg(eregOfRexRM(pfx,modrm)), 10282 nameIReg(sz, gregOfRexRM(pfx,modrm), 10283 False)); 10284 } else { 10285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10286 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10287 delta += alen; 10288 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10289 dis_buf, 10290 nameIReg(sz, gregOfRexRM(pfx,modrm), 10291 False)); 10292 } 10293 10294 if (r2zero) { 10295 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10296 } else { 10297 assign( rmode, get_sse_roundingmode() ); 10298 } 10299 10300 if (sz == 4) { 10301 putIReg32( gregOfRexRM(pfx,modrm), 10302 binop( Iop_F64toI32S, 10303 mkexpr(rmode), 10304 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10305 } else { 10306 vassert(sz == 8); 10307 putIReg64( gregOfRexRM(pfx,modrm), 10308 binop( Iop_F64toI64S, 10309 mkexpr(rmode), 10310 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10311 } 10312 10313 return delta; 10314 } 10315 10316 10317 static Long dis_CVTPS2PD_128 ( VexAbiInfo* vbi, Prefix pfx, 10318 Long delta, Bool isAvx ) 10319 { 10320 IRTemp addr = IRTemp_INVALID; 10321 Int alen = 0; 10322 HChar dis_buf[50]; 10323 IRTemp f32lo = newTemp(Ity_F32); 10324 IRTemp f32hi = newTemp(Ity_F32); 10325 UChar modrm = getUChar(delta); 10326 UInt rG = gregOfRexRM(pfx,modrm); 10327 if (epartIsReg(modrm)) { 10328 UInt rE = eregOfRexRM(pfx,modrm); 10329 assign( f32lo, getXMMRegLane32F(rE, 0) ); 10330 assign( f32hi, getXMMRegLane32F(rE, 1) ); 10331 delta += 1; 10332 DIP("%scvtps2pd %s,%s\n", 10333 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10334 } else { 10335 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10336 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10337 assign( f32hi, loadLE(Ity_F32, 10338 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10339 delta += alen; 10340 DIP("%scvtps2pd %s,%s\n", 10341 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 10342 } 10343 10344 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 10345 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 10346 if (isAvx) 10347 putYMMRegLane128( rG, 1, mkV128(0)); 10348 return delta; 10349 } 10350 10351 10352 static Long dis_CVTPS2PD_256 ( VexAbiInfo* vbi, Prefix pfx, 10353 Long delta ) 10354 { 10355 IRTemp addr = IRTemp_INVALID; 10356 Int alen = 0; 10357 HChar dis_buf[50]; 10358 IRTemp f32_0 = newTemp(Ity_F32); 10359 IRTemp f32_1 = newTemp(Ity_F32); 10360 IRTemp f32_2 = newTemp(Ity_F32); 10361 IRTemp f32_3 = newTemp(Ity_F32); 10362 UChar modrm = getUChar(delta); 10363 UInt rG = gregOfRexRM(pfx,modrm); 10364 if (epartIsReg(modrm)) { 10365 UInt rE = eregOfRexRM(pfx,modrm); 10366 assign( f32_0, getXMMRegLane32F(rE, 0) ); 10367 assign( f32_1, getXMMRegLane32F(rE, 1) ); 10368 assign( f32_2, getXMMRegLane32F(rE, 2) ); 10369 assign( f32_3, getXMMRegLane32F(rE, 3) ); 10370 delta += 1; 10371 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 10372 } else { 10373 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10374 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 10375 assign( f32_1, loadLE(Ity_F32, 10376 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10377 assign( f32_2, loadLE(Ity_F32, 10378 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 10379 assign( f32_3, loadLE(Ity_F32, 10380 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 10381 delta += alen; 10382 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 10383 } 10384 10385 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 10386 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 10387 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 10388 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 10389 return delta; 10390 } 10391 10392 10393 static Long dis_CVTPD2PS_128 ( VexAbiInfo* vbi, Prefix pfx, 10394 Long delta, Bool isAvx ) 10395 { 10396 IRTemp addr = IRTemp_INVALID; 10397 Int alen = 0; 10398 HChar dis_buf[50]; 10399 UChar modrm = getUChar(delta); 10400 UInt rG = gregOfRexRM(pfx,modrm); 10401 IRTemp argV = newTemp(Ity_V128); 10402 IRTemp rmode = newTemp(Ity_I32); 10403 if (epartIsReg(modrm)) { 10404 UInt rE = eregOfRexRM(pfx,modrm); 10405 assign( argV, getXMMReg(rE) ); 10406 delta += 1; 10407 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10408 nameXMMReg(rE), nameXMMReg(rG)); 10409 } else { 10410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10411 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10412 delta += alen; 10413 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10414 dis_buf, nameXMMReg(rG) ); 10415 } 10416 10417 assign( rmode, get_sse_roundingmode() ); 10418 IRTemp t0 = newTemp(Ity_F64); 10419 IRTemp t1 = newTemp(Ity_F64); 10420 assign( t0, unop(Iop_ReinterpI64asF64, 10421 unop(Iop_V128to64, mkexpr(argV))) ); 10422 assign( t1, unop(Iop_ReinterpI64asF64, 10423 unop(Iop_V128HIto64, mkexpr(argV))) ); 10424 10425 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 10426 putXMMRegLane32( rG, 3, mkU32(0) ); 10427 putXMMRegLane32( rG, 2, mkU32(0) ); 10428 putXMMRegLane32F( rG, 1, CVT(t1) ); 10429 putXMMRegLane32F( rG, 0, CVT(t0) ); 10430 # undef CVT 10431 if (isAvx) 10432 putYMMRegLane128( rG, 1, mkV128(0) ); 10433 10434 return delta; 10435 } 10436 10437 10438 static Long dis_CVTxPS2DQ_128 ( VexAbiInfo* vbi, Prefix pfx, 10439 Long delta, Bool isAvx, Bool r2zero ) 10440 { 10441 IRTemp addr = IRTemp_INVALID; 10442 Int alen = 0; 10443 HChar dis_buf[50]; 10444 UChar modrm = getUChar(delta); 10445 IRTemp argV = newTemp(Ity_V128); 10446 IRTemp rmode = newTemp(Ity_I32); 10447 UInt rG = gregOfRexRM(pfx,modrm); 10448 IRTemp t0, t1, t2, t3; 10449 10450 if (epartIsReg(modrm)) { 10451 UInt rE = eregOfRexRM(pfx,modrm); 10452 assign( argV, getXMMReg(rE) ); 10453 delta += 1; 10454 DIP("%scvt%sps2dq %s,%s\n", 10455 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10456 } else { 10457 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10458 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10459 delta += alen; 10460 DIP("%scvt%sps2dq %s,%s\n", 10461 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10462 } 10463 10464 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10465 : get_sse_roundingmode() ); 10466 t0 = t1 = t2 = t3 = IRTemp_INVALID; 10467 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10468 /* This is less than ideal. If it turns out to be a performance 10469 bottleneck it can be improved. */ 10470 # define CVT(_t) \ 10471 binop( Iop_F64toI32S, \ 10472 mkexpr(rmode), \ 10473 unop( Iop_F32toF64, \ 10474 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10475 10476 putXMMRegLane32( rG, 3, CVT(t3) ); 10477 putXMMRegLane32( rG, 2, CVT(t2) ); 10478 putXMMRegLane32( rG, 1, CVT(t1) ); 10479 putXMMRegLane32( rG, 0, CVT(t0) ); 10480 # undef CVT 10481 if (isAvx) 10482 putYMMRegLane128( rG, 1, mkV128(0) ); 10483 10484 return delta; 10485 } 10486 10487 10488 static Long dis_CVTxPS2DQ_256 ( VexAbiInfo* vbi, Prefix pfx, 10489 Long delta, Bool r2zero ) 10490 { 10491 IRTemp addr = IRTemp_INVALID; 10492 Int alen = 0; 10493 HChar dis_buf[50]; 10494 UChar modrm = getUChar(delta); 10495 IRTemp argV = newTemp(Ity_V256); 10496 IRTemp rmode = newTemp(Ity_I32); 10497 UInt rG = gregOfRexRM(pfx,modrm); 10498 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10499 10500 if (epartIsReg(modrm)) { 10501 UInt rE = eregOfRexRM(pfx,modrm); 10502 assign( argV, getYMMReg(rE) ); 10503 delta += 1; 10504 DIP("vcvt%sps2dq %s,%s\n", 10505 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10506 } else { 10507 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10508 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10509 delta += alen; 10510 DIP("vcvt%sps2dq %s,%s\n", 10511 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10512 } 10513 10514 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10515 : get_sse_roundingmode() ); 10516 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10517 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10518 /* This is less than ideal. If it turns out to be a performance 10519 bottleneck it can be improved. */ 10520 # define CVT(_t) \ 10521 binop( Iop_F64toI32S, \ 10522 mkexpr(rmode), \ 10523 unop( Iop_F32toF64, \ 10524 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10525 10526 putYMMRegLane32( rG, 7, CVT(t7) ); 10527 putYMMRegLane32( rG, 6, CVT(t6) ); 10528 putYMMRegLane32( rG, 5, CVT(t5) ); 10529 putYMMRegLane32( rG, 4, CVT(t4) ); 10530 putYMMRegLane32( rG, 3, CVT(t3) ); 10531 putYMMRegLane32( rG, 2, CVT(t2) ); 10532 putYMMRegLane32( rG, 1, CVT(t1) ); 10533 putYMMRegLane32( rG, 0, CVT(t0) ); 10534 # undef CVT 10535 10536 return delta; 10537 } 10538 10539 10540 static Long dis_CVTxPD2DQ_128 ( VexAbiInfo* vbi, Prefix pfx, 10541 Long delta, Bool isAvx, Bool r2zero ) 10542 { 10543 IRTemp addr = IRTemp_INVALID; 10544 Int alen = 0; 10545 HChar dis_buf[50]; 10546 UChar modrm = getUChar(delta); 10547 IRTemp argV = newTemp(Ity_V128); 10548 IRTemp rmode = newTemp(Ity_I32); 10549 UInt rG = gregOfRexRM(pfx,modrm); 10550 IRTemp t0, t1; 10551 10552 if (epartIsReg(modrm)) { 10553 UInt rE = eregOfRexRM(pfx,modrm); 10554 assign( argV, getXMMReg(rE) ); 10555 delta += 1; 10556 DIP("%scvt%spd2dq %s,%s\n", 10557 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10558 } else { 10559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10560 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10561 delta += alen; 10562 DIP("%scvt%spd2dqx %s,%s\n", 10563 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10564 } 10565 10566 if (r2zero) { 10567 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10568 } else { 10569 assign( rmode, get_sse_roundingmode() ); 10570 } 10571 10572 t0 = newTemp(Ity_F64); 10573 t1 = newTemp(Ity_F64); 10574 assign( t0, unop(Iop_ReinterpI64asF64, 10575 unop(Iop_V128to64, mkexpr(argV))) ); 10576 assign( t1, unop(Iop_ReinterpI64asF64, 10577 unop(Iop_V128HIto64, mkexpr(argV))) ); 10578 10579 # define CVT(_t) binop( Iop_F64toI32S, \ 10580 mkexpr(rmode), \ 10581 mkexpr(_t) ) 10582 10583 putXMMRegLane32( rG, 3, mkU32(0) ); 10584 putXMMRegLane32( rG, 2, mkU32(0) ); 10585 putXMMRegLane32( rG, 1, CVT(t1) ); 10586 putXMMRegLane32( rG, 0, CVT(t0) ); 10587 # undef CVT 10588 if (isAvx) 10589 putYMMRegLane128( rG, 1, mkV128(0) ); 10590 10591 return delta; 10592 } 10593 10594 10595 static Long dis_CVTxPD2DQ_256 ( VexAbiInfo* vbi, Prefix pfx, 10596 Long delta, Bool r2zero ) 10597 { 10598 IRTemp addr = IRTemp_INVALID; 10599 Int alen = 0; 10600 HChar dis_buf[50]; 10601 UChar modrm = getUChar(delta); 10602 IRTemp argV = newTemp(Ity_V256); 10603 IRTemp rmode = newTemp(Ity_I32); 10604 UInt rG = gregOfRexRM(pfx,modrm); 10605 IRTemp t0, t1, t2, t3; 10606 10607 if (epartIsReg(modrm)) { 10608 UInt rE = eregOfRexRM(pfx,modrm); 10609 assign( argV, getYMMReg(rE) ); 10610 delta += 1; 10611 DIP("vcvt%spd2dq %s,%s\n", 10612 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10613 } else { 10614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10615 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10616 delta += alen; 10617 DIP("vcvt%spd2dqy %s,%s\n", 10618 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10619 } 10620 10621 if (r2zero) { 10622 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10623 } else { 10624 assign( rmode, get_sse_roundingmode() ); 10625 } 10626 10627 t0 = IRTemp_INVALID; 10628 t1 = IRTemp_INVALID; 10629 t2 = IRTemp_INVALID; 10630 t3 = IRTemp_INVALID; 10631 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10632 10633 # define CVT(_t) binop( Iop_F64toI32S, \ 10634 mkexpr(rmode), \ 10635 unop( Iop_ReinterpI64asF64, \ 10636 mkexpr(_t) ) ) 10637 10638 putXMMRegLane32( rG, 3, CVT(t3) ); 10639 putXMMRegLane32( rG, 2, CVT(t2) ); 10640 putXMMRegLane32( rG, 1, CVT(t1) ); 10641 putXMMRegLane32( rG, 0, CVT(t0) ); 10642 # undef CVT 10643 putYMMRegLane128( rG, 1, mkV128(0) ); 10644 10645 return delta; 10646 } 10647 10648 10649 static Long dis_CVTDQ2PS_128 ( VexAbiInfo* vbi, Prefix pfx, 10650 Long delta, Bool isAvx ) 10651 { 10652 IRTemp addr = IRTemp_INVALID; 10653 Int alen = 0; 10654 HChar dis_buf[50]; 10655 UChar modrm = getUChar(delta); 10656 IRTemp argV = newTemp(Ity_V128); 10657 IRTemp rmode = newTemp(Ity_I32); 10658 UInt rG = gregOfRexRM(pfx,modrm); 10659 IRTemp t0, t1, t2, t3; 10660 10661 if (epartIsReg(modrm)) { 10662 UInt rE = eregOfRexRM(pfx,modrm); 10663 assign( argV, getXMMReg(rE) ); 10664 delta += 1; 10665 DIP("%scvtdq2ps %s,%s\n", 10666 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10667 } else { 10668 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10669 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10670 delta += alen; 10671 DIP("%scvtdq2ps %s,%s\n", 10672 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10673 } 10674 10675 assign( rmode, get_sse_roundingmode() ); 10676 t0 = IRTemp_INVALID; 10677 t1 = IRTemp_INVALID; 10678 t2 = IRTemp_INVALID; 10679 t3 = IRTemp_INVALID; 10680 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10681 10682 # define CVT(_t) binop( Iop_F64toF32, \ 10683 mkexpr(rmode), \ 10684 unop(Iop_I32StoF64,mkexpr(_t))) 10685 10686 putXMMRegLane32F( rG, 3, CVT(t3) ); 10687 putXMMRegLane32F( rG, 2, CVT(t2) ); 10688 putXMMRegLane32F( rG, 1, CVT(t1) ); 10689 putXMMRegLane32F( rG, 0, CVT(t0) ); 10690 # undef CVT 10691 if (isAvx) 10692 putYMMRegLane128( rG, 1, mkV128(0) ); 10693 10694 return delta; 10695 } 10696 10697 static Long dis_CVTDQ2PS_256 ( VexAbiInfo* vbi, Prefix pfx, 10698 Long delta ) 10699 { 10700 IRTemp addr = IRTemp_INVALID; 10701 Int alen = 0; 10702 HChar dis_buf[50]; 10703 UChar modrm = getUChar(delta); 10704 IRTemp argV = newTemp(Ity_V256); 10705 IRTemp rmode = newTemp(Ity_I32); 10706 UInt rG = gregOfRexRM(pfx,modrm); 10707 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10708 10709 if (epartIsReg(modrm)) { 10710 UInt rE = eregOfRexRM(pfx,modrm); 10711 assign( argV, getYMMReg(rE) ); 10712 delta += 1; 10713 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10714 } else { 10715 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10716 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10717 delta += alen; 10718 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10719 } 10720 10721 assign( rmode, get_sse_roundingmode() ); 10722 t0 = IRTemp_INVALID; 10723 t1 = IRTemp_INVALID; 10724 t2 = IRTemp_INVALID; 10725 t3 = IRTemp_INVALID; 10726 t4 = IRTemp_INVALID; 10727 t5 = IRTemp_INVALID; 10728 t6 = IRTemp_INVALID; 10729 t7 = IRTemp_INVALID; 10730 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10731 10732 # define CVT(_t) binop( Iop_F64toF32, \ 10733 mkexpr(rmode), \ 10734 unop(Iop_I32StoF64,mkexpr(_t))) 10735 10736 putYMMRegLane32F( rG, 7, CVT(t7) ); 10737 putYMMRegLane32F( rG, 6, CVT(t6) ); 10738 putYMMRegLane32F( rG, 5, CVT(t5) ); 10739 putYMMRegLane32F( rG, 4, CVT(t4) ); 10740 putYMMRegLane32F( rG, 3, CVT(t3) ); 10741 putYMMRegLane32F( rG, 2, CVT(t2) ); 10742 putYMMRegLane32F( rG, 1, CVT(t1) ); 10743 putYMMRegLane32F( rG, 0, CVT(t0) ); 10744 # undef CVT 10745 10746 return delta; 10747 } 10748 10749 10750 static Long dis_PMOVMSKB_128 ( VexAbiInfo* vbi, Prefix pfx, 10751 Long delta, Bool isAvx ) 10752 { 10753 UChar modrm = getUChar(delta); 10754 vassert(epartIsReg(modrm)); /* ensured by caller */ 10755 UInt rE = eregOfRexRM(pfx,modrm); 10756 UInt rG = gregOfRexRM(pfx,modrm); 10757 IRTemp t0 = newTemp(Ity_V128); 10758 IRTemp t1 = newTemp(Ity_I32); 10759 assign(t0, getXMMReg(rE)); 10760 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0)))); 10761 putIReg32(rG, mkexpr(t1)); 10762 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10763 nameIReg32(rG)); 10764 delta += 1; 10765 return delta; 10766 } 10767 10768 10769 static Long dis_PMOVMSKB_256 ( VexAbiInfo* vbi, Prefix pfx, 10770 Long delta ) 10771 { 10772 UChar modrm = getUChar(delta); 10773 vassert(epartIsReg(modrm)); /* ensured by caller */ 10774 UInt rE = eregOfRexRM(pfx,modrm); 10775 UInt rG = gregOfRexRM(pfx,modrm); 10776 IRTemp t0 = newTemp(Ity_V128); 10777 IRTemp t1 = newTemp(Ity_V128); 10778 IRTemp t2 = newTemp(Ity_I16); 10779 IRTemp t3 = newTemp(Ity_I16); 10780 assign(t0, getYMMRegLane128(rE, 0)); 10781 assign(t1, getYMMRegLane128(rE, 1)); 10782 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0))); 10783 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1))); 10784 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2))); 10785 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 10786 delta += 1; 10787 return delta; 10788 } 10789 10790 10791 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 10792 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 10793 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 10794 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10795 { 10796 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10797 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10798 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10799 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10800 IRTemp res = newTemp(Ity_V128); 10801 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 10802 : mkV128from32s( s1, d1, s0, d0 )); 10803 return res; 10804 } 10805 10806 10807 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 10808 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 10809 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10810 { 10811 IRTemp s1 = newTemp(Ity_I64); 10812 IRTemp s0 = newTemp(Ity_I64); 10813 IRTemp d1 = newTemp(Ity_I64); 10814 IRTemp d0 = newTemp(Ity_I64); 10815 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10816 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10817 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10818 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10819 IRTemp res = newTemp(Ity_V128); 10820 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 10821 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 10822 return res; 10823 } 10824 10825 10826 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 10827 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 10828 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 10829 way. */ 10830 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10831 { 10832 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10833 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10834 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 10835 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 10836 IRTemp res = newTemp(Ity_V256); 10837 assign(res, xIsH 10838 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 10839 mkexpr(s1), mkexpr(d1)) 10840 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 10841 mkexpr(s0), mkexpr(d0))); 10842 return res; 10843 } 10844 10845 10846 /* FIXME: this is really bad. Surely can do something better here? 10847 One observation is that the steering in the upper and lower 128 bit 10848 halves is the same as with math_UNPCKxPS_128, so we simply split 10849 into two halves, and use that. Consequently any improvement in 10850 math_UNPCKxPS_128 (probably, to use interleave-style primops) 10851 benefits this too. */ 10852 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10853 { 10854 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10855 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10856 breakupV256toV128s( sV, &sVhi, &sVlo ); 10857 breakupV256toV128s( dV, &dVhi, &dVlo ); 10858 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 10859 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 10860 IRTemp rV = newTemp(Ity_V256); 10861 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10862 return rV; 10863 } 10864 10865 10866 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10867 { 10868 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10869 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10870 vassert(imm8 < 256); 10871 10872 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10873 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10874 10875 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10876 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10877 IRTemp res = newTemp(Ity_V128); 10878 assign(res, 10879 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 10880 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 10881 # undef SELD 10882 # undef SELS 10883 return res; 10884 } 10885 10886 10887 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 10888 identically. Hence do the clueless thing and use math_SHUFPS_128 10889 twice. */ 10890 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10891 { 10892 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10893 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10894 breakupV256toV128s( sV, &sVhi, &sVlo ); 10895 breakupV256toV128s( dV, &dVhi, &dVlo ); 10896 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 10897 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 10898 IRTemp rV = newTemp(Ity_V256); 10899 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10900 return rV; 10901 } 10902 10903 10904 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10905 { 10906 IRTemp s1 = newTemp(Ity_I64); 10907 IRTemp s0 = newTemp(Ity_I64); 10908 IRTemp d1 = newTemp(Ity_I64); 10909 IRTemp d0 = newTemp(Ity_I64); 10910 10911 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10912 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10913 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10914 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10915 10916 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 10917 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 10918 10919 IRTemp res = newTemp(Ity_V128); 10920 assign(res, binop( Iop_64HLtoV128, 10921 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 10922 10923 # undef SELD 10924 # undef SELS 10925 return res; 10926 } 10927 10928 10929 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10930 { 10931 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10932 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10933 breakupV256toV128s( sV, &sVhi, &sVlo ); 10934 breakupV256toV128s( dV, &dVhi, &dVlo ); 10935 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10936 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 10937 IRTemp rV = newTemp(Ity_V256); 10938 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10939 return rV; 10940 } 10941 10942 10943 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10944 { 10945 UShort imm8_mask_16; 10946 IRTemp imm8_mask = newTemp(Ity_V128); 10947 10948 switch( imm8 & 3 ) { 10949 case 0: imm8_mask_16 = 0x0000; break; 10950 case 1: imm8_mask_16 = 0x00FF; break; 10951 case 2: imm8_mask_16 = 0xFF00; break; 10952 case 3: imm8_mask_16 = 0xFFFF; break; 10953 default: vassert(0); break; 10954 } 10955 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 10956 10957 IRTemp res = newTemp(Ity_V128); 10958 assign ( res, binop( Iop_OrV128, 10959 binop( Iop_AndV128, mkexpr(sV), 10960 mkexpr(imm8_mask) ), 10961 binop( Iop_AndV128, mkexpr(dV), 10962 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 10963 return res; 10964 } 10965 10966 10967 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10968 { 10969 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10970 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10971 breakupV256toV128s( sV, &sVhi, &sVlo ); 10972 breakupV256toV128s( dV, &dVhi, &dVlo ); 10973 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 10974 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 10975 IRTemp rV = newTemp(Ity_V256); 10976 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10977 return rV; 10978 } 10979 10980 10981 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10982 { 10983 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 10984 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 10985 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 10986 0xFFFF }; 10987 IRTemp imm8_mask = newTemp(Ity_V128); 10988 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 10989 10990 IRTemp res = newTemp(Ity_V128); 10991 assign ( res, binop( Iop_OrV128, 10992 binop( Iop_AndV128, mkexpr(sV), 10993 mkexpr(imm8_mask) ), 10994 binop( Iop_AndV128, mkexpr(dV), 10995 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 10996 return res; 10997 } 10998 10999 11000 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11001 { 11002 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11003 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11004 breakupV256toV128s( sV, &sVhi, &sVlo ); 11005 breakupV256toV128s( dV, &dVhi, &dVlo ); 11006 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 11007 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 11008 IRTemp rV = newTemp(Ity_V256); 11009 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11010 return rV; 11011 } 11012 11013 11014 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11015 { 11016 /* Make w be a 16-bit version of imm8, formed by duplicating each 11017 bit in imm8. */ 11018 Int i; 11019 UShort imm16 = 0; 11020 for (i = 0; i < 8; i++) { 11021 if (imm8 & (1 << i)) 11022 imm16 |= (3 << (2*i)); 11023 } 11024 IRTemp imm16_mask = newTemp(Ity_V128); 11025 assign( imm16_mask, mkV128( imm16 )); 11026 11027 IRTemp res = newTemp(Ity_V128); 11028 assign ( res, binop( Iop_OrV128, 11029 binop( Iop_AndV128, mkexpr(sV), 11030 mkexpr(imm16_mask) ), 11031 binop( Iop_AndV128, mkexpr(dV), 11032 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 11033 return res; 11034 } 11035 11036 11037 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 11038 { 11039 /* This is a really poor translation -- could be improved if 11040 performance critical */ 11041 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11042 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11043 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11044 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11045 IRTemp res = newTemp(Ity_V128); 11046 assign(res, binop(Iop_64HLtoV128, 11047 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 11048 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 11049 return res; 11050 } 11051 11052 11053 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV ) 11054 { 11055 /* This is a really poor translation -- could be improved if 11056 performance critical */ 11057 IRTemp sHi, sLo, dHi, dLo; 11058 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11059 breakupV256toV128s( dV, &dHi, &dLo); 11060 breakupV256toV128s( sV, &sHi, &sLo); 11061 IRTemp res = newTemp(Ity_V256); 11062 assign(res, binop(Iop_V128HLtoV256, 11063 mkexpr(math_PMULUDQ_128(sHi, dHi)), 11064 mkexpr(math_PMULUDQ_128(sLo, dLo)))); 11065 return res; 11066 } 11067 11068 11069 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 11070 { 11071 /* This is a really poor translation -- could be improved if 11072 performance critical */ 11073 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11074 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11075 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11076 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11077 IRTemp res = newTemp(Ity_V128); 11078 assign(res, binop(Iop_64HLtoV128, 11079 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 11080 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 11081 return res; 11082 } 11083 11084 11085 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV ) 11086 { 11087 /* This is a really poor translation -- could be improved if 11088 performance critical */ 11089 IRTemp sHi, sLo, dHi, dLo; 11090 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11091 breakupV256toV128s( dV, &dHi, &dLo); 11092 breakupV256toV128s( sV, &sHi, &sLo); 11093 IRTemp res = newTemp(Ity_V256); 11094 assign(res, binop(Iop_V128HLtoV256, 11095 mkexpr(math_PMULDQ_128(sHi, dHi)), 11096 mkexpr(math_PMULDQ_128(sLo, dLo)))); 11097 return res; 11098 } 11099 11100 11101 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 11102 { 11103 IRTemp sVhi, sVlo, dVhi, dVlo; 11104 IRTemp resHi = newTemp(Ity_I64); 11105 IRTemp resLo = newTemp(Ity_I64); 11106 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 11107 breakupV128to64s( sV, &sVhi, &sVlo ); 11108 breakupV128to64s( dV, &dVhi, &dVlo ); 11109 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11110 "amd64g_calculate_mmx_pmaddwd", 11111 &amd64g_calculate_mmx_pmaddwd, 11112 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 11113 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11114 "amd64g_calculate_mmx_pmaddwd", 11115 &amd64g_calculate_mmx_pmaddwd, 11116 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 11117 IRTemp res = newTemp(Ity_V128); 11118 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 11119 return res; 11120 } 11121 11122 11123 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV ) 11124 { 11125 IRTemp sHi, sLo, dHi, dLo; 11126 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11127 breakupV256toV128s( dV, &dHi, &dLo); 11128 breakupV256toV128s( sV, &sHi, &sLo); 11129 IRTemp res = newTemp(Ity_V256); 11130 assign(res, binop(Iop_V128HLtoV256, 11131 mkexpr(math_PMADDWD_128(dHi, sHi)), 11132 mkexpr(math_PMADDWD_128(dLo, sLo)))); 11133 return res; 11134 } 11135 11136 11137 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 11138 { 11139 IRTemp addV = newTemp(Ity_V128); 11140 IRTemp subV = newTemp(Ity_V128); 11141 IRTemp a1 = newTemp(Ity_I64); 11142 IRTemp s0 = newTemp(Ity_I64); 11143 IRTemp rm = newTemp(Ity_I32); 11144 11145 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11146 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11147 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11148 11149 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11150 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11151 11152 IRTemp res = newTemp(Ity_V128); 11153 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11154 return res; 11155 } 11156 11157 11158 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 11159 { 11160 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11161 IRTemp addV = newTemp(Ity_V256); 11162 IRTemp subV = newTemp(Ity_V256); 11163 IRTemp rm = newTemp(Ity_I32); 11164 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11165 11166 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11167 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11168 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11169 11170 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 11171 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 11172 11173 IRTemp res = newTemp(Ity_V256); 11174 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 11175 return res; 11176 } 11177 11178 11179 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 11180 { 11181 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11182 IRTemp addV = newTemp(Ity_V128); 11183 IRTemp subV = newTemp(Ity_V128); 11184 IRTemp rm = newTemp(Ity_I32); 11185 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11186 11187 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11188 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11189 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11190 11191 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 11192 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 11193 11194 IRTemp res = newTemp(Ity_V128); 11195 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 11196 return res; 11197 } 11198 11199 11200 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 11201 { 11202 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 11203 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 11204 IRTemp addV = newTemp(Ity_V256); 11205 IRTemp subV = newTemp(Ity_V256); 11206 IRTemp rm = newTemp(Ity_I32); 11207 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 11208 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11209 11210 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11211 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11212 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11213 11214 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 11215 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 11216 11217 IRTemp res = newTemp(Ity_V256); 11218 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 11219 return res; 11220 } 11221 11222 11223 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 11224 static Long dis_PSHUFxW_128 ( VexAbiInfo* vbi, Prefix pfx, 11225 Long delta, Bool isAvx, Bool xIsH ) 11226 { 11227 IRTemp addr = IRTemp_INVALID; 11228 Int alen = 0; 11229 HChar dis_buf[50]; 11230 UChar modrm = getUChar(delta); 11231 UInt rG = gregOfRexRM(pfx,modrm); 11232 UInt imm8; 11233 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 11234 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11235 sV = newTemp(Ity_V128); 11236 dV = newTemp(Ity_V128); 11237 sVmut = newTemp(Ity_I64); 11238 dVmut = newTemp(Ity_I64); 11239 sVcon = newTemp(Ity_I64); 11240 if (epartIsReg(modrm)) { 11241 UInt rE = eregOfRexRM(pfx,modrm); 11242 assign( sV, getXMMReg(rE) ); 11243 imm8 = (UInt)getUChar(delta+1); 11244 delta += 1+1; 11245 DIP("%spshuf%cw $%u,%s,%s\n", 11246 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11247 imm8, nameXMMReg(rE), nameXMMReg(rG)); 11248 } else { 11249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11250 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11251 imm8 = (UInt)getUChar(delta+alen); 11252 delta += alen+1; 11253 DIP("%spshuf%cw $%u,%s,%s\n", 11254 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11255 imm8, dis_buf, nameXMMReg(rG)); 11256 } 11257 11258 /* Get the to-be-changed (mut) and unchanging (con) bits of the 11259 source. */ 11260 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 11261 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 11262 11263 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 11264 # define SEL(n) \ 11265 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11266 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 11267 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 11268 # undef SEL 11269 11270 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 11271 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 11272 11273 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 11274 return delta; 11275 } 11276 11277 11278 /* Handle 256 bit PSHUFLW and PSHUFHW. */ 11279 static Long dis_PSHUFxW_256 ( VexAbiInfo* vbi, Prefix pfx, 11280 Long delta, Bool xIsH ) 11281 { 11282 IRTemp addr = IRTemp_INVALID; 11283 Int alen = 0; 11284 HChar dis_buf[50]; 11285 UChar modrm = getUChar(delta); 11286 UInt rG = gregOfRexRM(pfx,modrm); 11287 UInt imm8; 11288 IRTemp sV, s[8], sV64[4], dVhi, dVlo; 11289 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID; 11290 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 11291 sV = newTemp(Ity_V256); 11292 dVhi = newTemp(Ity_I64); 11293 dVlo = newTemp(Ity_I64); 11294 if (epartIsReg(modrm)) { 11295 UInt rE = eregOfRexRM(pfx,modrm); 11296 assign( sV, getYMMReg(rE) ); 11297 imm8 = (UInt)getUChar(delta+1); 11298 delta += 1+1; 11299 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11300 imm8, nameYMMReg(rE), nameYMMReg(rG)); 11301 } else { 11302 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11303 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 11304 imm8 = (UInt)getUChar(delta+alen); 11305 delta += alen+1; 11306 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11307 imm8, dis_buf, nameYMMReg(rG)); 11308 } 11309 11310 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] ); 11311 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] ); 11312 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] ); 11313 11314 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)], 11315 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) ); 11316 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)], 11317 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) ); 11318 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3], 11319 xIsH ? sV64[2] : dVhi, 11320 xIsH ? dVlo : sV64[1], 11321 xIsH ? sV64[0] : dVlo ) ); 11322 return delta; 11323 } 11324 11325 11326 static Long dis_PEXTRW_128_EregOnly_toG ( VexAbiInfo* vbi, Prefix pfx, 11327 Long delta, Bool isAvx ) 11328 { 11329 Long deltaIN = delta; 11330 UChar modrm = getUChar(delta); 11331 UInt rG = gregOfRexRM(pfx,modrm); 11332 IRTemp sV = newTemp(Ity_V128); 11333 IRTemp d16 = newTemp(Ity_I16); 11334 UInt imm8; 11335 IRTemp s0, s1, s2, s3; 11336 if (epartIsReg(modrm)) { 11337 UInt rE = eregOfRexRM(pfx,modrm); 11338 assign(sV, getXMMReg(rE)); 11339 imm8 = getUChar(delta+1) & 7; 11340 delta += 1+1; 11341 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "", 11342 (Int)imm8, nameXMMReg(rE), nameIReg32(rG)); 11343 } else { 11344 /* The memory case is disallowed, apparently. */ 11345 return deltaIN; /* FAIL */ 11346 } 11347 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11348 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11349 switch (imm8) { 11350 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 11351 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 11352 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 11353 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 11354 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 11355 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 11356 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 11357 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 11358 default: vassert(0); 11359 } 11360 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 11361 return delta; 11362 } 11363 11364 11365 static Long dis_CVTDQ2PD_128 ( VexAbiInfo* vbi, Prefix pfx, 11366 Long delta, Bool isAvx ) 11367 { 11368 IRTemp addr = IRTemp_INVALID; 11369 Int alen = 0; 11370 HChar dis_buf[50]; 11371 UChar modrm = getUChar(delta); 11372 IRTemp arg64 = newTemp(Ity_I64); 11373 UInt rG = gregOfRexRM(pfx,modrm); 11374 const HChar* mbV = isAvx ? "v" : ""; 11375 if (epartIsReg(modrm)) { 11376 UInt rE = eregOfRexRM(pfx,modrm); 11377 assign( arg64, getXMMRegLane64(rE, 0) ); 11378 delta += 1; 11379 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 11380 } else { 11381 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11382 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11383 delta += alen; 11384 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 11385 } 11386 putXMMRegLane64F( 11387 rG, 0, 11388 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 11389 ); 11390 putXMMRegLane64F( 11391 rG, 1, 11392 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 11393 ); 11394 if (isAvx) 11395 putYMMRegLane128(rG, 1, mkV128(0)); 11396 return delta; 11397 } 11398 11399 11400 static Long dis_STMXCSR ( VexAbiInfo* vbi, Prefix pfx, 11401 Long delta, Bool isAvx ) 11402 { 11403 IRTemp addr = IRTemp_INVALID; 11404 Int alen = 0; 11405 HChar dis_buf[50]; 11406 UChar modrm = getUChar(delta); 11407 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11408 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 11409 11410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11411 delta += alen; 11412 11413 /* Fake up a native SSE mxcsr word. The only thing it depends on 11414 is SSEROUND[1:0], so call a clean helper to cook it up. 11415 */ 11416 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 11417 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11418 storeLE( 11419 mkexpr(addr), 11420 unop(Iop_64to32, 11421 mkIRExprCCall( 11422 Ity_I64, 0/*regp*/, 11423 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 11424 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 11425 ) 11426 ) 11427 ); 11428 return delta; 11429 } 11430 11431 11432 static Long dis_LDMXCSR ( VexAbiInfo* vbi, Prefix pfx, 11433 Long delta, Bool isAvx ) 11434 { 11435 IRTemp addr = IRTemp_INVALID; 11436 Int alen = 0; 11437 HChar dis_buf[50]; 11438 UChar modrm = getUChar(delta); 11439 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11440 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 11441 11442 IRTemp t64 = newTemp(Ity_I64); 11443 IRTemp ew = newTemp(Ity_I32); 11444 11445 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11446 delta += alen; 11447 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11448 11449 /* The only thing we observe in %mxcsr is the rounding mode. 11450 Therefore, pass the 32-bit value (SSE native-format control 11451 word) to a clean helper, getting back a 64-bit value, the 11452 lower half of which is the SSEROUND value to store, and the 11453 upper half of which is the emulation-warning token which may 11454 be generated. 11455 */ 11456 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 11457 assign( t64, mkIRExprCCall( 11458 Ity_I64, 0/*regparms*/, 11459 "amd64g_check_ldmxcsr", 11460 &amd64g_check_ldmxcsr, 11461 mkIRExprVec_1( 11462 unop(Iop_32Uto64, 11463 loadLE(Ity_I32, mkexpr(addr)) 11464 ) 11465 ) 11466 ) 11467 ); 11468 11469 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 11470 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 11471 put_emwarn( mkexpr(ew) ); 11472 /* Finally, if an emulation warning was reported, side-exit to 11473 the next insn, reporting the warning, so that Valgrind's 11474 dispatcher sees the warning. */ 11475 stmt( 11476 IRStmt_Exit( 11477 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 11478 Ijk_EmWarn, 11479 IRConst_U64(guest_RIP_bbstart+delta), 11480 OFFB_RIP 11481 ) 11482 ); 11483 return delta; 11484 } 11485 11486 11487 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 11488 { 11489 vassert(imm8 >= 0 && imm8 <= 7); 11490 11491 // Create a V128 value which has the selected word in the 11492 // specified lane, and zeroes everywhere else. 11493 IRTemp tmp128 = newTemp(Ity_V128); 11494 IRTemp halfshift = newTemp(Ity_I64); 11495 assign(halfshift, binop(Iop_Shl64, 11496 unop(Iop_16Uto64, mkexpr(u16)), 11497 mkU8(16 * (imm8 & 3)))); 11498 if (imm8 < 4) { 11499 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 11500 } else { 11501 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 11502 } 11503 11504 UShort mask = ~(3 << (imm8 * 2)); 11505 IRTemp res = newTemp(Ity_V128); 11506 assign( res, binop(Iop_OrV128, 11507 mkexpr(tmp128), 11508 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 11509 return res; 11510 } 11511 11512 11513 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 11514 { 11515 IRTemp s1, s0, d1, d0; 11516 s1 = s0 = d1 = d0 = IRTemp_INVALID; 11517 11518 breakupV128to64s( sV, &s1, &s0 ); 11519 breakupV128to64s( dV, &d1, &d0 ); 11520 11521 IRTemp res = newTemp(Ity_V128); 11522 assign( res, 11523 binop(Iop_64HLtoV128, 11524 mkIRExprCCall(Ity_I64, 0/*regparms*/, 11525 "amd64g_calculate_mmx_psadbw", 11526 &amd64g_calculate_mmx_psadbw, 11527 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 11528 mkIRExprCCall(Ity_I64, 0/*regparms*/, 11529 "amd64g_calculate_mmx_psadbw", 11530 &amd64g_calculate_mmx_psadbw, 11531 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 11532 return res; 11533 } 11534 11535 11536 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV ) 11537 { 11538 IRTemp sHi, sLo, dHi, dLo; 11539 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11540 breakupV256toV128s( dV, &dHi, &dLo); 11541 breakupV256toV128s( sV, &sHi, &sLo); 11542 IRTemp res = newTemp(Ity_V256); 11543 assign(res, binop(Iop_V128HLtoV256, 11544 mkexpr(math_PSADBW_128(dHi, sHi)), 11545 mkexpr(math_PSADBW_128(dLo, sLo)))); 11546 return res; 11547 } 11548 11549 11550 static Long dis_MASKMOVDQU ( VexAbiInfo* vbi, Prefix pfx, 11551 Long delta, Bool isAvx ) 11552 { 11553 IRTemp regD = newTemp(Ity_V128); 11554 IRTemp mask = newTemp(Ity_V128); 11555 IRTemp olddata = newTemp(Ity_V128); 11556 IRTemp newdata = newTemp(Ity_V128); 11557 IRTemp addr = newTemp(Ity_I64); 11558 UChar modrm = getUChar(delta); 11559 UInt rG = gregOfRexRM(pfx,modrm); 11560 UInt rE = eregOfRexRM(pfx,modrm); 11561 11562 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 11563 assign( regD, getXMMReg( rG )); 11564 11565 /* Unfortunately can't do the obvious thing with SarN8x16 11566 here since that can't be re-emitted as SSE2 code - no such 11567 insn. */ 11568 assign( mask, 11569 binop(Iop_64HLtoV128, 11570 binop(Iop_SarN8x8, 11571 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 11572 mkU8(7) ), 11573 binop(Iop_SarN8x8, 11574 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 11575 mkU8(7) ) )); 11576 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 11577 assign( newdata, binop(Iop_OrV128, 11578 binop(Iop_AndV128, 11579 mkexpr(regD), 11580 mkexpr(mask) ), 11581 binop(Iop_AndV128, 11582 mkexpr(olddata), 11583 unop(Iop_NotV128, mkexpr(mask)))) ); 11584 storeLE( mkexpr(addr), mkexpr(newdata) ); 11585 11586 delta += 1; 11587 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 11588 nameXMMReg(rE), nameXMMReg(rG) ); 11589 return delta; 11590 } 11591 11592 11593 static Long dis_MOVMSKPS_128 ( VexAbiInfo* vbi, Prefix pfx, 11594 Long delta, Bool isAvx ) 11595 { 11596 UChar modrm = getUChar(delta); 11597 UInt rG = gregOfRexRM(pfx,modrm); 11598 UInt rE = eregOfRexRM(pfx,modrm); 11599 IRTemp t0 = newTemp(Ity_I32); 11600 IRTemp t1 = newTemp(Ity_I32); 11601 IRTemp t2 = newTemp(Ity_I32); 11602 IRTemp t3 = newTemp(Ity_I32); 11603 delta += 1; 11604 assign( t0, binop( Iop_And32, 11605 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 11606 mkU32(1) )); 11607 assign( t1, binop( Iop_And32, 11608 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 11609 mkU32(2) )); 11610 assign( t2, binop( Iop_And32, 11611 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 11612 mkU32(4) )); 11613 assign( t3, binop( Iop_And32, 11614 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 11615 mkU32(8) )); 11616 putIReg32( rG, binop(Iop_Or32, 11617 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11618 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11619 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 11620 nameXMMReg(rE), nameIReg32(rG)); 11621 return delta; 11622 } 11623 11624 11625 static Long dis_MOVMSKPS_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 11626 { 11627 UChar modrm = getUChar(delta); 11628 UInt rG = gregOfRexRM(pfx,modrm); 11629 UInt rE = eregOfRexRM(pfx,modrm); 11630 IRTemp t0 = newTemp(Ity_I32); 11631 IRTemp t1 = newTemp(Ity_I32); 11632 IRTemp t2 = newTemp(Ity_I32); 11633 IRTemp t3 = newTemp(Ity_I32); 11634 IRTemp t4 = newTemp(Ity_I32); 11635 IRTemp t5 = newTemp(Ity_I32); 11636 IRTemp t6 = newTemp(Ity_I32); 11637 IRTemp t7 = newTemp(Ity_I32); 11638 delta += 1; 11639 assign( t0, binop( Iop_And32, 11640 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 11641 mkU32(1) )); 11642 assign( t1, binop( Iop_And32, 11643 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 11644 mkU32(2) )); 11645 assign( t2, binop( Iop_And32, 11646 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 11647 mkU32(4) )); 11648 assign( t3, binop( Iop_And32, 11649 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 11650 mkU32(8) )); 11651 assign( t4, binop( Iop_And32, 11652 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 11653 mkU32(16) )); 11654 assign( t5, binop( Iop_And32, 11655 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 11656 mkU32(32) )); 11657 assign( t6, binop( Iop_And32, 11658 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 11659 mkU32(64) )); 11660 assign( t7, binop( Iop_And32, 11661 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 11662 mkU32(128) )); 11663 putIReg32( rG, binop(Iop_Or32, 11664 binop(Iop_Or32, 11665 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11666 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 11667 binop(Iop_Or32, 11668 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 11669 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 11670 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11671 return delta; 11672 } 11673 11674 11675 static Long dis_MOVMSKPD_128 ( VexAbiInfo* vbi, Prefix pfx, 11676 Long delta, Bool isAvx ) 11677 { 11678 UChar modrm = getUChar(delta); 11679 UInt rG = gregOfRexRM(pfx,modrm); 11680 UInt rE = eregOfRexRM(pfx,modrm); 11681 IRTemp t0 = newTemp(Ity_I32); 11682 IRTemp t1 = newTemp(Ity_I32); 11683 delta += 1; 11684 assign( t0, binop( Iop_And32, 11685 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 11686 mkU32(1) )); 11687 assign( t1, binop( Iop_And32, 11688 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 11689 mkU32(2) )); 11690 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 11691 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 11692 nameXMMReg(rE), nameIReg32(rG)); 11693 return delta; 11694 } 11695 11696 11697 static Long dis_MOVMSKPD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 11698 { 11699 UChar modrm = getUChar(delta); 11700 UInt rG = gregOfRexRM(pfx,modrm); 11701 UInt rE = eregOfRexRM(pfx,modrm); 11702 IRTemp t0 = newTemp(Ity_I32); 11703 IRTemp t1 = newTemp(Ity_I32); 11704 IRTemp t2 = newTemp(Ity_I32); 11705 IRTemp t3 = newTemp(Ity_I32); 11706 delta += 1; 11707 assign( t0, binop( Iop_And32, 11708 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 11709 mkU32(1) )); 11710 assign( t1, binop( Iop_And32, 11711 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 11712 mkU32(2) )); 11713 assign( t2, binop( Iop_And32, 11714 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 11715 mkU32(4) )); 11716 assign( t3, binop( Iop_And32, 11717 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 11718 mkU32(8) )); 11719 putIReg32( rG, binop(Iop_Or32, 11720 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 11721 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 11722 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 11723 return delta; 11724 } 11725 11726 11727 /* Note, this also handles SSE(1) insns. */ 11728 __attribute__((noinline)) 11729 static 11730 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 11731 VexAbiInfo* vbi, 11732 Prefix pfx, Int sz, Long deltaIN, 11733 DisResult* dres ) 11734 { 11735 IRTemp addr = IRTemp_INVALID; 11736 IRTemp t0 = IRTemp_INVALID; 11737 IRTemp t1 = IRTemp_INVALID; 11738 IRTemp t2 = IRTemp_INVALID; 11739 IRTemp t3 = IRTemp_INVALID; 11740 IRTemp t4 = IRTemp_INVALID; 11741 IRTemp t5 = IRTemp_INVALID; 11742 IRTemp t6 = IRTemp_INVALID; 11743 UChar modrm = 0; 11744 Int alen = 0; 11745 HChar dis_buf[50]; 11746 11747 *decode_OK = False; 11748 11749 Long delta = deltaIN; 11750 UChar opc = getUChar(delta); 11751 delta++; 11752 switch (opc) { 11753 11754 case 0x10: 11755 if (have66noF2noF3(pfx) 11756 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11757 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 11758 modrm = getUChar(delta); 11759 if (epartIsReg(modrm)) { 11760 putXMMReg( gregOfRexRM(pfx,modrm), 11761 getXMMReg( eregOfRexRM(pfx,modrm) )); 11762 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11763 nameXMMReg(gregOfRexRM(pfx,modrm))); 11764 delta += 1; 11765 } else { 11766 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11767 putXMMReg( gregOfRexRM(pfx,modrm), 11768 loadLE(Ity_V128, mkexpr(addr)) ); 11769 DIP("movupd %s,%s\n", dis_buf, 11770 nameXMMReg(gregOfRexRM(pfx,modrm))); 11771 delta += alen; 11772 } 11773 goto decode_success; 11774 } 11775 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 11776 G (lo half xmm). If E is mem, upper half of G is zeroed out. 11777 If E is reg, upper half of G is unchanged. */ 11778 if (haveF2no66noF3(pfx) 11779 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 11780 modrm = getUChar(delta); 11781 if (epartIsReg(modrm)) { 11782 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11783 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 11784 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11785 nameXMMReg(gregOfRexRM(pfx,modrm))); 11786 delta += 1; 11787 } else { 11788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11789 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11790 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 11791 loadLE(Ity_I64, mkexpr(addr)) ); 11792 DIP("movsd %s,%s\n", dis_buf, 11793 nameXMMReg(gregOfRexRM(pfx,modrm))); 11794 delta += alen; 11795 } 11796 goto decode_success; 11797 } 11798 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 11799 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 11800 if (haveF3no66noF2(pfx) 11801 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11802 modrm = getUChar(delta); 11803 if (epartIsReg(modrm)) { 11804 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11805 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 11806 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11807 nameXMMReg(gregOfRexRM(pfx,modrm))); 11808 delta += 1; 11809 } else { 11810 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11811 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 11812 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 11813 loadLE(Ity_I32, mkexpr(addr)) ); 11814 DIP("movss %s,%s\n", dis_buf, 11815 nameXMMReg(gregOfRexRM(pfx,modrm))); 11816 delta += alen; 11817 } 11818 goto decode_success; 11819 } 11820 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 11821 if (haveNo66noF2noF3(pfx) 11822 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11823 modrm = getUChar(delta); 11824 if (epartIsReg(modrm)) { 11825 putXMMReg( gregOfRexRM(pfx,modrm), 11826 getXMMReg( eregOfRexRM(pfx,modrm) )); 11827 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11828 nameXMMReg(gregOfRexRM(pfx,modrm))); 11829 delta += 1; 11830 } else { 11831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11832 putXMMReg( gregOfRexRM(pfx,modrm), 11833 loadLE(Ity_V128, mkexpr(addr)) ); 11834 DIP("movups %s,%s\n", dis_buf, 11835 nameXMMReg(gregOfRexRM(pfx,modrm))); 11836 delta += alen; 11837 } 11838 goto decode_success; 11839 } 11840 break; 11841 11842 case 0x11: 11843 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 11844 or lo half xmm). */ 11845 if (haveF2no66noF3(pfx) 11846 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11847 modrm = getUChar(delta); 11848 if (epartIsReg(modrm)) { 11849 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 11850 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 11851 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11852 nameXMMReg(eregOfRexRM(pfx,modrm))); 11853 delta += 1; 11854 } else { 11855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11856 storeLE( mkexpr(addr), 11857 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 11858 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11859 dis_buf); 11860 delta += alen; 11861 } 11862 goto decode_success; 11863 } 11864 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 11865 or lo 1/4 xmm). */ 11866 if (haveF3no66noF2(pfx) && sz == 4) { 11867 modrm = getUChar(delta); 11868 if (epartIsReg(modrm)) { 11869 /* fall through, we don't yet have a test case */ 11870 } else { 11871 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11872 storeLE( mkexpr(addr), 11873 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 11874 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11875 dis_buf); 11876 delta += alen; 11877 goto decode_success; 11878 } 11879 } 11880 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 11881 if (have66noF2noF3(pfx) 11882 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11883 modrm = getUChar(delta); 11884 if (epartIsReg(modrm)) { 11885 putXMMReg( eregOfRexRM(pfx,modrm), 11886 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 11887 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11888 nameXMMReg(eregOfRexRM(pfx,modrm))); 11889 delta += 1; 11890 } else { 11891 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11892 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11893 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11894 dis_buf ); 11895 delta += alen; 11896 } 11897 goto decode_success; 11898 } 11899 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 11900 if (haveNo66noF2noF3(pfx) 11901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11902 modrm = getUChar(delta); 11903 if (epartIsReg(modrm)) { 11904 /* fall through; awaiting test case */ 11905 } else { 11906 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11907 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 11908 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 11909 dis_buf ); 11910 delta += alen; 11911 goto decode_success; 11912 } 11913 } 11914 break; 11915 11916 case 0x12: 11917 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 11918 /* Identical to MOVLPS ? */ 11919 if (have66noF2noF3(pfx) 11920 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11921 modrm = getUChar(delta); 11922 if (epartIsReg(modrm)) { 11923 /* fall through; apparently reg-reg is not possible */ 11924 } else { 11925 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11926 delta += alen; 11927 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11928 0/*lower lane*/, 11929 loadLE(Ity_I64, mkexpr(addr)) ); 11930 DIP("movlpd %s, %s\n", 11931 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11932 goto decode_success; 11933 } 11934 } 11935 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 11936 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 11937 if (haveNo66noF2noF3(pfx) 11938 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11939 modrm = getUChar(delta); 11940 if (epartIsReg(modrm)) { 11941 delta += 1; 11942 putXMMRegLane64( gregOfRexRM(pfx,modrm), 11943 0/*lower lane*/, 11944 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 11945 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 11946 nameXMMReg(gregOfRexRM(pfx,modrm))); 11947 } else { 11948 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11949 delta += alen; 11950 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 11951 loadLE(Ity_I64, mkexpr(addr)) ); 11952 DIP("movlps %s, %s\n", 11953 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 11954 } 11955 goto decode_success; 11956 } 11957 break; 11958 11959 case 0x13: 11960 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 11961 if (haveNo66noF2noF3(pfx) 11962 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 11963 modrm = getUChar(delta); 11964 if (!epartIsReg(modrm)) { 11965 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11966 delta += alen; 11967 storeLE( mkexpr(addr), 11968 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11969 0/*lower lane*/ ) ); 11970 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11971 dis_buf); 11972 goto decode_success; 11973 } 11974 /* else fall through */ 11975 } 11976 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 11977 /* Identical to MOVLPS ? */ 11978 if (have66noF2noF3(pfx) 11979 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 11980 modrm = getUChar(delta); 11981 if (!epartIsReg(modrm)) { 11982 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11983 delta += alen; 11984 storeLE( mkexpr(addr), 11985 getXMMRegLane64( gregOfRexRM(pfx,modrm), 11986 0/*lower lane*/ ) ); 11987 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 11988 dis_buf); 11989 goto decode_success; 11990 } 11991 /* else fall through */ 11992 } 11993 break; 11994 11995 case 0x14: 11996 case 0x15: 11997 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 11998 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 11999 /* These just appear to be special cases of SHUFPS */ 12000 if (haveNo66noF2noF3(pfx) && sz == 4) { 12001 Bool hi = toBool(opc == 0x15); 12002 IRTemp sV = newTemp(Ity_V128); 12003 IRTemp dV = newTemp(Ity_V128); 12004 modrm = getUChar(delta); 12005 UInt rG = gregOfRexRM(pfx,modrm); 12006 assign( dV, getXMMReg(rG) ); 12007 if (epartIsReg(modrm)) { 12008 UInt rE = eregOfRexRM(pfx,modrm); 12009 assign( sV, getXMMReg(rE) ); 12010 delta += 1; 12011 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12012 nameXMMReg(rE), nameXMMReg(rG)); 12013 } else { 12014 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12015 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12016 delta += alen; 12017 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12018 dis_buf, nameXMMReg(rG)); 12019 } 12020 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 12021 putXMMReg( rG, mkexpr(res) ); 12022 goto decode_success; 12023 } 12024 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 12025 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 12026 /* These just appear to be special cases of SHUFPS */ 12027 if (have66noF2noF3(pfx) 12028 && sz == 2 /* could be 8 if rex also present */) { 12029 Bool hi = toBool(opc == 0x15); 12030 IRTemp sV = newTemp(Ity_V128); 12031 IRTemp dV = newTemp(Ity_V128); 12032 modrm = getUChar(delta); 12033 UInt rG = gregOfRexRM(pfx,modrm); 12034 assign( dV, getXMMReg(rG) ); 12035 if (epartIsReg(modrm)) { 12036 UInt rE = eregOfRexRM(pfx,modrm); 12037 assign( sV, getXMMReg(rE) ); 12038 delta += 1; 12039 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12040 nameXMMReg(rE), nameXMMReg(rG)); 12041 } else { 12042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12043 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12044 delta += alen; 12045 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12046 dis_buf, nameXMMReg(rG)); 12047 } 12048 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 12049 putXMMReg( rG, mkexpr(res) ); 12050 goto decode_success; 12051 } 12052 break; 12053 12054 case 0x16: 12055 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 12056 /* These seems identical to MOVHPS. This instruction encoding is 12057 completely crazy. */ 12058 if (have66noF2noF3(pfx) 12059 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12060 modrm = getUChar(delta); 12061 if (epartIsReg(modrm)) { 12062 /* fall through; apparently reg-reg is not possible */ 12063 } else { 12064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12065 delta += alen; 12066 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12067 loadLE(Ity_I64, mkexpr(addr)) ); 12068 DIP("movhpd %s,%s\n", dis_buf, 12069 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12070 goto decode_success; 12071 } 12072 } 12073 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 12074 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 12075 if (haveNo66noF2noF3(pfx) 12076 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12077 modrm = getUChar(delta); 12078 if (epartIsReg(modrm)) { 12079 delta += 1; 12080 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12081 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 12082 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12083 nameXMMReg(gregOfRexRM(pfx,modrm))); 12084 } else { 12085 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12086 delta += alen; 12087 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12088 loadLE(Ity_I64, mkexpr(addr)) ); 12089 DIP("movhps %s,%s\n", dis_buf, 12090 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12091 } 12092 goto decode_success; 12093 } 12094 break; 12095 12096 case 0x17: 12097 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 12098 if (haveNo66noF2noF3(pfx) 12099 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12100 modrm = getUChar(delta); 12101 if (!epartIsReg(modrm)) { 12102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12103 delta += alen; 12104 storeLE( mkexpr(addr), 12105 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12106 1/*upper lane*/ ) ); 12107 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12108 dis_buf); 12109 goto decode_success; 12110 } 12111 /* else fall through */ 12112 } 12113 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 12114 /* Again, this seems identical to MOVHPS. */ 12115 if (have66noF2noF3(pfx) 12116 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12117 modrm = getUChar(delta); 12118 if (!epartIsReg(modrm)) { 12119 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12120 delta += alen; 12121 storeLE( mkexpr(addr), 12122 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12123 1/*upper lane*/ ) ); 12124 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12125 dis_buf); 12126 goto decode_success; 12127 } 12128 /* else fall through */ 12129 } 12130 break; 12131 12132 case 0x18: 12133 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 12134 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 12135 /* 0F 18 /2 = PREFETCH1 */ 12136 /* 0F 18 /3 = PREFETCH2 */ 12137 if (haveNo66noF2noF3(pfx) 12138 && !epartIsReg(getUChar(delta)) 12139 && gregLO3ofRM(getUChar(delta)) >= 0 12140 && gregLO3ofRM(getUChar(delta)) <= 3) { 12141 const HChar* hintstr = "??"; 12142 12143 modrm = getUChar(delta); 12144 vassert(!epartIsReg(modrm)); 12145 12146 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12147 delta += alen; 12148 12149 switch (gregLO3ofRM(modrm)) { 12150 case 0: hintstr = "nta"; break; 12151 case 1: hintstr = "t0"; break; 12152 case 2: hintstr = "t1"; break; 12153 case 3: hintstr = "t2"; break; 12154 default: vassert(0); 12155 } 12156 12157 DIP("prefetch%s %s\n", hintstr, dis_buf); 12158 goto decode_success; 12159 } 12160 break; 12161 12162 case 0x28: 12163 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 12164 if (have66noF2noF3(pfx) 12165 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12166 modrm = getUChar(delta); 12167 if (epartIsReg(modrm)) { 12168 putXMMReg( gregOfRexRM(pfx,modrm), 12169 getXMMReg( eregOfRexRM(pfx,modrm) )); 12170 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12171 nameXMMReg(gregOfRexRM(pfx,modrm))); 12172 delta += 1; 12173 } else { 12174 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12175 gen_SEGV_if_not_16_aligned( addr ); 12176 putXMMReg( gregOfRexRM(pfx,modrm), 12177 loadLE(Ity_V128, mkexpr(addr)) ); 12178 DIP("movapd %s,%s\n", dis_buf, 12179 nameXMMReg(gregOfRexRM(pfx,modrm))); 12180 delta += alen; 12181 } 12182 goto decode_success; 12183 } 12184 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 12185 if (haveNo66noF2noF3(pfx) 12186 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12187 modrm = getUChar(delta); 12188 if (epartIsReg(modrm)) { 12189 putXMMReg( gregOfRexRM(pfx,modrm), 12190 getXMMReg( eregOfRexRM(pfx,modrm) )); 12191 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12192 nameXMMReg(gregOfRexRM(pfx,modrm))); 12193 delta += 1; 12194 } else { 12195 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12196 gen_SEGV_if_not_16_aligned( addr ); 12197 putXMMReg( gregOfRexRM(pfx,modrm), 12198 loadLE(Ity_V128, mkexpr(addr)) ); 12199 DIP("movaps %s,%s\n", dis_buf, 12200 nameXMMReg(gregOfRexRM(pfx,modrm))); 12201 delta += alen; 12202 } 12203 goto decode_success; 12204 } 12205 break; 12206 12207 case 0x29: 12208 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 12209 if (haveNo66noF2noF3(pfx) 12210 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12211 modrm = getUChar(delta); 12212 if (epartIsReg(modrm)) { 12213 putXMMReg( eregOfRexRM(pfx,modrm), 12214 getXMMReg( gregOfRexRM(pfx,modrm) )); 12215 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12216 nameXMMReg(eregOfRexRM(pfx,modrm))); 12217 delta += 1; 12218 } else { 12219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12220 gen_SEGV_if_not_16_aligned( addr ); 12221 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12222 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12223 dis_buf ); 12224 delta += alen; 12225 } 12226 goto decode_success; 12227 } 12228 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 12229 if (have66noF2noF3(pfx) 12230 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12231 modrm = getUChar(delta); 12232 if (epartIsReg(modrm)) { 12233 putXMMReg( eregOfRexRM(pfx,modrm), 12234 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12235 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12236 nameXMMReg(eregOfRexRM(pfx,modrm))); 12237 delta += 1; 12238 } else { 12239 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12240 gen_SEGV_if_not_16_aligned( addr ); 12241 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12242 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12243 dis_buf ); 12244 delta += alen; 12245 } 12246 goto decode_success; 12247 } 12248 break; 12249 12250 case 0x2A: 12251 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 12252 half xmm */ 12253 if (haveNo66noF2noF3(pfx) && sz == 4) { 12254 IRTemp arg64 = newTemp(Ity_I64); 12255 IRTemp rmode = newTemp(Ity_I32); 12256 12257 modrm = getUChar(delta); 12258 do_MMX_preamble(); 12259 if (epartIsReg(modrm)) { 12260 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12261 delta += 1; 12262 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12263 nameXMMReg(gregOfRexRM(pfx,modrm))); 12264 } else { 12265 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12266 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12267 delta += alen; 12268 DIP("cvtpi2ps %s,%s\n", dis_buf, 12269 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12270 } 12271 12272 assign( rmode, get_sse_roundingmode() ); 12273 12274 putXMMRegLane32F( 12275 gregOfRexRM(pfx,modrm), 0, 12276 binop(Iop_F64toF32, 12277 mkexpr(rmode), 12278 unop(Iop_I32StoF64, 12279 unop(Iop_64to32, mkexpr(arg64)) )) ); 12280 12281 putXMMRegLane32F( 12282 gregOfRexRM(pfx,modrm), 1, 12283 binop(Iop_F64toF32, 12284 mkexpr(rmode), 12285 unop(Iop_I32StoF64, 12286 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 12287 12288 goto decode_success; 12289 } 12290 /* F3 0F 2A = CVTSI2SS 12291 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 12292 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 12293 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12294 IRTemp rmode = newTemp(Ity_I32); 12295 assign( rmode, get_sse_roundingmode() ); 12296 modrm = getUChar(delta); 12297 if (sz == 4) { 12298 IRTemp arg32 = newTemp(Ity_I32); 12299 if (epartIsReg(modrm)) { 12300 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12301 delta += 1; 12302 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12303 nameXMMReg(gregOfRexRM(pfx,modrm))); 12304 } else { 12305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12306 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12307 delta += alen; 12308 DIP("cvtsi2ss %s,%s\n", dis_buf, 12309 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12310 } 12311 putXMMRegLane32F( 12312 gregOfRexRM(pfx,modrm), 0, 12313 binop(Iop_F64toF32, 12314 mkexpr(rmode), 12315 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 12316 } else { 12317 /* sz == 8 */ 12318 IRTemp arg64 = newTemp(Ity_I64); 12319 if (epartIsReg(modrm)) { 12320 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12321 delta += 1; 12322 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12323 nameXMMReg(gregOfRexRM(pfx,modrm))); 12324 } else { 12325 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12326 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12327 delta += alen; 12328 DIP("cvtsi2ssq %s,%s\n", dis_buf, 12329 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12330 } 12331 putXMMRegLane32F( 12332 gregOfRexRM(pfx,modrm), 0, 12333 binop(Iop_F64toF32, 12334 mkexpr(rmode), 12335 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 12336 } 12337 goto decode_success; 12338 } 12339 /* F2 0F 2A = CVTSI2SD 12340 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 12341 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 12342 */ 12343 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12344 modrm = getUChar(delta); 12345 if (sz == 4) { 12346 IRTemp arg32 = newTemp(Ity_I32); 12347 if (epartIsReg(modrm)) { 12348 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12349 delta += 1; 12350 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12351 nameXMMReg(gregOfRexRM(pfx,modrm))); 12352 } else { 12353 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12354 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12355 delta += alen; 12356 DIP("cvtsi2sdl %s,%s\n", dis_buf, 12357 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12358 } 12359 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12360 unop(Iop_I32StoF64, mkexpr(arg32)) 12361 ); 12362 } else { 12363 /* sz == 8 */ 12364 IRTemp arg64 = newTemp(Ity_I64); 12365 if (epartIsReg(modrm)) { 12366 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12367 delta += 1; 12368 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12369 nameXMMReg(gregOfRexRM(pfx,modrm))); 12370 } else { 12371 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12372 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12373 delta += alen; 12374 DIP("cvtsi2sdq %s,%s\n", dis_buf, 12375 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12376 } 12377 putXMMRegLane64F( 12378 gregOfRexRM(pfx,modrm), 12379 0, 12380 binop( Iop_I64StoF64, 12381 get_sse_roundingmode(), 12382 mkexpr(arg64) 12383 ) 12384 ); 12385 } 12386 goto decode_success; 12387 } 12388 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 12389 xmm(G) */ 12390 if (have66noF2noF3(pfx) && sz == 2) { 12391 IRTemp arg64 = newTemp(Ity_I64); 12392 12393 modrm = getUChar(delta); 12394 if (epartIsReg(modrm)) { 12395 /* Only switch to MMX mode if the source is a MMX register. 12396 This is inconsistent with all other instructions which 12397 convert between XMM and (M64 or MMX), which always switch 12398 to MMX mode even if 64-bit operand is M64 and not MMX. At 12399 least, that's what the Intel docs seem to me to say. 12400 Fixes #210264. */ 12401 do_MMX_preamble(); 12402 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12403 delta += 1; 12404 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12405 nameXMMReg(gregOfRexRM(pfx,modrm))); 12406 } else { 12407 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12408 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12409 delta += alen; 12410 DIP("cvtpi2pd %s,%s\n", dis_buf, 12411 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12412 } 12413 12414 putXMMRegLane64F( 12415 gregOfRexRM(pfx,modrm), 0, 12416 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 12417 ); 12418 12419 putXMMRegLane64F( 12420 gregOfRexRM(pfx,modrm), 1, 12421 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 12422 ); 12423 12424 goto decode_success; 12425 } 12426 break; 12427 12428 case 0x2B: 12429 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 12430 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 12431 if ( (haveNo66noF2noF3(pfx) && sz == 4) 12432 || (have66noF2noF3(pfx) && sz == 2) ) { 12433 modrm = getUChar(delta); 12434 if (!epartIsReg(modrm)) { 12435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12436 gen_SEGV_if_not_16_aligned( addr ); 12437 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12438 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 12439 dis_buf, 12440 nameXMMReg(gregOfRexRM(pfx,modrm))); 12441 delta += alen; 12442 goto decode_success; 12443 } 12444 /* else fall through */ 12445 } 12446 break; 12447 12448 case 0x2C: 12449 case 0x2D: 12450 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 12451 I32 in mmx, according to prevailing SSE rounding mode */ 12452 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 12453 I32 in mmx, rounding towards zero */ 12454 if (haveNo66noF2noF3(pfx) && sz == 4) { 12455 IRTemp dst64 = newTemp(Ity_I64); 12456 IRTemp rmode = newTemp(Ity_I32); 12457 IRTemp f32lo = newTemp(Ity_F32); 12458 IRTemp f32hi = newTemp(Ity_F32); 12459 Bool r2zero = toBool(opc == 0x2C); 12460 12461 do_MMX_preamble(); 12462 modrm = getUChar(delta); 12463 12464 if (epartIsReg(modrm)) { 12465 delta += 1; 12466 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 12467 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 12468 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 12469 nameXMMReg(eregOfRexRM(pfx,modrm)), 12470 nameMMXReg(gregLO3ofRM(modrm))); 12471 } else { 12472 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12473 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 12474 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 12475 mkexpr(addr), 12476 mkU64(4) ))); 12477 delta += alen; 12478 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 12479 dis_buf, 12480 nameMMXReg(gregLO3ofRM(modrm))); 12481 } 12482 12483 if (r2zero) { 12484 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 12485 } else { 12486 assign( rmode, get_sse_roundingmode() ); 12487 } 12488 12489 assign( 12490 dst64, 12491 binop( Iop_32HLto64, 12492 binop( Iop_F64toI32S, 12493 mkexpr(rmode), 12494 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 12495 binop( Iop_F64toI32S, 12496 mkexpr(rmode), 12497 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 12498 ) 12499 ); 12500 12501 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 12502 goto decode_success; 12503 } 12504 /* F3 0F 2D = CVTSS2SI 12505 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 12506 according to prevailing SSE rounding mode 12507 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 12508 according to prevailing SSE rounding mode 12509 */ 12510 /* F3 0F 2C = CVTTSS2SI 12511 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 12512 truncating towards zero 12513 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 12514 truncating towards zero 12515 */ 12516 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12517 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 12518 goto decode_success; 12519 } 12520 /* F2 0F 2D = CVTSD2SI 12521 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 12522 according to prevailing SSE rounding mode 12523 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 12524 according to prevailing SSE rounding mode 12525 */ 12526 /* F2 0F 2C = CVTTSD2SI 12527 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 12528 truncating towards zero 12529 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 12530 truncating towards zero 12531 */ 12532 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12533 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 12534 goto decode_success; 12535 } 12536 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 12537 I32 in mmx, according to prevailing SSE rounding mode */ 12538 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 12539 I32 in mmx, rounding towards zero */ 12540 if (have66noF2noF3(pfx) && sz == 2) { 12541 IRTemp dst64 = newTemp(Ity_I64); 12542 IRTemp rmode = newTemp(Ity_I32); 12543 IRTemp f64lo = newTemp(Ity_F64); 12544 IRTemp f64hi = newTemp(Ity_F64); 12545 Bool r2zero = toBool(opc == 0x2C); 12546 12547 do_MMX_preamble(); 12548 modrm = getUChar(delta); 12549 12550 if (epartIsReg(modrm)) { 12551 delta += 1; 12552 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 12553 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 12554 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 12555 nameXMMReg(eregOfRexRM(pfx,modrm)), 12556 nameMMXReg(gregLO3ofRM(modrm))); 12557 } else { 12558 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12559 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 12560 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 12561 mkexpr(addr), 12562 mkU64(8) ))); 12563 delta += alen; 12564 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 12565 dis_buf, 12566 nameMMXReg(gregLO3ofRM(modrm))); 12567 } 12568 12569 if (r2zero) { 12570 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 12571 } else { 12572 assign( rmode, get_sse_roundingmode() ); 12573 } 12574 12575 assign( 12576 dst64, 12577 binop( Iop_32HLto64, 12578 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 12579 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 12580 ) 12581 ); 12582 12583 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 12584 goto decode_success; 12585 } 12586 break; 12587 12588 case 0x2E: 12589 case 0x2F: 12590 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 12591 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 12592 if (have66noF2noF3(pfx) && sz == 2) { 12593 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 12594 goto decode_success; 12595 } 12596 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 12597 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 12598 if (haveNo66noF2noF3(pfx) && sz == 4) { 12599 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 12600 goto decode_success; 12601 } 12602 break; 12603 12604 case 0x50: 12605 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 12606 to 4 lowest bits of ireg(G) */ 12607 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 12608 && epartIsReg(getUChar(delta))) { 12609 /* sz == 8 is a kludge to handle insns with REX.W redundantly 12610 set to 1, which has been known to happen: 12611 12612 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 12613 12614 20071106: Intel docs say that REX.W isn't redundant: when 12615 present, a 64-bit register is written; when not present, only 12616 the 32-bit half is written. However, testing on a Core2 12617 machine suggests the entire 64 bit register is written 12618 irrespective of the status of REX.W. That could be because 12619 of the default rule that says "if the lower half of a 32-bit 12620 register is written, the upper half is zeroed". By using 12621 putIReg32 here we inadvertantly produce the same behaviour as 12622 the Core2, for the same reason -- putIReg32 implements said 12623 rule. 12624 12625 AMD docs give no indication that REX.W is even valid for this 12626 insn. */ 12627 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12628 goto decode_success; 12629 } 12630 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 12631 2 lowest bits of ireg(G) */ 12632 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 12633 /* sz == 8 is a kludge to handle insns with REX.W redundantly 12634 set to 1, which has been known to happen: 12635 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 12636 20071106: see further comments on MOVMSKPS implementation above. 12637 */ 12638 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12639 goto decode_success; 12640 } 12641 break; 12642 12643 case 0x51: 12644 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 12645 if (haveF3no66noF2(pfx) && sz == 4) { 12646 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12647 "sqrtss", Iop_Sqrt32F0x4 ); 12648 goto decode_success; 12649 } 12650 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 12651 if (haveNo66noF2noF3(pfx) && sz == 4) { 12652 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12653 "sqrtps", Iop_Sqrt32Fx4 ); 12654 goto decode_success; 12655 } 12656 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 12657 if (haveF2no66noF3(pfx) && sz == 4) { 12658 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 12659 "sqrtsd", Iop_Sqrt64F0x2 ); 12660 goto decode_success; 12661 } 12662 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 12663 if (have66noF2noF3(pfx) && sz == 2) { 12664 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12665 "sqrtpd", Iop_Sqrt64Fx2 ); 12666 goto decode_success; 12667 } 12668 break; 12669 12670 case 0x52: 12671 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 12672 if (haveF3no66noF2(pfx) && sz == 4) { 12673 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12674 "rsqrtss", Iop_RSqrt32F0x4 ); 12675 goto decode_success; 12676 } 12677 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 12678 if (haveNo66noF2noF3(pfx) && sz == 4) { 12679 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12680 "rsqrtps", Iop_RSqrt32Fx4 ); 12681 goto decode_success; 12682 } 12683 break; 12684 12685 case 0x53: 12686 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 12687 if (haveF3no66noF2(pfx) && sz == 4) { 12688 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 12689 "rcpss", Iop_Recip32F0x4 ); 12690 goto decode_success; 12691 } 12692 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 12693 if (haveNo66noF2noF3(pfx) && sz == 4) { 12694 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 12695 "rcpps", Iop_Recip32Fx4 ); 12696 goto decode_success; 12697 } 12698 break; 12699 12700 case 0x54: 12701 /* 0F 54 = ANDPS -- G = G and E */ 12702 if (haveNo66noF2noF3(pfx) && sz == 4) { 12703 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 12704 goto decode_success; 12705 } 12706 /* 66 0F 54 = ANDPD -- G = G and E */ 12707 if (have66noF2noF3(pfx) && sz == 2) { 12708 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 12709 goto decode_success; 12710 } 12711 break; 12712 12713 case 0x55: 12714 /* 0F 55 = ANDNPS -- G = (not G) and E */ 12715 if (haveNo66noF2noF3(pfx) && sz == 4) { 12716 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 12717 Iop_AndV128 ); 12718 goto decode_success; 12719 } 12720 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 12721 if (have66noF2noF3(pfx) && sz == 2) { 12722 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 12723 Iop_AndV128 ); 12724 goto decode_success; 12725 } 12726 break; 12727 12728 case 0x56: 12729 /* 0F 56 = ORPS -- G = G and E */ 12730 if (haveNo66noF2noF3(pfx) && sz == 4) { 12731 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 12732 goto decode_success; 12733 } 12734 /* 66 0F 56 = ORPD -- G = G and E */ 12735 if (have66noF2noF3(pfx) && sz == 2) { 12736 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 12737 goto decode_success; 12738 } 12739 break; 12740 12741 case 0x57: 12742 /* 66 0F 57 = XORPD -- G = G xor E */ 12743 if (have66noF2noF3(pfx) && sz == 2) { 12744 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 12745 goto decode_success; 12746 } 12747 /* 0F 57 = XORPS -- G = G xor E */ 12748 if (haveNo66noF2noF3(pfx) && sz == 4) { 12749 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 12750 goto decode_success; 12751 } 12752 break; 12753 12754 case 0x58: 12755 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 12756 if (haveNo66noF2noF3(pfx) && sz == 4) { 12757 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 12758 goto decode_success; 12759 } 12760 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 12761 if (haveF3no66noF2(pfx) && sz == 4) { 12762 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 12763 goto decode_success; 12764 } 12765 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 12766 if (haveF2no66noF3(pfx) 12767 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12768 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 12769 goto decode_success; 12770 } 12771 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 12772 if (have66noF2noF3(pfx) 12773 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12774 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 12775 goto decode_success; 12776 } 12777 break; 12778 12779 case 0x59: 12780 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 12781 if (haveF2no66noF3(pfx) 12782 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12783 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 12784 goto decode_success; 12785 } 12786 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 12787 if (haveF3no66noF2(pfx) && sz == 4) { 12788 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 12789 goto decode_success; 12790 } 12791 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 12792 if (haveNo66noF2noF3(pfx) && sz == 4) { 12793 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 12794 goto decode_success; 12795 } 12796 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 12797 if (have66noF2noF3(pfx) 12798 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12799 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 12800 goto decode_success; 12801 } 12802 break; 12803 12804 case 0x5A: 12805 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 12806 F64 in xmm(G). */ 12807 if (haveNo66noF2noF3(pfx) && sz == 4) { 12808 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 12809 goto decode_success; 12810 } 12811 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 12812 low half xmm(G) */ 12813 if (haveF3no66noF2(pfx) && sz == 4) { 12814 IRTemp f32lo = newTemp(Ity_F32); 12815 12816 modrm = getUChar(delta); 12817 if (epartIsReg(modrm)) { 12818 delta += 1; 12819 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 12820 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12821 nameXMMReg(gregOfRexRM(pfx,modrm))); 12822 } else { 12823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12824 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 12825 delta += alen; 12826 DIP("cvtss2sd %s,%s\n", dis_buf, 12827 nameXMMReg(gregOfRexRM(pfx,modrm))); 12828 } 12829 12830 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12831 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 12832 12833 goto decode_success; 12834 } 12835 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 12836 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 12837 if (haveF2no66noF3(pfx) && sz == 4) { 12838 IRTemp rmode = newTemp(Ity_I32); 12839 IRTemp f64lo = newTemp(Ity_F64); 12840 12841 modrm = getUChar(delta); 12842 if (epartIsReg(modrm)) { 12843 delta += 1; 12844 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 12845 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12846 nameXMMReg(gregOfRexRM(pfx,modrm))); 12847 } else { 12848 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12849 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 12850 delta += alen; 12851 DIP("cvtsd2ss %s,%s\n", dis_buf, 12852 nameXMMReg(gregOfRexRM(pfx,modrm))); 12853 } 12854 12855 assign( rmode, get_sse_roundingmode() ); 12856 putXMMRegLane32F( 12857 gregOfRexRM(pfx,modrm), 0, 12858 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 12859 ); 12860 12861 goto decode_success; 12862 } 12863 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 12864 lo half xmm(G), rounding according to prevailing SSE rounding 12865 mode, and zero upper half */ 12866 /* Note, this is practically identical to CVTPD2DQ. It would have 12867 be nice to merge them together. */ 12868 if (have66noF2noF3(pfx) && sz == 2) { 12869 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12870 goto decode_success; 12871 } 12872 break; 12873 12874 case 0x5B: 12875 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12876 xmm(G), rounding towards zero */ 12877 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 12878 xmm(G), as per the prevailing rounding mode */ 12879 if ( (have66noF2noF3(pfx) && sz == 2) 12880 || (haveF3no66noF2(pfx) && sz == 4) ) { 12881 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 12882 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 12883 goto decode_success; 12884 } 12885 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 12886 xmm(G) */ 12887 if (haveNo66noF2noF3(pfx) && sz == 4) { 12888 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 12889 goto decode_success; 12890 } 12891 break; 12892 12893 case 0x5C: 12894 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 12895 if (haveF3no66noF2(pfx) && sz == 4) { 12896 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 12897 goto decode_success; 12898 } 12899 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 12900 if (haveF2no66noF3(pfx) 12901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12902 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 12903 goto decode_success; 12904 } 12905 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 12906 if (haveNo66noF2noF3(pfx) && sz == 4) { 12907 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 12908 goto decode_success; 12909 } 12910 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 12911 if (have66noF2noF3(pfx) && sz == 2) { 12912 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 12913 goto decode_success; 12914 } 12915 break; 12916 12917 case 0x5D: 12918 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 12919 if (haveNo66noF2noF3(pfx) && sz == 4) { 12920 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 12921 goto decode_success; 12922 } 12923 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 12924 if (haveF3no66noF2(pfx) && sz == 4) { 12925 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 12926 goto decode_success; 12927 } 12928 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 12929 if (haveF2no66noF3(pfx) && sz == 4) { 12930 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 12931 goto decode_success; 12932 } 12933 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 12934 if (have66noF2noF3(pfx) && sz == 2) { 12935 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 12936 goto decode_success; 12937 } 12938 break; 12939 12940 case 0x5E: 12941 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 12942 if (haveF2no66noF3(pfx) && sz == 4) { 12943 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 12944 goto decode_success; 12945 } 12946 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 12947 if (haveNo66noF2noF3(pfx) && sz == 4) { 12948 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 12949 goto decode_success; 12950 } 12951 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 12952 if (haveF3no66noF2(pfx) && sz == 4) { 12953 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 12954 goto decode_success; 12955 } 12956 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 12957 if (have66noF2noF3(pfx) && sz == 2) { 12958 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 12959 goto decode_success; 12960 } 12961 break; 12962 12963 case 0x5F: 12964 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 12965 if (haveNo66noF2noF3(pfx) && sz == 4) { 12966 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 12967 goto decode_success; 12968 } 12969 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 12970 if (haveF3no66noF2(pfx) && sz == 4) { 12971 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 12972 goto decode_success; 12973 } 12974 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 12975 if (haveF2no66noF3(pfx) && sz == 4) { 12976 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 12977 goto decode_success; 12978 } 12979 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 12980 if (have66noF2noF3(pfx) && sz == 2) { 12981 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 12982 goto decode_success; 12983 } 12984 break; 12985 12986 case 0x60: 12987 /* 66 0F 60 = PUNPCKLBW */ 12988 if (have66noF2noF3(pfx) && sz == 2) { 12989 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 12990 "punpcklbw", 12991 Iop_InterleaveLO8x16, True ); 12992 goto decode_success; 12993 } 12994 break; 12995 12996 case 0x61: 12997 /* 66 0F 61 = PUNPCKLWD */ 12998 if (have66noF2noF3(pfx) && sz == 2) { 12999 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13000 "punpcklwd", 13001 Iop_InterleaveLO16x8, True ); 13002 goto decode_success; 13003 } 13004 break; 13005 13006 case 0x62: 13007 /* 66 0F 62 = PUNPCKLDQ */ 13008 if (have66noF2noF3(pfx) && sz == 2) { 13009 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13010 "punpckldq", 13011 Iop_InterleaveLO32x4, True ); 13012 goto decode_success; 13013 } 13014 break; 13015 13016 case 0x63: 13017 /* 66 0F 63 = PACKSSWB */ 13018 if (have66noF2noF3(pfx) && sz == 2) { 13019 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13020 "packsswb", 13021 Iop_QNarrowBin16Sto8Sx16, True ); 13022 goto decode_success; 13023 } 13024 break; 13025 13026 case 0x64: 13027 /* 66 0F 64 = PCMPGTB */ 13028 if (have66noF2noF3(pfx) && sz == 2) { 13029 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13030 "pcmpgtb", Iop_CmpGT8Sx16, False ); 13031 goto decode_success; 13032 } 13033 break; 13034 13035 case 0x65: 13036 /* 66 0F 65 = PCMPGTW */ 13037 if (have66noF2noF3(pfx) && sz == 2) { 13038 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13039 "pcmpgtw", Iop_CmpGT16Sx8, False ); 13040 goto decode_success; 13041 } 13042 break; 13043 13044 case 0x66: 13045 /* 66 0F 66 = PCMPGTD */ 13046 if (have66noF2noF3(pfx) && sz == 2) { 13047 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13048 "pcmpgtd", Iop_CmpGT32Sx4, False ); 13049 goto decode_success; 13050 } 13051 break; 13052 13053 case 0x67: 13054 /* 66 0F 67 = PACKUSWB */ 13055 if (have66noF2noF3(pfx) && sz == 2) { 13056 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13057 "packuswb", 13058 Iop_QNarrowBin16Sto8Ux16, True ); 13059 goto decode_success; 13060 } 13061 break; 13062 13063 case 0x68: 13064 /* 66 0F 68 = PUNPCKHBW */ 13065 if (have66noF2noF3(pfx) && sz == 2) { 13066 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13067 "punpckhbw", 13068 Iop_InterleaveHI8x16, True ); 13069 goto decode_success; 13070 } 13071 break; 13072 13073 case 0x69: 13074 /* 66 0F 69 = PUNPCKHWD */ 13075 if (have66noF2noF3(pfx) && sz == 2) { 13076 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13077 "punpckhwd", 13078 Iop_InterleaveHI16x8, True ); 13079 goto decode_success; 13080 } 13081 break; 13082 13083 case 0x6A: 13084 /* 66 0F 6A = PUNPCKHDQ */ 13085 if (have66noF2noF3(pfx) && sz == 2) { 13086 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13087 "punpckhdq", 13088 Iop_InterleaveHI32x4, True ); 13089 goto decode_success; 13090 } 13091 break; 13092 13093 case 0x6B: 13094 /* 66 0F 6B = PACKSSDW */ 13095 if (have66noF2noF3(pfx) && sz == 2) { 13096 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13097 "packssdw", 13098 Iop_QNarrowBin32Sto16Sx8, True ); 13099 goto decode_success; 13100 } 13101 break; 13102 13103 case 0x6C: 13104 /* 66 0F 6C = PUNPCKLQDQ */ 13105 if (have66noF2noF3(pfx) && sz == 2) { 13106 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13107 "punpcklqdq", 13108 Iop_InterleaveLO64x2, True ); 13109 goto decode_success; 13110 } 13111 break; 13112 13113 case 0x6D: 13114 /* 66 0F 6D = PUNPCKHQDQ */ 13115 if (have66noF2noF3(pfx) && sz == 2) { 13116 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13117 "punpckhqdq", 13118 Iop_InterleaveHI64x2, True ); 13119 goto decode_success; 13120 } 13121 break; 13122 13123 case 0x6E: 13124 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 13125 zeroing high 3/4 of xmm. */ 13126 /* or from ireg64/m64 to xmm lo 1/2, 13127 zeroing high 1/2 of xmm. */ 13128 if (have66noF2noF3(pfx)) { 13129 vassert(sz == 2 || sz == 8); 13130 if (sz == 2) sz = 4; 13131 modrm = getUChar(delta); 13132 if (epartIsReg(modrm)) { 13133 delta += 1; 13134 if (sz == 4) { 13135 putXMMReg( 13136 gregOfRexRM(pfx,modrm), 13137 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 13138 ); 13139 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13140 nameXMMReg(gregOfRexRM(pfx,modrm))); 13141 } else { 13142 putXMMReg( 13143 gregOfRexRM(pfx,modrm), 13144 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 13145 ); 13146 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13147 nameXMMReg(gregOfRexRM(pfx,modrm))); 13148 } 13149 } else { 13150 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13151 delta += alen; 13152 putXMMReg( 13153 gregOfRexRM(pfx,modrm), 13154 sz == 4 13155 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 13156 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 13157 ); 13158 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 13159 nameXMMReg(gregOfRexRM(pfx,modrm))); 13160 } 13161 goto decode_success; 13162 } 13163 break; 13164 13165 case 0x6F: 13166 if (have66noF2noF3(pfx) 13167 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13168 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 13169 modrm = getUChar(delta); 13170 if (epartIsReg(modrm)) { 13171 putXMMReg( gregOfRexRM(pfx,modrm), 13172 getXMMReg( eregOfRexRM(pfx,modrm) )); 13173 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13174 nameXMMReg(gregOfRexRM(pfx,modrm))); 13175 delta += 1; 13176 } else { 13177 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13178 gen_SEGV_if_not_16_aligned( addr ); 13179 putXMMReg( gregOfRexRM(pfx,modrm), 13180 loadLE(Ity_V128, mkexpr(addr)) ); 13181 DIP("movdqa %s,%s\n", dis_buf, 13182 nameXMMReg(gregOfRexRM(pfx,modrm))); 13183 delta += alen; 13184 } 13185 goto decode_success; 13186 } 13187 if (haveF3no66noF2(pfx) && sz == 4) { 13188 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 13189 modrm = getUChar(delta); 13190 if (epartIsReg(modrm)) { 13191 putXMMReg( gregOfRexRM(pfx,modrm), 13192 getXMMReg( eregOfRexRM(pfx,modrm) )); 13193 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13194 nameXMMReg(gregOfRexRM(pfx,modrm))); 13195 delta += 1; 13196 } else { 13197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13198 putXMMReg( gregOfRexRM(pfx,modrm), 13199 loadLE(Ity_V128, mkexpr(addr)) ); 13200 DIP("movdqu %s,%s\n", dis_buf, 13201 nameXMMReg(gregOfRexRM(pfx,modrm))); 13202 delta += alen; 13203 } 13204 goto decode_success; 13205 } 13206 break; 13207 13208 case 0x70: 13209 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 13210 if (have66noF2noF3(pfx) && sz == 2) { 13211 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 13212 goto decode_success; 13213 } 13214 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13215 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 13216 if (haveNo66noF2noF3(pfx) && sz == 4) { 13217 Int order; 13218 IRTemp sV, dV, s3, s2, s1, s0; 13219 s3 = s2 = s1 = s0 = IRTemp_INVALID; 13220 sV = newTemp(Ity_I64); 13221 dV = newTemp(Ity_I64); 13222 do_MMX_preamble(); 13223 modrm = getUChar(delta); 13224 if (epartIsReg(modrm)) { 13225 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13226 order = (Int)getUChar(delta+1); 13227 delta += 1+1; 13228 DIP("pshufw $%d,%s,%s\n", order, 13229 nameMMXReg(eregLO3ofRM(modrm)), 13230 nameMMXReg(gregLO3ofRM(modrm))); 13231 } else { 13232 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13233 1/*extra byte after amode*/ ); 13234 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13235 order = (Int)getUChar(delta+alen); 13236 delta += 1+alen; 13237 DIP("pshufw $%d,%s,%s\n", order, 13238 dis_buf, 13239 nameMMXReg(gregLO3ofRM(modrm))); 13240 } 13241 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 13242 # define SEL(n) \ 13243 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 13244 assign(dV, 13245 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 13246 SEL((order>>2)&3), SEL((order>>0)&3) ) 13247 ); 13248 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 13249 # undef SEL 13250 goto decode_success; 13251 } 13252 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 13253 mem) to G(xmm), and copy upper half */ 13254 if (haveF2no66noF3(pfx) && sz == 4) { 13255 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13256 False/*!isAvx*/, False/*!xIsH*/ ); 13257 goto decode_success; 13258 } 13259 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 13260 mem) to G(xmm), and copy lower half */ 13261 if (haveF3no66noF2(pfx) && sz == 4) { 13262 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13263 False/*!isAvx*/, True/*xIsH*/ ); 13264 goto decode_success; 13265 } 13266 break; 13267 13268 case 0x71: 13269 /* 66 0F 71 /2 ib = PSRLW by immediate */ 13270 if (have66noF2noF3(pfx) && sz == 2 13271 && epartIsReg(getUChar(delta)) 13272 && gregLO3ofRM(getUChar(delta)) == 2) { 13273 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 13274 goto decode_success; 13275 } 13276 /* 66 0F 71 /4 ib = PSRAW by immediate */ 13277 if (have66noF2noF3(pfx) && sz == 2 13278 && epartIsReg(getUChar(delta)) 13279 && gregLO3ofRM(getUChar(delta)) == 4) { 13280 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 13281 goto decode_success; 13282 } 13283 /* 66 0F 71 /6 ib = PSLLW by immediate */ 13284 if (have66noF2noF3(pfx) && sz == 2 13285 && epartIsReg(getUChar(delta)) 13286 && gregLO3ofRM(getUChar(delta)) == 6) { 13287 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 13288 goto decode_success; 13289 } 13290 break; 13291 13292 case 0x72: 13293 /* 66 0F 72 /2 ib = PSRLD by immediate */ 13294 if (have66noF2noF3(pfx) && sz == 2 13295 && epartIsReg(getUChar(delta)) 13296 && gregLO3ofRM(getUChar(delta)) == 2) { 13297 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 13298 goto decode_success; 13299 } 13300 /* 66 0F 72 /4 ib = PSRAD by immediate */ 13301 if (have66noF2noF3(pfx) && sz == 2 13302 && epartIsReg(getUChar(delta)) 13303 && gregLO3ofRM(getUChar(delta)) == 4) { 13304 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 13305 goto decode_success; 13306 } 13307 /* 66 0F 72 /6 ib = PSLLD by immediate */ 13308 if (have66noF2noF3(pfx) && sz == 2 13309 && epartIsReg(getUChar(delta)) 13310 && gregLO3ofRM(getUChar(delta)) == 6) { 13311 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 13312 goto decode_success; 13313 } 13314 break; 13315 13316 case 0x73: 13317 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 13318 /* note, if mem case ever filled in, 1 byte after amode */ 13319 if (have66noF2noF3(pfx) && sz == 2 13320 && epartIsReg(getUChar(delta)) 13321 && gregLO3ofRM(getUChar(delta)) == 3) { 13322 Int imm = (Int)getUChar(delta+1); 13323 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13324 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 13325 delta += 2; 13326 IRTemp sV = newTemp(Ity_V128); 13327 assign( sV, getXMMReg(reg) ); 13328 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 13329 goto decode_success; 13330 } 13331 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 13332 /* note, if mem case ever filled in, 1 byte after amode */ 13333 if (have66noF2noF3(pfx) && sz == 2 13334 && epartIsReg(getUChar(delta)) 13335 && gregLO3ofRM(getUChar(delta)) == 7) { 13336 Int imm = (Int)getUChar(delta+1); 13337 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13338 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 13339 vassert(imm >= 0 && imm <= 255); 13340 delta += 2; 13341 IRTemp sV = newTemp(Ity_V128); 13342 assign( sV, getXMMReg(reg) ); 13343 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 13344 goto decode_success; 13345 } 13346 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 13347 if (have66noF2noF3(pfx) && sz == 2 13348 && epartIsReg(getUChar(delta)) 13349 && gregLO3ofRM(getUChar(delta)) == 2) { 13350 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 13351 goto decode_success; 13352 } 13353 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 13354 if (have66noF2noF3(pfx) && sz == 2 13355 && epartIsReg(getUChar(delta)) 13356 && gregLO3ofRM(getUChar(delta)) == 6) { 13357 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 13358 goto decode_success; 13359 } 13360 break; 13361 13362 case 0x74: 13363 /* 66 0F 74 = PCMPEQB */ 13364 if (have66noF2noF3(pfx) && sz == 2) { 13365 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13366 "pcmpeqb", Iop_CmpEQ8x16, False ); 13367 goto decode_success; 13368 } 13369 break; 13370 13371 case 0x75: 13372 /* 66 0F 75 = PCMPEQW */ 13373 if (have66noF2noF3(pfx) && sz == 2) { 13374 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13375 "pcmpeqw", Iop_CmpEQ16x8, False ); 13376 goto decode_success; 13377 } 13378 break; 13379 13380 case 0x76: 13381 /* 66 0F 76 = PCMPEQD */ 13382 if (have66noF2noF3(pfx) && sz == 2) { 13383 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13384 "pcmpeqd", Iop_CmpEQ32x4, False ); 13385 goto decode_success; 13386 } 13387 break; 13388 13389 case 0x7E: 13390 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 13391 G (lo half xmm). Upper half of G is zeroed out. */ 13392 if (haveF3no66noF2(pfx) 13393 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13394 modrm = getUChar(delta); 13395 if (epartIsReg(modrm)) { 13396 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13397 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 13398 /* zero bits 127:64 */ 13399 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 13400 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13401 nameXMMReg(gregOfRexRM(pfx,modrm))); 13402 delta += 1; 13403 } else { 13404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13405 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 13406 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13407 loadLE(Ity_I64, mkexpr(addr)) ); 13408 DIP("movsd %s,%s\n", dis_buf, 13409 nameXMMReg(gregOfRexRM(pfx,modrm))); 13410 delta += alen; 13411 } 13412 goto decode_success; 13413 } 13414 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 13415 /* or from xmm low 1/2 to ireg64 or m64. */ 13416 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 13417 if (sz == 2) sz = 4; 13418 modrm = getUChar(delta); 13419 if (epartIsReg(modrm)) { 13420 delta += 1; 13421 if (sz == 4) { 13422 putIReg32( eregOfRexRM(pfx,modrm), 13423 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 13424 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13425 nameIReg32(eregOfRexRM(pfx,modrm))); 13426 } else { 13427 putIReg64( eregOfRexRM(pfx,modrm), 13428 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 13429 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13430 nameIReg64(eregOfRexRM(pfx,modrm))); 13431 } 13432 } else { 13433 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13434 delta += alen; 13435 storeLE( mkexpr(addr), 13436 sz == 4 13437 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 13438 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 13439 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 13440 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13441 } 13442 goto decode_success; 13443 } 13444 break; 13445 13446 case 0x7F: 13447 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 13448 if (haveF3no66noF2(pfx) && sz == 4) { 13449 modrm = getUChar(delta); 13450 if (epartIsReg(modrm)) { 13451 goto decode_failure; /* awaiting test case */ 13452 delta += 1; 13453 putXMMReg( eregOfRexRM(pfx,modrm), 13454 getXMMReg(gregOfRexRM(pfx,modrm)) ); 13455 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13456 nameXMMReg(eregOfRexRM(pfx,modrm))); 13457 } else { 13458 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13459 delta += alen; 13460 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13461 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13462 } 13463 goto decode_success; 13464 } 13465 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 13466 if (have66noF2noF3(pfx) && sz == 2) { 13467 modrm = getUChar(delta); 13468 if (epartIsReg(modrm)) { 13469 delta += 1; 13470 putXMMReg( eregOfRexRM(pfx,modrm), 13471 getXMMReg(gregOfRexRM(pfx,modrm)) ); 13472 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 13473 nameXMMReg(eregOfRexRM(pfx,modrm))); 13474 } else { 13475 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13476 gen_SEGV_if_not_16_aligned( addr ); 13477 delta += alen; 13478 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13479 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 13480 } 13481 goto decode_success; 13482 } 13483 break; 13484 13485 case 0xAE: 13486 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 13487 if (haveNo66noF2noF3(pfx) 13488 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 13489 && sz == 4) { 13490 delta += 1; 13491 /* Insert a memory fence. It's sometimes important that these 13492 are carried through to the generated code. */ 13493 stmt( IRStmt_MBE(Imbe_Fence) ); 13494 DIP("sfence\n"); 13495 goto decode_success; 13496 } 13497 /* mindless duplication follows .. */ 13498 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 13499 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 13500 if (haveNo66noF2noF3(pfx) 13501 && epartIsReg(getUChar(delta)) 13502 && (gregLO3ofRM(getUChar(delta)) == 5 13503 || gregLO3ofRM(getUChar(delta)) == 6) 13504 && sz == 4) { 13505 delta += 1; 13506 /* Insert a memory fence. It's sometimes important that these 13507 are carried through to the generated code. */ 13508 stmt( IRStmt_MBE(Imbe_Fence) ); 13509 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 13510 goto decode_success; 13511 } 13512 13513 /* 0F AE /7 = CLFLUSH -- flush cache line */ 13514 if (haveNo66noF2noF3(pfx) 13515 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 13516 && sz == 4) { 13517 13518 /* This is something of a hack. We need to know the size of 13519 the cache line containing addr. Since we don't (easily), 13520 assume 256 on the basis that no real cache would have a 13521 line that big. It's safe to invalidate more stuff than we 13522 need, just inefficient. */ 13523 ULong lineszB = 256ULL; 13524 13525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13526 delta += alen; 13527 13528 /* Round addr down to the start of the containing block. */ 13529 stmt( IRStmt_Put( 13530 OFFB_CMSTART, 13531 binop( Iop_And64, 13532 mkexpr(addr), 13533 mkU64( ~(lineszB-1) ))) ); 13534 13535 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); 13536 13537 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); 13538 13539 DIP("clflush %s\n", dis_buf); 13540 goto decode_success; 13541 } 13542 13543 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 13544 if (haveNo66noF2noF3(pfx) 13545 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 13546 && sz == 4) { 13547 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 13548 goto decode_success; 13549 } 13550 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 13551 if (haveNo66noF2noF3(pfx) 13552 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 13553 && sz == 4) { 13554 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 13555 goto decode_success; 13556 } 13557 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. 13558 Note that the presence or absence of REX.W slightly affects the 13559 written format: whether the saved FPU IP and DP pointers are 64 13560 or 32 bits. But the helper function we call simply writes zero 13561 bits in the relevant fields (which are 64 bits regardless of 13562 what REX.W is) and so it's good enough (iow, equally broken) in 13563 both cases. */ 13564 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13565 && !epartIsReg(getUChar(delta)) 13566 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 13567 IRDirty* d; 13568 modrm = getUChar(delta); 13569 vassert(!epartIsReg(modrm)); 13570 13571 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13572 delta += alen; 13573 gen_SEGV_if_not_16_aligned(addr); 13574 13575 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 13576 13577 /* Uses dirty helper: 13578 void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ 13579 d = unsafeIRDirty_0_N ( 13580 0/*regparms*/, 13581 "amd64g_dirtyhelper_FXSAVE", 13582 &amd64g_dirtyhelper_FXSAVE, 13583 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 13584 ); 13585 13586 /* declare we're writing memory */ 13587 d->mFx = Ifx_Write; 13588 d->mAddr = mkexpr(addr); 13589 d->mSize = 464; /* according to recent Intel docs */ 13590 13591 /* declare we're reading guest state */ 13592 d->nFxState = 7; 13593 vex_bzero(&d->fxState, sizeof(d->fxState)); 13594 13595 d->fxState[0].fx = Ifx_Read; 13596 d->fxState[0].offset = OFFB_FTOP; 13597 d->fxState[0].size = sizeof(UInt); 13598 13599 d->fxState[1].fx = Ifx_Read; 13600 d->fxState[1].offset = OFFB_FPREGS; 13601 d->fxState[1].size = 8 * sizeof(ULong); 13602 13603 d->fxState[2].fx = Ifx_Read; 13604 d->fxState[2].offset = OFFB_FPTAGS; 13605 d->fxState[2].size = 8 * sizeof(UChar); 13606 13607 d->fxState[3].fx = Ifx_Read; 13608 d->fxState[3].offset = OFFB_FPROUND; 13609 d->fxState[3].size = sizeof(ULong); 13610 13611 d->fxState[4].fx = Ifx_Read; 13612 d->fxState[4].offset = OFFB_FC3210; 13613 d->fxState[4].size = sizeof(ULong); 13614 13615 d->fxState[5].fx = Ifx_Read; 13616 d->fxState[5].offset = OFFB_YMM0; 13617 d->fxState[5].size = sizeof(U128); 13618 /* plus 15 more of the above, spaced out in YMM sized steps */ 13619 d->fxState[5].nRepeats = 15; 13620 d->fxState[5].repeatLen = sizeof(U256); 13621 13622 d->fxState[6].fx = Ifx_Read; 13623 d->fxState[6].offset = OFFB_SSEROUND; 13624 d->fxState[6].size = sizeof(ULong); 13625 13626 /* Be paranoid ... this assertion tries to ensure the 16 %ymm 13627 images are packed back-to-back. If not, the settings for 13628 d->fxState[5] are wrong. */ 13629 vassert(32 == sizeof(U256)); 13630 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); 13631 13632 stmt( IRStmt_Dirty(d) ); 13633 13634 goto decode_success; 13635 } 13636 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. 13637 As with FXSAVE above we ignore the value of REX.W since we're 13638 not bothering with the FPU DP and IP fields. */ 13639 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13640 && !epartIsReg(getUChar(delta)) 13641 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 13642 IRDirty* d; 13643 modrm = getUChar(delta); 13644 vassert(!epartIsReg(modrm)); 13645 13646 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13647 delta += alen; 13648 gen_SEGV_if_not_16_aligned(addr); 13649 13650 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 13651 13652 /* Uses dirty helper: 13653 VexEmNote amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) 13654 NOTE: 13655 the VexEmNote value is simply ignored 13656 */ 13657 d = unsafeIRDirty_0_N ( 13658 0/*regparms*/, 13659 "amd64g_dirtyhelper_FXRSTOR", 13660 &amd64g_dirtyhelper_FXRSTOR, 13661 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 13662 ); 13663 13664 /* declare we're reading memory */ 13665 d->mFx = Ifx_Read; 13666 d->mAddr = mkexpr(addr); 13667 d->mSize = 464; /* according to recent Intel docs */ 13668 13669 /* declare we're writing guest state */ 13670 d->nFxState = 7; 13671 vex_bzero(&d->fxState, sizeof(d->fxState)); 13672 13673 d->fxState[0].fx = Ifx_Write; 13674 d->fxState[0].offset = OFFB_FTOP; 13675 d->fxState[0].size = sizeof(UInt); 13676 13677 d->fxState[1].fx = Ifx_Write; 13678 d->fxState[1].offset = OFFB_FPREGS; 13679 d->fxState[1].size = 8 * sizeof(ULong); 13680 13681 d->fxState[2].fx = Ifx_Write; 13682 d->fxState[2].offset = OFFB_FPTAGS; 13683 d->fxState[2].size = 8 * sizeof(UChar); 13684 13685 d->fxState[3].fx = Ifx_Write; 13686 d->fxState[3].offset = OFFB_FPROUND; 13687 d->fxState[3].size = sizeof(ULong); 13688 13689 d->fxState[4].fx = Ifx_Write; 13690 d->fxState[4].offset = OFFB_FC3210; 13691 d->fxState[4].size = sizeof(ULong); 13692 13693 d->fxState[5].fx = Ifx_Write; 13694 d->fxState[5].offset = OFFB_YMM0; 13695 d->fxState[5].size = sizeof(U128); 13696 /* plus 15 more of the above, spaced out in YMM sized steps */ 13697 d->fxState[5].nRepeats = 15; 13698 d->fxState[5].repeatLen = sizeof(U256); 13699 13700 d->fxState[6].fx = Ifx_Write; 13701 d->fxState[6].offset = OFFB_SSEROUND; 13702 d->fxState[6].size = sizeof(ULong); 13703 13704 /* Be paranoid ... this assertion tries to ensure the 16 %ymm 13705 images are packed back-to-back. If not, the settings for 13706 d->fxState[5] are wrong. */ 13707 vassert(32 == sizeof(U256)); 13708 vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); 13709 13710 stmt( IRStmt_Dirty(d) ); 13711 13712 goto decode_success; 13713 } 13714 break; 13715 13716 case 0xC2: 13717 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 13718 if (haveNo66noF2noF3(pfx) && sz == 4) { 13719 Long delta0 = delta; 13720 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 13721 if (delta > delta0) goto decode_success; 13722 } 13723 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 13724 if (haveF3no66noF2(pfx) && sz == 4) { 13725 Long delta0 = delta; 13726 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 13727 if (delta > delta0) goto decode_success; 13728 } 13729 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 13730 if (haveF2no66noF3(pfx) && sz == 4) { 13731 Long delta0 = delta; 13732 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 13733 if (delta > delta0) goto decode_success; 13734 } 13735 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 13736 if (have66noF2noF3(pfx) && sz == 2) { 13737 Long delta0 = delta; 13738 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 13739 if (delta > delta0) goto decode_success; 13740 } 13741 break; 13742 13743 case 0xC3: 13744 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 13745 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13746 modrm = getUChar(delta); 13747 if (!epartIsReg(modrm)) { 13748 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13749 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 13750 DIP("movnti %s,%s\n", dis_buf, 13751 nameIRegG(sz, pfx, modrm)); 13752 delta += alen; 13753 goto decode_success; 13754 } 13755 /* else fall through */ 13756 } 13757 break; 13758 13759 case 0xC4: 13760 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13761 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13762 put it into the specified lane of mmx(G). */ 13763 if (haveNo66noF2noF3(pfx) 13764 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13765 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 13766 mmx reg. t4 is the new lane value. t5 is the original 13767 mmx value. t6 is the new mmx value. */ 13768 Int lane; 13769 t4 = newTemp(Ity_I16); 13770 t5 = newTemp(Ity_I64); 13771 t6 = newTemp(Ity_I64); 13772 modrm = getUChar(delta); 13773 do_MMX_preamble(); 13774 13775 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 13776 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 13777 13778 if (epartIsReg(modrm)) { 13779 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 13780 delta += 1+1; 13781 lane = getUChar(delta-1); 13782 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13783 nameIReg16(eregOfRexRM(pfx,modrm)), 13784 nameMMXReg(gregLO3ofRM(modrm))); 13785 } else { 13786 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13787 delta += 1+alen; 13788 lane = getUChar(delta-1); 13789 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13790 DIP("pinsrw $%d,%s,%s\n", (Int)lane, 13791 dis_buf, 13792 nameMMXReg(gregLO3ofRM(modrm))); 13793 } 13794 13795 switch (lane & 3) { 13796 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 13797 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 13798 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 13799 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 13800 default: vassert(0); 13801 } 13802 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 13803 goto decode_success; 13804 } 13805 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 13806 put it into the specified lane of xmm(G). */ 13807 if (have66noF2noF3(pfx) 13808 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13809 Int lane; 13810 t4 = newTemp(Ity_I16); 13811 modrm = getUChar(delta); 13812 UInt rG = gregOfRexRM(pfx,modrm); 13813 if (epartIsReg(modrm)) { 13814 UInt rE = eregOfRexRM(pfx,modrm); 13815 assign(t4, getIReg16(rE)); 13816 delta += 1+1; 13817 lane = getUChar(delta-1); 13818 DIP("pinsrw $%d,%s,%s\n", 13819 (Int)lane, nameIReg16(rE), nameXMMReg(rG)); 13820 } else { 13821 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13822 1/*byte after the amode*/ ); 13823 delta += 1+alen; 13824 lane = getUChar(delta-1); 13825 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 13826 DIP("pinsrw $%d,%s,%s\n", 13827 (Int)lane, dis_buf, nameXMMReg(rG)); 13828 } 13829 IRTemp src_vec = newTemp(Ity_V128); 13830 assign(src_vec, getXMMReg(rG)); 13831 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 13832 putXMMReg(rG, mkexpr(res_vec)); 13833 goto decode_success; 13834 } 13835 break; 13836 13837 case 0xC5: 13838 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13839 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 13840 zero-extend of it in ireg(G). */ 13841 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 13842 modrm = getUChar(delta); 13843 if (epartIsReg(modrm)) { 13844 IRTemp sV = newTemp(Ity_I64); 13845 t5 = newTemp(Ity_I16); 13846 do_MMX_preamble(); 13847 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 13848 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 13849 switch (getUChar(delta+1) & 3) { 13850 case 0: assign(t5, mkexpr(t0)); break; 13851 case 1: assign(t5, mkexpr(t1)); break; 13852 case 2: assign(t5, mkexpr(t2)); break; 13853 case 3: assign(t5, mkexpr(t3)); break; 13854 default: vassert(0); 13855 } 13856 if (sz == 8) 13857 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 13858 else 13859 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 13860 DIP("pextrw $%d,%s,%s\n", 13861 (Int)getUChar(delta+1), 13862 nameMMXReg(eregLO3ofRM(modrm)), 13863 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 13864 : nameIReg32(gregOfRexRM(pfx,modrm)) 13865 ); 13866 delta += 2; 13867 goto decode_success; 13868 } 13869 /* else fall through */ 13870 /* note, for anyone filling in the mem case: this insn has one 13871 byte after the amode and therefore you must pass 1 as the 13872 last arg to disAMode */ 13873 } 13874 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 13875 zero-extend of it in ireg(G). */ 13876 if (have66noF2noF3(pfx) 13877 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13878 Long delta0 = delta; 13879 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 13880 False/*!isAvx*/ ); 13881 if (delta > delta0) goto decode_success; 13882 /* else fall through -- decoding has failed */ 13883 } 13884 break; 13885 13886 case 0xC6: 13887 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 13888 if (haveNo66noF2noF3(pfx) && sz == 4) { 13889 Int imm8 = 0; 13890 IRTemp sV = newTemp(Ity_V128); 13891 IRTemp dV = newTemp(Ity_V128); 13892 modrm = getUChar(delta); 13893 UInt rG = gregOfRexRM(pfx,modrm); 13894 assign( dV, getXMMReg(rG) ); 13895 if (epartIsReg(modrm)) { 13896 UInt rE = eregOfRexRM(pfx,modrm); 13897 assign( sV, getXMMReg(rE) ); 13898 imm8 = (Int)getUChar(delta+1); 13899 delta += 1+1; 13900 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 13901 } else { 13902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13903 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13904 imm8 = (Int)getUChar(delta+alen); 13905 delta += 1+alen; 13906 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 13907 } 13908 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 13909 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13910 goto decode_success; 13911 } 13912 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 13913 if (have66noF2noF3(pfx) && sz == 2) { 13914 Int select; 13915 IRTemp sV = newTemp(Ity_V128); 13916 IRTemp dV = newTemp(Ity_V128); 13917 13918 modrm = getUChar(delta); 13919 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 13920 13921 if (epartIsReg(modrm)) { 13922 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 13923 select = (Int)getUChar(delta+1); 13924 delta += 1+1; 13925 DIP("shufpd $%d,%s,%s\n", select, 13926 nameXMMReg(eregOfRexRM(pfx,modrm)), 13927 nameXMMReg(gregOfRexRM(pfx,modrm))); 13928 } else { 13929 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 13930 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 13931 select = getUChar(delta+alen); 13932 delta += 1+alen; 13933 DIP("shufpd $%d,%s,%s\n", select, 13934 dis_buf, 13935 nameXMMReg(gregOfRexRM(pfx,modrm))); 13936 } 13937 13938 IRTemp res = math_SHUFPD_128( sV, dV, select ); 13939 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 13940 goto decode_success; 13941 } 13942 break; 13943 13944 case 0xD1: 13945 /* 66 0F D1 = PSRLW by E */ 13946 if (have66noF2noF3(pfx) && sz == 2) { 13947 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 13948 goto decode_success; 13949 } 13950 break; 13951 13952 case 0xD2: 13953 /* 66 0F D2 = PSRLD by E */ 13954 if (have66noF2noF3(pfx) && sz == 2) { 13955 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 13956 goto decode_success; 13957 } 13958 break; 13959 13960 case 0xD3: 13961 /* 66 0F D3 = PSRLQ by E */ 13962 if (have66noF2noF3(pfx) && sz == 2) { 13963 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 13964 goto decode_success; 13965 } 13966 break; 13967 13968 case 0xD4: 13969 /* 66 0F D4 = PADDQ */ 13970 if (have66noF2noF3(pfx) && sz == 2) { 13971 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13972 "paddq", Iop_Add64x2, False ); 13973 goto decode_success; 13974 } 13975 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 13976 /* 0F D4 = PADDQ -- add 64x1 */ 13977 if (haveNo66noF2noF3(pfx) && sz == 4) { 13978 do_MMX_preamble(); 13979 delta = dis_MMXop_regmem_to_reg ( 13980 vbi, pfx, delta, opc, "paddq", False ); 13981 goto decode_success; 13982 } 13983 break; 13984 13985 case 0xD5: 13986 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 13987 if (have66noF2noF3(pfx) && sz == 2) { 13988 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13989 "pmullw", Iop_Mul16x8, False ); 13990 goto decode_success; 13991 } 13992 break; 13993 13994 case 0xD6: 13995 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 13996 hi half). */ 13997 if (haveF3no66noF2(pfx) && sz == 4) { 13998 modrm = getUChar(delta); 13999 if (epartIsReg(modrm)) { 14000 do_MMX_preamble(); 14001 putXMMReg( gregOfRexRM(pfx,modrm), 14002 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 14003 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14004 nameXMMReg(gregOfRexRM(pfx,modrm))); 14005 delta += 1; 14006 goto decode_success; 14007 } 14008 /* apparently no mem case for this insn */ 14009 } 14010 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 14011 or lo half xmm). */ 14012 if (have66noF2noF3(pfx) 14013 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14014 modrm = getUChar(delta); 14015 if (epartIsReg(modrm)) { 14016 /* fall through, awaiting test case */ 14017 /* dst: lo half copied, hi half zeroed */ 14018 } else { 14019 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14020 storeLE( mkexpr(addr), 14021 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 14022 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 14023 delta += alen; 14024 goto decode_success; 14025 } 14026 } 14027 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 14028 if (haveF2no66noF3(pfx) && sz == 4) { 14029 modrm = getUChar(delta); 14030 if (epartIsReg(modrm)) { 14031 do_MMX_preamble(); 14032 putMMXReg( gregLO3ofRM(modrm), 14033 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14034 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14035 nameMMXReg(gregLO3ofRM(modrm))); 14036 delta += 1; 14037 goto decode_success; 14038 } 14039 /* apparently no mem case for this insn */ 14040 } 14041 break; 14042 14043 case 0xD7: 14044 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 14045 lanes in xmm(E), turn them into a byte, and put 14046 zero-extend of it in ireg(G). Doing this directly is just 14047 too cumbersome; give up therefore and call a helper. */ 14048 if (have66noF2noF3(pfx) 14049 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14050 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 14051 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 14052 goto decode_success; 14053 } 14054 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14055 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 14056 mmx(E), turn them into a byte, and put zero-extend of it in 14057 ireg(G). */ 14058 if (haveNo66noF2noF3(pfx) 14059 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14060 modrm = getUChar(delta); 14061 if (epartIsReg(modrm)) { 14062 do_MMX_preamble(); 14063 t0 = newTemp(Ity_I64); 14064 t1 = newTemp(Ity_I32); 14065 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 14066 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 14067 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); 14068 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14069 nameIReg32(gregOfRexRM(pfx,modrm))); 14070 delta += 1; 14071 goto decode_success; 14072 } 14073 /* else fall through */ 14074 } 14075 break; 14076 14077 case 0xD8: 14078 /* 66 0F D8 = PSUBUSB */ 14079 if (have66noF2noF3(pfx) && sz == 2) { 14080 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14081 "psubusb", Iop_QSub8Ux16, False ); 14082 goto decode_success; 14083 } 14084 break; 14085 14086 case 0xD9: 14087 /* 66 0F D9 = PSUBUSW */ 14088 if (have66noF2noF3(pfx) && sz == 2) { 14089 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14090 "psubusw", Iop_QSub16Ux8, False ); 14091 goto decode_success; 14092 } 14093 break; 14094 14095 case 0xDA: 14096 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14097 /* 0F DA = PMINUB -- 8x8 unsigned min */ 14098 if (haveNo66noF2noF3(pfx) && sz == 4) { 14099 do_MMX_preamble(); 14100 delta = dis_MMXop_regmem_to_reg ( 14101 vbi, pfx, delta, opc, "pminub", False ); 14102 goto decode_success; 14103 } 14104 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 14105 if (have66noF2noF3(pfx) && sz == 2) { 14106 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14107 "pminub", Iop_Min8Ux16, False ); 14108 goto decode_success; 14109 } 14110 break; 14111 14112 case 0xDB: 14113 /* 66 0F DB = PAND */ 14114 if (have66noF2noF3(pfx) && sz == 2) { 14115 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 14116 goto decode_success; 14117 } 14118 break; 14119 14120 case 0xDC: 14121 /* 66 0F DC = PADDUSB */ 14122 if (have66noF2noF3(pfx) && sz == 2) { 14123 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14124 "paddusb", Iop_QAdd8Ux16, False ); 14125 goto decode_success; 14126 } 14127 break; 14128 14129 case 0xDD: 14130 /* 66 0F DD = PADDUSW */ 14131 if (have66noF2noF3(pfx) && sz == 2) { 14132 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14133 "paddusw", Iop_QAdd16Ux8, False ); 14134 goto decode_success; 14135 } 14136 break; 14137 14138 case 0xDE: 14139 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14140 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 14141 if (haveNo66noF2noF3(pfx) && sz == 4) { 14142 do_MMX_preamble(); 14143 delta = dis_MMXop_regmem_to_reg ( 14144 vbi, pfx, delta, opc, "pmaxub", False ); 14145 goto decode_success; 14146 } 14147 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 14148 if (have66noF2noF3(pfx) && sz == 2) { 14149 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14150 "pmaxub", Iop_Max8Ux16, False ); 14151 goto decode_success; 14152 } 14153 break; 14154 14155 case 0xDF: 14156 /* 66 0F DF = PANDN */ 14157 if (have66noF2noF3(pfx) && sz == 2) { 14158 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 14159 goto decode_success; 14160 } 14161 break; 14162 14163 case 0xE0: 14164 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14165 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 14166 if (haveNo66noF2noF3(pfx) && sz == 4) { 14167 do_MMX_preamble(); 14168 delta = dis_MMXop_regmem_to_reg ( 14169 vbi, pfx, delta, opc, "pavgb", False ); 14170 goto decode_success; 14171 } 14172 /* 66 0F E0 = PAVGB */ 14173 if (have66noF2noF3(pfx) && sz == 2) { 14174 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14175 "pavgb", Iop_Avg8Ux16, False ); 14176 goto decode_success; 14177 } 14178 break; 14179 14180 case 0xE1: 14181 /* 66 0F E1 = PSRAW by E */ 14182 if (have66noF2noF3(pfx) && sz == 2) { 14183 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 14184 goto decode_success; 14185 } 14186 break; 14187 14188 case 0xE2: 14189 /* 66 0F E2 = PSRAD by E */ 14190 if (have66noF2noF3(pfx) && sz == 2) { 14191 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 14192 goto decode_success; 14193 } 14194 break; 14195 14196 case 0xE3: 14197 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14198 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 14199 if (haveNo66noF2noF3(pfx) && sz == 4) { 14200 do_MMX_preamble(); 14201 delta = dis_MMXop_regmem_to_reg ( 14202 vbi, pfx, delta, opc, "pavgw", False ); 14203 goto decode_success; 14204 } 14205 /* 66 0F E3 = PAVGW */ 14206 if (have66noF2noF3(pfx) && sz == 2) { 14207 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14208 "pavgw", Iop_Avg16Ux8, False ); 14209 goto decode_success; 14210 } 14211 break; 14212 14213 case 0xE4: 14214 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14215 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 14216 if (haveNo66noF2noF3(pfx) && sz == 4) { 14217 do_MMX_preamble(); 14218 delta = dis_MMXop_regmem_to_reg ( 14219 vbi, pfx, delta, opc, "pmuluh", False ); 14220 goto decode_success; 14221 } 14222 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 14223 if (have66noF2noF3(pfx) && sz == 2) { 14224 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14225 "pmulhuw", Iop_MulHi16Ux8, False ); 14226 goto decode_success; 14227 } 14228 break; 14229 14230 case 0xE5: 14231 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 14232 if (have66noF2noF3(pfx) && sz == 2) { 14233 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14234 "pmulhw", Iop_MulHi16Sx8, False ); 14235 goto decode_success; 14236 } 14237 break; 14238 14239 case 0xE6: 14240 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14241 lo half xmm(G), and zero upper half, rounding towards zero */ 14242 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14243 lo half xmm(G), according to prevailing rounding mode, and zero 14244 upper half */ 14245 if ( (haveF2no66noF3(pfx) && sz == 4) 14246 || (have66noF2noF3(pfx) && sz == 2) ) { 14247 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 14248 toBool(sz == 2)/*r2zero*/); 14249 goto decode_success; 14250 } 14251 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 14252 F64 in xmm(G) */ 14253 if (haveF3no66noF2(pfx) && sz == 4) { 14254 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 14255 goto decode_success; 14256 } 14257 break; 14258 14259 case 0xE7: 14260 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14261 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 14262 Intel manual does not say anything about the usual business of 14263 the FP reg tags getting trashed whenever an MMX insn happens. 14264 So we just leave them alone. 14265 */ 14266 if (haveNo66noF2noF3(pfx) && sz == 4) { 14267 modrm = getUChar(delta); 14268 if (!epartIsReg(modrm)) { 14269 /* do_MMX_preamble(); Intel docs don't specify this */ 14270 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14271 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 14272 DIP("movntq %s,%s\n", dis_buf, 14273 nameMMXReg(gregLO3ofRM(modrm))); 14274 delta += alen; 14275 goto decode_success; 14276 } 14277 /* else fall through */ 14278 } 14279 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 14280 if (have66noF2noF3(pfx) && sz == 2) { 14281 modrm = getUChar(delta); 14282 if (!epartIsReg(modrm)) { 14283 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14284 gen_SEGV_if_not_16_aligned( addr ); 14285 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14286 DIP("movntdq %s,%s\n", dis_buf, 14287 nameXMMReg(gregOfRexRM(pfx,modrm))); 14288 delta += alen; 14289 goto decode_success; 14290 } 14291 /* else fall through */ 14292 } 14293 break; 14294 14295 case 0xE8: 14296 /* 66 0F E8 = PSUBSB */ 14297 if (have66noF2noF3(pfx) && sz == 2) { 14298 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14299 "psubsb", Iop_QSub8Sx16, False ); 14300 goto decode_success; 14301 } 14302 break; 14303 14304 case 0xE9: 14305 /* 66 0F E9 = PSUBSW */ 14306 if (have66noF2noF3(pfx) && sz == 2) { 14307 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14308 "psubsw", Iop_QSub16Sx8, False ); 14309 goto decode_success; 14310 } 14311 break; 14312 14313 case 0xEA: 14314 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14315 /* 0F EA = PMINSW -- 16x4 signed min */ 14316 if (haveNo66noF2noF3(pfx) && sz == 4) { 14317 do_MMX_preamble(); 14318 delta = dis_MMXop_regmem_to_reg ( 14319 vbi, pfx, delta, opc, "pminsw", False ); 14320 goto decode_success; 14321 } 14322 /* 66 0F EA = PMINSW -- 16x8 signed min */ 14323 if (have66noF2noF3(pfx) && sz == 2) { 14324 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14325 "pminsw", Iop_Min16Sx8, False ); 14326 goto decode_success; 14327 } 14328 break; 14329 14330 case 0xEB: 14331 /* 66 0F EB = POR */ 14332 if (have66noF2noF3(pfx) && sz == 2) { 14333 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 14334 goto decode_success; 14335 } 14336 break; 14337 14338 case 0xEC: 14339 /* 66 0F EC = PADDSB */ 14340 if (have66noF2noF3(pfx) && sz == 2) { 14341 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14342 "paddsb", Iop_QAdd8Sx16, False ); 14343 goto decode_success; 14344 } 14345 break; 14346 14347 case 0xED: 14348 /* 66 0F ED = PADDSW */ 14349 if (have66noF2noF3(pfx) && sz == 2) { 14350 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14351 "paddsw", Iop_QAdd16Sx8, False ); 14352 goto decode_success; 14353 } 14354 break; 14355 14356 case 0xEE: 14357 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14358 /* 0F EE = PMAXSW -- 16x4 signed max */ 14359 if (haveNo66noF2noF3(pfx) && sz == 4) { 14360 do_MMX_preamble(); 14361 delta = dis_MMXop_regmem_to_reg ( 14362 vbi, pfx, delta, opc, "pmaxsw", False ); 14363 goto decode_success; 14364 } 14365 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 14366 if (have66noF2noF3(pfx) && sz == 2) { 14367 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14368 "pmaxsw", Iop_Max16Sx8, False ); 14369 goto decode_success; 14370 } 14371 break; 14372 14373 case 0xEF: 14374 /* 66 0F EF = PXOR */ 14375 if (have66noF2noF3(pfx) && sz == 2) { 14376 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 14377 goto decode_success; 14378 } 14379 break; 14380 14381 case 0xF1: 14382 /* 66 0F F1 = PSLLW by E */ 14383 if (have66noF2noF3(pfx) && sz == 2) { 14384 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 14385 goto decode_success; 14386 } 14387 break; 14388 14389 case 0xF2: 14390 /* 66 0F F2 = PSLLD by E */ 14391 if (have66noF2noF3(pfx) && sz == 2) { 14392 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 14393 goto decode_success; 14394 } 14395 break; 14396 14397 case 0xF3: 14398 /* 66 0F F3 = PSLLQ by E */ 14399 if (have66noF2noF3(pfx) && sz == 2) { 14400 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 14401 goto decode_success; 14402 } 14403 break; 14404 14405 case 0xF4: 14406 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14407 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 14408 half */ 14409 if (have66noF2noF3(pfx) && sz == 2) { 14410 IRTemp sV = newTemp(Ity_V128); 14411 IRTemp dV = newTemp(Ity_V128); 14412 modrm = getUChar(delta); 14413 UInt rG = gregOfRexRM(pfx,modrm); 14414 assign( dV, getXMMReg(rG) ); 14415 if (epartIsReg(modrm)) { 14416 UInt rE = eregOfRexRM(pfx,modrm); 14417 assign( sV, getXMMReg(rE) ); 14418 delta += 1; 14419 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14420 } else { 14421 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14422 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14423 delta += alen; 14424 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 14425 } 14426 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 14427 goto decode_success; 14428 } 14429 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14430 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14431 0 to form 64-bit result */ 14432 if (haveNo66noF2noF3(pfx) && sz == 4) { 14433 IRTemp sV = newTemp(Ity_I64); 14434 IRTemp dV = newTemp(Ity_I64); 14435 t1 = newTemp(Ity_I32); 14436 t0 = newTemp(Ity_I32); 14437 modrm = getUChar(delta); 14438 14439 do_MMX_preamble(); 14440 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14441 14442 if (epartIsReg(modrm)) { 14443 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14444 delta += 1; 14445 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14446 nameMMXReg(gregLO3ofRM(modrm))); 14447 } else { 14448 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14449 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14450 delta += alen; 14451 DIP("pmuludq %s,%s\n", dis_buf, 14452 nameMMXReg(gregLO3ofRM(modrm))); 14453 } 14454 14455 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 14456 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 14457 putMMXReg( gregLO3ofRM(modrm), 14458 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 14459 goto decode_success; 14460 } 14461 break; 14462 14463 case 0xF5: 14464 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 14465 E(xmm or mem) to G(xmm) */ 14466 if (have66noF2noF3(pfx) && sz == 2) { 14467 IRTemp sV = newTemp(Ity_V128); 14468 IRTemp dV = newTemp(Ity_V128); 14469 modrm = getUChar(delta); 14470 UInt rG = gregOfRexRM(pfx,modrm); 14471 if (epartIsReg(modrm)) { 14472 UInt rE = eregOfRexRM(pfx,modrm); 14473 assign( sV, getXMMReg(rE) ); 14474 delta += 1; 14475 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14476 } else { 14477 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14478 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14479 delta += alen; 14480 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 14481 } 14482 assign( dV, getXMMReg(rG) ); 14483 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 14484 goto decode_success; 14485 } 14486 break; 14487 14488 case 0xF6: 14489 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14490 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 14491 if (haveNo66noF2noF3(pfx) && sz == 4) { 14492 do_MMX_preamble(); 14493 delta = dis_MMXop_regmem_to_reg ( 14494 vbi, pfx, delta, opc, "psadbw", False ); 14495 goto decode_success; 14496 } 14497 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 14498 from E(xmm or mem) to G(xmm) */ 14499 if (have66noF2noF3(pfx) && sz == 2) { 14500 IRTemp sV = newTemp(Ity_V128); 14501 IRTemp dV = newTemp(Ity_V128); 14502 modrm = getUChar(delta); 14503 UInt rG = gregOfRexRM(pfx,modrm); 14504 if (epartIsReg(modrm)) { 14505 UInt rE = eregOfRexRM(pfx,modrm); 14506 assign( sV, getXMMReg(rE) ); 14507 delta += 1; 14508 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14509 } else { 14510 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14511 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14512 delta += alen; 14513 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 14514 } 14515 assign( dV, getXMMReg(rG) ); 14516 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 14517 14518 goto decode_success; 14519 } 14520 break; 14521 14522 case 0xF7: 14523 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14524 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 14525 if (haveNo66noF2noF3(pfx) && sz == 4) { 14526 Bool ok = False; 14527 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 14528 if (ok) goto decode_success; 14529 } 14530 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 14531 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 14532 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 14533 goto decode_success; 14534 } 14535 break; 14536 14537 case 0xF8: 14538 /* 66 0F F8 = PSUBB */ 14539 if (have66noF2noF3(pfx) && sz == 2) { 14540 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14541 "psubb", Iop_Sub8x16, False ); 14542 goto decode_success; 14543 } 14544 break; 14545 14546 case 0xF9: 14547 /* 66 0F F9 = PSUBW */ 14548 if (have66noF2noF3(pfx) && sz == 2) { 14549 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14550 "psubw", Iop_Sub16x8, False ); 14551 goto decode_success; 14552 } 14553 break; 14554 14555 case 0xFA: 14556 /* 66 0F FA = PSUBD */ 14557 if (have66noF2noF3(pfx) && sz == 2) { 14558 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14559 "psubd", Iop_Sub32x4, False ); 14560 goto decode_success; 14561 } 14562 break; 14563 14564 case 0xFB: 14565 /* 66 0F FB = PSUBQ */ 14566 if (have66noF2noF3(pfx) && sz == 2) { 14567 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14568 "psubq", Iop_Sub64x2, False ); 14569 goto decode_success; 14570 } 14571 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14572 /* 0F FB = PSUBQ -- sub 64x1 */ 14573 if (haveNo66noF2noF3(pfx) && sz == 4) { 14574 do_MMX_preamble(); 14575 delta = dis_MMXop_regmem_to_reg ( 14576 vbi, pfx, delta, opc, "psubq", False ); 14577 goto decode_success; 14578 } 14579 break; 14580 14581 case 0xFC: 14582 /* 66 0F FC = PADDB */ 14583 if (have66noF2noF3(pfx) && sz == 2) { 14584 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14585 "paddb", Iop_Add8x16, False ); 14586 goto decode_success; 14587 } 14588 break; 14589 14590 case 0xFD: 14591 /* 66 0F FD = PADDW */ 14592 if (have66noF2noF3(pfx) && sz == 2) { 14593 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14594 "paddw", Iop_Add16x8, False ); 14595 goto decode_success; 14596 } 14597 break; 14598 14599 case 0xFE: 14600 /* 66 0F FE = PADDD */ 14601 if (have66noF2noF3(pfx) && sz == 2) { 14602 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14603 "paddd", Iop_Add32x4, False ); 14604 goto decode_success; 14605 } 14606 break; 14607 14608 default: 14609 goto decode_failure; 14610 14611 } 14612 14613 decode_failure: 14614 *decode_OK = False; 14615 return deltaIN; 14616 14617 decode_success: 14618 *decode_OK = True; 14619 return delta; 14620 } 14621 14622 14623 /*------------------------------------------------------------*/ 14624 /*--- ---*/ 14625 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 14626 /*--- ---*/ 14627 /*------------------------------------------------------------*/ 14628 14629 static Long dis_MOVDDUP_128 ( VexAbiInfo* vbi, Prefix pfx, 14630 Long delta, Bool isAvx ) 14631 { 14632 IRTemp addr = IRTemp_INVALID; 14633 Int alen = 0; 14634 HChar dis_buf[50]; 14635 IRTemp sV = newTemp(Ity_V128); 14636 IRTemp d0 = newTemp(Ity_I64); 14637 UChar modrm = getUChar(delta); 14638 UInt rG = gregOfRexRM(pfx,modrm); 14639 if (epartIsReg(modrm)) { 14640 UInt rE = eregOfRexRM(pfx,modrm); 14641 assign( sV, getXMMReg(rE) ); 14642 DIP("%smovddup %s,%s\n", 14643 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 14644 delta += 1; 14645 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 14646 } else { 14647 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14648 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14649 DIP("%smovddup %s,%s\n", 14650 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 14651 delta += alen; 14652 } 14653 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14654 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 14655 return delta; 14656 } 14657 14658 14659 static Long dis_MOVDDUP_256 ( VexAbiInfo* vbi, Prefix pfx, 14660 Long delta ) 14661 { 14662 IRTemp addr = IRTemp_INVALID; 14663 Int alen = 0; 14664 HChar dis_buf[50]; 14665 IRTemp d0 = newTemp(Ity_I64); 14666 IRTemp d1 = newTemp(Ity_I64); 14667 UChar modrm = getUChar(delta); 14668 UInt rG = gregOfRexRM(pfx,modrm); 14669 if (epartIsReg(modrm)) { 14670 UInt rE = eregOfRexRM(pfx,modrm); 14671 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 14672 delta += 1; 14673 assign ( d0, getYMMRegLane64(rE, 0) ); 14674 assign ( d1, getYMMRegLane64(rE, 2) ); 14675 } else { 14676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14677 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 14678 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 14679 mkexpr(addr), mkU64(16))) ); 14680 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 14681 delta += alen; 14682 } 14683 putYMMRegLane64( rG, 0, mkexpr(d0) ); 14684 putYMMRegLane64( rG, 1, mkexpr(d0) ); 14685 putYMMRegLane64( rG, 2, mkexpr(d1) ); 14686 putYMMRegLane64( rG, 3, mkexpr(d1) ); 14687 return delta; 14688 } 14689 14690 14691 static Long dis_MOVSxDUP_128 ( VexAbiInfo* vbi, Prefix pfx, 14692 Long delta, Bool isAvx, Bool isL ) 14693 { 14694 IRTemp addr = IRTemp_INVALID; 14695 Int alen = 0; 14696 HChar dis_buf[50]; 14697 IRTemp sV = newTemp(Ity_V128); 14698 UChar modrm = getUChar(delta); 14699 UInt rG = gregOfRexRM(pfx,modrm); 14700 IRTemp s3, s2, s1, s0; 14701 s3 = s2 = s1 = s0 = IRTemp_INVALID; 14702 if (epartIsReg(modrm)) { 14703 UInt rE = eregOfRexRM(pfx,modrm); 14704 assign( sV, getXMMReg(rE) ); 14705 DIP("%smovs%cdup %s,%s\n", 14706 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 14707 delta += 1; 14708 } else { 14709 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14710 if (!isAvx) 14711 gen_SEGV_if_not_16_aligned( addr ); 14712 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14713 DIP("%smovs%cdup %s,%s\n", 14714 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 14715 delta += alen; 14716 } 14717 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14718 (isAvx ? putYMMRegLoAndZU : putXMMReg) 14719 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 14720 : mkV128from32s( s3, s3, s1, s1 ) ); 14721 return delta; 14722 } 14723 14724 14725 static Long dis_MOVSxDUP_256 ( VexAbiInfo* vbi, Prefix pfx, 14726 Long delta, Bool isL ) 14727 { 14728 IRTemp addr = IRTemp_INVALID; 14729 Int alen = 0; 14730 HChar dis_buf[50]; 14731 IRTemp sV = newTemp(Ity_V256); 14732 UChar modrm = getUChar(delta); 14733 UInt rG = gregOfRexRM(pfx,modrm); 14734 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 14735 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 14736 if (epartIsReg(modrm)) { 14737 UInt rE = eregOfRexRM(pfx,modrm); 14738 assign( sV, getYMMReg(rE) ); 14739 DIP("vmovs%cdup %s,%s\n", 14740 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 14741 delta += 1; 14742 } else { 14743 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14744 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 14745 DIP("vmovs%cdup %s,%s\n", 14746 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 14747 delta += alen; 14748 } 14749 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 14750 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 14751 : mkV128from32s( s7, s7, s5, s5 ) ); 14752 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 14753 : mkV128from32s( s3, s3, s1, s1 ) ); 14754 return delta; 14755 } 14756 14757 14758 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14759 { 14760 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 14761 IRTemp leftV = newTemp(Ity_V128); 14762 IRTemp rightV = newTemp(Ity_V128); 14763 IRTemp rm = newTemp(Ity_I32); 14764 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 14765 14766 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 14767 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 14768 14769 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 14770 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 14771 14772 IRTemp res = newTemp(Ity_V128); 14773 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 14774 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 14775 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 14776 return res; 14777 } 14778 14779 14780 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 14781 { 14782 IRTemp s1, s0, d1, d0; 14783 IRTemp leftV = newTemp(Ity_V128); 14784 IRTemp rightV = newTemp(Ity_V128); 14785 IRTemp rm = newTemp(Ity_I32); 14786 s1 = s0 = d1 = d0 = IRTemp_INVALID; 14787 14788 breakupV128to64s( sV, &s1, &s0 ); 14789 breakupV128to64s( dV, &d1, &d0 ); 14790 14791 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 14792 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 14793 14794 IRTemp res = newTemp(Ity_V128); 14795 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 14796 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 14797 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 14798 return res; 14799 } 14800 14801 14802 __attribute__((noinline)) 14803 static 14804 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 14805 VexAbiInfo* vbi, 14806 Prefix pfx, Int sz, Long deltaIN ) 14807 { 14808 IRTemp addr = IRTemp_INVALID; 14809 UChar modrm = 0; 14810 Int alen = 0; 14811 HChar dis_buf[50]; 14812 14813 *decode_OK = False; 14814 14815 Long delta = deltaIN; 14816 UChar opc = getUChar(delta); 14817 delta++; 14818 switch (opc) { 14819 14820 case 0x12: 14821 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 14822 duplicating some lanes (2:2:0:0). */ 14823 if (haveF3no66noF2(pfx) && sz == 4) { 14824 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14825 True/*isL*/ ); 14826 goto decode_success; 14827 } 14828 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 14829 duplicating some lanes (0:1:0:1). */ 14830 if (haveF2no66noF3(pfx) 14831 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14832 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ ); 14833 goto decode_success; 14834 } 14835 break; 14836 14837 case 0x16: 14838 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 14839 duplicating some lanes (3:3:1:1). */ 14840 if (haveF3no66noF2(pfx) && sz == 4) { 14841 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 14842 False/*!isL*/ ); 14843 goto decode_success; 14844 } 14845 break; 14846 14847 case 0x7C: 14848 case 0x7D: 14849 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 14850 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 14851 if (haveF2no66noF3(pfx) && sz == 4) { 14852 IRTemp eV = newTemp(Ity_V128); 14853 IRTemp gV = newTemp(Ity_V128); 14854 Bool isAdd = opc == 0x7C; 14855 const HChar* str = isAdd ? "add" : "sub"; 14856 modrm = getUChar(delta); 14857 UInt rG = gregOfRexRM(pfx,modrm); 14858 if (epartIsReg(modrm)) { 14859 UInt rE = eregOfRexRM(pfx,modrm); 14860 assign( eV, getXMMReg(rE) ); 14861 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14862 delta += 1; 14863 } else { 14864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14865 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14866 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14867 delta += alen; 14868 } 14869 14870 assign( gV, getXMMReg(rG) ); 14871 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) ); 14872 goto decode_success; 14873 } 14874 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 14875 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 14876 if (have66noF2noF3(pfx) && sz == 2) { 14877 IRTemp eV = newTemp(Ity_V128); 14878 IRTemp gV = newTemp(Ity_V128); 14879 Bool isAdd = opc == 0x7C; 14880 const HChar* str = isAdd ? "add" : "sub"; 14881 modrm = getUChar(delta); 14882 UInt rG = gregOfRexRM(pfx,modrm); 14883 if (epartIsReg(modrm)) { 14884 UInt rE = eregOfRexRM(pfx,modrm); 14885 assign( eV, getXMMReg(rE) ); 14886 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 14887 delta += 1; 14888 } else { 14889 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14890 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14891 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG)); 14892 delta += alen; 14893 } 14894 14895 assign( gV, getXMMReg(rG) ); 14896 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) ); 14897 goto decode_success; 14898 } 14899 break; 14900 14901 case 0xD0: 14902 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 14903 if (have66noF2noF3(pfx) && sz == 2) { 14904 IRTemp eV = newTemp(Ity_V128); 14905 IRTemp gV = newTemp(Ity_V128); 14906 modrm = getUChar(delta); 14907 UInt rG = gregOfRexRM(pfx,modrm); 14908 if (epartIsReg(modrm)) { 14909 UInt rE = eregOfRexRM(pfx,modrm); 14910 assign( eV, getXMMReg(rE) ); 14911 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14912 delta += 1; 14913 } else { 14914 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14915 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14916 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG)); 14917 delta += alen; 14918 } 14919 14920 assign( gV, getXMMReg(rG) ); 14921 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) ); 14922 goto decode_success; 14923 } 14924 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 14925 if (haveF2no66noF3(pfx) && sz == 4) { 14926 IRTemp eV = newTemp(Ity_V128); 14927 IRTemp gV = newTemp(Ity_V128); 14928 modrm = getUChar(delta); 14929 UInt rG = gregOfRexRM(pfx,modrm); 14930 14931 modrm = getUChar(delta); 14932 if (epartIsReg(modrm)) { 14933 UInt rE = eregOfRexRM(pfx,modrm); 14934 assign( eV, getXMMReg(rE) ); 14935 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14936 delta += 1; 14937 } else { 14938 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14939 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 14940 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG)); 14941 delta += alen; 14942 } 14943 14944 assign( gV, getXMMReg(rG) ); 14945 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) ); 14946 goto decode_success; 14947 } 14948 break; 14949 14950 case 0xF0: 14951 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 14952 if (haveF2no66noF3(pfx) && sz == 4) { 14953 modrm = getUChar(delta); 14954 if (epartIsReg(modrm)) { 14955 goto decode_failure; 14956 } else { 14957 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14958 putXMMReg( gregOfRexRM(pfx,modrm), 14959 loadLE(Ity_V128, mkexpr(addr)) ); 14960 DIP("lddqu %s,%s\n", dis_buf, 14961 nameXMMReg(gregOfRexRM(pfx,modrm))); 14962 delta += alen; 14963 } 14964 goto decode_success; 14965 } 14966 break; 14967 14968 default: 14969 goto decode_failure; 14970 14971 } 14972 14973 decode_failure: 14974 *decode_OK = False; 14975 return deltaIN; 14976 14977 decode_success: 14978 *decode_OK = True; 14979 return delta; 14980 } 14981 14982 14983 /*------------------------------------------------------------*/ 14984 /*--- ---*/ 14985 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/ 14986 /*--- ---*/ 14987 /*------------------------------------------------------------*/ 14988 14989 static 14990 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 14991 { 14992 IRTemp sHi = newTemp(Ity_I64); 14993 IRTemp sLo = newTemp(Ity_I64); 14994 IRTemp dHi = newTemp(Ity_I64); 14995 IRTemp dLo = newTemp(Ity_I64); 14996 IRTemp rHi = newTemp(Ity_I64); 14997 IRTemp rLo = newTemp(Ity_I64); 14998 IRTemp sevens = newTemp(Ity_I64); 14999 IRTemp mask0x80hi = newTemp(Ity_I64); 15000 IRTemp mask0x80lo = newTemp(Ity_I64); 15001 IRTemp maskBit3hi = newTemp(Ity_I64); 15002 IRTemp maskBit3lo = newTemp(Ity_I64); 15003 IRTemp sAnd7hi = newTemp(Ity_I64); 15004 IRTemp sAnd7lo = newTemp(Ity_I64); 15005 IRTemp permdHi = newTemp(Ity_I64); 15006 IRTemp permdLo = newTemp(Ity_I64); 15007 IRTemp res = newTemp(Ity_V128); 15008 15009 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15010 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15011 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15012 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15013 15014 assign( sevens, mkU64(0x0707070707070707ULL) ); 15015 15016 /* mask0x80hi = Not(SarN8x8(sHi,7)) 15017 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 15018 sAnd7hi = And(sHi,sevens) 15019 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 15020 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 15021 rHi = And(permdHi,mask0x80hi) 15022 */ 15023 assign( 15024 mask0x80hi, 15025 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 15026 15027 assign( 15028 maskBit3hi, 15029 binop(Iop_SarN8x8, 15030 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 15031 mkU8(7))); 15032 15033 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 15034 15035 assign( 15036 permdHi, 15037 binop( 15038 Iop_Or64, 15039 binop(Iop_And64, 15040 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 15041 mkexpr(maskBit3hi)), 15042 binop(Iop_And64, 15043 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 15044 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 15045 15046 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 15047 15048 /* And the same for the lower half of the result. What fun. */ 15049 15050 assign( 15051 mask0x80lo, 15052 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 15053 15054 assign( 15055 maskBit3lo, 15056 binop(Iop_SarN8x8, 15057 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 15058 mkU8(7))); 15059 15060 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 15061 15062 assign( 15063 permdLo, 15064 binop( 15065 Iop_Or64, 15066 binop(Iop_And64, 15067 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 15068 mkexpr(maskBit3lo)), 15069 binop(Iop_And64, 15070 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 15071 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 15072 15073 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 15074 15075 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 15076 return res; 15077 } 15078 15079 15080 static 15081 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 15082 { 15083 IRTemp sHi, sLo, dHi, dLo; 15084 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15085 breakupV256toV128s( dV, &dHi, &dLo); 15086 breakupV256toV128s( sV, &sHi, &sLo); 15087 IRTemp res = newTemp(Ity_V256); 15088 assign(res, binop(Iop_V128HLtoV256, 15089 mkexpr(math_PSHUFB_XMM(dHi, sHi)), 15090 mkexpr(math_PSHUFB_XMM(dLo, sLo)))); 15091 return res; 15092 } 15093 15094 15095 static Long dis_PHADD_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 15096 Bool isAvx, UChar opc ) 15097 { 15098 IRTemp addr = IRTemp_INVALID; 15099 Int alen = 0; 15100 HChar dis_buf[50]; 15101 const HChar* str = "???"; 15102 IROp opV64 = Iop_INVALID; 15103 IROp opCatO = Iop_CatOddLanes16x4; 15104 IROp opCatE = Iop_CatEvenLanes16x4; 15105 IRTemp sV = newTemp(Ity_V128); 15106 IRTemp dV = newTemp(Ity_V128); 15107 IRTemp sHi = newTemp(Ity_I64); 15108 IRTemp sLo = newTemp(Ity_I64); 15109 IRTemp dHi = newTemp(Ity_I64); 15110 IRTemp dLo = newTemp(Ity_I64); 15111 UChar modrm = getUChar(delta); 15112 UInt rG = gregOfRexRM(pfx,modrm); 15113 UInt rV = isAvx ? getVexNvvvv(pfx) : rG; 15114 15115 switch (opc) { 15116 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15117 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15118 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15119 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15120 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15121 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15122 default: vassert(0); 15123 } 15124 if (opc == 0x02 || opc == 0x06) { 15125 opCatO = Iop_InterleaveHI32x2; 15126 opCatE = Iop_InterleaveLO32x2; 15127 } 15128 15129 assign( dV, getXMMReg(rV) ); 15130 15131 if (epartIsReg(modrm)) { 15132 UInt rE = eregOfRexRM(pfx,modrm); 15133 assign( sV, getXMMReg(rE) ); 15134 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15135 nameXMMReg(rE), nameXMMReg(rG)); 15136 delta += 1; 15137 } else { 15138 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15139 if (!isAvx) 15140 gen_SEGV_if_not_16_aligned( addr ); 15141 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15142 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15143 dis_buf, nameXMMReg(rG)); 15144 delta += alen; 15145 } 15146 15147 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15148 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15149 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15150 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15151 15152 /* This isn't a particularly efficient way to compute the 15153 result, but at least it avoids a proliferation of IROps, 15154 hence avoids complication all the backends. */ 15155 15156 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15157 ( rG, 15158 binop(Iop_64HLtoV128, 15159 binop(opV64, 15160 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 15161 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ), 15162 binop(opV64, 15163 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 15164 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) ); 15165 return delta; 15166 } 15167 15168 15169 static Long dis_PHADD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc ) 15170 { 15171 IRTemp addr = IRTemp_INVALID; 15172 Int alen = 0; 15173 HChar dis_buf[50]; 15174 const HChar* str = "???"; 15175 IROp opV64 = Iop_INVALID; 15176 IROp opCatO = Iop_CatOddLanes16x4; 15177 IROp opCatE = Iop_CatEvenLanes16x4; 15178 IRTemp sV = newTemp(Ity_V256); 15179 IRTemp dV = newTemp(Ity_V256); 15180 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15181 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15182 UChar modrm = getUChar(delta); 15183 UInt rG = gregOfRexRM(pfx,modrm); 15184 UInt rV = getVexNvvvv(pfx); 15185 15186 switch (opc) { 15187 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15188 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15189 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15190 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15191 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15192 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15193 default: vassert(0); 15194 } 15195 if (opc == 0x02 || opc == 0x06) { 15196 opCatO = Iop_InterleaveHI32x2; 15197 opCatE = Iop_InterleaveLO32x2; 15198 } 15199 15200 assign( dV, getYMMReg(rV) ); 15201 15202 if (epartIsReg(modrm)) { 15203 UInt rE = eregOfRexRM(pfx,modrm); 15204 assign( sV, getYMMReg(rE) ); 15205 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG)); 15206 delta += 1; 15207 } else { 15208 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15209 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15210 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG)); 15211 delta += alen; 15212 } 15213 15214 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 15215 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 15216 15217 /* This isn't a particularly efficient way to compute the 15218 result, but at least it avoids a proliferation of IROps, 15219 hence avoids complication all the backends. */ 15220 15221 putYMMReg( rG, 15222 binop(Iop_V128HLtoV256, 15223 binop(Iop_64HLtoV128, 15224 binop(opV64, 15225 binop(opCatE,mkexpr(s3),mkexpr(s2)), 15226 binop(opCatO,mkexpr(s3),mkexpr(s2)) ), 15227 binop(opV64, 15228 binop(opCatE,mkexpr(d3),mkexpr(d2)), 15229 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ), 15230 binop(Iop_64HLtoV128, 15231 binop(opV64, 15232 binop(opCatE,mkexpr(s1),mkexpr(s0)), 15233 binop(opCatO,mkexpr(s1),mkexpr(s0)) ), 15234 binop(opV64, 15235 binop(opCatE,mkexpr(d1),mkexpr(d0)), 15236 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) ); 15237 return delta; 15238 } 15239 15240 15241 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV ) 15242 { 15243 IRTemp sVoddsSX = newTemp(Ity_V128); 15244 IRTemp sVevensSX = newTemp(Ity_V128); 15245 IRTemp dVoddsZX = newTemp(Ity_V128); 15246 IRTemp dVevensZX = newTemp(Ity_V128); 15247 /* compute dV unsigned x sV signed */ 15248 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 15249 assign( sVevensSX, binop(Iop_SarN16x8, 15250 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 15251 mkU8(8)) ); 15252 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 15253 assign( dVevensZX, binop(Iop_ShrN16x8, 15254 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 15255 mkU8(8)) ); 15256 15257 IRTemp res = newTemp(Ity_V128); 15258 assign( res, binop(Iop_QAdd16Sx8, 15259 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15260 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15261 ) 15262 ); 15263 return res; 15264 } 15265 15266 15267 static 15268 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV ) 15269 { 15270 IRTemp sHi, sLo, dHi, dLo; 15271 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15272 breakupV256toV128s( dV, &dHi, &dLo); 15273 breakupV256toV128s( sV, &sHi, &sLo); 15274 IRTemp res = newTemp(Ity_V256); 15275 assign(res, binop(Iop_V128HLtoV256, 15276 mkexpr(math_PMADDUBSW_128(dHi, sHi)), 15277 mkexpr(math_PMADDUBSW_128(dLo, sLo)))); 15278 return res; 15279 } 15280 15281 15282 __attribute__((noinline)) 15283 static 15284 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK, 15285 VexAbiInfo* vbi, 15286 Prefix pfx, Int sz, Long deltaIN ) 15287 { 15288 IRTemp addr = IRTemp_INVALID; 15289 UChar modrm = 0; 15290 Int alen = 0; 15291 HChar dis_buf[50]; 15292 15293 *decode_OK = False; 15294 15295 Long delta = deltaIN; 15296 UChar opc = getUChar(delta); 15297 delta++; 15298 switch (opc) { 15299 15300 case 0x00: 15301 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 15302 if (have66noF2noF3(pfx) 15303 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15304 IRTemp sV = newTemp(Ity_V128); 15305 IRTemp dV = newTemp(Ity_V128); 15306 15307 modrm = getUChar(delta); 15308 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15309 15310 if (epartIsReg(modrm)) { 15311 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15312 delta += 1; 15313 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 15314 nameXMMReg(gregOfRexRM(pfx,modrm))); 15315 } else { 15316 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15317 gen_SEGV_if_not_16_aligned( addr ); 15318 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15319 delta += alen; 15320 DIP("pshufb %s,%s\n", dis_buf, 15321 nameXMMReg(gregOfRexRM(pfx,modrm))); 15322 } 15323 15324 IRTemp res = math_PSHUFB_XMM( dV, sV ); 15325 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res)); 15326 goto decode_success; 15327 } 15328 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 15329 if (haveNo66noF2noF3(pfx) && sz == 4) { 15330 IRTemp sV = newTemp(Ity_I64); 15331 IRTemp dV = newTemp(Ity_I64); 15332 15333 modrm = getUChar(delta); 15334 do_MMX_preamble(); 15335 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15336 15337 if (epartIsReg(modrm)) { 15338 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15339 delta += 1; 15340 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15341 nameMMXReg(gregLO3ofRM(modrm))); 15342 } else { 15343 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15344 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15345 delta += alen; 15346 DIP("pshufb %s,%s\n", dis_buf, 15347 nameMMXReg(gregLO3ofRM(modrm))); 15348 } 15349 15350 putMMXReg( 15351 gregLO3ofRM(modrm), 15352 binop( 15353 Iop_And64, 15354 /* permute the lanes */ 15355 binop( 15356 Iop_Perm8x8, 15357 mkexpr(dV), 15358 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 15359 ), 15360 /* mask off lanes which have (index & 0x80) == 0x80 */ 15361 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 15362 ) 15363 ); 15364 goto decode_success; 15365 } 15366 break; 15367 15368 case 0x01: 15369 case 0x02: 15370 case 0x03: 15371 case 0x05: 15372 case 0x06: 15373 case 0x07: 15374 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 15375 G to G (xmm). */ 15376 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 15377 G to G (xmm). */ 15378 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 15379 xmm) and G to G (xmm). */ 15380 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 15381 G to G (xmm). */ 15382 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 15383 G to G (xmm). */ 15384 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 15385 xmm) and G to G (xmm). */ 15386 if (have66noF2noF3(pfx) 15387 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15388 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc ); 15389 goto decode_success; 15390 } 15391 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 15392 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 15393 to G (mmx). */ 15394 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 15395 to G (mmx). */ 15396 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 15397 mmx) and G to G (mmx). */ 15398 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 15399 to G (mmx). */ 15400 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 15401 to G (mmx). */ 15402 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 15403 mmx) and G to G (mmx). */ 15404 if (haveNo66noF2noF3(pfx) && sz == 4) { 15405 const HChar* str = "???"; 15406 IROp opV64 = Iop_INVALID; 15407 IROp opCatO = Iop_CatOddLanes16x4; 15408 IROp opCatE = Iop_CatEvenLanes16x4; 15409 IRTemp sV = newTemp(Ity_I64); 15410 IRTemp dV = newTemp(Ity_I64); 15411 15412 modrm = getUChar(delta); 15413 15414 switch (opc) { 15415 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15416 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15417 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15418 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15419 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15420 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15421 default: vassert(0); 15422 } 15423 if (opc == 0x02 || opc == 0x06) { 15424 opCatO = Iop_InterleaveHI32x2; 15425 opCatE = Iop_InterleaveLO32x2; 15426 } 15427 15428 do_MMX_preamble(); 15429 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15430 15431 if (epartIsReg(modrm)) { 15432 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15433 delta += 1; 15434 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15435 nameMMXReg(gregLO3ofRM(modrm))); 15436 } else { 15437 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15438 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15439 delta += alen; 15440 DIP("ph%s %s,%s\n", str, dis_buf, 15441 nameMMXReg(gregLO3ofRM(modrm))); 15442 } 15443 15444 putMMXReg( 15445 gregLO3ofRM(modrm), 15446 binop(opV64, 15447 binop(opCatE,mkexpr(sV),mkexpr(dV)), 15448 binop(opCatO,mkexpr(sV),mkexpr(dV)) 15449 ) 15450 ); 15451 goto decode_success; 15452 } 15453 break; 15454 15455 case 0x04: 15456 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15457 Unsigned Bytes (XMM) */ 15458 if (have66noF2noF3(pfx) 15459 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15460 IRTemp sV = newTemp(Ity_V128); 15461 IRTemp dV = newTemp(Ity_V128); 15462 modrm = getUChar(delta); 15463 UInt rG = gregOfRexRM(pfx,modrm); 15464 15465 assign( dV, getXMMReg(rG) ); 15466 15467 if (epartIsReg(modrm)) { 15468 UInt rE = eregOfRexRM(pfx,modrm); 15469 assign( sV, getXMMReg(rE) ); 15470 delta += 1; 15471 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15472 } else { 15473 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15474 gen_SEGV_if_not_16_aligned( addr ); 15475 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15476 delta += alen; 15477 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG)); 15478 } 15479 15480 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) ); 15481 goto decode_success; 15482 } 15483 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15484 Unsigned Bytes (MMX) */ 15485 if (haveNo66noF2noF3(pfx) && sz == 4) { 15486 IRTemp sV = newTemp(Ity_I64); 15487 IRTemp dV = newTemp(Ity_I64); 15488 IRTemp sVoddsSX = newTemp(Ity_I64); 15489 IRTemp sVevensSX = newTemp(Ity_I64); 15490 IRTemp dVoddsZX = newTemp(Ity_I64); 15491 IRTemp dVevensZX = newTemp(Ity_I64); 15492 15493 modrm = getUChar(delta); 15494 do_MMX_preamble(); 15495 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15496 15497 if (epartIsReg(modrm)) { 15498 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15499 delta += 1; 15500 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15501 nameMMXReg(gregLO3ofRM(modrm))); 15502 } else { 15503 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15504 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15505 delta += alen; 15506 DIP("pmaddubsw %s,%s\n", dis_buf, 15507 nameMMXReg(gregLO3ofRM(modrm))); 15508 } 15509 15510 /* compute dV unsigned x sV signed */ 15511 assign( sVoddsSX, 15512 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 15513 assign( sVevensSX, 15514 binop(Iop_SarN16x4, 15515 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 15516 mkU8(8)) ); 15517 assign( dVoddsZX, 15518 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 15519 assign( dVevensZX, 15520 binop(Iop_ShrN16x4, 15521 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 15522 mkU8(8)) ); 15523 15524 putMMXReg( 15525 gregLO3ofRM(modrm), 15526 binop(Iop_QAdd16Sx4, 15527 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15528 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15529 ) 15530 ); 15531 goto decode_success; 15532 } 15533 break; 15534 15535 case 0x08: 15536 case 0x09: 15537 case 0x0A: 15538 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 15539 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 15540 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */ 15541 if (have66noF2noF3(pfx) 15542 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15543 IRTemp sV = newTemp(Ity_V128); 15544 IRTemp dV = newTemp(Ity_V128); 15545 IRTemp sHi = newTemp(Ity_I64); 15546 IRTemp sLo = newTemp(Ity_I64); 15547 IRTemp dHi = newTemp(Ity_I64); 15548 IRTemp dLo = newTemp(Ity_I64); 15549 const HChar* str = "???"; 15550 Int laneszB = 0; 15551 15552 switch (opc) { 15553 case 0x08: laneszB = 1; str = "b"; break; 15554 case 0x09: laneszB = 2; str = "w"; break; 15555 case 0x0A: laneszB = 4; str = "d"; break; 15556 default: vassert(0); 15557 } 15558 15559 modrm = getUChar(delta); 15560 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15561 15562 if (epartIsReg(modrm)) { 15563 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15564 delta += 1; 15565 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 15566 nameXMMReg(gregOfRexRM(pfx,modrm))); 15567 } else { 15568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15569 gen_SEGV_if_not_16_aligned( addr ); 15570 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15571 delta += alen; 15572 DIP("psign%s %s,%s\n", str, dis_buf, 15573 nameXMMReg(gregOfRexRM(pfx,modrm))); 15574 } 15575 15576 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15577 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15578 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15579 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15580 15581 putXMMReg( 15582 gregOfRexRM(pfx,modrm), 15583 binop(Iop_64HLtoV128, 15584 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 15585 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 15586 ) 15587 ); 15588 goto decode_success; 15589 } 15590 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 15591 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 15592 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */ 15593 if (haveNo66noF2noF3(pfx) && sz == 4) { 15594 IRTemp sV = newTemp(Ity_I64); 15595 IRTemp dV = newTemp(Ity_I64); 15596 const HChar* str = "???"; 15597 Int laneszB = 0; 15598 15599 switch (opc) { 15600 case 0x08: laneszB = 1; str = "b"; break; 15601 case 0x09: laneszB = 2; str = "w"; break; 15602 case 0x0A: laneszB = 4; str = "d"; break; 15603 default: vassert(0); 15604 } 15605 15606 modrm = getUChar(delta); 15607 do_MMX_preamble(); 15608 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15609 15610 if (epartIsReg(modrm)) { 15611 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15612 delta += 1; 15613 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15614 nameMMXReg(gregLO3ofRM(modrm))); 15615 } else { 15616 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15617 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15618 delta += alen; 15619 DIP("psign%s %s,%s\n", str, dis_buf, 15620 nameMMXReg(gregLO3ofRM(modrm))); 15621 } 15622 15623 putMMXReg( 15624 gregLO3ofRM(modrm), 15625 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 15626 ); 15627 goto decode_success; 15628 } 15629 break; 15630 15631 case 0x0B: 15632 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 15633 Scale (XMM) */ 15634 if (have66noF2noF3(pfx) 15635 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15636 IRTemp sV = newTemp(Ity_V128); 15637 IRTemp dV = newTemp(Ity_V128); 15638 IRTemp sHi = newTemp(Ity_I64); 15639 IRTemp sLo = newTemp(Ity_I64); 15640 IRTemp dHi = newTemp(Ity_I64); 15641 IRTemp dLo = newTemp(Ity_I64); 15642 15643 modrm = getUChar(delta); 15644 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15645 15646 if (epartIsReg(modrm)) { 15647 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15648 delta += 1; 15649 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 15650 nameXMMReg(gregOfRexRM(pfx,modrm))); 15651 } else { 15652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15653 gen_SEGV_if_not_16_aligned( addr ); 15654 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15655 delta += alen; 15656 DIP("pmulhrsw %s,%s\n", dis_buf, 15657 nameXMMReg(gregOfRexRM(pfx,modrm))); 15658 } 15659 15660 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15661 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15662 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15663 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15664 15665 putXMMReg( 15666 gregOfRexRM(pfx,modrm), 15667 binop(Iop_64HLtoV128, 15668 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 15669 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 15670 ) 15671 ); 15672 goto decode_success; 15673 } 15674 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 15675 (MMX) */ 15676 if (haveNo66noF2noF3(pfx) && sz == 4) { 15677 IRTemp sV = newTemp(Ity_I64); 15678 IRTemp dV = newTemp(Ity_I64); 15679 15680 modrm = getUChar(delta); 15681 do_MMX_preamble(); 15682 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15683 15684 if (epartIsReg(modrm)) { 15685 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15686 delta += 1; 15687 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15688 nameMMXReg(gregLO3ofRM(modrm))); 15689 } else { 15690 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15691 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15692 delta += alen; 15693 DIP("pmulhrsw %s,%s\n", dis_buf, 15694 nameMMXReg(gregLO3ofRM(modrm))); 15695 } 15696 15697 putMMXReg( 15698 gregLO3ofRM(modrm), 15699 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 15700 ); 15701 goto decode_success; 15702 } 15703 break; 15704 15705 case 0x1C: 15706 case 0x1D: 15707 case 0x1E: 15708 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 15709 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 15710 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 15711 if (have66noF2noF3(pfx) 15712 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15713 IRTemp sV = newTemp(Ity_V128); 15714 const HChar* str = "???"; 15715 Int laneszB = 0; 15716 15717 switch (opc) { 15718 case 0x1C: laneszB = 1; str = "b"; break; 15719 case 0x1D: laneszB = 2; str = "w"; break; 15720 case 0x1E: laneszB = 4; str = "d"; break; 15721 default: vassert(0); 15722 } 15723 15724 modrm = getUChar(delta); 15725 if (epartIsReg(modrm)) { 15726 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15727 delta += 1; 15728 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 15729 nameXMMReg(gregOfRexRM(pfx,modrm))); 15730 } else { 15731 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15732 gen_SEGV_if_not_16_aligned( addr ); 15733 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15734 delta += alen; 15735 DIP("pabs%s %s,%s\n", str, dis_buf, 15736 nameXMMReg(gregOfRexRM(pfx,modrm))); 15737 } 15738 15739 putXMMReg( gregOfRexRM(pfx,modrm), 15740 mkexpr(math_PABS_XMM(sV, laneszB)) ); 15741 goto decode_success; 15742 } 15743 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 15744 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 15745 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 15746 if (haveNo66noF2noF3(pfx) && sz == 4) { 15747 IRTemp sV = newTemp(Ity_I64); 15748 const HChar* str = "???"; 15749 Int laneszB = 0; 15750 15751 switch (opc) { 15752 case 0x1C: laneszB = 1; str = "b"; break; 15753 case 0x1D: laneszB = 2; str = "w"; break; 15754 case 0x1E: laneszB = 4; str = "d"; break; 15755 default: vassert(0); 15756 } 15757 15758 modrm = getUChar(delta); 15759 do_MMX_preamble(); 15760 15761 if (epartIsReg(modrm)) { 15762 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15763 delta += 1; 15764 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15765 nameMMXReg(gregLO3ofRM(modrm))); 15766 } else { 15767 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15768 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15769 delta += alen; 15770 DIP("pabs%s %s,%s\n", str, dis_buf, 15771 nameMMXReg(gregLO3ofRM(modrm))); 15772 } 15773 15774 putMMXReg( gregLO3ofRM(modrm), 15775 mkexpr(math_PABS_MMX( sV, laneszB )) ); 15776 goto decode_success; 15777 } 15778 break; 15779 15780 default: 15781 break; 15782 15783 } 15784 15785 //decode_failure: 15786 *decode_OK = False; 15787 return deltaIN; 15788 15789 decode_success: 15790 *decode_OK = True; 15791 return delta; 15792 } 15793 15794 15795 /*------------------------------------------------------------*/ 15796 /*--- ---*/ 15797 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/ 15798 /*--- ---*/ 15799 /*------------------------------------------------------------*/ 15800 15801 __attribute__((noinline)) 15802 static 15803 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK, 15804 VexAbiInfo* vbi, 15805 Prefix pfx, Int sz, Long deltaIN ) 15806 { 15807 Long d64 = 0; 15808 IRTemp addr = IRTemp_INVALID; 15809 UChar modrm = 0; 15810 Int alen = 0; 15811 HChar dis_buf[50]; 15812 15813 *decode_OK = False; 15814 15815 Long delta = deltaIN; 15816 UChar opc = getUChar(delta); 15817 delta++; 15818 switch (opc) { 15819 15820 case 0x0F: 15821 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 15822 if (have66noF2noF3(pfx) 15823 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15824 IRTemp sV = newTemp(Ity_V128); 15825 IRTemp dV = newTemp(Ity_V128); 15826 15827 modrm = getUChar(delta); 15828 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15829 15830 if (epartIsReg(modrm)) { 15831 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15832 d64 = (Long)getUChar(delta+1); 15833 delta += 1+1; 15834 DIP("palignr $%d,%s,%s\n", (Int)d64, 15835 nameXMMReg(eregOfRexRM(pfx,modrm)), 15836 nameXMMReg(gregOfRexRM(pfx,modrm))); 15837 } else { 15838 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15839 gen_SEGV_if_not_16_aligned( addr ); 15840 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15841 d64 = (Long)getUChar(delta+alen); 15842 delta += alen+1; 15843 DIP("palignr $%d,%s,%s\n", (Int)d64, 15844 dis_buf, 15845 nameXMMReg(gregOfRexRM(pfx,modrm))); 15846 } 15847 15848 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 ); 15849 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 15850 goto decode_success; 15851 } 15852 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 15853 if (haveNo66noF2noF3(pfx) && sz == 4) { 15854 IRTemp sV = newTemp(Ity_I64); 15855 IRTemp dV = newTemp(Ity_I64); 15856 IRTemp res = newTemp(Ity_I64); 15857 15858 modrm = getUChar(delta); 15859 do_MMX_preamble(); 15860 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15861 15862 if (epartIsReg(modrm)) { 15863 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15864 d64 = (Long)getUChar(delta+1); 15865 delta += 1+1; 15866 DIP("palignr $%d,%s,%s\n", (Int)d64, 15867 nameMMXReg(eregLO3ofRM(modrm)), 15868 nameMMXReg(gregLO3ofRM(modrm))); 15869 } else { 15870 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 15871 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15872 d64 = (Long)getUChar(delta+alen); 15873 delta += alen+1; 15874 DIP("palignr $%d%s,%s\n", (Int)d64, 15875 dis_buf, 15876 nameMMXReg(gregLO3ofRM(modrm))); 15877 } 15878 15879 if (d64 == 0) { 15880 assign( res, mkexpr(sV) ); 15881 } 15882 else if (d64 >= 1 && d64 <= 7) { 15883 assign(res, 15884 binop(Iop_Or64, 15885 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 15886 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 15887 ))); 15888 } 15889 else if (d64 == 8) { 15890 assign( res, mkexpr(dV) ); 15891 } 15892 else if (d64 >= 9 && d64 <= 15) { 15893 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 15894 } 15895 else if (d64 >= 16 && d64 <= 255) { 15896 assign( res, mkU64(0) ); 15897 } 15898 else 15899 vassert(0); 15900 15901 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 15902 goto decode_success; 15903 } 15904 break; 15905 15906 default: 15907 break; 15908 15909 } 15910 15911 //decode_failure: 15912 *decode_OK = False; 15913 return deltaIN; 15914 15915 decode_success: 15916 *decode_OK = True; 15917 return delta; 15918 } 15919 15920 15921 /*------------------------------------------------------------*/ 15922 /*--- ---*/ 15923 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/ 15924 /*--- ---*/ 15925 /*------------------------------------------------------------*/ 15926 15927 __attribute__((noinline)) 15928 static 15929 Long dis_ESC_0F__SSE4 ( Bool* decode_OK, 15930 VexArchInfo* archinfo, 15931 VexAbiInfo* vbi, 15932 Prefix pfx, Int sz, Long deltaIN ) 15933 { 15934 IRTemp addr = IRTemp_INVALID; 15935 IRType ty = Ity_INVALID; 15936 UChar modrm = 0; 15937 Int alen = 0; 15938 HChar dis_buf[50]; 15939 15940 *decode_OK = False; 15941 15942 Long delta = deltaIN; 15943 UChar opc = getUChar(delta); 15944 delta++; 15945 switch (opc) { 15946 15947 case 0xB8: 15948 /* F3 0F B8 = POPCNT{W,L,Q} 15949 Count the number of 1 bits in a register 15950 */ 15951 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */ 15952 && (sz == 2 || sz == 4 || sz == 8)) { 15953 /*IRType*/ ty = szToITy(sz); 15954 IRTemp src = newTemp(ty); 15955 modrm = getUChar(delta); 15956 if (epartIsReg(modrm)) { 15957 assign(src, getIRegE(sz, pfx, modrm)); 15958 delta += 1; 15959 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 15960 nameIRegG(sz, pfx, modrm)); 15961 } else { 15962 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 15963 assign(src, loadLE(ty, mkexpr(addr))); 15964 delta += alen; 15965 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 15966 nameIRegG(sz, pfx, modrm)); 15967 } 15968 15969 IRTemp result = gen_POPCOUNT(ty, src); 15970 putIRegG(sz, pfx, modrm, mkexpr(result)); 15971 15972 // Update flags. This is pretty lame .. perhaps can do better 15973 // if this turns out to be performance critical. 15974 // O S A C P are cleared. Z is set if SRC == 0. 15975 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 15976 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 15977 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 15978 stmt( IRStmt_Put( OFFB_CC_DEP1, 15979 binop(Iop_Shl64, 15980 unop(Iop_1Uto64, 15981 binop(Iop_CmpEQ64, 15982 widenUto64(mkexpr(src)), 15983 mkU64(0))), 15984 mkU8(AMD64G_CC_SHIFT_Z)))); 15985 15986 goto decode_success; 15987 } 15988 break; 15989 15990 case 0xBC: 15991 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension, 15992 which we can only decode if we're sure this is a BMI1 capable cpu 15993 that supports TZCNT, since otherwise it's BSF, which behaves 15994 differently on zero source. */ 15995 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 15996 && (sz == 2 || sz == 4 || sz == 8) 15997 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) { 15998 /*IRType*/ ty = szToITy(sz); 15999 IRTemp src = newTemp(ty); 16000 modrm = getUChar(delta); 16001 if (epartIsReg(modrm)) { 16002 assign(src, getIRegE(sz, pfx, modrm)); 16003 delta += 1; 16004 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16005 nameIRegG(sz, pfx, modrm)); 16006 } else { 16007 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16008 assign(src, loadLE(ty, mkexpr(addr))); 16009 delta += alen; 16010 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16011 nameIRegG(sz, pfx, modrm)); 16012 } 16013 16014 IRTemp res = gen_TZCNT(ty, src); 16015 putIRegG(sz, pfx, modrm, mkexpr(res)); 16016 16017 // Update flags. This is pretty lame .. perhaps can do better 16018 // if this turns out to be performance critical. 16019 // O S A P are cleared. Z is set if RESULT == 0. 16020 // C is set if SRC is zero. 16021 IRTemp src64 = newTemp(Ity_I64); 16022 IRTemp res64 = newTemp(Ity_I64); 16023 assign(src64, widenUto64(mkexpr(src))); 16024 assign(res64, widenUto64(mkexpr(res))); 16025 16026 IRTemp oszacp = newTemp(Ity_I64); 16027 assign( 16028 oszacp, 16029 binop(Iop_Or64, 16030 binop(Iop_Shl64, 16031 unop(Iop_1Uto64, 16032 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16033 mkU8(AMD64G_CC_SHIFT_Z)), 16034 binop(Iop_Shl64, 16035 unop(Iop_1Uto64, 16036 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16037 mkU8(AMD64G_CC_SHIFT_C)) 16038 ) 16039 ); 16040 16041 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16042 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16043 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16044 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16045 16046 goto decode_success; 16047 } 16048 break; 16049 16050 case 0xBD: 16051 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 16052 which we can only decode if we're sure this is an AMD cpu 16053 that supports LZCNT, since otherwise it's BSR, which behaves 16054 differently. Bizarrely, my Sandy Bridge also accepts these 16055 instructions but produces different results. */ 16056 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 16057 && (sz == 2 || sz == 4 || sz == 8) 16058 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 16059 /*IRType*/ ty = szToITy(sz); 16060 IRTemp src = newTemp(ty); 16061 modrm = getUChar(delta); 16062 if (epartIsReg(modrm)) { 16063 assign(src, getIRegE(sz, pfx, modrm)); 16064 delta += 1; 16065 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16066 nameIRegG(sz, pfx, modrm)); 16067 } else { 16068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16069 assign(src, loadLE(ty, mkexpr(addr))); 16070 delta += alen; 16071 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16072 nameIRegG(sz, pfx, modrm)); 16073 } 16074 16075 IRTemp res = gen_LZCNT(ty, src); 16076 putIRegG(sz, pfx, modrm, mkexpr(res)); 16077 16078 // Update flags. This is pretty lame .. perhaps can do better 16079 // if this turns out to be performance critical. 16080 // O S A P are cleared. Z is set if RESULT == 0. 16081 // C is set if SRC is zero. 16082 IRTemp src64 = newTemp(Ity_I64); 16083 IRTemp res64 = newTemp(Ity_I64); 16084 assign(src64, widenUto64(mkexpr(src))); 16085 assign(res64, widenUto64(mkexpr(res))); 16086 16087 IRTemp oszacp = newTemp(Ity_I64); 16088 assign( 16089 oszacp, 16090 binop(Iop_Or64, 16091 binop(Iop_Shl64, 16092 unop(Iop_1Uto64, 16093 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16094 mkU8(AMD64G_CC_SHIFT_Z)), 16095 binop(Iop_Shl64, 16096 unop(Iop_1Uto64, 16097 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16098 mkU8(AMD64G_CC_SHIFT_C)) 16099 ) 16100 ); 16101 16102 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16103 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16104 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16105 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16106 16107 goto decode_success; 16108 } 16109 break; 16110 16111 default: 16112 break; 16113 16114 } 16115 16116 //decode_failure: 16117 *decode_OK = False; 16118 return deltaIN; 16119 16120 decode_success: 16121 *decode_OK = True; 16122 return delta; 16123 } 16124 16125 16126 /*------------------------------------------------------------*/ 16127 /*--- ---*/ 16128 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/ 16129 /*--- ---*/ 16130 /*------------------------------------------------------------*/ 16131 16132 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG, 16133 IRTemp vec0/*controlling mask*/, 16134 UInt gran, IROp opSAR ) 16135 { 16136 /* The tricky bit is to convert vec0 into a suitable mask, by 16137 copying the most significant bit of each lane into all positions 16138 in the lane. */ 16139 IRTemp sh = newTemp(Ity_I8); 16140 assign(sh, mkU8(8 * gran - 1)); 16141 16142 IRTemp mask = newTemp(Ity_V128); 16143 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 16144 16145 IRTemp notmask = newTemp(Ity_V128); 16146 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 16147 16148 IRTemp res = newTemp(Ity_V128); 16149 assign(res, binop(Iop_OrV128, 16150 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 16151 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)))); 16152 return res; 16153 } 16154 16155 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG, 16156 IRTemp vec0/*controlling mask*/, 16157 UInt gran, IROp opSAR128 ) 16158 { 16159 /* The tricky bit is to convert vec0 into a suitable mask, by 16160 copying the most significant bit of each lane into all positions 16161 in the lane. */ 16162 IRTemp sh = newTemp(Ity_I8); 16163 assign(sh, mkU8(8 * gran - 1)); 16164 16165 IRTemp vec0Hi = IRTemp_INVALID; 16166 IRTemp vec0Lo = IRTemp_INVALID; 16167 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo ); 16168 16169 IRTemp mask = newTemp(Ity_V256); 16170 assign(mask, binop(Iop_V128HLtoV256, 16171 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)), 16172 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh)))); 16173 16174 IRTemp notmask = newTemp(Ity_V256); 16175 assign(notmask, unop(Iop_NotV256, mkexpr(mask))); 16176 16177 IRTemp res = newTemp(Ity_V256); 16178 assign(res, binop(Iop_OrV256, 16179 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)), 16180 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask)))); 16181 return res; 16182 } 16183 16184 static Long dis_VBLENDV_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 16185 const HChar *name, UInt gran, IROp opSAR ) 16186 { 16187 IRTemp addr = IRTemp_INVALID; 16188 Int alen = 0; 16189 HChar dis_buf[50]; 16190 UChar modrm = getUChar(delta); 16191 UInt rG = gregOfRexRM(pfx, modrm); 16192 UInt rV = getVexNvvvv(pfx); 16193 UInt rIS4 = 0xFF; /* invalid */ 16194 IRTemp vecE = newTemp(Ity_V128); 16195 IRTemp vecV = newTemp(Ity_V128); 16196 IRTemp vecIS4 = newTemp(Ity_V128); 16197 if (epartIsReg(modrm)) { 16198 delta++; 16199 UInt rE = eregOfRexRM(pfx, modrm); 16200 assign(vecE, getXMMReg(rE)); 16201 UChar ib = getUChar(delta); 16202 rIS4 = (ib >> 4) & 0xF; 16203 DIP("%s %s,%s,%s,%s\n", 16204 name, nameXMMReg(rIS4), nameXMMReg(rE), 16205 nameXMMReg(rV), nameXMMReg(rG)); 16206 } else { 16207 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16208 delta += alen; 16209 assign(vecE, loadLE(Ity_V128, mkexpr(addr))); 16210 UChar ib = getUChar(delta); 16211 rIS4 = (ib >> 4) & 0xF; 16212 DIP("%s %s,%s,%s,%s\n", 16213 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 16214 } 16215 delta++; 16216 assign(vecV, getXMMReg(rV)); 16217 assign(vecIS4, getXMMReg(rIS4)); 16218 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR ); 16219 putYMMRegLoAndZU( rG, mkexpr(res) ); 16220 return delta; 16221 } 16222 16223 static Long dis_VBLENDV_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta, 16224 const HChar *name, UInt gran, IROp opSAR128 ) 16225 { 16226 IRTemp addr = IRTemp_INVALID; 16227 Int alen = 0; 16228 HChar dis_buf[50]; 16229 UChar modrm = getUChar(delta); 16230 UInt rG = gregOfRexRM(pfx, modrm); 16231 UInt rV = getVexNvvvv(pfx); 16232 UInt rIS4 = 0xFF; /* invalid */ 16233 IRTemp vecE = newTemp(Ity_V256); 16234 IRTemp vecV = newTemp(Ity_V256); 16235 IRTemp vecIS4 = newTemp(Ity_V256); 16236 if (epartIsReg(modrm)) { 16237 delta++; 16238 UInt rE = eregOfRexRM(pfx, modrm); 16239 assign(vecE, getYMMReg(rE)); 16240 UChar ib = getUChar(delta); 16241 rIS4 = (ib >> 4) & 0xF; 16242 DIP("%s %s,%s,%s,%s\n", 16243 name, nameYMMReg(rIS4), nameYMMReg(rE), 16244 nameYMMReg(rV), nameYMMReg(rG)); 16245 } else { 16246 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16247 delta += alen; 16248 assign(vecE, loadLE(Ity_V256, mkexpr(addr))); 16249 UChar ib = getUChar(delta); 16250 rIS4 = (ib >> 4) & 0xF; 16251 DIP("%s %s,%s,%s,%s\n", 16252 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 16253 } 16254 delta++; 16255 assign(vecV, getYMMReg(rV)); 16256 assign(vecIS4, getYMMReg(rIS4)); 16257 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 ); 16258 putYMMReg( rG, mkexpr(res) ); 16259 return delta; 16260 } 16261 16262 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) 16263 { 16264 /* Set Z=1 iff (vecE & vecG) == 0 16265 Set C=1 iff (vecE & not vecG) == 0 16266 */ 16267 16268 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16269 16270 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top 16271 and bottom 64-bits together. It relies on this trick: 16272 16273 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 16274 16275 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 16276 InterleaveHI64x2([a,b],[a,b]) == [a,a] 16277 16278 and so the OR of the above 2 exprs produces 16279 [a OR b, a OR b], from which we simply take the lower half. 16280 */ 16281 IRTemp and64 = newTemp(Ity_I64); 16282 IRTemp andn64 = newTemp(Ity_I64); 16283 16284 assign(and64, 16285 unop(Iop_V128to64, 16286 binop(Iop_OrV128, 16287 binop(Iop_InterleaveLO64x2, 16288 mkexpr(andV), mkexpr(andV)), 16289 binop(Iop_InterleaveHI64x2, 16290 mkexpr(andV), mkexpr(andV))))); 16291 16292 assign(andn64, 16293 unop(Iop_V128to64, 16294 binop(Iop_OrV128, 16295 binop(Iop_InterleaveLO64x2, 16296 mkexpr(andnV), mkexpr(andnV)), 16297 binop(Iop_InterleaveHI64x2, 16298 mkexpr(andnV), mkexpr(andnV))))); 16299 16300 IRTemp z64 = newTemp(Ity_I64); 16301 IRTemp c64 = newTemp(Ity_I64); 16302 if (sign == 64) { 16303 /* When only interested in the most significant bit, just shift 16304 arithmetically right and negate. */ 16305 assign(z64, 16306 unop(Iop_Not64, 16307 binop(Iop_Sar64, mkexpr(and64), mkU8(63)))); 16308 16309 assign(c64, 16310 unop(Iop_Not64, 16311 binop(Iop_Sar64, mkexpr(andn64), mkU8(63)))); 16312 } else { 16313 if (sign == 32) { 16314 /* When interested in bit 31 and bit 63, mask those bits and 16315 fallthrough into the PTEST handling. */ 16316 IRTemp t0 = newTemp(Ity_I64); 16317 IRTemp t1 = newTemp(Ity_I64); 16318 IRTemp t2 = newTemp(Ity_I64); 16319 assign(t0, mkU64(0x8000000080000000ULL)); 16320 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0))); 16321 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0))); 16322 and64 = t1; 16323 andn64 = t2; 16324 } 16325 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 16326 slice out the Z and C bits conveniently. We use the standard 16327 trick all-zeroes -> all-zeroes, anything-else -> all-ones 16328 done by "(x | -x) >>s (word-size - 1)". 16329 */ 16330 assign(z64, 16331 unop(Iop_Not64, 16332 binop(Iop_Sar64, 16333 binop(Iop_Or64, 16334 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 16335 mkexpr(and64)), mkU8(63)))); 16336 16337 assign(c64, 16338 unop(Iop_Not64, 16339 binop(Iop_Sar64, 16340 binop(Iop_Or64, 16341 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 16342 mkexpr(andn64)), mkU8(63)))); 16343 } 16344 16345 /* And finally, slice out the Z and C flags and set the flags 16346 thunk to COPY for them. OSAP are set to zero. */ 16347 IRTemp newOSZACP = newTemp(Ity_I64); 16348 assign(newOSZACP, 16349 binop(Iop_Or64, 16350 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 16351 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)))); 16352 16353 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 16354 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16355 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16356 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16357 } 16358 16359 16360 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD. 16361 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16362 static Long dis_xTESTy_128 ( VexAbiInfo* vbi, Prefix pfx, 16363 Long delta, Bool isAvx, Int sign ) 16364 { 16365 IRTemp addr = IRTemp_INVALID; 16366 Int alen = 0; 16367 HChar dis_buf[50]; 16368 UChar modrm = getUChar(delta); 16369 UInt rG = gregOfRexRM(pfx, modrm); 16370 IRTemp vecE = newTemp(Ity_V128); 16371 IRTemp vecG = newTemp(Ity_V128); 16372 16373 if ( epartIsReg(modrm) ) { 16374 UInt rE = eregOfRexRM(pfx, modrm); 16375 assign(vecE, getXMMReg(rE)); 16376 delta += 1; 16377 DIP( "%s%stest%s %s,%s\n", 16378 isAvx ? "v" : "", sign == 0 ? "p" : "", 16379 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16380 nameXMMReg(rE), nameXMMReg(rG) ); 16381 } else { 16382 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16383 if (!isAvx) 16384 gen_SEGV_if_not_16_aligned( addr ); 16385 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 16386 delta += alen; 16387 DIP( "%s%stest%s %s,%s\n", 16388 isAvx ? "v" : "", sign == 0 ? "p" : "", 16389 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16390 dis_buf, nameXMMReg(rG) ); 16391 } 16392 16393 assign(vecG, getXMMReg(rG)); 16394 16395 /* Set Z=1 iff (vecE & vecG) == 0 16396 Set C=1 iff (vecE & not vecG) == 0 16397 */ 16398 16399 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16400 IRTemp andV = newTemp(Ity_V128); 16401 IRTemp andnV = newTemp(Ity_V128); 16402 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 16403 assign(andnV, binop(Iop_AndV128, 16404 mkexpr(vecE), 16405 binop(Iop_XorV128, mkexpr(vecG), 16406 mkV128(0xFFFF)))); 16407 16408 finish_xTESTy ( andV, andnV, sign ); 16409 return delta; 16410 } 16411 16412 16413 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD. 16414 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16415 static Long dis_xTESTy_256 ( VexAbiInfo* vbi, Prefix pfx, 16416 Long delta, Int sign ) 16417 { 16418 IRTemp addr = IRTemp_INVALID; 16419 Int alen = 0; 16420 HChar dis_buf[50]; 16421 UChar modrm = getUChar(delta); 16422 UInt rG = gregOfRexRM(pfx, modrm); 16423 IRTemp vecE = newTemp(Ity_V256); 16424 IRTemp vecG = newTemp(Ity_V256); 16425 16426 if ( epartIsReg(modrm) ) { 16427 UInt rE = eregOfRexRM(pfx, modrm); 16428 assign(vecE, getYMMReg(rE)); 16429 delta += 1; 16430 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16431 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16432 nameYMMReg(rE), nameYMMReg(rG) ); 16433 } else { 16434 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16435 assign(vecE, loadLE( Ity_V256, mkexpr(addr) )); 16436 delta += alen; 16437 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16438 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16439 dis_buf, nameYMMReg(rG) ); 16440 } 16441 16442 assign(vecG, getYMMReg(rG)); 16443 16444 /* Set Z=1 iff (vecE & vecG) == 0 16445 Set C=1 iff (vecE & not vecG) == 0 16446 */ 16447 16448 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16449 IRTemp andV = newTemp(Ity_V256); 16450 IRTemp andnV = newTemp(Ity_V256); 16451 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG))); 16452 assign(andnV, binop(Iop_AndV256, 16453 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG)))); 16454 16455 IRTemp andVhi = IRTemp_INVALID; 16456 IRTemp andVlo = IRTemp_INVALID; 16457 IRTemp andnVhi = IRTemp_INVALID; 16458 IRTemp andnVlo = IRTemp_INVALID; 16459 breakupV256toV128s( andV, &andVhi, &andVlo ); 16460 breakupV256toV128s( andnV, &andnVhi, &andnVlo ); 16461 16462 IRTemp andV128 = newTemp(Ity_V128); 16463 IRTemp andnV128 = newTemp(Ity_V128); 16464 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) ); 16465 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) ); 16466 16467 finish_xTESTy ( andV128, andnV128, sign ); 16468 return delta; 16469 } 16470 16471 16472 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */ 16473 static Long dis_PMOVxXBW_128 ( VexAbiInfo* vbi, Prefix pfx, 16474 Long delta, Bool isAvx, Bool xIsZ ) 16475 { 16476 IRTemp addr = IRTemp_INVALID; 16477 Int alen = 0; 16478 HChar dis_buf[50]; 16479 IRTemp srcVec = newTemp(Ity_V128); 16480 UChar modrm = getUChar(delta); 16481 const HChar* mbV = isAvx ? "v" : ""; 16482 const HChar how = xIsZ ? 'z' : 's'; 16483 UInt rG = gregOfRexRM(pfx, modrm); 16484 if ( epartIsReg(modrm) ) { 16485 UInt rE = eregOfRexRM(pfx, modrm); 16486 assign( srcVec, getXMMReg(rE) ); 16487 delta += 1; 16488 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16489 } else { 16490 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16491 assign( srcVec, 16492 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16493 delta += alen; 16494 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16495 } 16496 16497 IRExpr* res 16498 = xIsZ /* do math for either zero or sign extend */ 16499 ? binop( Iop_InterleaveLO8x16, 16500 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 16501 : binop( Iop_SarN16x8, 16502 binop( Iop_ShlN16x8, 16503 binop( Iop_InterleaveLO8x16, 16504 IRExpr_Const( IRConst_V128(0) ), 16505 mkexpr(srcVec) ), 16506 mkU8(8) ), 16507 mkU8(8) ); 16508 16509 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16510 16511 return delta; 16512 } 16513 16514 16515 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */ 16516 static Long dis_PMOVxXBW_256 ( VexAbiInfo* vbi, Prefix pfx, 16517 Long delta, Bool xIsZ ) 16518 { 16519 IRTemp addr = IRTemp_INVALID; 16520 Int alen = 0; 16521 HChar dis_buf[50]; 16522 IRTemp srcVec = newTemp(Ity_V128); 16523 UChar modrm = getUChar(delta); 16524 UChar how = xIsZ ? 'z' : 's'; 16525 UInt rG = gregOfRexRM(pfx, modrm); 16526 if ( epartIsReg(modrm) ) { 16527 UInt rE = eregOfRexRM(pfx, modrm); 16528 assign( srcVec, getXMMReg(rE) ); 16529 delta += 1; 16530 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16531 } else { 16532 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16533 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 16534 delta += alen; 16535 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16536 } 16537 16538 /* First do zero extend. */ 16539 IRExpr* res 16540 = binop( Iop_V128HLtoV256, 16541 binop( Iop_InterleaveHI8x16, 16542 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16543 binop( Iop_InterleaveLO8x16, 16544 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16545 /* And if needed sign extension as well. */ 16546 if (!xIsZ) 16547 res = binop( Iop_SarN16x16, 16548 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) ); 16549 16550 putYMMReg ( rG, res ); 16551 16552 return delta; 16553 } 16554 16555 16556 static Long dis_PMOVxXWD_128 ( VexAbiInfo* vbi, Prefix pfx, 16557 Long delta, Bool isAvx, Bool xIsZ ) 16558 { 16559 IRTemp addr = IRTemp_INVALID; 16560 Int alen = 0; 16561 HChar dis_buf[50]; 16562 IRTemp srcVec = newTemp(Ity_V128); 16563 UChar modrm = getUChar(delta); 16564 const HChar* mbV = isAvx ? "v" : ""; 16565 const HChar how = xIsZ ? 'z' : 's'; 16566 UInt rG = gregOfRexRM(pfx, modrm); 16567 16568 if ( epartIsReg(modrm) ) { 16569 UInt rE = eregOfRexRM(pfx, modrm); 16570 assign( srcVec, getXMMReg(rE) ); 16571 delta += 1; 16572 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16573 } else { 16574 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16575 assign( srcVec, 16576 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16577 delta += alen; 16578 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16579 } 16580 16581 IRExpr* res 16582 = binop( Iop_InterleaveLO16x8, 16583 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ); 16584 if (!xIsZ) 16585 res = binop(Iop_SarN32x4, 16586 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16)); 16587 16588 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16589 ( gregOfRexRM(pfx, modrm), res ); 16590 16591 return delta; 16592 } 16593 16594 16595 static Long dis_PMOVxXWD_256 ( VexAbiInfo* vbi, Prefix pfx, 16596 Long delta, Bool xIsZ ) 16597 { 16598 IRTemp addr = IRTemp_INVALID; 16599 Int alen = 0; 16600 HChar dis_buf[50]; 16601 IRTemp srcVec = newTemp(Ity_V128); 16602 UChar modrm = getUChar(delta); 16603 UChar how = xIsZ ? 'z' : 's'; 16604 UInt rG = gregOfRexRM(pfx, modrm); 16605 16606 if ( epartIsReg(modrm) ) { 16607 UInt rE = eregOfRexRM(pfx, modrm); 16608 assign( srcVec, getXMMReg(rE) ); 16609 delta += 1; 16610 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16611 } else { 16612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16613 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 16614 delta += alen; 16615 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16616 } 16617 16618 IRExpr* res 16619 = binop( Iop_V128HLtoV256, 16620 binop( Iop_InterleaveHI16x8, 16621 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16622 binop( Iop_InterleaveLO16x8, 16623 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16624 if (!xIsZ) 16625 res = binop(Iop_SarN32x8, 16626 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16)); 16627 16628 putYMMReg ( rG, res ); 16629 16630 return delta; 16631 } 16632 16633 16634 static Long dis_PMOVSXWQ_128 ( VexAbiInfo* vbi, Prefix pfx, 16635 Long delta, Bool isAvx ) 16636 { 16637 IRTemp addr = IRTemp_INVALID; 16638 Int alen = 0; 16639 HChar dis_buf[50]; 16640 IRTemp srcBytes = newTemp(Ity_I32); 16641 UChar modrm = getUChar(delta); 16642 const HChar* mbV = isAvx ? "v" : ""; 16643 UInt rG = gregOfRexRM(pfx, modrm); 16644 16645 if ( epartIsReg( modrm ) ) { 16646 UInt rE = eregOfRexRM(pfx, modrm); 16647 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 16648 delta += 1; 16649 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16650 } else { 16651 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16652 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 16653 delta += alen; 16654 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 16655 } 16656 16657 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16658 ( rG, binop( Iop_64HLtoV128, 16659 unop( Iop_16Sto64, 16660 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 16661 unop( Iop_16Sto64, 16662 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 16663 return delta; 16664 } 16665 16666 16667 static Long dis_PMOVSXWQ_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta ) 16668 { 16669 IRTemp addr = IRTemp_INVALID; 16670 Int alen = 0; 16671 HChar dis_buf[50]; 16672 IRTemp srcBytes = newTemp(Ity_I64); 16673 UChar modrm = getUChar(delta); 16674 UInt rG = gregOfRexRM(pfx, modrm); 16675 IRTemp s3, s2, s1, s0; 16676 s3 = s2 = s1 = s0 = IRTemp_INVALID; 16677 16678 if ( epartIsReg( modrm ) ) { 16679 UInt rE = eregOfRexRM(pfx, modrm); 16680 assign( srcBytes, getXMMRegLane64( rE, 0 ) ); 16681 delta += 1; 16682 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 16683 } else { 16684 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16685 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) ); 16686 delta += alen; 16687 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 16688 } 16689 16690 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 ); 16691 putYMMReg( rG, binop( Iop_V128HLtoV256, 16692 binop( Iop_64HLtoV128, 16693 unop( Iop_16Sto64, mkexpr(s3) ), 16694 unop( Iop_16Sto64, mkexpr(s2) ) ), 16695 binop( Iop_64HLtoV128, 16696 unop( Iop_16Sto64, mkexpr(s1) ), 16697 unop( Iop_16Sto64, mkexpr(s0) ) ) ) ); 16698 return delta; 16699 } 16700 16701 16702 static Long dis_PMOVZXWQ_128 ( VexAbiInfo* vbi, Prefix pfx, 16703 Long delta, Bool isAvx ) 16704 { 16705 IRTemp addr = IRTemp_INVALID; 16706 Int alen = 0; 16707 HChar dis_buf[50]; 16708 IRTemp srcVec = newTemp(Ity_V128); 16709 UChar modrm = getUChar(delta); 16710 const HChar* mbV = isAvx ? "v" : ""; 16711 UInt rG = gregOfRexRM(pfx, modrm); 16712 16713 if ( epartIsReg( modrm ) ) { 16714 UInt rE = eregOfRexRM(pfx, modrm); 16715 assign( srcVec, getXMMReg(rE) ); 16716 delta += 1; 16717 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16718 } else { 16719 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16720 assign( srcVec, 16721 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 16722 delta += alen; 16723 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 16724 } 16725 16726 IRTemp zeroVec = newTemp( Ity_V128 ); 16727 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16728 16729 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16730 ( rG, binop( Iop_InterleaveLO16x8, 16731 mkexpr(zeroVec), 16732 binop( Iop_InterleaveLO16x8, 16733 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 16734 return delta; 16735 } 16736 16737 16738 static Long dis_PMOVZXWQ_256 ( VexAbiInfo* vbi, Prefix pfx, 16739 Long delta ) 16740 { 16741 IRTemp addr = IRTemp_INVALID; 16742 Int alen = 0; 16743 HChar dis_buf[50]; 16744 IRTemp srcVec = newTemp(Ity_V128); 16745 UChar modrm = getUChar(delta); 16746 UInt rG = gregOfRexRM(pfx, modrm); 16747 16748 if ( epartIsReg( modrm ) ) { 16749 UInt rE = eregOfRexRM(pfx, modrm); 16750 assign( srcVec, getXMMReg(rE) ); 16751 delta += 1; 16752 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 16753 } else { 16754 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16755 assign( srcVec, 16756 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16757 delta += alen; 16758 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 16759 } 16760 16761 IRTemp zeroVec = newTemp( Ity_V128 ); 16762 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16763 16764 putYMMReg( rG, binop( Iop_V128HLtoV256, 16765 binop( Iop_InterleaveHI16x8, 16766 mkexpr(zeroVec), 16767 binop( Iop_InterleaveLO16x8, 16768 mkexpr(zeroVec), mkexpr(srcVec) ) ), 16769 binop( Iop_InterleaveLO16x8, 16770 mkexpr(zeroVec), 16771 binop( Iop_InterleaveLO16x8, 16772 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 16773 return delta; 16774 } 16775 16776 16777 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */ 16778 static Long dis_PMOVxXDQ_128 ( VexAbiInfo* vbi, Prefix pfx, 16779 Long delta, Bool isAvx, Bool xIsZ ) 16780 { 16781 IRTemp addr = IRTemp_INVALID; 16782 Int alen = 0; 16783 HChar dis_buf[50]; 16784 IRTemp srcI64 = newTemp(Ity_I64); 16785 IRTemp srcVec = newTemp(Ity_V128); 16786 UChar modrm = getUChar(delta); 16787 const HChar* mbV = isAvx ? "v" : ""; 16788 const HChar how = xIsZ ? 'z' : 's'; 16789 UInt rG = gregOfRexRM(pfx, modrm); 16790 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 16791 thing in a V128, with arbitrary junk in the top 64 bits. Use 16792 one or both of them and let iropt clean up afterwards (as 16793 usual). */ 16794 if ( epartIsReg(modrm) ) { 16795 UInt rE = eregOfRexRM(pfx, modrm); 16796 assign( srcVec, getXMMReg(rE) ); 16797 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) ); 16798 delta += 1; 16799 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16800 } else { 16801 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16802 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) ); 16803 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) ); 16804 delta += alen; 16805 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16806 } 16807 16808 IRExpr* res 16809 = xIsZ /* do math for either zero or sign extend */ 16810 ? binop( Iop_InterleaveLO32x4, 16811 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 16812 : binop( Iop_64HLtoV128, 16813 unop( Iop_32Sto64, 16814 unop( Iop_64HIto32, mkexpr(srcI64) ) ), 16815 unop( Iop_32Sto64, 16816 unop( Iop_64to32, mkexpr(srcI64) ) ) ); 16817 16818 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16819 16820 return delta; 16821 } 16822 16823 16824 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */ 16825 static Long dis_PMOVxXDQ_256 ( VexAbiInfo* vbi, Prefix pfx, 16826 Long delta, Bool xIsZ ) 16827 { 16828 IRTemp addr = IRTemp_INVALID; 16829 Int alen = 0; 16830 HChar dis_buf[50]; 16831 IRTemp srcVec = newTemp(Ity_V128); 16832 UChar modrm = getUChar(delta); 16833 UChar how = xIsZ ? 'z' : 's'; 16834 UInt rG = gregOfRexRM(pfx, modrm); 16835 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 16836 thing in a V128, with arbitrary junk in the top 64 bits. Use 16837 one or both of them and let iropt clean up afterwards (as 16838 usual). */ 16839 if ( epartIsReg(modrm) ) { 16840 UInt rE = eregOfRexRM(pfx, modrm); 16841 assign( srcVec, getXMMReg(rE) ); 16842 delta += 1; 16843 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16844 } else { 16845 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16846 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) ); 16847 delta += alen; 16848 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16849 } 16850 16851 IRExpr* res; 16852 if (xIsZ) 16853 res = binop( Iop_V128HLtoV256, 16854 binop( Iop_InterleaveHI32x4, 16855 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 16856 binop( Iop_InterleaveLO32x4, 16857 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 16858 else { 16859 IRTemp s3, s2, s1, s0; 16860 s3 = s2 = s1 = s0 = IRTemp_INVALID; 16861 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 ); 16862 res = binop( Iop_V128HLtoV256, 16863 binop( Iop_64HLtoV128, 16864 unop( Iop_32Sto64, mkexpr(s3) ), 16865 unop( Iop_32Sto64, mkexpr(s2) ) ), 16866 binop( Iop_64HLtoV128, 16867 unop( Iop_32Sto64, mkexpr(s1) ), 16868 unop( Iop_32Sto64, mkexpr(s0) ) ) ); 16869 } 16870 16871 putYMMReg ( rG, res ); 16872 16873 return delta; 16874 } 16875 16876 16877 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */ 16878 static Long dis_PMOVxXBD_128 ( VexAbiInfo* vbi, Prefix pfx, 16879 Long delta, Bool isAvx, Bool xIsZ ) 16880 { 16881 IRTemp addr = IRTemp_INVALID; 16882 Int alen = 0; 16883 HChar dis_buf[50]; 16884 IRTemp srcVec = newTemp(Ity_V128); 16885 UChar modrm = getUChar(delta); 16886 const HChar* mbV = isAvx ? "v" : ""; 16887 const HChar how = xIsZ ? 'z' : 's'; 16888 UInt rG = gregOfRexRM(pfx, modrm); 16889 if ( epartIsReg(modrm) ) { 16890 UInt rE = eregOfRexRM(pfx, modrm); 16891 assign( srcVec, getXMMReg(rE) ); 16892 delta += 1; 16893 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16894 } else { 16895 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16896 assign( srcVec, 16897 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 16898 delta += alen; 16899 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16900 } 16901 16902 IRTemp zeroVec = newTemp(Ity_V128); 16903 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16904 16905 IRExpr* res 16906 = binop(Iop_InterleaveLO8x16, 16907 mkexpr(zeroVec), 16908 binop(Iop_InterleaveLO8x16, 16909 mkexpr(zeroVec), mkexpr(srcVec))); 16910 if (!xIsZ) 16911 res = binop(Iop_SarN32x4, 16912 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24)); 16913 16914 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16915 16916 return delta; 16917 } 16918 16919 16920 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */ 16921 static Long dis_PMOVxXBD_256 ( VexAbiInfo* vbi, Prefix pfx, 16922 Long delta, Bool xIsZ ) 16923 { 16924 IRTemp addr = IRTemp_INVALID; 16925 Int alen = 0; 16926 HChar dis_buf[50]; 16927 IRTemp srcVec = newTemp(Ity_V128); 16928 UChar modrm = getUChar(delta); 16929 UChar how = xIsZ ? 'z' : 's'; 16930 UInt rG = gregOfRexRM(pfx, modrm); 16931 if ( epartIsReg(modrm) ) { 16932 UInt rE = eregOfRexRM(pfx, modrm); 16933 assign( srcVec, getXMMReg(rE) ); 16934 delta += 1; 16935 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16936 } else { 16937 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16938 assign( srcVec, 16939 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16940 delta += alen; 16941 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 16942 } 16943 16944 IRTemp zeroVec = newTemp(Ity_V128); 16945 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 16946 16947 IRExpr* res 16948 = binop( Iop_V128HLtoV256, 16949 binop(Iop_InterleaveHI8x16, 16950 mkexpr(zeroVec), 16951 binop(Iop_InterleaveLO8x16, 16952 mkexpr(zeroVec), mkexpr(srcVec)) ), 16953 binop(Iop_InterleaveLO8x16, 16954 mkexpr(zeroVec), 16955 binop(Iop_InterleaveLO8x16, 16956 mkexpr(zeroVec), mkexpr(srcVec)) ) ); 16957 if (!xIsZ) 16958 res = binop(Iop_SarN32x8, 16959 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24)); 16960 16961 putYMMReg ( rG, res ); 16962 16963 return delta; 16964 } 16965 16966 16967 /* Handles 128 bit versions of PMOVSXBQ. */ 16968 static Long dis_PMOVSXBQ_128 ( VexAbiInfo* vbi, Prefix pfx, 16969 Long delta, Bool isAvx ) 16970 { 16971 IRTemp addr = IRTemp_INVALID; 16972 Int alen = 0; 16973 HChar dis_buf[50]; 16974 IRTemp srcBytes = newTemp(Ity_I16); 16975 UChar modrm = getUChar(delta); 16976 const HChar* mbV = isAvx ? "v" : ""; 16977 UInt rG = gregOfRexRM(pfx, modrm); 16978 if ( epartIsReg(modrm) ) { 16979 UInt rE = eregOfRexRM(pfx, modrm); 16980 assign( srcBytes, getXMMRegLane16( rE, 0 ) ); 16981 delta += 1; 16982 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 16983 } else { 16984 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16985 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 16986 delta += alen; 16987 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 16988 } 16989 16990 (isAvx ? putYMMRegLoAndZU : putXMMReg) 16991 ( rG, binop( Iop_64HLtoV128, 16992 unop( Iop_8Sto64, 16993 unop( Iop_16HIto8, mkexpr(srcBytes) ) ), 16994 unop( Iop_8Sto64, 16995 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 16996 return delta; 16997 } 16998 16999 17000 /* Handles 256 bit versions of PMOVSXBQ. */ 17001 static Long dis_PMOVSXBQ_256 ( VexAbiInfo* vbi, Prefix pfx, 17002 Long delta ) 17003 { 17004 IRTemp addr = IRTemp_INVALID; 17005 Int alen = 0; 17006 HChar dis_buf[50]; 17007 IRTemp srcBytes = newTemp(Ity_I32); 17008 UChar modrm = getUChar(delta); 17009 UInt rG = gregOfRexRM(pfx, modrm); 17010 if ( epartIsReg(modrm) ) { 17011 UInt rE = eregOfRexRM(pfx, modrm); 17012 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 17013 delta += 1; 17014 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17015 } else { 17016 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17017 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 17018 delta += alen; 17019 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17020 } 17021 17022 putYMMReg 17023 ( rG, binop( Iop_V128HLtoV256, 17024 binop( Iop_64HLtoV128, 17025 unop( Iop_8Sto64, 17026 unop( Iop_16HIto8, 17027 unop( Iop_32HIto16, 17028 mkexpr(srcBytes) ) ) ), 17029 unop( Iop_8Sto64, 17030 unop( Iop_16to8, 17031 unop( Iop_32HIto16, 17032 mkexpr(srcBytes) ) ) ) ), 17033 binop( Iop_64HLtoV128, 17034 unop( Iop_8Sto64, 17035 unop( Iop_16HIto8, 17036 unop( Iop_32to16, 17037 mkexpr(srcBytes) ) ) ), 17038 unop( Iop_8Sto64, 17039 unop( Iop_16to8, 17040 unop( Iop_32to16, 17041 mkexpr(srcBytes) ) ) ) ) ) ); 17042 return delta; 17043 } 17044 17045 17046 /* Handles 128 bit versions of PMOVZXBQ. */ 17047 static Long dis_PMOVZXBQ_128 ( VexAbiInfo* vbi, Prefix pfx, 17048 Long delta, Bool isAvx ) 17049 { 17050 IRTemp addr = IRTemp_INVALID; 17051 Int alen = 0; 17052 HChar dis_buf[50]; 17053 IRTemp srcVec = newTemp(Ity_V128); 17054 UChar modrm = getUChar(delta); 17055 const HChar* mbV = isAvx ? "v" : ""; 17056 UInt rG = gregOfRexRM(pfx, modrm); 17057 if ( epartIsReg(modrm) ) { 17058 UInt rE = eregOfRexRM(pfx, modrm); 17059 assign( srcVec, getXMMReg(rE) ); 17060 delta += 1; 17061 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17062 } else { 17063 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17064 assign( srcVec, 17065 unop( Iop_32UtoV128, 17066 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) )))); 17067 delta += alen; 17068 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17069 } 17070 17071 IRTemp zeroVec = newTemp(Ity_V128); 17072 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17073 17074 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17075 ( rG, binop( Iop_InterleaveLO8x16, 17076 mkexpr(zeroVec), 17077 binop( Iop_InterleaveLO8x16, 17078 mkexpr(zeroVec), 17079 binop( Iop_InterleaveLO8x16, 17080 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 17081 return delta; 17082 } 17083 17084 17085 /* Handles 256 bit versions of PMOVZXBQ. */ 17086 static Long dis_PMOVZXBQ_256 ( VexAbiInfo* vbi, Prefix pfx, 17087 Long delta ) 17088 { 17089 IRTemp addr = IRTemp_INVALID; 17090 Int alen = 0; 17091 HChar dis_buf[50]; 17092 IRTemp srcVec = newTemp(Ity_V128); 17093 UChar modrm = getUChar(delta); 17094 UInt rG = gregOfRexRM(pfx, modrm); 17095 if ( epartIsReg(modrm) ) { 17096 UInt rE = eregOfRexRM(pfx, modrm); 17097 assign( srcVec, getXMMReg(rE) ); 17098 delta += 1; 17099 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17100 } else { 17101 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17102 assign( srcVec, 17103 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ))); 17104 delta += alen; 17105 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17106 } 17107 17108 IRTemp zeroVec = newTemp(Ity_V128); 17109 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17110 17111 putYMMReg 17112 ( rG, binop( Iop_V128HLtoV256, 17113 binop( Iop_InterleaveHI8x16, 17114 mkexpr(zeroVec), 17115 binop( Iop_InterleaveLO8x16, 17116 mkexpr(zeroVec), 17117 binop( Iop_InterleaveLO8x16, 17118 mkexpr(zeroVec), mkexpr(srcVec) ) ) ), 17119 binop( Iop_InterleaveLO8x16, 17120 mkexpr(zeroVec), 17121 binop( Iop_InterleaveLO8x16, 17122 mkexpr(zeroVec), 17123 binop( Iop_InterleaveLO8x16, 17124 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) 17125 ) ); 17126 return delta; 17127 } 17128 17129 17130 static Long dis_PHMINPOSUW_128 ( VexAbiInfo* vbi, Prefix pfx, 17131 Long delta, Bool isAvx ) 17132 { 17133 IRTemp addr = IRTemp_INVALID; 17134 Int alen = 0; 17135 HChar dis_buf[50]; 17136 UChar modrm = getUChar(delta); 17137 const HChar* mbV = isAvx ? "v" : ""; 17138 IRTemp sV = newTemp(Ity_V128); 17139 IRTemp sHi = newTemp(Ity_I64); 17140 IRTemp sLo = newTemp(Ity_I64); 17141 IRTemp dLo = newTemp(Ity_I64); 17142 UInt rG = gregOfRexRM(pfx,modrm); 17143 if (epartIsReg(modrm)) { 17144 UInt rE = eregOfRexRM(pfx,modrm); 17145 assign( sV, getXMMReg(rE) ); 17146 delta += 1; 17147 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 17148 } else { 17149 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17150 if (!isAvx) 17151 gen_SEGV_if_not_16_aligned(addr); 17152 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17153 delta += alen; 17154 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG)); 17155 } 17156 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 17157 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 17158 assign( dLo, mkIRExprCCall( 17159 Ity_I64, 0/*regparms*/, 17160 "amd64g_calculate_sse_phminposuw", 17161 &amd64g_calculate_sse_phminposuw, 17162 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) 17163 )); 17164 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17165 (rG, unop(Iop_64UtoV128, mkexpr(dLo))); 17166 return delta; 17167 } 17168 17169 17170 static Long dis_AESx ( VexAbiInfo* vbi, Prefix pfx, 17171 Long delta, Bool isAvx, UChar opc ) 17172 { 17173 IRTemp addr = IRTemp_INVALID; 17174 Int alen = 0; 17175 HChar dis_buf[50]; 17176 UChar modrm = getUChar(delta); 17177 UInt rG = gregOfRexRM(pfx, modrm); 17178 UInt regNoL = 0; 17179 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG; 17180 17181 /* This is a nasty kludge. We need to pass 2 x V128 to the 17182 helper. Since we can't do that, use a dirty 17183 helper to compute the results directly from the XMM regs in 17184 the guest state. That means for the memory case, we need to 17185 move the left operand into a pseudo-register (XMM16, let's 17186 call it). */ 17187 if (epartIsReg(modrm)) { 17188 regNoL = eregOfRexRM(pfx, modrm); 17189 delta += 1; 17190 } else { 17191 regNoL = 16; /* use XMM16 as an intermediary */ 17192 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17193 /* alignment check needed ???? */ 17194 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17195 delta += alen; 17196 } 17197 17198 void* fn = &amd64g_dirtyhelper_AES; 17199 const HChar* nm = "amd64g_dirtyhelper_AES"; 17200 17201 /* Round up the arguments. Note that this is a kludge -- the 17202 use of mkU64 rather than mkIRExpr_HWord implies the 17203 assumption that the host's word size is 64-bit. */ 17204 UInt gstOffD = ymmGuestRegOffset(rG); 17205 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17206 UInt gstOffR = ymmGuestRegOffset(regNoR); 17207 IRExpr* opc4 = mkU64(opc); 17208 IRExpr* gstOffDe = mkU64(gstOffD); 17209 IRExpr* gstOffLe = mkU64(gstOffL); 17210 IRExpr* gstOffRe = mkU64(gstOffR); 17211 IRExpr** args 17212 = mkIRExprVec_5( IRExpr_BBPTR(), opc4, gstOffDe, gstOffLe, gstOffRe ); 17213 17214 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17215 /* It's not really a dirty call, but we can't use the clean helper 17216 mechanism here for the very lame reason that we can't pass 2 x 17217 V128s by value to a helper. Hence this roundabout scheme. */ 17218 d->nFxState = 2; 17219 vex_bzero(&d->fxState, sizeof(d->fxState)); 17220 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes 17221 the second for !isAvx or the third for isAvx. 17222 AESIMC (0xDB) reads the first register, and writes the second. */ 17223 d->fxState[0].fx = Ifx_Read; 17224 d->fxState[0].offset = gstOffL; 17225 d->fxState[0].size = sizeof(U128); 17226 d->fxState[1].offset = gstOffR; 17227 d->fxState[1].size = sizeof(U128); 17228 if (opc == 0xDB) 17229 d->fxState[1].fx = Ifx_Write; 17230 else if (!isAvx || rG == regNoR) 17231 d->fxState[1].fx = Ifx_Modify; 17232 else { 17233 d->fxState[1].fx = Ifx_Read; 17234 d->nFxState++; 17235 d->fxState[2].fx = Ifx_Write; 17236 d->fxState[2].offset = gstOffD; 17237 d->fxState[2].size = sizeof(U128); 17238 } 17239 17240 stmt( IRStmt_Dirty(d) ); 17241 { 17242 const HChar* opsuf; 17243 switch (opc) { 17244 case 0xDC: opsuf = "enc"; break; 17245 case 0XDD: opsuf = "enclast"; break; 17246 case 0xDE: opsuf = "dec"; break; 17247 case 0xDF: opsuf = "declast"; break; 17248 case 0xDB: opsuf = "imc"; break; 17249 default: vassert(0); 17250 } 17251 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf, 17252 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17253 nameXMMReg(regNoR), 17254 (isAvx && opc != 0xDB) ? "," : "", 17255 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : ""); 17256 } 17257 if (isAvx) 17258 putYMMRegLane128( rG, 1, mkV128(0) ); 17259 return delta; 17260 } 17261 17262 static Long dis_AESKEYGENASSIST ( VexAbiInfo* vbi, Prefix pfx, 17263 Long delta, Bool isAvx ) 17264 { 17265 IRTemp addr = IRTemp_INVALID; 17266 Int alen = 0; 17267 HChar dis_buf[50]; 17268 UChar modrm = getUChar(delta); 17269 UInt regNoL = 0; 17270 UInt regNoR = gregOfRexRM(pfx, modrm); 17271 UChar imm = 0; 17272 17273 /* This is a nasty kludge. See AESENC et al. instructions. */ 17274 modrm = getUChar(delta); 17275 if (epartIsReg(modrm)) { 17276 regNoL = eregOfRexRM(pfx, modrm); 17277 imm = getUChar(delta+1); 17278 delta += 1+1; 17279 } else { 17280 regNoL = 16; /* use XMM16 as an intermediary */ 17281 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17282 /* alignment check ???? . */ 17283 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17284 imm = getUChar(delta+alen); 17285 delta += alen+1; 17286 } 17287 17288 /* Who ya gonna call? Presumably not Ghostbusters. */ 17289 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST; 17290 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST"; 17291 17292 /* Round up the arguments. Note that this is a kludge -- the 17293 use of mkU64 rather than mkIRExpr_HWord implies the 17294 assumption that the host's word size is 64-bit. */ 17295 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17296 UInt gstOffR = ymmGuestRegOffset(regNoR); 17297 17298 IRExpr* imme = mkU64(imm & 0xFF); 17299 IRExpr* gstOffLe = mkU64(gstOffL); 17300 IRExpr* gstOffRe = mkU64(gstOffR); 17301 IRExpr** args 17302 = mkIRExprVec_4( IRExpr_BBPTR(), imme, gstOffLe, gstOffRe ); 17303 17304 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17305 /* It's not really a dirty call, but we can't use the clean helper 17306 mechanism here for the very lame reason that we can't pass 2 x 17307 V128s by value to a helper. Hence this roundabout scheme. */ 17308 d->nFxState = 2; 17309 vex_bzero(&d->fxState, sizeof(d->fxState)); 17310 d->fxState[0].fx = Ifx_Read; 17311 d->fxState[0].offset = gstOffL; 17312 d->fxState[0].size = sizeof(U128); 17313 d->fxState[1].fx = Ifx_Write; 17314 d->fxState[1].offset = gstOffR; 17315 d->fxState[1].size = sizeof(U128); 17316 stmt( IRStmt_Dirty(d) ); 17317 17318 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm, 17319 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17320 nameXMMReg(regNoR)); 17321 if (isAvx) 17322 putYMMRegLane128( regNoR, 1, mkV128(0) ); 17323 return delta; 17324 } 17325 17326 17327 __attribute__((noinline)) 17328 static 17329 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK, 17330 VexAbiInfo* vbi, 17331 Prefix pfx, Int sz, Long deltaIN ) 17332 { 17333 IRTemp addr = IRTemp_INVALID; 17334 UChar modrm = 0; 17335 Int alen = 0; 17336 HChar dis_buf[50]; 17337 17338 *decode_OK = False; 17339 17340 Long delta = deltaIN; 17341 UChar opc = getUChar(delta); 17342 delta++; 17343 switch (opc) { 17344 17345 case 0x10: 17346 case 0x14: 17347 case 0x15: 17348 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 17349 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 17350 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 17351 Blend at various granularities, with XMM0 (implicit operand) 17352 providing the controlling mask. 17353 */ 17354 if (have66noF2noF3(pfx) && sz == 2) { 17355 modrm = getUChar(delta); 17356 17357 const HChar* nm = NULL; 17358 UInt gran = 0; 17359 IROp opSAR = Iop_INVALID; 17360 switch (opc) { 17361 case 0x10: 17362 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 17363 break; 17364 case 0x14: 17365 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 17366 break; 17367 case 0x15: 17368 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 17369 break; 17370 } 17371 vassert(nm); 17372 17373 IRTemp vecE = newTemp(Ity_V128); 17374 IRTemp vecG = newTemp(Ity_V128); 17375 IRTemp vec0 = newTemp(Ity_V128); 17376 17377 if ( epartIsReg(modrm) ) { 17378 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 17379 delta += 1; 17380 DIP( "%s %s,%s\n", nm, 17381 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17382 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17383 } else { 17384 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17385 gen_SEGV_if_not_16_aligned( addr ); 17386 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 17387 delta += alen; 17388 DIP( "%s %s,%s\n", nm, 17389 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17390 } 17391 17392 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 17393 assign(vec0, getXMMReg(0)); 17394 17395 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR ); 17396 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res)); 17397 17398 goto decode_success; 17399 } 17400 break; 17401 17402 case 0x17: 17403 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128 17404 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 17405 if (have66noF2noF3(pfx) 17406 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 17407 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 ); 17408 goto decode_success; 17409 } 17410 break; 17411 17412 case 0x20: 17413 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64 17414 Packed Move with Sign Extend from Byte to Word (XMM) */ 17415 if (have66noF2noF3(pfx) && sz == 2) { 17416 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 17417 False/*!isAvx*/, False/*!xIsZ*/ ); 17418 goto decode_success; 17419 } 17420 break; 17421 17422 case 0x21: 17423 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32 17424 Packed Move with Sign Extend from Byte to DWord (XMM) */ 17425 if (have66noF2noF3(pfx) && sz == 2) { 17426 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 17427 False/*!isAvx*/, False/*!xIsZ*/ ); 17428 goto decode_success; 17429 } 17430 break; 17431 17432 case 0x22: 17433 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 17434 Packed Move with Sign Extend from Byte to QWord (XMM) */ 17435 if (have66noF2noF3(pfx) && sz == 2) { 17436 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17437 goto decode_success; 17438 } 17439 break; 17440 17441 case 0x23: 17442 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64 17443 Packed Move with Sign Extend from Word to DWord (XMM) */ 17444 if (have66noF2noF3(pfx) && sz == 2) { 17445 delta = dis_PMOVxXWD_128(vbi, pfx, delta, 17446 False/*!isAvx*/, False/*!xIsZ*/); 17447 goto decode_success; 17448 } 17449 break; 17450 17451 case 0x24: 17452 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 17453 Packed Move with Sign Extend from Word to QWord (XMM) */ 17454 if (have66noF2noF3(pfx) && sz == 2) { 17455 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17456 goto decode_success; 17457 } 17458 break; 17459 17460 case 0x25: 17461 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 17462 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 17463 if (have66noF2noF3(pfx) && sz == 2) { 17464 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 17465 False/*!isAvx*/, False/*!xIsZ*/ ); 17466 goto decode_success; 17467 } 17468 break; 17469 17470 case 0x28: 17471 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes 17472 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper 17473 64-bit half */ 17474 /* This is a really poor translation -- could be improved if 17475 performance critical. It's a copy-paste of PMULUDQ, too. */ 17476 if (have66noF2noF3(pfx) && sz == 2) { 17477 IRTemp sV = newTemp(Ity_V128); 17478 IRTemp dV = newTemp(Ity_V128); 17479 modrm = getUChar(delta); 17480 UInt rG = gregOfRexRM(pfx,modrm); 17481 assign( dV, getXMMReg(rG) ); 17482 if (epartIsReg(modrm)) { 17483 UInt rE = eregOfRexRM(pfx,modrm); 17484 assign( sV, getXMMReg(rE) ); 17485 delta += 1; 17486 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 17487 } else { 17488 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17489 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17490 delta += alen; 17491 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG)); 17492 } 17493 17494 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) ); 17495 goto decode_success; 17496 } 17497 break; 17498 17499 case 0x29: 17500 /* 66 0F 38 29 = PCMPEQQ 17501 64x2 equality comparison */ 17502 if (have66noF2noF3(pfx) && sz == 2) { 17503 /* FIXME: this needs an alignment check */ 17504 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 17505 "pcmpeqq", Iop_CmpEQ64x2, False ); 17506 goto decode_success; 17507 } 17508 break; 17509 17510 case 0x2A: 17511 /* 66 0F 38 2A = MOVNTDQA 17512 "non-temporal" "streaming" load 17513 Handle like MOVDQA but only memory operand is allowed */ 17514 if (have66noF2noF3(pfx) && sz == 2) { 17515 modrm = getUChar(delta); 17516 if (!epartIsReg(modrm)) { 17517 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17518 gen_SEGV_if_not_16_aligned( addr ); 17519 putXMMReg( gregOfRexRM(pfx,modrm), 17520 loadLE(Ity_V128, mkexpr(addr)) ); 17521 DIP("movntdqa %s,%s\n", dis_buf, 17522 nameXMMReg(gregOfRexRM(pfx,modrm))); 17523 delta += alen; 17524 goto decode_success; 17525 } 17526 } 17527 break; 17528 17529 case 0x2B: 17530 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128 17531 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */ 17532 if (have66noF2noF3(pfx) && sz == 2) { 17533 17534 modrm = getUChar(delta); 17535 17536 IRTemp argL = newTemp(Ity_V128); 17537 IRTemp argR = newTemp(Ity_V128); 17538 17539 if ( epartIsReg(modrm) ) { 17540 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17541 delta += 1; 17542 DIP( "packusdw %s,%s\n", 17543 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17544 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17545 } else { 17546 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17547 gen_SEGV_if_not_16_aligned( addr ); 17548 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 17549 delta += alen; 17550 DIP( "packusdw %s,%s\n", 17551 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17552 } 17553 17554 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 17555 17556 putXMMReg( gregOfRexRM(pfx, modrm), 17557 binop( Iop_QNarrowBin32Sto16Ux8, 17558 mkexpr(argL), mkexpr(argR)) ); 17559 17560 goto decode_success; 17561 } 17562 break; 17563 17564 case 0x30: 17565 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64 17566 Packed Move with Zero Extend from Byte to Word (XMM) */ 17567 if (have66noF2noF3(pfx) && sz == 2) { 17568 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 17569 False/*!isAvx*/, True/*xIsZ*/ ); 17570 goto decode_success; 17571 } 17572 break; 17573 17574 case 0x31: 17575 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32 17576 Packed Move with Zero Extend from Byte to DWord (XMM) */ 17577 if (have66noF2noF3(pfx) && sz == 2) { 17578 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 17579 False/*!isAvx*/, True/*xIsZ*/ ); 17580 goto decode_success; 17581 } 17582 break; 17583 17584 case 0x32: 17585 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 17586 Packed Move with Zero Extend from Byte to QWord (XMM) */ 17587 if (have66noF2noF3(pfx) && sz == 2) { 17588 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17589 goto decode_success; 17590 } 17591 break; 17592 17593 case 0x33: 17594 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64 17595 Packed Move with Zero Extend from Word to DWord (XMM) */ 17596 if (have66noF2noF3(pfx) && sz == 2) { 17597 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 17598 False/*!isAvx*/, True/*xIsZ*/ ); 17599 goto decode_success; 17600 } 17601 break; 17602 17603 case 0x34: 17604 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 17605 Packed Move with Zero Extend from Word to QWord (XMM) */ 17606 if (have66noF2noF3(pfx) && sz == 2) { 17607 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17608 goto decode_success; 17609 } 17610 break; 17611 17612 case 0x35: 17613 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 17614 Packed Move with Zero Extend from DWord to QWord (XMM) */ 17615 if (have66noF2noF3(pfx) && sz == 2) { 17616 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 17617 False/*!isAvx*/, True/*xIsZ*/ ); 17618 goto decode_success; 17619 } 17620 break; 17621 17622 case 0x37: 17623 /* 66 0F 38 37 = PCMPGTQ 17624 64x2 comparison (signed, presumably; the Intel docs don't say :-) 17625 */ 17626 if (have66noF2noF3(pfx) && sz == 2) { 17627 /* FIXME: this needs an alignment check */ 17628 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 17629 "pcmpgtq", Iop_CmpGT64Sx2, False ); 17630 goto decode_success; 17631 } 17632 break; 17633 17634 case 0x38: 17635 case 0x3C: 17636 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min 17637 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max 17638 */ 17639 if (have66noF2noF3(pfx) && sz == 2) { 17640 /* FIXME: this needs an alignment check */ 17641 Bool isMAX = opc == 0x3C; 17642 delta = dis_SSEint_E_to_G( 17643 vbi, pfx, delta, 17644 isMAX ? "pmaxsb" : "pminsb", 17645 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 17646 False 17647 ); 17648 goto decode_success; 17649 } 17650 break; 17651 17652 case 0x39: 17653 case 0x3D: 17654 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 17655 Minimum of Packed Signed Double Word Integers (XMM) 17656 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 17657 Maximum of Packed Signed Double Word Integers (XMM) 17658 */ 17659 if (have66noF2noF3(pfx) && sz == 2) { 17660 /* FIXME: this needs an alignment check */ 17661 Bool isMAX = opc == 0x3D; 17662 delta = dis_SSEint_E_to_G( 17663 vbi, pfx, delta, 17664 isMAX ? "pmaxsd" : "pminsd", 17665 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 17666 False 17667 ); 17668 goto decode_success; 17669 } 17670 break; 17671 17672 case 0x3A: 17673 case 0x3E: 17674 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 17675 Minimum of Packed Unsigned Word Integers (XMM) 17676 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 17677 Maximum of Packed Unsigned Word Integers (XMM) 17678 */ 17679 if (have66noF2noF3(pfx) && sz == 2) { 17680 /* FIXME: this needs an alignment check */ 17681 Bool isMAX = opc == 0x3E; 17682 delta = dis_SSEint_E_to_G( 17683 vbi, pfx, delta, 17684 isMAX ? "pmaxuw" : "pminuw", 17685 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 17686 False 17687 ); 17688 goto decode_success; 17689 } 17690 break; 17691 17692 case 0x3B: 17693 case 0x3F: 17694 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 17695 Minimum of Packed Unsigned Doubleword Integers (XMM) 17696 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 17697 Maximum of Packed Unsigned Doubleword Integers (XMM) 17698 */ 17699 if (have66noF2noF3(pfx) && sz == 2) { 17700 /* FIXME: this needs an alignment check */ 17701 Bool isMAX = opc == 0x3F; 17702 delta = dis_SSEint_E_to_G( 17703 vbi, pfx, delta, 17704 isMAX ? "pmaxud" : "pminud", 17705 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 17706 False 17707 ); 17708 goto decode_success; 17709 } 17710 break; 17711 17712 case 0x40: 17713 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128 17714 32x4 integer multiply from xmm2/m128 to xmm1 */ 17715 if (have66noF2noF3(pfx) && sz == 2) { 17716 17717 modrm = getUChar(delta); 17718 17719 IRTemp argL = newTemp(Ity_V128); 17720 IRTemp argR = newTemp(Ity_V128); 17721 17722 if ( epartIsReg(modrm) ) { 17723 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 17724 delta += 1; 17725 DIP( "pmulld %s,%s\n", 17726 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17727 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17728 } else { 17729 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17730 gen_SEGV_if_not_16_aligned( addr ); 17731 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 17732 delta += alen; 17733 DIP( "pmulld %s,%s\n", 17734 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17735 } 17736 17737 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 17738 17739 putXMMReg( gregOfRexRM(pfx, modrm), 17740 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 17741 17742 goto decode_success; 17743 } 17744 break; 17745 17746 case 0x41: 17747 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128 17748 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */ 17749 if (have66noF2noF3(pfx) && sz == 2) { 17750 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ ); 17751 goto decode_success; 17752 } 17753 break; 17754 17755 case 0xDC: 17756 case 0xDD: 17757 case 0xDE: 17758 case 0xDF: 17759 case 0xDB: 17760 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128 17761 DD /r = AESENCLAST xmm1, xmm2/m128 17762 DE /r = AESDEC xmm1, xmm2/m128 17763 DF /r = AESDECLAST xmm1, xmm2/m128 17764 17765 DB /r = AESIMC xmm1, xmm2/m128 */ 17766 if (have66noF2noF3(pfx) && sz == 2) { 17767 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc ); 17768 goto decode_success; 17769 } 17770 break; 17771 17772 case 0xF0: 17773 case 0xF1: 17774 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 17775 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 17776 The decoding on this is a bit unusual. 17777 */ 17778 if (haveF2noF3(pfx) 17779 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) { 17780 modrm = getUChar(delta); 17781 17782 if (opc == 0xF0) 17783 sz = 1; 17784 else 17785 vassert(sz == 2 || sz == 4 || sz == 8); 17786 17787 IRType tyE = szToITy(sz); 17788 IRTemp valE = newTemp(tyE); 17789 17790 if (epartIsReg(modrm)) { 17791 assign(valE, getIRegE(sz, pfx, modrm)); 17792 delta += 1; 17793 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 17794 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 17795 } else { 17796 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17797 assign(valE, loadLE(tyE, mkexpr(addr))); 17798 delta += alen; 17799 DIP("crc32b %s,%s\n", dis_buf, 17800 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 17801 } 17802 17803 /* Somewhat funny getting/putting of the crc32 value, in order 17804 to ensure that it turns into 64-bit gets and puts. However, 17805 mask off the upper 32 bits so as to not get memcheck false 17806 +ves around the helper call. */ 17807 IRTemp valG0 = newTemp(Ity_I64); 17808 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 17809 mkU64(0xFFFFFFFF))); 17810 17811 const HChar* nm = NULL; 17812 void* fn = NULL; 17813 switch (sz) { 17814 case 1: nm = "amd64g_calc_crc32b"; 17815 fn = &amd64g_calc_crc32b; break; 17816 case 2: nm = "amd64g_calc_crc32w"; 17817 fn = &amd64g_calc_crc32w; break; 17818 case 4: nm = "amd64g_calc_crc32l"; 17819 fn = &amd64g_calc_crc32l; break; 17820 case 8: nm = "amd64g_calc_crc32q"; 17821 fn = &amd64g_calc_crc32q; break; 17822 } 17823 vassert(nm && fn); 17824 IRTemp valG1 = newTemp(Ity_I64); 17825 assign(valG1, 17826 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 17827 mkIRExprVec_2(mkexpr(valG0), 17828 widenUto64(mkexpr(valE))))); 17829 17830 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 17831 goto decode_success; 17832 } 17833 break; 17834 17835 default: 17836 break; 17837 17838 } 17839 17840 //decode_failure: 17841 *decode_OK = False; 17842 return deltaIN; 17843 17844 decode_success: 17845 *decode_OK = True; 17846 return delta; 17847 } 17848 17849 17850 /*------------------------------------------------------------*/ 17851 /*--- ---*/ 17852 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/ 17853 /*--- ---*/ 17854 /*------------------------------------------------------------*/ 17855 17856 static Long dis_PEXTRW ( VexAbiInfo* vbi, Prefix pfx, 17857 Long delta, Bool isAvx ) 17858 { 17859 IRTemp addr = IRTemp_INVALID; 17860 IRTemp t0 = IRTemp_INVALID; 17861 IRTemp t1 = IRTemp_INVALID; 17862 IRTemp t2 = IRTemp_INVALID; 17863 IRTemp t3 = IRTemp_INVALID; 17864 UChar modrm = getUChar(delta); 17865 Int alen = 0; 17866 HChar dis_buf[50]; 17867 UInt rG = gregOfRexRM(pfx,modrm); 17868 Int imm8_20; 17869 IRTemp xmm_vec = newTemp(Ity_V128); 17870 IRTemp d16 = newTemp(Ity_I16); 17871 const HChar* mbV = isAvx ? "v" : ""; 17872 17873 vassert(0==getRexW(pfx)); /* ensured by caller */ 17874 assign( xmm_vec, getXMMReg(rG) ); 17875 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17876 17877 if ( epartIsReg( modrm ) ) { 17878 imm8_20 = (Int)(getUChar(delta+1) & 7); 17879 } else { 17880 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17881 imm8_20 = (Int)(getUChar(delta+alen) & 7); 17882 } 17883 17884 switch (imm8_20) { 17885 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break; 17886 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break; 17887 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break; 17888 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break; 17889 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break; 17890 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break; 17891 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break; 17892 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break; 17893 default: vassert(0); 17894 } 17895 17896 if ( epartIsReg( modrm ) ) { 17897 UInt rE = eregOfRexRM(pfx,modrm); 17898 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) ); 17899 delta += 1+1; 17900 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, 17901 nameXMMReg( rG ), nameIReg32( rE ) ); 17902 } else { 17903 storeLE( mkexpr(addr), mkexpr(d16) ); 17904 delta += alen+1; 17905 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf ); 17906 } 17907 return delta; 17908 } 17909 17910 17911 static Long dis_PEXTRD ( VexAbiInfo* vbi, Prefix pfx, 17912 Long delta, Bool isAvx ) 17913 { 17914 IRTemp addr = IRTemp_INVALID; 17915 IRTemp t0 = IRTemp_INVALID; 17916 IRTemp t1 = IRTemp_INVALID; 17917 IRTemp t2 = IRTemp_INVALID; 17918 IRTemp t3 = IRTemp_INVALID; 17919 UChar modrm = 0; 17920 Int alen = 0; 17921 HChar dis_buf[50]; 17922 17923 Int imm8_10; 17924 IRTemp xmm_vec = newTemp(Ity_V128); 17925 IRTemp src_dword = newTemp(Ity_I32); 17926 const HChar* mbV = isAvx ? "v" : ""; 17927 17928 vassert(0==getRexW(pfx)); /* ensured by caller */ 17929 modrm = getUChar(delta); 17930 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 17931 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 17932 17933 if ( epartIsReg( modrm ) ) { 17934 imm8_10 = (Int)(getUChar(delta+1) & 3); 17935 } else { 17936 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17937 imm8_10 = (Int)(getUChar(delta+alen) & 3); 17938 } 17939 17940 switch ( imm8_10 ) { 17941 case 0: assign( src_dword, mkexpr(t0) ); break; 17942 case 1: assign( src_dword, mkexpr(t1) ); break; 17943 case 2: assign( src_dword, mkexpr(t2) ); break; 17944 case 3: assign( src_dword, mkexpr(t3) ); break; 17945 default: vassert(0); 17946 } 17947 17948 if ( epartIsReg( modrm ) ) { 17949 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 17950 delta += 1+1; 17951 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10, 17952 nameXMMReg( gregOfRexRM(pfx, modrm) ), 17953 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 17954 } else { 17955 storeLE( mkexpr(addr), mkexpr(src_dword) ); 17956 delta += alen+1; 17957 DIP( "%spextrd $%d, %s,%s\n", mbV, 17958 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 17959 } 17960 return delta; 17961 } 17962 17963 17964 static Long dis_PEXTRQ ( VexAbiInfo* vbi, Prefix pfx, 17965 Long delta, Bool isAvx ) 17966 { 17967 IRTemp addr = IRTemp_INVALID; 17968 UChar modrm = 0; 17969 Int alen = 0; 17970 HChar dis_buf[50]; 17971 17972 Int imm8_0; 17973 IRTemp xmm_vec = newTemp(Ity_V128); 17974 IRTemp src_qword = newTemp(Ity_I64); 17975 const HChar* mbV = isAvx ? "v" : ""; 17976 17977 vassert(1==getRexW(pfx)); /* ensured by caller */ 17978 modrm = getUChar(delta); 17979 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 17980 17981 if ( epartIsReg( modrm ) ) { 17982 imm8_0 = (Int)(getUChar(delta+1) & 1); 17983 } else { 17984 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17985 imm8_0 = (Int)(getUChar(delta+alen) & 1); 17986 } 17987 17988 switch ( imm8_0 ) { 17989 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); 17990 break; 17991 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); 17992 break; 17993 default: vassert(0); 17994 } 17995 17996 if ( epartIsReg( modrm ) ) { 17997 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 17998 delta += 1+1; 17999 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0, 18000 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18001 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18002 } else { 18003 storeLE( mkexpr(addr), mkexpr(src_qword) ); 18004 delta += alen+1; 18005 DIP( "%spextrq $%d, %s,%s\n", mbV, 18006 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18007 } 18008 return delta; 18009 } 18010 18011 static IRExpr* math_CTZ32(IRExpr *exp) 18012 { 18013 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */ 18014 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp))); 18015 } 18016 18017 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR, 18018 Long delta, UChar opc, UChar imm, 18019 HChar dis_buf[]) 18020 { 18021 /* We only handle PCMPISTRI for now */ 18022 vassert((opc & 0x03) == 0x03); 18023 /* And only an immediate byte of 0x38 or 0x3A */ 18024 vassert((imm & ~0x02) == 0x38); 18025 18026 /* FIXME: Is this correct when RegNoL == 16 ? */ 18027 IRTemp argL = newTemp(Ity_V128); 18028 assign(argL, getXMMReg(regNoL)); 18029 IRTemp argR = newTemp(Ity_V128); 18030 assign(argR, getXMMReg(regNoR)); 18031 18032 IRTemp zmaskL = newTemp(Ity_I32); 18033 assign(zmaskL, unop(Iop_16Uto32, 18034 unop(Iop_GetMSBs8x16, 18035 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0))))); 18036 IRTemp zmaskR = newTemp(Ity_I32); 18037 assign(zmaskR, unop(Iop_16Uto32, 18038 unop(Iop_GetMSBs8x16, 18039 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0))))); 18040 18041 /* We want validL = ~(zmaskL | -zmaskL) 18042 18043 But this formulation kills memcheck's validity tracking when any 18044 bits above the first "1" are invalid. So reformulate as: 18045 18046 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1 18047 */ 18048 18049 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL))); 18050 18051 /* Generate a bool expression which is zero iff the original is 18052 zero. Do this carefully so memcheck can propagate validity bits 18053 correctly. 18054 */ 18055 IRTemp zmaskL_zero = newTemp(Ity_I1); 18056 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0))); 18057 18058 IRTemp validL = newTemp(Ity_I32); 18059 assign(validL, binop(Iop_Sub32, 18060 IRExpr_ITE(mkexpr(zmaskL_zero), 18061 binop(Iop_Shl32, mkU32(1), ctzL), 18062 mkU32(0)), 18063 mkU32(1))); 18064 18065 /* And similarly for validR. */ 18066 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR))); 18067 IRTemp zmaskR_zero = newTemp(Ity_I1); 18068 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0))); 18069 IRTemp validR = newTemp(Ity_I32); 18070 assign(validR, binop(Iop_Sub32, 18071 IRExpr_ITE(mkexpr(zmaskR_zero), 18072 binop(Iop_Shl32, mkU32(1), ctzR), 18073 mkU32(0)), 18074 mkU32(1))); 18075 18076 /* Do the actual comparison. */ 18077 IRExpr *boolResII = unop(Iop_16Uto32, 18078 unop(Iop_GetMSBs8x16, 18079 binop(Iop_CmpEQ8x16, mkexpr(argL), 18080 mkexpr(argR)))); 18081 18082 /* Compute boolresII & validL & validR (i.e., if both valid, use 18083 comparison result) */ 18084 IRExpr *intRes1_a = binop(Iop_And32, boolResII, 18085 binop(Iop_And32, 18086 mkexpr(validL), mkexpr(validR))); 18087 18088 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */ 18089 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32, 18090 mkexpr(validL), mkexpr(validR))); 18091 /* Otherwise, zero. */ 18092 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF), 18093 binop(Iop_Or32, intRes1_a, intRes1_b)); 18094 18095 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with 18096 result. */ 18097 IRTemp intRes2 = newTemp(Ity_I32); 18098 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF), 18099 binop(Iop_Xor32, intRes1, mkexpr(validL)))); 18100 18101 /* If the 0x40 bit were set in imm=0x3A, we would return the index 18102 of the msb. Since it is clear, we return the index of the 18103 lsb. */ 18104 IRExpr *newECX = math_CTZ32(binop(Iop_Or32, 18105 mkexpr(intRes2), mkU32(0x10000))); 18106 18107 /* And thats our rcx. */ 18108 putIReg32(R_RCX, newECX); 18109 18110 /* Now for the condition codes... */ 18111 18112 /* C == 0 iff intRes2 == 0 */ 18113 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2), 18114 mkU32(0)), 18115 mkU32(1 << AMD64G_CC_SHIFT_C), 18116 mkU32(0)); 18117 /* Z == 1 iff any in argL is 0 */ 18118 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero), 18119 mkU32(1 << AMD64G_CC_SHIFT_Z), 18120 mkU32(0)); 18121 /* S == 1 iff any in argR is 0 */ 18122 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero), 18123 mkU32(1 << AMD64G_CC_SHIFT_S), 18124 mkU32(0)); 18125 /* O == IntRes2[0] */ 18126 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2), 18127 mkU32(0x01)), 18128 mkU8(AMD64G_CC_SHIFT_O)); 18129 18130 /* Put them all together */ 18131 IRTemp cc = newTemp(Ity_I64); 18132 assign(cc, widenUto64(binop(Iop_Or32, 18133 binop(Iop_Or32, c_bit, z_bit), 18134 binop(Iop_Or32, s_bit, o_bit)))); 18135 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY))); 18136 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc))); 18137 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0))); 18138 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0))); 18139 18140 return delta; 18141 } 18142 18143 /* This can fail, in which case it returns the original (unchanged) 18144 delta. */ 18145 static Long dis_PCMPxSTRx ( VexAbiInfo* vbi, Prefix pfx, 18146 Long delta, Bool isAvx, UChar opc ) 18147 { 18148 Long delta0 = delta; 18149 UInt isISTRx = opc & 2; 18150 UInt isxSTRM = (opc & 1) ^ 1; 18151 UInt regNoL = 0; 18152 UInt regNoR = 0; 18153 UChar imm = 0; 18154 IRTemp addr = IRTemp_INVALID; 18155 Int alen = 0; 18156 HChar dis_buf[50]; 18157 18158 /* This is a nasty kludge. We need to pass 2 x V128 to the helper 18159 (which is clean). Since we can't do that, use a dirty helper to 18160 compute the results directly from the XMM regs in the guest 18161 state. That means for the memory case, we need to move the left 18162 operand into a pseudo-register (XMM16, let's call it). */ 18163 UChar modrm = getUChar(delta); 18164 if (epartIsReg(modrm)) { 18165 regNoL = eregOfRexRM(pfx, modrm); 18166 regNoR = gregOfRexRM(pfx, modrm); 18167 imm = getUChar(delta+1); 18168 delta += 1+1; 18169 } else { 18170 regNoL = 16; /* use XMM16 as an intermediary */ 18171 regNoR = gregOfRexRM(pfx, modrm); 18172 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18173 /* No alignment check; I guess that makes sense, given that 18174 these insns are for dealing with C style strings. */ 18175 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 18176 imm = getUChar(delta+alen); 18177 delta += alen+1; 18178 } 18179 18180 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so 18181 itself. */ 18182 if (regNoL == 16) { 18183 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18184 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18185 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 18186 } else { 18187 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18188 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18189 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 18190 } 18191 18192 /* Handle special case(s). */ 18193 if (imm == 0x3A && isISTRx && !isxSTRM) { 18194 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta, 18195 opc, imm, dis_buf); 18196 } 18197 18198 /* Now we know the XMM reg numbers for the operands, and the 18199 immediate byte. Is it one we can actually handle? Throw out any 18200 cases for which the helper function has not been verified. */ 18201 switch (imm) { 18202 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: 18203 case 0x12: case 0x14: case 0x1A: 18204 case 0x30: case 0x34: case 0x38: case 0x3A: 18205 case 0x40: case 0x44: case 0x46: case 0x4A: 18206 break; 18207 // the 16-bit character versions of the above 18208 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 18209 case 0x13: case 0x1B: 18210 case 0x39: case 0x3B: 18211 case 0x45: case 0x4B: 18212 break; 18213 default: 18214 return delta0; /*FAIL*/ 18215 } 18216 18217 /* Who ya gonna call? Presumably not Ghostbusters. */ 18218 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 18219 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 18220 18221 /* Round up the arguments. Note that this is a kludge -- the use 18222 of mkU64 rather than mkIRExpr_HWord implies the assumption that 18223 the host's word size is 64-bit. */ 18224 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 18225 UInt gstOffR = ymmGuestRegOffset(regNoR); 18226 18227 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF)); 18228 IRExpr* gstOffLe = mkU64(gstOffL); 18229 IRExpr* gstOffRe = mkU64(gstOffR); 18230 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 18231 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 18232 IRExpr** args 18233 = mkIRExprVec_6( IRExpr_BBPTR(), 18234 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 18235 18236 IRTemp resT = newTemp(Ity_I64); 18237 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 18238 /* It's not really a dirty call, but we can't use the clean helper 18239 mechanism here for the very lame reason that we can't pass 2 x 18240 V128s by value to a helper. Hence this roundabout scheme. */ 18241 d->nFxState = 2; 18242 vex_bzero(&d->fxState, sizeof(d->fxState)); 18243 d->fxState[0].fx = Ifx_Read; 18244 d->fxState[0].offset = gstOffL; 18245 d->fxState[0].size = sizeof(U128); 18246 d->fxState[1].fx = Ifx_Read; 18247 d->fxState[1].offset = gstOffR; 18248 d->fxState[1].size = sizeof(U128); 18249 if (isxSTRM) { 18250 /* Declare that the helper writes XMM0. */ 18251 d->nFxState = 3; 18252 d->fxState[2].fx = Ifx_Write; 18253 d->fxState[2].offset = ymmGuestRegOffset(0); 18254 d->fxState[2].size = sizeof(U128); 18255 } 18256 18257 stmt( IRStmt_Dirty(d) ); 18258 18259 /* Now resT[15:0] holds the new OSZACP values, so the condition 18260 codes must be updated. And for a xSTRI case, resT[31:16] holds 18261 the new ECX value, so stash that too. */ 18262 if (!isxSTRM) { 18263 putIReg64(R_RCX, binop(Iop_And64, 18264 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 18265 mkU64(0xFFFF))); 18266 } 18267 18268 /* Zap the upper half of the dest reg as per AVX conventions. */ 18269 if (isxSTRM && isAvx) 18270 putYMMRegLane128(/*YMM*/0, 1, mkV128(0)); 18271 18272 stmt( IRStmt_Put( 18273 OFFB_CC_DEP1, 18274 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 18275 )); 18276 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18277 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18278 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 18279 18280 return delta; 18281 } 18282 18283 18284 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 ) 18285 { 18286 vassert(imm8 >= 0 && imm8 <= 15); 18287 18288 // Create a V128 value which has the selected byte in the 18289 // specified lane, and zeroes everywhere else. 18290 IRTemp tmp128 = newTemp(Ity_V128); 18291 IRTemp halfshift = newTemp(Ity_I64); 18292 assign(halfshift, binop(Iop_Shl64, 18293 unop(Iop_8Uto64, mkexpr(u8)), 18294 mkU8(8 * (imm8 & 7)))); 18295 if (imm8 < 8) { 18296 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 18297 } else { 18298 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 18299 } 18300 18301 UShort mask = ~(1 << imm8); 18302 IRTemp res = newTemp(Ity_V128); 18303 assign( res, binop(Iop_OrV128, 18304 mkexpr(tmp128), 18305 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 18306 return res; 18307 } 18308 18309 18310 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 ) 18311 { 18312 IRTemp z32 = newTemp(Ity_I32); 18313 assign(z32, mkU32(0)); 18314 18315 /* Surround u32 with zeroes as per imm, giving us something we can 18316 OR into a suitably masked-out v128.*/ 18317 IRTemp withZs = newTemp(Ity_V128); 18318 UShort mask = 0; 18319 switch (imm8) { 18320 case 3: mask = 0x0FFF; 18321 assign(withZs, mkV128from32s(u32, z32, z32, z32)); 18322 break; 18323 case 2: mask = 0xF0FF; 18324 assign(withZs, mkV128from32s(z32, u32, z32, z32)); 18325 break; 18326 case 1: mask = 0xFF0F; 18327 assign(withZs, mkV128from32s(z32, z32, u32, z32)); 18328 break; 18329 case 0: mask = 0xFFF0; 18330 assign(withZs, mkV128from32s(z32, z32, z32, u32)); 18331 break; 18332 default: vassert(0); 18333 } 18334 18335 IRTemp res = newTemp(Ity_V128); 18336 assign(res, binop( Iop_OrV128, 18337 mkexpr(withZs), 18338 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18339 return res; 18340 } 18341 18342 18343 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 ) 18344 { 18345 /* Surround u64 with zeroes as per imm, giving us something we can 18346 OR into a suitably masked-out v128.*/ 18347 IRTemp withZs = newTemp(Ity_V128); 18348 UShort mask = 0; 18349 if (imm8 == 0) { 18350 mask = 0xFF00; 18351 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64))); 18352 } else { 18353 vassert(imm8 == 1); 18354 mask = 0x00FF; 18355 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0))); 18356 } 18357 18358 IRTemp res = newTemp(Ity_V128); 18359 assign( res, binop( Iop_OrV128, 18360 mkexpr(withZs), 18361 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18362 return res; 18363 } 18364 18365 18366 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 ) 18367 { 18368 const IRTemp inval = IRTemp_INVALID; 18369 IRTemp dstDs[4] = { inval, inval, inval, inval }; 18370 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] ); 18371 18372 vassert(imm8 <= 255); 18373 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */ 18374 18375 UInt imm8_zmask = (imm8 & 15); 18376 IRTemp zero_32 = newTemp(Ity_I32); 18377 assign( zero_32, mkU32(0) ); 18378 IRTemp resV = newTemp(Ity_V128); 18379 assign( resV, mkV128from32s( 18380 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3], 18381 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2], 18382 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1], 18383 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) ); 18384 return resV; 18385 } 18386 18387 18388 static Long dis_PEXTRB_128_GtoE ( VexAbiInfo* vbi, Prefix pfx, 18389 Long delta, Bool isAvx ) 18390 { 18391 IRTemp addr = IRTemp_INVALID; 18392 Int alen = 0; 18393 HChar dis_buf[50]; 18394 IRTemp xmm_vec = newTemp(Ity_V128); 18395 IRTemp sel_lane = newTemp(Ity_I32); 18396 IRTemp shr_lane = newTemp(Ity_I32); 18397 const HChar* mbV = isAvx ? "v" : ""; 18398 UChar modrm = getUChar(delta); 18399 IRTemp t3, t2, t1, t0; 18400 Int imm8; 18401 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 18402 t3 = t2 = t1 = t0 = IRTemp_INVALID; 18403 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18404 18405 if ( epartIsReg( modrm ) ) { 18406 imm8 = (Int)getUChar(delta+1); 18407 } else { 18408 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18409 imm8 = (Int)getUChar(delta+alen); 18410 } 18411 switch ( (imm8 >> 2) & 3 ) { 18412 case 0: assign( sel_lane, mkexpr(t0) ); break; 18413 case 1: assign( sel_lane, mkexpr(t1) ); break; 18414 case 2: assign( sel_lane, mkexpr(t2) ); break; 18415 case 3: assign( sel_lane, mkexpr(t3) ); break; 18416 default: vassert(0); 18417 } 18418 assign( shr_lane, 18419 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 18420 18421 if ( epartIsReg( modrm ) ) { 18422 putIReg64( eregOfRexRM(pfx,modrm), 18423 unop( Iop_32Uto64, 18424 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 18425 delta += 1+1; 18426 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8, 18427 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18428 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18429 } else { 18430 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 18431 delta += alen+1; 18432 DIP( "%spextrb $%d,%s,%s\n", mbV, 18433 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18434 } 18435 18436 return delta; 18437 } 18438 18439 18440 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18441 { 18442 vassert(imm8 < 256); 18443 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 18444 IRTemp and_vec = newTemp(Ity_V128); 18445 IRTemp sum_vec = newTemp(Ity_V128); 18446 IRTemp rm = newTemp(Ity_I32); 18447 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18448 assign( and_vec, binop( Iop_AndV128, 18449 triop( Iop_Mul64Fx2, 18450 mkexpr(rm), 18451 mkexpr(dst_vec), mkexpr(src_vec) ), 18452 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 18453 18454 assign( sum_vec, binop( Iop_Add64F0x2, 18455 binop( Iop_InterleaveHI64x2, 18456 mkexpr(and_vec), mkexpr(and_vec) ), 18457 binop( Iop_InterleaveLO64x2, 18458 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 18459 IRTemp res = newTemp(Ity_V128); 18460 assign(res, binop( Iop_AndV128, 18461 binop( Iop_InterleaveLO64x2, 18462 mkexpr(sum_vec), mkexpr(sum_vec) ), 18463 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 18464 return res; 18465 } 18466 18467 18468 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18469 { 18470 vassert(imm8 < 256); 18471 IRTemp tmp_prod_vec = newTemp(Ity_V128); 18472 IRTemp prod_vec = newTemp(Ity_V128); 18473 IRTemp sum_vec = newTemp(Ity_V128); 18474 IRTemp rm = newTemp(Ity_I32); 18475 IRTemp v3, v2, v1, v0; 18476 v3 = v2 = v1 = v0 = IRTemp_INVALID; 18477 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 18478 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 18479 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 18480 0xFFFF }; 18481 18482 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18483 assign( tmp_prod_vec, 18484 binop( Iop_AndV128, 18485 triop( Iop_Mul32Fx4, 18486 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ), 18487 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 18488 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 18489 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) ); 18490 18491 assign( sum_vec, triop( Iop_Add32Fx4, 18492 mkexpr(rm), 18493 binop( Iop_InterleaveHI32x4, 18494 mkexpr(prod_vec), mkexpr(prod_vec) ), 18495 binop( Iop_InterleaveLO32x4, 18496 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 18497 18498 IRTemp res = newTemp(Ity_V128); 18499 assign( res, binop( Iop_AndV128, 18500 triop( Iop_Add32Fx4, 18501 mkexpr(rm), 18502 binop( Iop_InterleaveHI32x4, 18503 mkexpr(sum_vec), mkexpr(sum_vec) ), 18504 binop( Iop_InterleaveLO32x4, 18505 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 18506 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 18507 return res; 18508 } 18509 18510 18511 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 ) 18512 { 18513 /* Mask out bits of the operands we don't need. This isn't 18514 strictly necessary, but it does ensure Memcheck doesn't 18515 give us any false uninitialised value errors as a 18516 result. */ 18517 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 }; 18518 UShort dst_mask[2] = { 0x07FF, 0x7FF0 }; 18519 18520 IRTemp src_maskV = newTemp(Ity_V128); 18521 IRTemp dst_maskV = newTemp(Ity_V128); 18522 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] )); 18523 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] )); 18524 18525 IRTemp src_masked = newTemp(Ity_V128); 18526 IRTemp dst_masked = newTemp(Ity_V128); 18527 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV))); 18528 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV))); 18529 18530 /* Generate 4 64 bit values that we can hand to a clean helper */ 18531 IRTemp sHi = newTemp(Ity_I64); 18532 IRTemp sLo = newTemp(Ity_I64); 18533 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) ); 18534 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) ); 18535 18536 IRTemp dHi = newTemp(Ity_I64); 18537 IRTemp dLo = newTemp(Ity_I64); 18538 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) ); 18539 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) ); 18540 18541 /* Compute halves of the result separately */ 18542 IRTemp resHi = newTemp(Ity_I64); 18543 IRTemp resLo = newTemp(Ity_I64); 18544 18545 IRExpr** argsHi 18546 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 18547 mkU64( 0x80 | (imm8 & 7) )); 18548 IRExpr** argsLo 18549 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 18550 mkU64( 0x00 | (imm8 & 7) )); 18551 18552 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/, 18553 "amd64g_calc_mpsadbw", 18554 &amd64g_calc_mpsadbw, argsHi )); 18555 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/, 18556 "amd64g_calc_mpsadbw", 18557 &amd64g_calc_mpsadbw, argsLo )); 18558 18559 IRTemp res = newTemp(Ity_V128); 18560 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))); 18561 return res; 18562 } 18563 18564 static Long dis_EXTRACTPS ( VexAbiInfo* vbi, Prefix pfx, 18565 Long delta, Bool isAvx ) 18566 { 18567 IRTemp addr = IRTemp_INVALID; 18568 Int alen = 0; 18569 HChar dis_buf[50]; 18570 UChar modrm = getUChar(delta); 18571 Int imm8_10; 18572 IRTemp xmm_vec = newTemp(Ity_V128); 18573 IRTemp src_dword = newTemp(Ity_I32); 18574 UInt rG = gregOfRexRM(pfx,modrm); 18575 IRTemp t3, t2, t1, t0; 18576 t3 = t2 = t1 = t0 = IRTemp_INVALID; 18577 18578 assign( xmm_vec, getXMMReg( rG ) ); 18579 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18580 18581 if ( epartIsReg( modrm ) ) { 18582 imm8_10 = (Int)(getUChar(delta+1) & 3); 18583 } else { 18584 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18585 imm8_10 = (Int)(getUChar(delta+alen) & 3); 18586 } 18587 18588 switch ( imm8_10 ) { 18589 case 0: assign( src_dword, mkexpr(t0) ); break; 18590 case 1: assign( src_dword, mkexpr(t1) ); break; 18591 case 2: assign( src_dword, mkexpr(t2) ); break; 18592 case 3: assign( src_dword, mkexpr(t3) ); break; 18593 default: vassert(0); 18594 } 18595 18596 if ( epartIsReg( modrm ) ) { 18597 UInt rE = eregOfRexRM(pfx,modrm); 18598 putIReg32( rE, mkexpr(src_dword) ); 18599 delta += 1+1; 18600 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 18601 nameXMMReg( rG ), nameIReg32( rE ) ); 18602 } else { 18603 storeLE( mkexpr(addr), mkexpr(src_dword) ); 18604 delta += alen+1; 18605 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 18606 nameXMMReg( rG ), dis_buf ); 18607 } 18608 18609 return delta; 18610 } 18611 18612 18613 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 ) 18614 { 18615 IRTemp t0 = newTemp(Ity_I64); 18616 IRTemp t1 = newTemp(Ity_I64); 18617 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, 18618 mkexpr(dV))); 18619 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, 18620 mkexpr(sV))); 18621 18622 IRTemp t2 = newTemp(Ity_I64); 18623 IRTemp t3 = newTemp(Ity_I64); 18624 18625 IRExpr** args; 18626 18627 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 18628 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 18629 &amd64g_calculate_pclmul, args)); 18630 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 18631 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 18632 &amd64g_calculate_pclmul, args)); 18633 18634 IRTemp res = newTemp(Ity_V128); 18635 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 18636 return res; 18637 } 18638 18639 18640 __attribute__((noinline)) 18641 static 18642 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK, 18643 VexAbiInfo* vbi, 18644 Prefix pfx, Int sz, Long deltaIN ) 18645 { 18646 IRTemp addr = IRTemp_INVALID; 18647 UChar modrm = 0; 18648 Int alen = 0; 18649 HChar dis_buf[50]; 18650 18651 *decode_OK = False; 18652 18653 Long delta = deltaIN; 18654 UChar opc = getUChar(delta); 18655 delta++; 18656 switch (opc) { 18657 18658 case 0x08: 18659 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 18660 if (have66noF2noF3(pfx) && sz == 2) { 18661 18662 IRTemp src0 = newTemp(Ity_F32); 18663 IRTemp src1 = newTemp(Ity_F32); 18664 IRTemp src2 = newTemp(Ity_F32); 18665 IRTemp src3 = newTemp(Ity_F32); 18666 IRTemp res0 = newTemp(Ity_F32); 18667 IRTemp res1 = newTemp(Ity_F32); 18668 IRTemp res2 = newTemp(Ity_F32); 18669 IRTemp res3 = newTemp(Ity_F32); 18670 IRTemp rm = newTemp(Ity_I32); 18671 Int imm = 0; 18672 18673 modrm = getUChar(delta); 18674 18675 if (epartIsReg(modrm)) { 18676 assign( src0, 18677 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 18678 assign( src1, 18679 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 18680 assign( src2, 18681 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 18682 assign( src3, 18683 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 18684 imm = getUChar(delta+1); 18685 if (imm & ~15) goto decode_failure; 18686 delta += 1+1; 18687 DIP( "roundps $%d,%s,%s\n", 18688 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18689 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18690 } else { 18691 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18692 gen_SEGV_if_not_16_aligned(addr); 18693 assign( src0, loadLE(Ity_F32, 18694 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 18695 assign( src1, loadLE(Ity_F32, 18696 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 18697 assign( src2, loadLE(Ity_F32, 18698 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 18699 assign( src3, loadLE(Ity_F32, 18700 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 18701 imm = getUChar(delta+alen); 18702 if (imm & ~15) goto decode_failure; 18703 delta += alen+1; 18704 DIP( "roundps $%d,%s,%s\n", 18705 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18706 } 18707 18708 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18709 that encoding is the same as the encoding for IRRoundingMode, 18710 we can use that value directly in the IR as a rounding 18711 mode. */ 18712 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 18713 18714 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 18715 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 18716 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 18717 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 18718 18719 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 18720 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 18721 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 18722 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 18723 18724 goto decode_success; 18725 } 18726 break; 18727 18728 case 0x09: 18729 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 18730 if (have66noF2noF3(pfx) && sz == 2) { 18731 18732 IRTemp src0 = newTemp(Ity_F64); 18733 IRTemp src1 = newTemp(Ity_F64); 18734 IRTemp res0 = newTemp(Ity_F64); 18735 IRTemp res1 = newTemp(Ity_F64); 18736 IRTemp rm = newTemp(Ity_I32); 18737 Int imm = 0; 18738 18739 modrm = getUChar(delta); 18740 18741 if (epartIsReg(modrm)) { 18742 assign( src0, 18743 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 18744 assign( src1, 18745 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 18746 imm = getUChar(delta+1); 18747 if (imm & ~15) goto decode_failure; 18748 delta += 1+1; 18749 DIP( "roundpd $%d,%s,%s\n", 18750 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18751 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18752 } else { 18753 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18754 gen_SEGV_if_not_16_aligned(addr); 18755 assign( src0, loadLE(Ity_F64, 18756 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 18757 assign( src1, loadLE(Ity_F64, 18758 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 18759 imm = getUChar(delta+alen); 18760 if (imm & ~15) goto decode_failure; 18761 delta += alen+1; 18762 DIP( "roundpd $%d,%s,%s\n", 18763 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18764 } 18765 18766 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18767 that encoding is the same as the encoding for IRRoundingMode, 18768 we can use that value directly in the IR as a rounding 18769 mode. */ 18770 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 18771 18772 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 18773 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 18774 18775 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 18776 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 18777 18778 goto decode_success; 18779 } 18780 break; 18781 18782 case 0x0A: 18783 case 0x0B: 18784 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 18785 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 18786 */ 18787 if (have66noF2noF3(pfx) && sz == 2) { 18788 18789 Bool isD = opc == 0x0B; 18790 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 18791 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 18792 Int imm = 0; 18793 18794 modrm = getUChar(delta); 18795 18796 if (epartIsReg(modrm)) { 18797 assign( src, 18798 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 18799 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 18800 imm = getUChar(delta+1); 18801 if (imm & ~15) goto decode_failure; 18802 delta += 1+1; 18803 DIP( "rounds%c $%d,%s,%s\n", 18804 isD ? 'd' : 's', 18805 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 18806 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18807 } else { 18808 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18809 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 18810 imm = getUChar(delta+alen); 18811 if (imm & ~15) goto decode_failure; 18812 delta += alen+1; 18813 DIP( "rounds%c $%d,%s,%s\n", 18814 isD ? 'd' : 's', 18815 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18816 } 18817 18818 /* (imm & 3) contains an Intel-encoded rounding mode. Because 18819 that encoding is the same as the encoding for IRRoundingMode, 18820 we can use that value directly in the IR as a rounding 18821 mode. */ 18822 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 18823 (imm & 4) ? get_sse_roundingmode() 18824 : mkU32(imm & 3), 18825 mkexpr(src)) ); 18826 18827 if (isD) 18828 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 18829 else 18830 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 18831 18832 goto decode_success; 18833 } 18834 break; 18835 18836 case 0x0C: 18837 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 18838 Blend Packed Single Precision Floating-Point Values (XMM) */ 18839 if (have66noF2noF3(pfx) && sz == 2) { 18840 18841 Int imm8; 18842 IRTemp dst_vec = newTemp(Ity_V128); 18843 IRTemp src_vec = newTemp(Ity_V128); 18844 18845 modrm = getUChar(delta); 18846 18847 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18848 18849 if ( epartIsReg( modrm ) ) { 18850 imm8 = (Int)getUChar(delta+1); 18851 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18852 delta += 1+1; 18853 DIP( "blendps $%d, %s,%s\n", imm8, 18854 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18855 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18856 } else { 18857 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18858 1/* imm8 is 1 byte after the amode */ ); 18859 gen_SEGV_if_not_16_aligned( addr ); 18860 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18861 imm8 = (Int)getUChar(delta+alen); 18862 delta += alen+1; 18863 DIP( "blendpd $%d, %s,%s\n", 18864 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18865 } 18866 18867 putXMMReg( gregOfRexRM(pfx, modrm), 18868 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) ); 18869 goto decode_success; 18870 } 18871 break; 18872 18873 case 0x0D: 18874 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 18875 Blend Packed Double Precision Floating-Point Values (XMM) */ 18876 if (have66noF2noF3(pfx) && sz == 2) { 18877 18878 Int imm8; 18879 IRTemp dst_vec = newTemp(Ity_V128); 18880 IRTemp src_vec = newTemp(Ity_V128); 18881 18882 modrm = getUChar(delta); 18883 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18884 18885 if ( epartIsReg( modrm ) ) { 18886 imm8 = (Int)getUChar(delta+1); 18887 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18888 delta += 1+1; 18889 DIP( "blendpd $%d, %s,%s\n", imm8, 18890 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18891 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18892 } else { 18893 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18894 1/* imm8 is 1 byte after the amode */ ); 18895 gen_SEGV_if_not_16_aligned( addr ); 18896 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18897 imm8 = (Int)getUChar(delta+alen); 18898 delta += alen+1; 18899 DIP( "blendpd $%d, %s,%s\n", 18900 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18901 } 18902 18903 putXMMReg( gregOfRexRM(pfx, modrm), 18904 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) ); 18905 goto decode_success; 18906 } 18907 break; 18908 18909 case 0x0E: 18910 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 18911 Blend Packed Words (XMM) */ 18912 if (have66noF2noF3(pfx) && sz == 2) { 18913 18914 Int imm8; 18915 IRTemp dst_vec = newTemp(Ity_V128); 18916 IRTemp src_vec = newTemp(Ity_V128); 18917 18918 modrm = getUChar(delta); 18919 18920 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 18921 18922 if ( epartIsReg( modrm ) ) { 18923 imm8 = (Int)getUChar(delta+1); 18924 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18925 delta += 1+1; 18926 DIP( "pblendw $%d, %s,%s\n", imm8, 18927 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18928 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18929 } else { 18930 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 18931 1/* imm8 is 1 byte after the amode */ ); 18932 gen_SEGV_if_not_16_aligned( addr ); 18933 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 18934 imm8 = (Int)getUChar(delta+alen); 18935 delta += alen+1; 18936 DIP( "pblendw $%d, %s,%s\n", 18937 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18938 } 18939 18940 putXMMReg( gregOfRexRM(pfx, modrm), 18941 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) ); 18942 goto decode_success; 18943 } 18944 break; 18945 18946 case 0x14: 18947 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 18948 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. 18949 (XMM) */ 18950 if (have66noF2noF3(pfx) && sz == 2) { 18951 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 18952 goto decode_success; 18953 } 18954 break; 18955 18956 case 0x15: 18957 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 18958 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. 18959 (XMM) */ 18960 if (have66noF2noF3(pfx) && sz == 2) { 18961 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ ); 18962 goto decode_success; 18963 } 18964 break; 18965 18966 case 0x16: 18967 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 18968 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 18969 Note that this insn has the same opcodes as PEXTRQ, but 18970 here the REX.W bit is _not_ present */ 18971 if (have66noF2noF3(pfx) 18972 && sz == 2 /* REX.W is _not_ present */) { 18973 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ ); 18974 goto decode_success; 18975 } 18976 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 18977 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 18978 Note that this insn has the same opcodes as PEXTRD, but 18979 here the REX.W bit is present */ 18980 if (have66noF2noF3(pfx) 18981 && sz == 8 /* REX.W is present */) { 18982 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/); 18983 goto decode_success; 18984 } 18985 break; 18986 18987 case 0x17: 18988 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 18989 float from xmm reg and store in gen.reg or mem. This is 18990 identical to PEXTRD, except that REX.W appears to be ignored. 18991 */ 18992 if (have66noF2noF3(pfx) 18993 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 18994 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ ); 18995 goto decode_success; 18996 } 18997 break; 18998 18999 case 0x20: 19000 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 19001 Extract byte from r32/m8 and insert into xmm1 */ 19002 if (have66noF2noF3(pfx) && sz == 2) { 19003 Int imm8; 19004 IRTemp new8 = newTemp(Ity_I8); 19005 modrm = getUChar(delta); 19006 UInt rG = gregOfRexRM(pfx, modrm); 19007 if ( epartIsReg( modrm ) ) { 19008 UInt rE = eregOfRexRM(pfx,modrm); 19009 imm8 = (Int)(getUChar(delta+1) & 0xF); 19010 assign( new8, unop(Iop_32to8, getIReg32(rE)) ); 19011 delta += 1+1; 19012 DIP( "pinsrb $%d,%s,%s\n", imm8, 19013 nameIReg32(rE), nameXMMReg(rG) ); 19014 } else { 19015 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19016 imm8 = (Int)(getUChar(delta+alen) & 0xF); 19017 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) ); 19018 delta += alen+1; 19019 DIP( "pinsrb $%d,%s,%s\n", 19020 imm8, dis_buf, nameXMMReg(rG) ); 19021 } 19022 IRTemp src_vec = newTemp(Ity_V128); 19023 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) )); 19024 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 ); 19025 putXMMReg( rG, mkexpr(res) ); 19026 goto decode_success; 19027 } 19028 break; 19029 19030 case 0x21: 19031 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1 19032 Insert Packed Single Precision Floating-Point Value (XMM) */ 19033 if (have66noF2noF3(pfx) && sz == 2) { 19034 UInt imm8; 19035 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 19036 const IRTemp inval = IRTemp_INVALID; 19037 19038 modrm = getUChar(delta); 19039 UInt rG = gregOfRexRM(pfx, modrm); 19040 19041 if ( epartIsReg( modrm ) ) { 19042 UInt rE = eregOfRexRM(pfx, modrm); 19043 IRTemp vE = newTemp(Ity_V128); 19044 assign( vE, getXMMReg(rE) ); 19045 IRTemp dsE[4] = { inval, inval, inval, inval }; 19046 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 19047 imm8 = getUChar(delta+1); 19048 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 19049 delta += 1+1; 19050 DIP( "insertps $%u, %s,%s\n", 19051 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19052 } else { 19053 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19054 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 19055 imm8 = getUChar(delta+alen); 19056 delta += alen+1; 19057 DIP( "insertps $%u, %s,%s\n", 19058 imm8, dis_buf, nameXMMReg(rG) ); 19059 } 19060 19061 IRTemp vG = newTemp(Ity_V128); 19062 assign( vG, getXMMReg(rG) ); 19063 19064 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) ); 19065 goto decode_success; 19066 } 19067 break; 19068 19069 case 0x22: 19070 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 19071 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 19072 if (have66noF2noF3(pfx) 19073 && sz == 2 /* REX.W is NOT present */) { 19074 Int imm8_10; 19075 IRTemp src_u32 = newTemp(Ity_I32); 19076 modrm = getUChar(delta); 19077 UInt rG = gregOfRexRM(pfx, modrm); 19078 19079 if ( epartIsReg( modrm ) ) { 19080 UInt rE = eregOfRexRM(pfx,modrm); 19081 imm8_10 = (Int)(getUChar(delta+1) & 3); 19082 assign( src_u32, getIReg32( rE ) ); 19083 delta += 1+1; 19084 DIP( "pinsrd $%d, %s,%s\n", 19085 imm8_10, nameIReg32(rE), nameXMMReg(rG) ); 19086 } else { 19087 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19088 imm8_10 = (Int)(getUChar(delta+alen) & 3); 19089 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 19090 delta += alen+1; 19091 DIP( "pinsrd $%d, %s,%s\n", 19092 imm8_10, dis_buf, nameXMMReg(rG) ); 19093 } 19094 19095 IRTemp src_vec = newTemp(Ity_V128); 19096 assign(src_vec, getXMMReg( rG )); 19097 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 19098 putXMMReg( rG, mkexpr(res_vec) ); 19099 goto decode_success; 19100 } 19101 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 19102 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 19103 if (have66noF2noF3(pfx) 19104 && sz == 8 /* REX.W is present */) { 19105 Int imm8_0; 19106 IRTemp src_u64 = newTemp(Ity_I64); 19107 modrm = getUChar(delta); 19108 UInt rG = gregOfRexRM(pfx, modrm); 19109 19110 if ( epartIsReg( modrm ) ) { 19111 UInt rE = eregOfRexRM(pfx,modrm); 19112 imm8_0 = (Int)(getUChar(delta+1) & 1); 19113 assign( src_u64, getIReg64( rE ) ); 19114 delta += 1+1; 19115 DIP( "pinsrq $%d, %s,%s\n", 19116 imm8_0, nameIReg64(rE), nameXMMReg(rG) ); 19117 } else { 19118 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19119 imm8_0 = (Int)(getUChar(delta+alen) & 1); 19120 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 19121 delta += alen+1; 19122 DIP( "pinsrq $%d, %s,%s\n", 19123 imm8_0, dis_buf, nameXMMReg(rG) ); 19124 } 19125 19126 IRTemp src_vec = newTemp(Ity_V128); 19127 assign(src_vec, getXMMReg( rG )); 19128 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 19129 putXMMReg( rG, mkexpr(res_vec) ); 19130 goto decode_success; 19131 } 19132 break; 19133 19134 case 0x40: 19135 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 19136 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 19137 if (have66noF2noF3(pfx) && sz == 2) { 19138 modrm = getUChar(delta); 19139 Int imm8; 19140 IRTemp src_vec = newTemp(Ity_V128); 19141 IRTemp dst_vec = newTemp(Ity_V128); 19142 UInt rG = gregOfRexRM(pfx, modrm); 19143 assign( dst_vec, getXMMReg( rG ) ); 19144 if ( epartIsReg( modrm ) ) { 19145 UInt rE = eregOfRexRM(pfx, modrm); 19146 imm8 = (Int)getUChar(delta+1); 19147 assign( src_vec, getXMMReg(rE) ); 19148 delta += 1+1; 19149 DIP( "dpps $%d, %s,%s\n", 19150 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19151 } else { 19152 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19153 1/* imm8 is 1 byte after the amode */ ); 19154 gen_SEGV_if_not_16_aligned( addr ); 19155 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19156 imm8 = (Int)getUChar(delta+alen); 19157 delta += alen+1; 19158 DIP( "dpps $%d, %s,%s\n", 19159 imm8, dis_buf, nameXMMReg(rG) ); 19160 } 19161 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 ); 19162 putXMMReg( rG, mkexpr(res) ); 19163 goto decode_success; 19164 } 19165 break; 19166 19167 case 0x41: 19168 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 19169 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 19170 if (have66noF2noF3(pfx) && sz == 2) { 19171 modrm = getUChar(delta); 19172 Int imm8; 19173 IRTemp src_vec = newTemp(Ity_V128); 19174 IRTemp dst_vec = newTemp(Ity_V128); 19175 UInt rG = gregOfRexRM(pfx, modrm); 19176 assign( dst_vec, getXMMReg( rG ) ); 19177 if ( epartIsReg( modrm ) ) { 19178 UInt rE = eregOfRexRM(pfx, modrm); 19179 imm8 = (Int)getUChar(delta+1); 19180 assign( src_vec, getXMMReg(rE) ); 19181 delta += 1+1; 19182 DIP( "dppd $%d, %s,%s\n", 19183 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19184 } else { 19185 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19186 1/* imm8 is 1 byte after the amode */ ); 19187 gen_SEGV_if_not_16_aligned( addr ); 19188 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19189 imm8 = (Int)getUChar(delta+alen); 19190 delta += alen+1; 19191 DIP( "dppd $%d, %s,%s\n", 19192 imm8, dis_buf, nameXMMReg(rG) ); 19193 } 19194 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 ); 19195 putXMMReg( rG, mkexpr(res) ); 19196 goto decode_success; 19197 } 19198 break; 19199 19200 case 0x42: 19201 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8 19202 Multiple Packed Sums of Absolule Difference (XMM) */ 19203 if (have66noF2noF3(pfx) && sz == 2) { 19204 Int imm8; 19205 IRTemp src_vec = newTemp(Ity_V128); 19206 IRTemp dst_vec = newTemp(Ity_V128); 19207 modrm = getUChar(delta); 19208 UInt rG = gregOfRexRM(pfx, modrm); 19209 19210 assign( dst_vec, getXMMReg(rG) ); 19211 19212 if ( epartIsReg( modrm ) ) { 19213 UInt rE = eregOfRexRM(pfx, modrm); 19214 19215 imm8 = (Int)getUChar(delta+1); 19216 assign( src_vec, getXMMReg(rE) ); 19217 delta += 1+1; 19218 DIP( "mpsadbw $%d, %s,%s\n", imm8, 19219 nameXMMReg(rE), nameXMMReg(rG) ); 19220 } else { 19221 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19222 1/* imm8 is 1 byte after the amode */ ); 19223 gen_SEGV_if_not_16_aligned( addr ); 19224 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19225 imm8 = (Int)getUChar(delta+alen); 19226 delta += alen+1; 19227 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) ); 19228 } 19229 19230 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) ); 19231 goto decode_success; 19232 } 19233 break; 19234 19235 case 0x44: 19236 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 19237 * Carry-less multiplication of selected XMM quadwords into XMM 19238 * registers (a.k.a multiplication of polynomials over GF(2)) 19239 */ 19240 if (have66noF2noF3(pfx) && sz == 2) { 19241 19242 Int imm8; 19243 IRTemp svec = newTemp(Ity_V128); 19244 IRTemp dvec = newTemp(Ity_V128); 19245 modrm = getUChar(delta); 19246 UInt rG = gregOfRexRM(pfx, modrm); 19247 19248 assign( dvec, getXMMReg(rG) ); 19249 19250 if ( epartIsReg( modrm ) ) { 19251 UInt rE = eregOfRexRM(pfx, modrm); 19252 imm8 = (Int)getUChar(delta+1); 19253 assign( svec, getXMMReg(rE) ); 19254 delta += 1+1; 19255 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 19256 nameXMMReg(rE), nameXMMReg(rG) ); 19257 } else { 19258 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19259 1/* imm8 is 1 byte after the amode */ ); 19260 gen_SEGV_if_not_16_aligned( addr ); 19261 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 19262 imm8 = (Int)getUChar(delta+alen); 19263 delta += alen+1; 19264 DIP( "pclmulqdq $%d, %s,%s\n", 19265 imm8, dis_buf, nameXMMReg(rG) ); 19266 } 19267 19268 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) ); 19269 goto decode_success; 19270 } 19271 break; 19272 19273 case 0x60: 19274 case 0x61: 19275 case 0x62: 19276 case 0x63: 19277 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 19278 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 19279 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 19280 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 19281 (selected special cases that actually occur in glibc, 19282 not by any means a complete implementation.) 19283 */ 19284 if (have66noF2noF3(pfx) && sz == 2) { 19285 Long delta0 = delta; 19286 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc ); 19287 if (delta > delta0) goto decode_success; 19288 /* else fall though; dis_PCMPxSTRx failed to decode it */ 19289 } 19290 break; 19291 19292 case 0xDF: 19293 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */ 19294 if (have66noF2noF3(pfx) && sz == 2) { 19295 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ ); 19296 goto decode_success; 19297 } 19298 break; 19299 19300 default: 19301 break; 19302 19303 } 19304 19305 decode_failure: 19306 *decode_OK = False; 19307 return deltaIN; 19308 19309 decode_success: 19310 *decode_OK = True; 19311 return delta; 19312 } 19313 19314 19315 /*------------------------------------------------------------*/ 19316 /*--- ---*/ 19317 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/ 19318 /*--- ---*/ 19319 /*------------------------------------------------------------*/ 19320 19321 __attribute__((noinline)) 19322 static 19323 Long dis_ESC_NONE ( 19324 /*MB_OUT*/DisResult* dres, 19325 /*MB_OUT*/Bool* expect_CAS, 19326 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 19327 Bool resteerCisOk, 19328 void* callback_opaque, 19329 VexArchInfo* archinfo, 19330 VexAbiInfo* vbi, 19331 Prefix pfx, Int sz, Long deltaIN 19332 ) 19333 { 19334 Long d64 = 0; 19335 UChar abyte = 0; 19336 IRTemp addr = IRTemp_INVALID; 19337 IRTemp t1 = IRTemp_INVALID; 19338 IRTemp t2 = IRTemp_INVALID; 19339 IRTemp t3 = IRTemp_INVALID; 19340 IRTemp t4 = IRTemp_INVALID; 19341 IRTemp t5 = IRTemp_INVALID; 19342 IRType ty = Ity_INVALID; 19343 UChar modrm = 0; 19344 Int am_sz = 0; 19345 Int d_sz = 0; 19346 Int alen = 0; 19347 HChar dis_buf[50]; 19348 19349 Long delta = deltaIN; 19350 UChar opc = getUChar(delta); delta++; 19351 19352 /* delta now points at the modrm byte. In most of the cases that 19353 follow, neither the F2 nor F3 prefixes are allowed. However, 19354 for some basic arithmetic operations we have to allow F2/XACQ or 19355 F3/XREL in the case where the destination is memory and the LOCK 19356 prefix is also present. Do this check by looking at the modrm 19357 byte but not advancing delta over it. */ 19358 /* By default, F2 and F3 are not allowed, so let's start off with 19359 that setting. */ 19360 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 19361 { UChar tmp_modrm = getUChar(delta); 19362 switch (opc) { 19363 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */ 19364 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */ 19365 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */ 19366 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */ 19367 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */ 19368 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */ 19369 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */ 19370 if (!epartIsReg(tmp_modrm) 19371 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 19372 /* dst is mem, and we have F2 or F3 but not both */ 19373 validF2orF3 = True; 19374 } 19375 break; 19376 default: 19377 break; 19378 } 19379 } 19380 19381 /* Now, in the switch below, for the opc values examined by the 19382 switch above, use validF2orF3 rather than looking at pfx 19383 directly. */ 19384 switch (opc) { 19385 19386 case 0x00: /* ADD Gb,Eb */ 19387 if (!validF2orF3) goto decode_failure; 19388 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19389 return delta; 19390 case 0x01: /* ADD Gv,Ev */ 19391 if (!validF2orF3) goto decode_failure; 19392 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19393 return delta; 19394 19395 case 0x02: /* ADD Eb,Gb */ 19396 if (haveF2orF3(pfx)) goto decode_failure; 19397 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19398 return delta; 19399 case 0x03: /* ADD Ev,Gv */ 19400 if (haveF2orF3(pfx)) goto decode_failure; 19401 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19402 return delta; 19403 19404 case 0x04: /* ADD Ib, AL */ 19405 if (haveF2orF3(pfx)) goto decode_failure; 19406 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 19407 return delta; 19408 case 0x05: /* ADD Iv, eAX */ 19409 if (haveF2orF3(pfx)) goto decode_failure; 19410 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 19411 return delta; 19412 19413 case 0x08: /* OR Gb,Eb */ 19414 if (!validF2orF3) goto decode_failure; 19415 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19416 return delta; 19417 case 0x09: /* OR Gv,Ev */ 19418 if (!validF2orF3) goto decode_failure; 19419 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19420 return delta; 19421 19422 case 0x0A: /* OR Eb,Gb */ 19423 if (haveF2orF3(pfx)) goto decode_failure; 19424 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19425 return delta; 19426 case 0x0B: /* OR Ev,Gv */ 19427 if (haveF2orF3(pfx)) goto decode_failure; 19428 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19429 return delta; 19430 19431 case 0x0C: /* OR Ib, AL */ 19432 if (haveF2orF3(pfx)) goto decode_failure; 19433 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 19434 return delta; 19435 case 0x0D: /* OR Iv, eAX */ 19436 if (haveF2orF3(pfx)) goto decode_failure; 19437 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 19438 return delta; 19439 19440 case 0x10: /* ADC Gb,Eb */ 19441 if (!validF2orF3) goto decode_failure; 19442 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19443 return delta; 19444 case 0x11: /* ADC Gv,Ev */ 19445 if (!validF2orF3) goto decode_failure; 19446 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19447 return delta; 19448 19449 case 0x12: /* ADC Eb,Gb */ 19450 if (haveF2orF3(pfx)) goto decode_failure; 19451 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19452 return delta; 19453 case 0x13: /* ADC Ev,Gv */ 19454 if (haveF2orF3(pfx)) goto decode_failure; 19455 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19456 return delta; 19457 19458 case 0x14: /* ADC Ib, AL */ 19459 if (haveF2orF3(pfx)) goto decode_failure; 19460 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 19461 return delta; 19462 case 0x15: /* ADC Iv, eAX */ 19463 if (haveF2orF3(pfx)) goto decode_failure; 19464 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 19465 return delta; 19466 19467 case 0x18: /* SBB Gb,Eb */ 19468 if (!validF2orF3) goto decode_failure; 19469 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19470 return delta; 19471 case 0x19: /* SBB Gv,Ev */ 19472 if (!validF2orF3) goto decode_failure; 19473 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19474 return delta; 19475 19476 case 0x1A: /* SBB Eb,Gb */ 19477 if (haveF2orF3(pfx)) goto decode_failure; 19478 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19479 return delta; 19480 case 0x1B: /* SBB Ev,Gv */ 19481 if (haveF2orF3(pfx)) goto decode_failure; 19482 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19483 return delta; 19484 19485 case 0x1C: /* SBB Ib, AL */ 19486 if (haveF2orF3(pfx)) goto decode_failure; 19487 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 19488 return delta; 19489 case 0x1D: /* SBB Iv, eAX */ 19490 if (haveF2orF3(pfx)) goto decode_failure; 19491 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 19492 return delta; 19493 19494 case 0x20: /* AND Gb,Eb */ 19495 if (!validF2orF3) goto decode_failure; 19496 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19497 return delta; 19498 case 0x21: /* AND Gv,Ev */ 19499 if (!validF2orF3) goto decode_failure; 19500 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19501 return delta; 19502 19503 case 0x22: /* AND Eb,Gb */ 19504 if (haveF2orF3(pfx)) goto decode_failure; 19505 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19506 return delta; 19507 case 0x23: /* AND Ev,Gv */ 19508 if (haveF2orF3(pfx)) goto decode_failure; 19509 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19510 return delta; 19511 19512 case 0x24: /* AND Ib, AL */ 19513 if (haveF2orF3(pfx)) goto decode_failure; 19514 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 19515 return delta; 19516 case 0x25: /* AND Iv, eAX */ 19517 if (haveF2orF3(pfx)) goto decode_failure; 19518 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 19519 return delta; 19520 19521 case 0x28: /* SUB Gb,Eb */ 19522 if (!validF2orF3) goto decode_failure; 19523 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 19524 return delta; 19525 case 0x29: /* SUB Gv,Ev */ 19526 if (!validF2orF3) goto decode_failure; 19527 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 19528 return delta; 19529 19530 case 0x2A: /* SUB Eb,Gb */ 19531 if (haveF2orF3(pfx)) goto decode_failure; 19532 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 19533 return delta; 19534 case 0x2B: /* SUB Ev,Gv */ 19535 if (haveF2orF3(pfx)) goto decode_failure; 19536 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 19537 return delta; 19538 19539 case 0x2C: /* SUB Ib, AL */ 19540 if (haveF2orF3(pfx)) goto decode_failure; 19541 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 19542 return delta; 19543 case 0x2D: /* SUB Iv, eAX */ 19544 if (haveF2orF3(pfx)) goto decode_failure; 19545 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 19546 return delta; 19547 19548 case 0x30: /* XOR Gb,Eb */ 19549 if (!validF2orF3) goto decode_failure; 19550 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 19551 return delta; 19552 case 0x31: /* XOR Gv,Ev */ 19553 if (!validF2orF3) goto decode_failure; 19554 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 19555 return delta; 19556 19557 case 0x32: /* XOR Eb,Gb */ 19558 if (haveF2orF3(pfx)) goto decode_failure; 19559 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 19560 return delta; 19561 case 0x33: /* XOR Ev,Gv */ 19562 if (haveF2orF3(pfx)) goto decode_failure; 19563 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 19564 return delta; 19565 19566 case 0x34: /* XOR Ib, AL */ 19567 if (haveF2orF3(pfx)) goto decode_failure; 19568 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 19569 return delta; 19570 case 0x35: /* XOR Iv, eAX */ 19571 if (haveF2orF3(pfx)) goto decode_failure; 19572 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 19573 return delta; 19574 19575 case 0x38: /* CMP Gb,Eb */ 19576 if (haveF2orF3(pfx)) goto decode_failure; 19577 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 19578 return delta; 19579 case 0x39: /* CMP Gv,Ev */ 19580 if (haveF2orF3(pfx)) goto decode_failure; 19581 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 19582 return delta; 19583 19584 case 0x3A: /* CMP Eb,Gb */ 19585 if (haveF2orF3(pfx)) goto decode_failure; 19586 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 19587 return delta; 19588 case 0x3B: /* CMP Ev,Gv */ 19589 if (haveF2orF3(pfx)) goto decode_failure; 19590 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 19591 return delta; 19592 19593 case 0x3C: /* CMP Ib, AL */ 19594 if (haveF2orF3(pfx)) goto decode_failure; 19595 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 19596 return delta; 19597 case 0x3D: /* CMP Iv, eAX */ 19598 if (haveF2orF3(pfx)) goto decode_failure; 19599 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 19600 return delta; 19601 19602 case 0x50: /* PUSH eAX */ 19603 case 0x51: /* PUSH eCX */ 19604 case 0x52: /* PUSH eDX */ 19605 case 0x53: /* PUSH eBX */ 19606 case 0x55: /* PUSH eBP */ 19607 case 0x56: /* PUSH eSI */ 19608 case 0x57: /* PUSH eDI */ 19609 case 0x54: /* PUSH eSP */ 19610 /* This is the Right Way, in that the value to be pushed is 19611 established before %rsp is changed, so that pushq %rsp 19612 correctly pushes the old value. */ 19613 if (haveF2orF3(pfx)) goto decode_failure; 19614 vassert(sz == 2 || sz == 4 || sz == 8); 19615 if (sz == 4) 19616 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 19617 ty = sz==2 ? Ity_I16 : Ity_I64; 19618 t1 = newTemp(ty); 19619 t2 = newTemp(Ity_I64); 19620 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 19621 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 19622 putIReg64(R_RSP, mkexpr(t2) ); 19623 storeLE(mkexpr(t2),mkexpr(t1)); 19624 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 19625 return delta; 19626 19627 case 0x58: /* POP eAX */ 19628 case 0x59: /* POP eCX */ 19629 case 0x5A: /* POP eDX */ 19630 case 0x5B: /* POP eBX */ 19631 case 0x5D: /* POP eBP */ 19632 case 0x5E: /* POP eSI */ 19633 case 0x5F: /* POP eDI */ 19634 case 0x5C: /* POP eSP */ 19635 if (haveF2orF3(pfx)) goto decode_failure; 19636 vassert(sz == 2 || sz == 4 || sz == 8); 19637 if (sz == 4) 19638 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 19639 t1 = newTemp(szToITy(sz)); 19640 t2 = newTemp(Ity_I64); 19641 assign(t2, getIReg64(R_RSP)); 19642 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 19643 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 19644 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 19645 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 19646 return delta; 19647 19648 case 0x63: /* MOVSX */ 19649 if (haveF2orF3(pfx)) goto decode_failure; 19650 if (haveREX(pfx) && 1==getRexW(pfx)) { 19651 vassert(sz == 8); 19652 /* movsx r/m32 to r64 */ 19653 modrm = getUChar(delta); 19654 if (epartIsReg(modrm)) { 19655 delta++; 19656 putIRegG(8, pfx, modrm, 19657 unop(Iop_32Sto64, 19658 getIRegE(4, pfx, modrm))); 19659 DIP("movslq %s,%s\n", 19660 nameIRegE(4, pfx, modrm), 19661 nameIRegG(8, pfx, modrm)); 19662 return delta; 19663 } else { 19664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19665 delta += alen; 19666 putIRegG(8, pfx, modrm, 19667 unop(Iop_32Sto64, 19668 loadLE(Ity_I32, mkexpr(addr)))); 19669 DIP("movslq %s,%s\n", dis_buf, 19670 nameIRegG(8, pfx, modrm)); 19671 return delta; 19672 } 19673 } else { 19674 goto decode_failure; 19675 } 19676 19677 case 0x68: /* PUSH Iv */ 19678 if (haveF2orF3(pfx)) goto decode_failure; 19679 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 19680 if (sz == 4) sz = 8; 19681 d64 = getSDisp(imin(4,sz),delta); 19682 delta += imin(4,sz); 19683 goto do_push_I; 19684 19685 case 0x69: /* IMUL Iv, Ev, Gv */ 19686 if (haveF2orF3(pfx)) goto decode_failure; 19687 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 19688 return delta; 19689 19690 case 0x6A: /* PUSH Ib, sign-extended to sz */ 19691 if (haveF2orF3(pfx)) goto decode_failure; 19692 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 19693 if (sz == 4) sz = 8; 19694 d64 = getSDisp8(delta); delta += 1; 19695 goto do_push_I; 19696 do_push_I: 19697 ty = szToITy(sz); 19698 t1 = newTemp(Ity_I64); 19699 t2 = newTemp(ty); 19700 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 19701 putIReg64(R_RSP, mkexpr(t1) ); 19702 /* stop mkU16 asserting if d32 is a negative 16-bit number 19703 (bug #132813) */ 19704 if (ty == Ity_I16) 19705 d64 &= 0xFFFF; 19706 storeLE( mkexpr(t1), mkU(ty,d64) ); 19707 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 19708 return delta; 19709 19710 case 0x6B: /* IMUL Ib, Ev, Gv */ 19711 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 19712 return delta; 19713 19714 case 0x70: 19715 case 0x71: 19716 case 0x72: /* JBb/JNAEb (jump below) */ 19717 case 0x73: /* JNBb/JAEb (jump not below) */ 19718 case 0x74: /* JZb/JEb (jump zero) */ 19719 case 0x75: /* JNZb/JNEb (jump not zero) */ 19720 case 0x76: /* JBEb/JNAb (jump below or equal) */ 19721 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 19722 case 0x78: /* JSb (jump negative) */ 19723 case 0x79: /* JSb (jump not negative) */ 19724 case 0x7A: /* JP (jump parity even) */ 19725 case 0x7B: /* JNP/JPO (jump parity odd) */ 19726 case 0x7C: /* JLb/JNGEb (jump less) */ 19727 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 19728 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 19729 case 0x7F: { /* JGb/JNLEb (jump greater) */ 19730 Long jmpDelta; 19731 const HChar* comment = ""; 19732 if (haveF3(pfx)) goto decode_failure; 19733 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 19734 jmpDelta = getSDisp8(delta); 19735 vassert(-128 <= jmpDelta && jmpDelta < 128); 19736 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 19737 delta++; 19738 if (resteerCisOk 19739 && vex_control.guest_chase_cond 19740 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19741 && jmpDelta < 0 19742 && resteerOkFn( callback_opaque, d64) ) { 19743 /* Speculation: assume this backward branch is taken. So we 19744 need to emit a side-exit to the insn following this one, 19745 on the negation of the condition, and continue at the 19746 branch target address (d64). If we wind up back at the 19747 first instruction of the trace, just stop; it's better to 19748 let the IR loop unroller handle that case. */ 19749 stmt( IRStmt_Exit( 19750 mk_amd64g_calculate_condition( 19751 (AMD64Condcode)(1 ^ (opc - 0x70))), 19752 Ijk_Boring, 19753 IRConst_U64(guest_RIP_bbstart+delta), 19754 OFFB_RIP ) ); 19755 dres->whatNext = Dis_ResteerC; 19756 dres->continueAt = d64; 19757 comment = "(assumed taken)"; 19758 } 19759 else 19760 if (resteerCisOk 19761 && vex_control.guest_chase_cond 19762 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 19763 && jmpDelta >= 0 19764 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 19765 /* Speculation: assume this forward branch is not taken. So 19766 we need to emit a side-exit to d64 (the dest) and continue 19767 disassembling at the insn immediately following this 19768 one. */ 19769 stmt( IRStmt_Exit( 19770 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 19771 Ijk_Boring, 19772 IRConst_U64(d64), 19773 OFFB_RIP ) ); 19774 dres->whatNext = Dis_ResteerC; 19775 dres->continueAt = guest_RIP_bbstart+delta; 19776 comment = "(assumed not taken)"; 19777 } 19778 else { 19779 /* Conservative default translation - end the block at this 19780 point. */ 19781 jcc_01( dres, (AMD64Condcode)(opc - 0x70), 19782 guest_RIP_bbstart+delta, d64 ); 19783 vassert(dres->whatNext == Dis_StopHere); 19784 } 19785 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment); 19786 return delta; 19787 } 19788 19789 case 0x80: /* Grp1 Ib,Eb */ 19790 modrm = getUChar(delta); 19791 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow 19792 just one for the mem case and also require LOCK in this case. 19793 Note that this erroneously allows XACQ/XREL on CMP since we 19794 don't check the subopcode here. No big deal. */ 19795 if (epartIsReg(modrm) && haveF2orF3(pfx)) 19796 goto decode_failure; 19797 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 19798 goto decode_failure; 19799 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 19800 goto decode_failure; 19801 am_sz = lengthAMode(pfx,delta); 19802 sz = 1; 19803 d_sz = 1; 19804 d64 = getSDisp8(delta + am_sz); 19805 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19806 return delta; 19807 19808 case 0x81: /* Grp1 Iv,Ev */ 19809 modrm = getUChar(delta); 19810 /* Same comment as for case 0x80 just above. */ 19811 if (epartIsReg(modrm) && haveF2orF3(pfx)) 19812 goto decode_failure; 19813 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 19814 goto decode_failure; 19815 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 19816 goto decode_failure; 19817 am_sz = lengthAMode(pfx,delta); 19818 d_sz = imin(sz,4); 19819 d64 = getSDisp(d_sz, delta + am_sz); 19820 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19821 return delta; 19822 19823 case 0x83: /* Grp1 Ib,Ev */ 19824 if (haveF2orF3(pfx)) goto decode_failure; 19825 modrm = getUChar(delta); 19826 am_sz = lengthAMode(pfx,delta); 19827 d_sz = 1; 19828 d64 = getSDisp8(delta + am_sz); 19829 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 19830 return delta; 19831 19832 case 0x84: /* TEST Eb,Gb */ 19833 if (haveF2orF3(pfx)) goto decode_failure; 19834 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 19835 return delta; 19836 19837 case 0x85: /* TEST Ev,Gv */ 19838 if (haveF2orF3(pfx)) goto decode_failure; 19839 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 19840 return delta; 19841 19842 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 19843 prefix. Therefore, generate CAS regardless of the presence or 19844 otherwise of a LOCK prefix. */ 19845 case 0x86: /* XCHG Gb,Eb */ 19846 sz = 1; 19847 /* Fall through ... */ 19848 case 0x87: /* XCHG Gv,Ev */ 19849 modrm = getUChar(delta); 19850 /* Check whether F2 or F3 are allowable. For the mem case, one 19851 or the othter but not both are. We don't care about the 19852 presence of LOCK in this case -- XCHG is unusual in this 19853 respect. */ 19854 if (haveF2orF3(pfx)) { 19855 if (epartIsReg(modrm)) { 19856 goto decode_failure; 19857 } else { 19858 if (haveF2andF3(pfx)) 19859 goto decode_failure; 19860 } 19861 } 19862 ty = szToITy(sz); 19863 t1 = newTemp(ty); t2 = newTemp(ty); 19864 if (epartIsReg(modrm)) { 19865 assign(t1, getIRegE(sz, pfx, modrm)); 19866 assign(t2, getIRegG(sz, pfx, modrm)); 19867 putIRegG(sz, pfx, modrm, mkexpr(t1)); 19868 putIRegE(sz, pfx, modrm, mkexpr(t2)); 19869 delta++; 19870 DIP("xchg%c %s, %s\n", 19871 nameISize(sz), nameIRegG(sz, pfx, modrm), 19872 nameIRegE(sz, pfx, modrm)); 19873 } else { 19874 *expect_CAS = True; 19875 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 19876 assign( t1, loadLE(ty, mkexpr(addr)) ); 19877 assign( t2, getIRegG(sz, pfx, modrm) ); 19878 casLE( mkexpr(addr), 19879 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 19880 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 19881 delta += alen; 19882 DIP("xchg%c %s, %s\n", nameISize(sz), 19883 nameIRegG(sz, pfx, modrm), dis_buf); 19884 } 19885 return delta; 19886 19887 case 0x88: { /* MOV Gb,Eb */ 19888 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 19889 Bool ok = True; 19890 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok); 19891 if (!ok) goto decode_failure; 19892 return delta; 19893 } 19894 19895 case 0x89: { /* MOV Gv,Ev */ 19896 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 19897 Bool ok = True; 19898 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok); 19899 if (!ok) goto decode_failure; 19900 return delta; 19901 } 19902 19903 case 0x8A: /* MOV Eb,Gb */ 19904 if (haveF2orF3(pfx)) goto decode_failure; 19905 delta = dis_mov_E_G(vbi, pfx, 1, delta); 19906 return delta; 19907 19908 case 0x8B: /* MOV Ev,Gv */ 19909 if (haveF2orF3(pfx)) goto decode_failure; 19910 delta = dis_mov_E_G(vbi, pfx, sz, delta); 19911 return delta; 19912 19913 case 0x8D: /* LEA M,Gv */ 19914 if (haveF2orF3(pfx)) goto decode_failure; 19915 if (sz != 4 && sz != 8) 19916 goto decode_failure; 19917 modrm = getUChar(delta); 19918 if (epartIsReg(modrm)) 19919 goto decode_failure; 19920 /* NOTE! this is the one place where a segment override prefix 19921 has no effect on the address calculation. Therefore we clear 19922 any segment override bits in pfx. */ 19923 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 19924 delta += alen; 19925 /* This is a hack. But it isn't clear that really doing the 19926 calculation at 32 bits is really worth it. Hence for leal, 19927 do the full 64-bit calculation and then truncate it. */ 19928 putIRegG( sz, pfx, modrm, 19929 sz == 4 19930 ? unop(Iop_64to32, mkexpr(addr)) 19931 : mkexpr(addr) 19932 ); 19933 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 19934 nameIRegG(sz,pfx,modrm)); 19935 return delta; 19936 19937 case 0x8F: { /* POPQ m64 / POPW m16 */ 19938 Int len; 19939 UChar rm; 19940 /* There is no encoding for 32-bit pop in 64-bit mode. 19941 So sz==4 actually means sz==8. */ 19942 if (haveF2orF3(pfx)) goto decode_failure; 19943 vassert(sz == 2 || sz == 4 19944 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 19945 if (sz == 4) sz = 8; 19946 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 19947 19948 rm = getUChar(delta); 19949 19950 /* make sure this instruction is correct POP */ 19951 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 19952 goto decode_failure; 19953 /* and has correct size */ 19954 vassert(sz == 8); 19955 19956 t1 = newTemp(Ity_I64); 19957 t3 = newTemp(Ity_I64); 19958 assign( t1, getIReg64(R_RSP) ); 19959 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 19960 19961 /* Increase RSP; must be done before the STORE. Intel manual 19962 says: If the RSP register is used as a base register for 19963 addressing a destination operand in memory, the POP 19964 instruction computes the effective address of the operand 19965 after it increments the RSP register. */ 19966 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 19967 19968 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 19969 storeLE( mkexpr(addr), mkexpr(t3) ); 19970 19971 DIP("popl %s\n", dis_buf); 19972 19973 delta += len; 19974 return delta; 19975 } 19976 19977 case 0x90: /* XCHG eAX,eAX */ 19978 /* detect and handle F3 90 (rep nop) specially */ 19979 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 19980 DIP("rep nop (P4 pause)\n"); 19981 /* "observe" the hint. The Vex client needs to be careful not 19982 to cause very long delays as a result, though. */ 19983 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta); 19984 vassert(dres->whatNext == Dis_StopHere); 19985 return delta; 19986 } 19987 /* detect and handle NOPs specially */ 19988 if (/* F2/F3 probably change meaning completely */ 19989 !haveF2orF3(pfx) 19990 /* If REX.B is 1, we're not exchanging rAX with itself */ 19991 && getRexB(pfx)==0 ) { 19992 DIP("nop\n"); 19993 return delta; 19994 } 19995 /* else fall through to normal case. */ 19996 case 0x91: /* XCHG rAX,rCX */ 19997 case 0x92: /* XCHG rAX,rDX */ 19998 case 0x93: /* XCHG rAX,rBX */ 19999 case 0x94: /* XCHG rAX,rSP */ 20000 case 0x95: /* XCHG rAX,rBP */ 20001 case 0x96: /* XCHG rAX,rSI */ 20002 case 0x97: /* XCHG rAX,rDI */ 20003 /* guard against mutancy */ 20004 if (haveF2orF3(pfx)) goto decode_failure; 20005 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 20006 return delta; 20007 20008 case 0x98: /* CBW */ 20009 if (haveF2orF3(pfx)) goto decode_failure; 20010 if (sz == 8) { 20011 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 20012 DIP(/*"cdqe\n"*/"cltq"); 20013 return delta; 20014 } 20015 if (sz == 4) { 20016 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 20017 DIP("cwtl\n"); 20018 return delta; 20019 } 20020 if (sz == 2) { 20021 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 20022 DIP("cbw\n"); 20023 return delta; 20024 } 20025 goto decode_failure; 20026 20027 case 0x99: /* CWD/CDQ/CQO */ 20028 if (haveF2orF3(pfx)) goto decode_failure; 20029 vassert(sz == 2 || sz == 4 || sz == 8); 20030 ty = szToITy(sz); 20031 putIRegRDX( sz, 20032 binop(mkSizedOp(ty,Iop_Sar8), 20033 getIRegRAX(sz), 20034 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 20035 DIP(sz == 2 ? "cwd\n" 20036 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 20037 : "cqo\n")); 20038 return delta; 20039 20040 case 0x9B: /* FWAIT (X87 insn) */ 20041 /* ignore? */ 20042 DIP("fwait\n"); 20043 return delta; 20044 20045 case 0x9C: /* PUSHF */ { 20046 /* Note. There is no encoding for a 32-bit pushf in 64-bit 20047 mode. So sz==4 actually means sz==8. */ 20048 /* 24 July 06: has also been seen with a redundant REX prefix, 20049 so must also allow sz==8. */ 20050 if (haveF2orF3(pfx)) goto decode_failure; 20051 vassert(sz == 2 || sz == 4 || sz == 8); 20052 if (sz == 4) sz = 8; 20053 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20054 20055 t1 = newTemp(Ity_I64); 20056 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 20057 putIReg64(R_RSP, mkexpr(t1) ); 20058 20059 t2 = newTemp(Ity_I64); 20060 assign( t2, mk_amd64g_calculate_rflags_all() ); 20061 20062 /* Patch in the D flag. This can simply be a copy of bit 10 of 20063 baseBlock[OFFB_DFLAG]. */ 20064 t3 = newTemp(Ity_I64); 20065 assign( t3, binop(Iop_Or64, 20066 mkexpr(t2), 20067 binop(Iop_And64, 20068 IRExpr_Get(OFFB_DFLAG,Ity_I64), 20069 mkU64(1<<10))) 20070 ); 20071 20072 /* And patch in the ID flag. */ 20073 t4 = newTemp(Ity_I64); 20074 assign( t4, binop(Iop_Or64, 20075 mkexpr(t3), 20076 binop(Iop_And64, 20077 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 20078 mkU8(21)), 20079 mkU64(1<<21))) 20080 ); 20081 20082 /* And patch in the AC flag too. */ 20083 t5 = newTemp(Ity_I64); 20084 assign( t5, binop(Iop_Or64, 20085 mkexpr(t4), 20086 binop(Iop_And64, 20087 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 20088 mkU8(18)), 20089 mkU64(1<<18))) 20090 ); 20091 20092 /* if sz==2, the stored value needs to be narrowed. */ 20093 if (sz == 2) 20094 storeLE( mkexpr(t1), unop(Iop_32to16, 20095 unop(Iop_64to32,mkexpr(t5))) ); 20096 else 20097 storeLE( mkexpr(t1), mkexpr(t5) ); 20098 20099 DIP("pushf%c\n", nameISize(sz)); 20100 return delta; 20101 } 20102 20103 case 0x9D: /* POPF */ 20104 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 20105 So sz==4 actually means sz==8. */ 20106 if (haveF2orF3(pfx)) goto decode_failure; 20107 vassert(sz == 2 || sz == 4); 20108 if (sz == 4) sz = 8; 20109 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20110 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 20111 assign(t2, getIReg64(R_RSP)); 20112 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 20113 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 20114 /* t1 is the flag word. Mask out everything except OSZACP and 20115 set the flags thunk to AMD64G_CC_OP_COPY. */ 20116 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20117 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20118 stmt( IRStmt_Put( OFFB_CC_DEP1, 20119 binop(Iop_And64, 20120 mkexpr(t1), 20121 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 20122 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 20123 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 20124 ) 20125 ) 20126 ); 20127 20128 /* Also need to set the D flag, which is held in bit 10 of t1. 20129 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 20130 stmt( IRStmt_Put( 20131 OFFB_DFLAG, 20132 IRExpr_ITE( 20133 unop(Iop_64to1, 20134 binop(Iop_And64, 20135 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 20136 mkU64(1))), 20137 mkU64(0xFFFFFFFFFFFFFFFFULL), 20138 mkU64(1))) 20139 ); 20140 20141 /* And set the ID flag */ 20142 stmt( IRStmt_Put( 20143 OFFB_IDFLAG, 20144 IRExpr_ITE( 20145 unop(Iop_64to1, 20146 binop(Iop_And64, 20147 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 20148 mkU64(1))), 20149 mkU64(1), 20150 mkU64(0))) 20151 ); 20152 20153 /* And set the AC flag too */ 20154 stmt( IRStmt_Put( 20155 OFFB_ACFLAG, 20156 IRExpr_ITE( 20157 unop(Iop_64to1, 20158 binop(Iop_And64, 20159 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 20160 mkU64(1))), 20161 mkU64(1), 20162 mkU64(0))) 20163 ); 20164 20165 DIP("popf%c\n", nameISize(sz)); 20166 return delta; 20167 20168 case 0x9E: /* SAHF */ 20169 codegen_SAHF(); 20170 DIP("sahf\n"); 20171 return delta; 20172 20173 case 0x9F: /* LAHF */ 20174 codegen_LAHF(); 20175 DIP("lahf\n"); 20176 return delta; 20177 20178 case 0xA0: /* MOV Ob,AL */ 20179 if (have66orF2orF3(pfx)) goto decode_failure; 20180 sz = 1; 20181 /* Fall through ... */ 20182 case 0xA1: /* MOV Ov,eAX */ 20183 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20184 goto decode_failure; 20185 d64 = getDisp64(delta); 20186 delta += 8; 20187 ty = szToITy(sz); 20188 addr = newTemp(Ity_I64); 20189 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20190 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 20191 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 20192 segRegTxt(pfx), d64, 20193 nameIRegRAX(sz)); 20194 return delta; 20195 20196 case 0xA2: /* MOV AL,Ob */ 20197 if (have66orF2orF3(pfx)) goto decode_failure; 20198 sz = 1; 20199 /* Fall through ... */ 20200 case 0xA3: /* MOV eAX,Ov */ 20201 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20202 goto decode_failure; 20203 d64 = getDisp64(delta); 20204 delta += 8; 20205 ty = szToITy(sz); 20206 addr = newTemp(Ity_I64); 20207 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20208 storeLE( mkexpr(addr), getIRegRAX(sz) ); 20209 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 20210 segRegTxt(pfx), d64); 20211 return delta; 20212 20213 case 0xA4: 20214 case 0xA5: 20215 /* F3 A4: rep movsb */ 20216 if (haveF3(pfx) && !haveF2(pfx)) { 20217 if (opc == 0xA4) 20218 sz = 1; 20219 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz, 20220 guest_RIP_curr_instr, 20221 guest_RIP_bbstart+delta, "rep movs", pfx ); 20222 dres->whatNext = Dis_StopHere; 20223 return delta; 20224 } 20225 /* A4: movsb */ 20226 if (!haveF3(pfx) && !haveF2(pfx)) { 20227 if (opc == 0xA4) 20228 sz = 1; 20229 dis_string_op( dis_MOVS, sz, "movs", pfx ); 20230 return delta; 20231 } 20232 goto decode_failure; 20233 20234 case 0xA6: 20235 case 0xA7: 20236 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 20237 if (haveF3(pfx) && !haveF2(pfx)) { 20238 if (opc == 0xA6) 20239 sz = 1; 20240 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz, 20241 guest_RIP_curr_instr, 20242 guest_RIP_bbstart+delta, "repe cmps", pfx ); 20243 dres->whatNext = Dis_StopHere; 20244 return delta; 20245 } 20246 goto decode_failure; 20247 20248 case 0xAA: 20249 case 0xAB: 20250 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 20251 if (haveF3(pfx) && !haveF2(pfx)) { 20252 if (opc == 0xAA) 20253 sz = 1; 20254 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz, 20255 guest_RIP_curr_instr, 20256 guest_RIP_bbstart+delta, "rep stos", pfx ); 20257 vassert(dres->whatNext == Dis_StopHere); 20258 return delta; 20259 } 20260 /* AA/AB: stosb/stos{w,l,q} */ 20261 if (!haveF3(pfx) && !haveF2(pfx)) { 20262 if (opc == 0xAA) 20263 sz = 1; 20264 dis_string_op( dis_STOS, sz, "stos", pfx ); 20265 return delta; 20266 } 20267 goto decode_failure; 20268 20269 case 0xA8: /* TEST Ib, AL */ 20270 if (haveF2orF3(pfx)) goto decode_failure; 20271 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 20272 return delta; 20273 case 0xA9: /* TEST Iv, eAX */ 20274 if (haveF2orF3(pfx)) goto decode_failure; 20275 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 20276 return delta; 20277 20278 case 0xAC: /* LODS, no REP prefix */ 20279 case 0xAD: 20280 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 20281 return delta; 20282 20283 case 0xAE: 20284 case 0xAF: 20285 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 20286 if (haveF2(pfx) && !haveF3(pfx)) { 20287 if (opc == 0xAE) 20288 sz = 1; 20289 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz, 20290 guest_RIP_curr_instr, 20291 guest_RIP_bbstart+delta, "repne scas", pfx ); 20292 vassert(dres->whatNext == Dis_StopHere); 20293 return delta; 20294 } 20295 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 20296 if (!haveF2(pfx) && haveF3(pfx)) { 20297 if (opc == 0xAE) 20298 sz = 1; 20299 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz, 20300 guest_RIP_curr_instr, 20301 guest_RIP_bbstart+delta, "repe scas", pfx ); 20302 vassert(dres->whatNext == Dis_StopHere); 20303 return delta; 20304 } 20305 /* AE/AF: scasb/scas{w,l,q} */ 20306 if (!haveF2(pfx) && !haveF3(pfx)) { 20307 if (opc == 0xAE) 20308 sz = 1; 20309 dis_string_op( dis_SCAS, sz, "scas", pfx ); 20310 return delta; 20311 } 20312 goto decode_failure; 20313 20314 /* XXXX be careful here with moves to AH/BH/CH/DH */ 20315 case 0xB0: /* MOV imm,AL */ 20316 case 0xB1: /* MOV imm,CL */ 20317 case 0xB2: /* MOV imm,DL */ 20318 case 0xB3: /* MOV imm,BL */ 20319 case 0xB4: /* MOV imm,AH */ 20320 case 0xB5: /* MOV imm,CH */ 20321 case 0xB6: /* MOV imm,DH */ 20322 case 0xB7: /* MOV imm,BH */ 20323 if (haveF2orF3(pfx)) goto decode_failure; 20324 d64 = getUChar(delta); 20325 delta += 1; 20326 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 20327 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 20328 return delta; 20329 20330 case 0xB8: /* MOV imm,eAX */ 20331 case 0xB9: /* MOV imm,eCX */ 20332 case 0xBA: /* MOV imm,eDX */ 20333 case 0xBB: /* MOV imm,eBX */ 20334 case 0xBC: /* MOV imm,eSP */ 20335 case 0xBD: /* MOV imm,eBP */ 20336 case 0xBE: /* MOV imm,eSI */ 20337 case 0xBF: /* MOV imm,eDI */ 20338 /* This is the one-and-only place where 64-bit literals are 20339 allowed in the instruction stream. */ 20340 if (haveF2orF3(pfx)) goto decode_failure; 20341 if (sz == 8) { 20342 d64 = getDisp64(delta); 20343 delta += 8; 20344 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 20345 DIP("movabsq $%lld,%s\n", (Long)d64, 20346 nameIRegRexB(8,pfx,opc-0xB8)); 20347 } else { 20348 d64 = getSDisp(imin(4,sz),delta); 20349 delta += imin(4,sz); 20350 putIRegRexB(sz, pfx, opc-0xB8, 20351 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20352 DIP("mov%c $%lld,%s\n", nameISize(sz), 20353 (Long)d64, 20354 nameIRegRexB(sz,pfx,opc-0xB8)); 20355 } 20356 return delta; 20357 20358 case 0xC0: { /* Grp2 Ib,Eb */ 20359 Bool decode_OK = True; 20360 if (haveF2orF3(pfx)) goto decode_failure; 20361 modrm = getUChar(delta); 20362 am_sz = lengthAMode(pfx,delta); 20363 d_sz = 1; 20364 d64 = getUChar(delta + am_sz); 20365 sz = 1; 20366 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20367 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20368 if (!decode_OK) goto decode_failure; 20369 return delta; 20370 } 20371 20372 case 0xC1: { /* Grp2 Ib,Ev */ 20373 Bool decode_OK = True; 20374 if (haveF2orF3(pfx)) goto decode_failure; 20375 modrm = getUChar(delta); 20376 am_sz = lengthAMode(pfx,delta); 20377 d_sz = 1; 20378 d64 = getUChar(delta + am_sz); 20379 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20380 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20381 if (!decode_OK) goto decode_failure; 20382 return delta; 20383 } 20384 20385 case 0xC2: /* RET imm16 */ 20386 if (have66orF3(pfx)) goto decode_failure; 20387 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20388 d64 = getUDisp16(delta); 20389 delta += 2; 20390 dis_ret(dres, vbi, d64); 20391 DIP("ret $%lld\n", d64); 20392 return delta; 20393 20394 case 0xC3: /* RET */ 20395 if (have66(pfx)) goto decode_failure; 20396 /* F3 is acceptable on AMD. */ 20397 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20398 dis_ret(dres, vbi, 0); 20399 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 20400 return delta; 20401 20402 case 0xC6: /* C6 /0 = MOV Ib,Eb */ 20403 sz = 1; 20404 goto maybe_do_Mov_I_E; 20405 case 0xC7: /* C7 /0 = MOV Iv,Ev */ 20406 goto maybe_do_Mov_I_E; 20407 maybe_do_Mov_I_E: 20408 modrm = getUChar(delta); 20409 if (gregLO3ofRM(modrm) == 0) { 20410 if (epartIsReg(modrm)) { 20411 /* Neither F2 nor F3 are allowable. */ 20412 if (haveF2orF3(pfx)) goto decode_failure; 20413 delta++; /* mod/rm byte */ 20414 d64 = getSDisp(imin(4,sz),delta); 20415 delta += imin(4,sz); 20416 putIRegE(sz, pfx, modrm, 20417 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20418 DIP("mov%c $%lld, %s\n", nameISize(sz), 20419 (Long)d64, 20420 nameIRegE(sz,pfx,modrm)); 20421 } else { 20422 if (haveF2(pfx)) goto decode_failure; 20423 /* F3(XRELEASE) is allowable here */ 20424 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 20425 /*xtra*/imin(4,sz) ); 20426 delta += alen; 20427 d64 = getSDisp(imin(4,sz),delta); 20428 delta += imin(4,sz); 20429 storeLE(mkexpr(addr), 20430 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20431 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 20432 } 20433 return delta; 20434 } 20435 /* BEGIN HACKY SUPPORT FOR xbegin */ 20436 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4 20437 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20438 delta++; /* mod/rm byte */ 20439 d64 = getSDisp(4,delta); 20440 delta += 4; 20441 guest_RIP_next_mustcheck = True; 20442 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 20443 Addr64 failAddr = guest_RIP_bbstart + delta + d64; 20444 /* EAX contains the failure status code. Bit 3 is "Set if an 20445 internal buffer overflowed", which seems like the 20446 least-bogus choice we can make here. */ 20447 putIRegRAX(4, mkU32(1<<3)); 20448 /* And jump to the fail address. */ 20449 jmp_lit(dres, Ijk_Boring, failAddr); 20450 vassert(dres->whatNext == Dis_StopHere); 20451 DIP("xbeginq 0x%llx\n", failAddr); 20452 return delta; 20453 } 20454 /* END HACKY SUPPORT FOR xbegin */ 20455 /* BEGIN HACKY SUPPORT FOR xabort */ 20456 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1 20457 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20458 delta++; /* mod/rm byte */ 20459 abyte = getUChar(delta); delta++; 20460 /* There is never a real transaction in progress, so do nothing. */ 20461 DIP("xabort $%d", (Int)abyte); 20462 return delta; 20463 } 20464 /* END HACKY SUPPORT FOR xabort */ 20465 goto decode_failure; 20466 20467 case 0xC8: /* ENTER */ 20468 /* Same comments re operand size as for LEAVE below apply. 20469 Also, only handles the case "enter $imm16, $0"; other cases 20470 for the second operand (nesting depth) are not handled. */ 20471 if (sz != 4) 20472 goto decode_failure; 20473 d64 = getUDisp16(delta); 20474 delta += 2; 20475 vassert(d64 >= 0 && d64 <= 0xFFFF); 20476 if (getUChar(delta) != 0) 20477 goto decode_failure; 20478 delta++; 20479 /* Intel docs seem to suggest: 20480 push rbp 20481 temp = rsp 20482 rbp = temp 20483 rsp = rsp - imm16 20484 */ 20485 t1 = newTemp(Ity_I64); 20486 assign(t1, getIReg64(R_RBP)); 20487 t2 = newTemp(Ity_I64); 20488 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 20489 putIReg64(R_RSP, mkexpr(t2)); 20490 storeLE(mkexpr(t2), mkexpr(t1)); 20491 putIReg64(R_RBP, mkexpr(t2)); 20492 if (d64 > 0) { 20493 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64))); 20494 } 20495 DIP("enter $%u, $0\n", (UInt)d64); 20496 return delta; 20497 20498 case 0xC9: /* LEAVE */ 20499 /* In 64-bit mode this defaults to a 64-bit operand size. There 20500 is no way to encode a 32-bit variant. Hence sz==4 but we do 20501 it as if sz=8. */ 20502 if (sz != 4) 20503 goto decode_failure; 20504 t1 = newTemp(Ity_I64); 20505 t2 = newTemp(Ity_I64); 20506 assign(t1, getIReg64(R_RBP)); 20507 /* First PUT RSP looks redundant, but need it because RSP must 20508 always be up-to-date for Memcheck to work... */ 20509 putIReg64(R_RSP, mkexpr(t1)); 20510 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 20511 putIReg64(R_RBP, mkexpr(t2)); 20512 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 20513 DIP("leave\n"); 20514 return delta; 20515 20516 case 0xCC: /* INT 3 */ 20517 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta); 20518 vassert(dres->whatNext == Dis_StopHere); 20519 DIP("int $0x3\n"); 20520 return delta; 20521 20522 case 0xD0: { /* Grp2 1,Eb */ 20523 Bool decode_OK = True; 20524 if (haveF2orF3(pfx)) goto decode_failure; 20525 modrm = getUChar(delta); 20526 am_sz = lengthAMode(pfx,delta); 20527 d_sz = 0; 20528 d64 = 1; 20529 sz = 1; 20530 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20531 mkU8(d64), NULL, &decode_OK ); 20532 if (!decode_OK) goto decode_failure; 20533 return delta; 20534 } 20535 20536 case 0xD1: { /* Grp2 1,Ev */ 20537 Bool decode_OK = True; 20538 if (haveF2orF3(pfx)) goto decode_failure; 20539 modrm = getUChar(delta); 20540 am_sz = lengthAMode(pfx,delta); 20541 d_sz = 0; 20542 d64 = 1; 20543 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20544 mkU8(d64), NULL, &decode_OK ); 20545 if (!decode_OK) goto decode_failure; 20546 return delta; 20547 } 20548 20549 case 0xD2: { /* Grp2 CL,Eb */ 20550 Bool decode_OK = True; 20551 if (haveF2orF3(pfx)) goto decode_failure; 20552 modrm = getUChar(delta); 20553 am_sz = lengthAMode(pfx,delta); 20554 d_sz = 0; 20555 sz = 1; 20556 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20557 getIRegCL(), "%cl", &decode_OK ); 20558 if (!decode_OK) goto decode_failure; 20559 return delta; 20560 } 20561 20562 case 0xD3: { /* Grp2 CL,Ev */ 20563 Bool decode_OK = True; 20564 if (haveF2orF3(pfx)) goto decode_failure; 20565 modrm = getUChar(delta); 20566 am_sz = lengthAMode(pfx,delta); 20567 d_sz = 0; 20568 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20569 getIRegCL(), "%cl", &decode_OK ); 20570 if (!decode_OK) goto decode_failure; 20571 return delta; 20572 } 20573 20574 case 0xD8: /* X87 instructions */ 20575 case 0xD9: 20576 case 0xDA: 20577 case 0xDB: 20578 case 0xDC: 20579 case 0xDD: 20580 case 0xDE: 20581 case 0xDF: { 20582 Bool redundantREXWok = False; 20583 20584 if (haveF2orF3(pfx)) 20585 goto decode_failure; 20586 20587 /* kludge to tolerate redundant rex.w prefixes (should do this 20588 properly one day) */ 20589 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 20590 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 20591 redundantREXWok = True; 20592 20593 Bool size_OK = False; 20594 if ( sz == 4 ) 20595 size_OK = True; 20596 else if ( sz == 8 ) 20597 size_OK = redundantREXWok; 20598 else if ( sz == 2 ) { 20599 int mod_rm = getUChar(delta+0); 20600 int reg = gregLO3ofRM(mod_rm); 20601 /* The HotSpot JVM uses these */ 20602 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ || 20603 reg == 4 /* FNSAVE */ || 20604 reg == 6 /* FRSTOR */ ) ) 20605 size_OK = True; 20606 } 20607 /* AMD manual says 0x66 size override is ignored, except where 20608 it is meaningful */ 20609 if (!size_OK) 20610 goto decode_failure; 20611 20612 Bool decode_OK = False; 20613 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 20614 if (!decode_OK) 20615 goto decode_failure; 20616 20617 return delta; 20618 } 20619 20620 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 20621 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 20622 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 20623 { /* The docs say this uses rCX as a count depending on the 20624 address size override, not the operand one. */ 20625 IRExpr* zbit = NULL; 20626 IRExpr* count = NULL; 20627 IRExpr* cond = NULL; 20628 const HChar* xtra = NULL; 20629 20630 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 20631 /* So at this point we've rejected any variants which appear to 20632 be governed by the usual operand-size modifiers. Hence only 20633 the address size prefix can have an effect. It changes the 20634 size from 64 (default) to 32. */ 20635 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 20636 delta++; 20637 if (haveASO(pfx)) { 20638 /* 64to32 of 64-bit get is merely a get-put improvement 20639 trick. */ 20640 putIReg32(R_RCX, binop(Iop_Sub32, 20641 unop(Iop_64to32, getIReg64(R_RCX)), 20642 mkU32(1))); 20643 } else { 20644 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 20645 } 20646 20647 /* This is correct, both for 32- and 64-bit versions. If we're 20648 doing a 32-bit dec and the result is zero then the default 20649 zero extension rule will cause the upper 32 bits to be zero 20650 too. Hence a 64-bit check against zero is OK. */ 20651 count = getIReg64(R_RCX); 20652 cond = binop(Iop_CmpNE64, count, mkU64(0)); 20653 switch (opc) { 20654 case 0xE2: 20655 xtra = ""; 20656 break; 20657 case 0xE1: 20658 xtra = "e"; 20659 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 20660 cond = mkAnd1(cond, zbit); 20661 break; 20662 case 0xE0: 20663 xtra = "ne"; 20664 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 20665 cond = mkAnd1(cond, zbit); 20666 break; 20667 default: 20668 vassert(0); 20669 } 20670 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) ); 20671 20672 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64); 20673 return delta; 20674 } 20675 20676 case 0xE3: 20677 /* JRCXZ or JECXZ, depending address size override. */ 20678 if (have66orF2orF3(pfx)) goto decode_failure; 20679 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 20680 delta++; 20681 if (haveASO(pfx)) { 20682 /* 32-bit */ 20683 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 20684 unop(Iop_32Uto64, getIReg32(R_RCX)), 20685 mkU64(0)), 20686 Ijk_Boring, 20687 IRConst_U64(d64), 20688 OFFB_RIP 20689 )); 20690 DIP("jecxz 0x%llx\n", d64); 20691 } else { 20692 /* 64-bit */ 20693 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 20694 getIReg64(R_RCX), 20695 mkU64(0)), 20696 Ijk_Boring, 20697 IRConst_U64(d64), 20698 OFFB_RIP 20699 )); 20700 DIP("jrcxz 0x%llx\n", d64); 20701 } 20702 return delta; 20703 20704 case 0xE4: /* IN imm8, AL */ 20705 sz = 1; 20706 t1 = newTemp(Ity_I64); 20707 abyte = getUChar(delta); delta++; 20708 assign(t1, mkU64( abyte & 0xFF )); 20709 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 20710 goto do_IN; 20711 case 0xE5: /* IN imm8, eAX */ 20712 if (!(sz == 2 || sz == 4)) goto decode_failure; 20713 t1 = newTemp(Ity_I64); 20714 abyte = getUChar(delta); delta++; 20715 assign(t1, mkU64( abyte & 0xFF )); 20716 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 20717 goto do_IN; 20718 case 0xEC: /* IN %DX, AL */ 20719 sz = 1; 20720 t1 = newTemp(Ity_I64); 20721 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 20722 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 20723 nameIRegRAX(sz)); 20724 goto do_IN; 20725 case 0xED: /* IN %DX, eAX */ 20726 if (!(sz == 2 || sz == 4)) goto decode_failure; 20727 t1 = newTemp(Ity_I64); 20728 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 20729 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 20730 nameIRegRAX(sz)); 20731 goto do_IN; 20732 do_IN: { 20733 /* At this point, sz indicates the width, and t1 is a 64-bit 20734 value giving port number. */ 20735 IRDirty* d; 20736 if (haveF2orF3(pfx)) goto decode_failure; 20737 vassert(sz == 1 || sz == 2 || sz == 4); 20738 ty = szToITy(sz); 20739 t2 = newTemp(Ity_I64); 20740 d = unsafeIRDirty_1_N( 20741 t2, 20742 0/*regparms*/, 20743 "amd64g_dirtyhelper_IN", 20744 &amd64g_dirtyhelper_IN, 20745 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 20746 ); 20747 /* do the call, dumping the result in t2. */ 20748 stmt( IRStmt_Dirty(d) ); 20749 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 20750 return delta; 20751 } 20752 20753 case 0xE6: /* OUT AL, imm8 */ 20754 sz = 1; 20755 t1 = newTemp(Ity_I64); 20756 abyte = getUChar(delta); delta++; 20757 assign( t1, mkU64( abyte & 0xFF ) ); 20758 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 20759 goto do_OUT; 20760 case 0xE7: /* OUT eAX, imm8 */ 20761 if (!(sz == 2 || sz == 4)) goto decode_failure; 20762 t1 = newTemp(Ity_I64); 20763 abyte = getUChar(delta); delta++; 20764 assign( t1, mkU64( abyte & 0xFF ) ); 20765 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 20766 goto do_OUT; 20767 case 0xEE: /* OUT AL, %DX */ 20768 sz = 1; 20769 t1 = newTemp(Ity_I64); 20770 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 20771 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 20772 nameIRegRDX(2)); 20773 goto do_OUT; 20774 case 0xEF: /* OUT eAX, %DX */ 20775 if (!(sz == 2 || sz == 4)) goto decode_failure; 20776 t1 = newTemp(Ity_I64); 20777 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 20778 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 20779 nameIRegRDX(2)); 20780 goto do_OUT; 20781 do_OUT: { 20782 /* At this point, sz indicates the width, and t1 is a 64-bit 20783 value giving port number. */ 20784 IRDirty* d; 20785 if (haveF2orF3(pfx)) goto decode_failure; 20786 vassert(sz == 1 || sz == 2 || sz == 4); 20787 ty = szToITy(sz); 20788 d = unsafeIRDirty_0_N( 20789 0/*regparms*/, 20790 "amd64g_dirtyhelper_OUT", 20791 &amd64g_dirtyhelper_OUT, 20792 mkIRExprVec_3( mkexpr(t1), 20793 widenUto64( getIRegRAX(sz) ), 20794 mkU64(sz) ) 20795 ); 20796 stmt( IRStmt_Dirty(d) ); 20797 return delta; 20798 } 20799 20800 case 0xE8: /* CALL J4 */ 20801 if (haveF3(pfx)) goto decode_failure; 20802 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20803 d64 = getSDisp32(delta); delta += 4; 20804 d64 += (guest_RIP_bbstart+delta); 20805 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 20806 t1 = newTemp(Ity_I64); 20807 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 20808 putIReg64(R_RSP, mkexpr(t1)); 20809 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 20810 t2 = newTemp(Ity_I64); 20811 assign(t2, mkU64((Addr64)d64)); 20812 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 20813 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 20814 /* follow into the call target. */ 20815 dres->whatNext = Dis_ResteerU; 20816 dres->continueAt = d64; 20817 } else { 20818 jmp_lit(dres, Ijk_Call, d64); 20819 vassert(dres->whatNext == Dis_StopHere); 20820 } 20821 DIP("call 0x%llx\n",d64); 20822 return delta; 20823 20824 case 0xE9: /* Jv (jump, 16/32 offset) */ 20825 if (haveF3(pfx)) goto decode_failure; 20826 if (sz != 4) 20827 goto decode_failure; /* JRS added 2004 July 11 */ 20828 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20829 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 20830 delta += sz; 20831 if (resteerOkFn(callback_opaque,d64)) { 20832 dres->whatNext = Dis_ResteerU; 20833 dres->continueAt = d64; 20834 } else { 20835 jmp_lit(dres, Ijk_Boring, d64); 20836 vassert(dres->whatNext == Dis_StopHere); 20837 } 20838 DIP("jmp 0x%llx\n", d64); 20839 return delta; 20840 20841 case 0xEB: /* Jb (jump, byte offset) */ 20842 if (haveF3(pfx)) goto decode_failure; 20843 if (sz != 4) 20844 goto decode_failure; /* JRS added 2004 July 11 */ 20845 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20846 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 20847 delta++; 20848 if (resteerOkFn(callback_opaque,d64)) { 20849 dres->whatNext = Dis_ResteerU; 20850 dres->continueAt = d64; 20851 } else { 20852 jmp_lit(dres, Ijk_Boring, d64); 20853 vassert(dres->whatNext == Dis_StopHere); 20854 } 20855 DIP("jmp-8 0x%llx\n", d64); 20856 return delta; 20857 20858 case 0xF5: /* CMC */ 20859 case 0xF8: /* CLC */ 20860 case 0xF9: /* STC */ 20861 t1 = newTemp(Ity_I64); 20862 t2 = newTemp(Ity_I64); 20863 assign( t1, mk_amd64g_calculate_rflags_all() ); 20864 switch (opc) { 20865 case 0xF5: 20866 assign( t2, binop(Iop_Xor64, mkexpr(t1), 20867 mkU64(AMD64G_CC_MASK_C))); 20868 DIP("cmc\n"); 20869 break; 20870 case 0xF8: 20871 assign( t2, binop(Iop_And64, mkexpr(t1), 20872 mkU64(~AMD64G_CC_MASK_C))); 20873 DIP("clc\n"); 20874 break; 20875 case 0xF9: 20876 assign( t2, binop(Iop_Or64, mkexpr(t1), 20877 mkU64(AMD64G_CC_MASK_C))); 20878 DIP("stc\n"); 20879 break; 20880 default: 20881 vpanic("disInstr(x64)(cmc/clc/stc)"); 20882 } 20883 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20884 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20885 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) )); 20886 /* Set NDEP even though it isn't used. This makes redundant-PUT 20887 elimination of previous stores to this field work better. */ 20888 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 20889 return delta; 20890 20891 case 0xF6: { /* Grp3 Eb */ 20892 Bool decode_OK = True; 20893 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20894 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 20895 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 20896 if (!decode_OK) goto decode_failure; 20897 return delta; 20898 } 20899 20900 case 0xF7: { /* Grp3 Ev */ 20901 Bool decode_OK = True; 20902 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20903 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 20904 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 20905 if (!decode_OK) goto decode_failure; 20906 return delta; 20907 } 20908 20909 case 0xFC: /* CLD */ 20910 if (haveF2orF3(pfx)) goto decode_failure; 20911 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 20912 DIP("cld\n"); 20913 return delta; 20914 20915 case 0xFD: /* STD */ 20916 if (haveF2orF3(pfx)) goto decode_failure; 20917 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 20918 DIP("std\n"); 20919 return delta; 20920 20921 case 0xFE: { /* Grp4 Eb */ 20922 Bool decode_OK = True; 20923 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20924 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */ 20925 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 20926 if (!decode_OK) goto decode_failure; 20927 return delta; 20928 } 20929 20930 case 0xFF: { /* Grp5 Ev */ 20931 Bool decode_OK = True; 20932 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 20933 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */ 20934 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK ); 20935 if (!decode_OK) goto decode_failure; 20936 return delta; 20937 } 20938 20939 default: 20940 break; 20941 20942 } 20943 20944 decode_failure: 20945 return deltaIN; /* fail */ 20946 } 20947 20948 20949 /*------------------------------------------------------------*/ 20950 /*--- ---*/ 20951 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/ 20952 /*--- ---*/ 20953 /*------------------------------------------------------------*/ 20954 20955 static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 20956 { 20957 IRTemp t2 = newTemp(ty); 20958 if (ty == Ity_I64) { 20959 IRTemp m8 = newTemp(Ity_I64); 20960 IRTemp s8 = newTemp(Ity_I64); 20961 IRTemp m16 = newTemp(Ity_I64); 20962 IRTemp s16 = newTemp(Ity_I64); 20963 IRTemp m32 = newTemp(Ity_I64); 20964 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 20965 assign( s8, 20966 binop(Iop_Or64, 20967 binop(Iop_Shr64, 20968 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 20969 mkU8(8)), 20970 binop(Iop_And64, 20971 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 20972 mkexpr(m8)) 20973 ) 20974 ); 20975 20976 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 20977 assign( s16, 20978 binop(Iop_Or64, 20979 binop(Iop_Shr64, 20980 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 20981 mkU8(16)), 20982 binop(Iop_And64, 20983 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 20984 mkexpr(m16)) 20985 ) 20986 ); 20987 20988 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 20989 assign( t2, 20990 binop(Iop_Or64, 20991 binop(Iop_Shr64, 20992 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 20993 mkU8(32)), 20994 binop(Iop_And64, 20995 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 20996 mkexpr(m32)) 20997 ) 20998 ); 20999 return t2; 21000 } 21001 if (ty == Ity_I32) { 21002 assign( t2, 21003 binop( 21004 Iop_Or32, 21005 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 21006 binop( 21007 Iop_Or32, 21008 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 21009 mkU32(0x00FF0000)), 21010 binop(Iop_Or32, 21011 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 21012 mkU32(0x0000FF00)), 21013 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 21014 mkU32(0x000000FF) ) 21015 ))) 21016 ); 21017 return t2; 21018 } 21019 if (ty == Ity_I16) { 21020 assign(t2, 21021 binop(Iop_Or16, 21022 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 21023 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 21024 return t2; 21025 } 21026 vassert(0); 21027 /*NOTREACHED*/ 21028 return IRTemp_INVALID; 21029 } 21030 21031 21032 __attribute__((noinline)) 21033 static 21034 Long dis_ESC_0F ( 21035 /*MB_OUT*/DisResult* dres, 21036 /*MB_OUT*/Bool* expect_CAS, 21037 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 21038 Bool resteerCisOk, 21039 void* callback_opaque, 21040 VexArchInfo* archinfo, 21041 VexAbiInfo* vbi, 21042 Prefix pfx, Int sz, Long deltaIN 21043 ) 21044 { 21045 Long d64 = 0; 21046 IRTemp addr = IRTemp_INVALID; 21047 IRTemp t1 = IRTemp_INVALID; 21048 IRTemp t2 = IRTemp_INVALID; 21049 UChar modrm = 0; 21050 Int am_sz = 0; 21051 Int alen = 0; 21052 HChar dis_buf[50]; 21053 21054 /* In the first switch, look for ordinary integer insns. */ 21055 Long delta = deltaIN; 21056 UChar opc = getUChar(delta); 21057 delta++; 21058 switch (opc) { /* first switch */ 21059 21060 case 0x01: 21061 { 21062 modrm = getUChar(delta); 21063 /* 0F 01 /0 -- SGDT */ 21064 /* 0F 01 /1 -- SIDT */ 21065 if (!epartIsReg(modrm) 21066 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) { 21067 /* This is really revolting, but ... since each processor 21068 (core) only has one IDT and one GDT, just let the guest 21069 see it (pass-through semantics). I can't see any way to 21070 construct a faked-up value, so don't bother to try. */ 21071 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21072 delta += alen; 21073 switch (gregLO3ofRM(modrm)) { 21074 case 0: DIP("sgdt %s\n", dis_buf); break; 21075 case 1: DIP("sidt %s\n", dis_buf); break; 21076 default: vassert(0); /*NOTREACHED*/ 21077 } 21078 IRDirty* d = unsafeIRDirty_0_N ( 21079 0/*regparms*/, 21080 "amd64g_dirtyhelper_SxDT", 21081 &amd64g_dirtyhelper_SxDT, 21082 mkIRExprVec_2( mkexpr(addr), 21083 mkU64(gregLO3ofRM(modrm)) ) 21084 ); 21085 /* declare we're writing memory */ 21086 d->mFx = Ifx_Write; 21087 d->mAddr = mkexpr(addr); 21088 d->mSize = 6; 21089 stmt( IRStmt_Dirty(d) ); 21090 return delta; 21091 } 21092 /* 0F 01 D0 = XGETBV */ 21093 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21094 delta += 1; 21095 DIP("xgetbv\n"); 21096 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I 21097 am not sure if that translates in to SEGV or to something 21098 else, in user space. */ 21099 t1 = newTemp(Ity_I32); 21100 assign( t1, getIReg32(R_RCX) ); 21101 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)), 21102 Ijk_SigSEGV, 21103 IRConst_U64(guest_RIP_curr_instr), 21104 OFFB_RIP 21105 )); 21106 putIRegRAX(4, mkU32(7)); 21107 putIRegRDX(4, mkU32(0)); 21108 return delta; 21109 } 21110 /* BEGIN HACKY SUPPORT FOR xtest */ 21111 /* 0F 01 D6 = XTEST */ 21112 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21113 /* Sets ZF because there never is a transaction, and all 21114 CF, OF, SF, PF and AF are always cleared by xtest. */ 21115 delta += 1; 21116 DIP("xtest\n"); 21117 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21118 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21119 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) )); 21120 /* Set NDEP even though it isn't used. This makes redundant-PUT 21121 elimination of previous stores to this field work better. */ 21122 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21123 return delta; 21124 } 21125 /* END HACKY SUPPORT FOR xtest */ 21126 /* 0F 01 F9 = RDTSCP */ 21127 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) { 21128 delta += 1; 21129 /* Uses dirty helper: 21130 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* ) 21131 declared to wr rax, rcx, rdx 21132 */ 21133 const HChar* fName = "amd64g_dirtyhelper_RDTSCP"; 21134 void* fAddr = &amd64g_dirtyhelper_RDTSCP; 21135 IRDirty* d 21136 = unsafeIRDirty_0_N ( 0/*regparms*/, 21137 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21138 /* declare guest state effects */ 21139 d->nFxState = 3; 21140 vex_bzero(&d->fxState, sizeof(d->fxState)); 21141 d->fxState[0].fx = Ifx_Write; 21142 d->fxState[0].offset = OFFB_RAX; 21143 d->fxState[0].size = 8; 21144 d->fxState[1].fx = Ifx_Write; 21145 d->fxState[1].offset = OFFB_RCX; 21146 d->fxState[1].size = 8; 21147 d->fxState[2].fx = Ifx_Write; 21148 d->fxState[2].offset = OFFB_RDX; 21149 d->fxState[2].size = 8; 21150 /* execute the dirty call, side-effecting guest state */ 21151 stmt( IRStmt_Dirty(d) ); 21152 /* RDTSCP is a serialising insn. So, just in case someone is 21153 using it as a memory fence ... */ 21154 stmt( IRStmt_MBE(Imbe_Fence) ); 21155 DIP("rdtscp\n"); 21156 return delta; 21157 } 21158 /* else decode failed */ 21159 break; 21160 } 21161 21162 case 0x05: /* SYSCALL */ 21163 guest_RIP_next_mustcheck = True; 21164 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 21165 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 21166 /* It's important that all guest state is up-to-date 21167 at this point. So we declare an end-of-block here, which 21168 forces any cached guest state to be flushed. */ 21169 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed); 21170 vassert(dres->whatNext == Dis_StopHere); 21171 DIP("syscall\n"); 21172 return delta; 21173 21174 case 0x0B: /* UD2 */ 21175 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 21176 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr); 21177 vassert(dres->whatNext == Dis_StopHere); 21178 DIP("ud2\n"); 21179 return delta; 21180 21181 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 21182 /* 0F 0D /1 -- prefetchw mem8 */ 21183 if (have66orF2orF3(pfx)) goto decode_failure; 21184 modrm = getUChar(delta); 21185 if (epartIsReg(modrm)) goto decode_failure; 21186 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 21187 goto decode_failure; 21188 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21189 delta += alen; 21190 switch (gregLO3ofRM(modrm)) { 21191 case 0: DIP("prefetch %s\n", dis_buf); break; 21192 case 1: DIP("prefetchw %s\n", dis_buf); break; 21193 default: vassert(0); /*NOTREACHED*/ 21194 } 21195 return delta; 21196 21197 case 0x1F: 21198 if (haveF2orF3(pfx)) goto decode_failure; 21199 modrm = getUChar(delta); 21200 if (epartIsReg(modrm)) goto decode_failure; 21201 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21202 delta += alen; 21203 DIP("nop%c %s\n", nameISize(sz), dis_buf); 21204 return delta; 21205 21206 case 0x31: { /* RDTSC */ 21207 IRTemp val = newTemp(Ity_I64); 21208 IRExpr** args = mkIRExprVec_0(); 21209 IRDirty* d = unsafeIRDirty_1_N ( 21210 val, 21211 0/*regparms*/, 21212 "amd64g_dirtyhelper_RDTSC", 21213 &amd64g_dirtyhelper_RDTSC, 21214 args 21215 ); 21216 if (have66orF2orF3(pfx)) goto decode_failure; 21217 /* execute the dirty call, dumping the result in val. */ 21218 stmt( IRStmt_Dirty(d) ); 21219 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 21220 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 21221 DIP("rdtsc\n"); 21222 return delta; 21223 } 21224 21225 case 0x40: 21226 case 0x41: 21227 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 21228 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 21229 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 21230 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 21231 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 21232 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 21233 case 0x48: /* CMOVSb (cmov negative) */ 21234 case 0x49: /* CMOVSb (cmov not negative) */ 21235 case 0x4A: /* CMOVP (cmov parity even) */ 21236 case 0x4B: /* CMOVNP (cmov parity odd) */ 21237 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 21238 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 21239 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 21240 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 21241 if (haveF2orF3(pfx)) goto decode_failure; 21242 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 21243 return delta; 21244 21245 case 0x80: 21246 case 0x81: 21247 case 0x82: /* JBb/JNAEb (jump below) */ 21248 case 0x83: /* JNBb/JAEb (jump not below) */ 21249 case 0x84: /* JZb/JEb (jump zero) */ 21250 case 0x85: /* JNZb/JNEb (jump not zero) */ 21251 case 0x86: /* JBEb/JNAb (jump below or equal) */ 21252 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 21253 case 0x88: /* JSb (jump negative) */ 21254 case 0x89: /* JSb (jump not negative) */ 21255 case 0x8A: /* JP (jump parity even) */ 21256 case 0x8B: /* JNP/JPO (jump parity odd) */ 21257 case 0x8C: /* JLb/JNGEb (jump less) */ 21258 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 21259 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 21260 case 0x8F: { /* JGb/JNLEb (jump greater) */ 21261 Long jmpDelta; 21262 const HChar* comment = ""; 21263 if (haveF3(pfx)) goto decode_failure; 21264 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21265 jmpDelta = getSDisp32(delta); 21266 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 21267 delta += 4; 21268 if (resteerCisOk 21269 && vex_control.guest_chase_cond 21270 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21271 && jmpDelta < 0 21272 && resteerOkFn( callback_opaque, d64) ) { 21273 /* Speculation: assume this backward branch is taken. So 21274 we need to emit a side-exit to the insn following this 21275 one, on the negation of the condition, and continue at 21276 the branch target address (d64). If we wind up back at 21277 the first instruction of the trace, just stop; it's 21278 better to let the IR loop unroller handle that case. */ 21279 stmt( IRStmt_Exit( 21280 mk_amd64g_calculate_condition( 21281 (AMD64Condcode)(1 ^ (opc - 0x80))), 21282 Ijk_Boring, 21283 IRConst_U64(guest_RIP_bbstart+delta), 21284 OFFB_RIP 21285 )); 21286 dres->whatNext = Dis_ResteerC; 21287 dres->continueAt = d64; 21288 comment = "(assumed taken)"; 21289 } 21290 else 21291 if (resteerCisOk 21292 && vex_control.guest_chase_cond 21293 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21294 && jmpDelta >= 0 21295 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 21296 /* Speculation: assume this forward branch is not taken. 21297 So we need to emit a side-exit to d64 (the dest) and 21298 continue disassembling at the insn immediately 21299 following this one. */ 21300 stmt( IRStmt_Exit( 21301 mk_amd64g_calculate_condition((AMD64Condcode) 21302 (opc - 0x80)), 21303 Ijk_Boring, 21304 IRConst_U64(d64), 21305 OFFB_RIP 21306 )); 21307 dres->whatNext = Dis_ResteerC; 21308 dres->continueAt = guest_RIP_bbstart+delta; 21309 comment = "(assumed not taken)"; 21310 } 21311 else { 21312 /* Conservative default translation - end the block at 21313 this point. */ 21314 jcc_01( dres, (AMD64Condcode)(opc - 0x80), 21315 guest_RIP_bbstart+delta, d64 ); 21316 vassert(dres->whatNext == Dis_StopHere); 21317 } 21318 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment); 21319 return delta; 21320 } 21321 21322 case 0x90: 21323 case 0x91: 21324 case 0x92: /* set-Bb/set-NAEb (set if below) */ 21325 case 0x93: /* set-NBb/set-AEb (set if not below) */ 21326 case 0x94: /* set-Zb/set-Eb (set if zero) */ 21327 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 21328 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 21329 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 21330 case 0x98: /* set-Sb (set if negative) */ 21331 case 0x99: /* set-Sb (set if not negative) */ 21332 case 0x9A: /* set-P (set if parity even) */ 21333 case 0x9B: /* set-NP (set if parity odd) */ 21334 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 21335 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 21336 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 21337 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 21338 if (haveF2orF3(pfx)) goto decode_failure; 21339 t1 = newTemp(Ity_I8); 21340 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 21341 modrm = getUChar(delta); 21342 if (epartIsReg(modrm)) { 21343 delta++; 21344 putIRegE(1, pfx, modrm, mkexpr(t1)); 21345 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 21346 nameIRegE(1,pfx,modrm)); 21347 } else { 21348 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21349 delta += alen; 21350 storeLE( mkexpr(addr), mkexpr(t1) ); 21351 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 21352 } 21353 return delta; 21354 21355 case 0x1A: 21356 case 0x1B: { /* Future MPX instructions, currently NOPs. 21357 BNDMK b, m F3 0F 1B 21358 BNDCL b, r/m F3 0F 1A 21359 BNDCU b, r/m F2 0F 1A 21360 BNDCN b, r/m F2 0F 1B 21361 BNDMOV b, b/m 66 0F 1A 21362 BNDMOV b/m, b 66 0F 1B 21363 BNDLDX b, mib 0F 1A 21364 BNDSTX mib, b 0F 1B */ 21365 21366 /* All instructions have two operands. One operand is always the 21367 bnd register number (bnd0-bnd3, other register numbers are 21368 ignored when MPX isn't enabled, but should generate an 21369 exception if MPX is enabled) given by gregOfRexRM. The other 21370 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded 21371 address, all of which can be decoded by using either 21372 eregOfRexRM or disAMode. */ 21373 21374 modrm = getUChar(delta); 21375 int bnd = gregOfRexRM(pfx,modrm); 21376 const HChar *oper; 21377 if (epartIsReg(modrm)) { 21378 oper = nameIReg64 (eregOfRexRM(pfx,modrm)); 21379 delta += 1; 21380 } else { 21381 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21382 delta += alen; 21383 oper = dis_buf; 21384 } 21385 21386 if (haveF3no66noF2 (pfx)) { 21387 if (opc == 0x1B) { 21388 DIP ("bndmk %s, %%bnd%d\n", oper, bnd); 21389 } else /* opc == 0x1A */ { 21390 DIP ("bndcl %s, %%bnd%d\n", oper, bnd); 21391 } 21392 } else if (haveF2no66noF3 (pfx)) { 21393 if (opc == 0x1A) { 21394 DIP ("bndcu %s, %%bnd%d\n", oper, bnd); 21395 } else /* opc == 0x1B */ { 21396 DIP ("bndcn %s, %%bnd%d\n", oper, bnd); 21397 } 21398 } else if (have66noF2noF3 (pfx)) { 21399 if (opc == 0x1A) { 21400 DIP ("bndmov %s, %%bnd%d\n", oper, bnd); 21401 } else /* opc == 0x1B */ { 21402 DIP ("bndmov %%bnd%d, %s\n", bnd, oper); 21403 } 21404 } else if (haveNo66noF2noF3 (pfx)) { 21405 if (opc == 0x1A) { 21406 DIP ("bndldx %s, %%bnd%d\n", oper, bnd); 21407 } else /* opc == 0x1B */ { 21408 DIP ("bndstx %%bnd%d, %s\n", bnd, oper); 21409 } 21410 } else goto decode_failure; 21411 21412 return delta; 21413 } 21414 21415 case 0xA2: { /* CPUID */ 21416 /* Uses dirty helper: 21417 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 21418 declared to mod rax, wr rbx, rcx, rdx 21419 */ 21420 IRDirty* d = NULL; 21421 const HChar* fName = NULL; 21422 void* fAddr = NULL; 21423 if (haveF2orF3(pfx)) goto decode_failure; 21424 /* This isn't entirely correct, CPUID should depend on the VEX 21425 capabilities, not on the underlying CPU. See bug #324882. */ 21426 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21427 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) && 21428 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21429 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16"; 21430 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16; 21431 /* This is a Core-i5-2300-like machine */ 21432 } 21433 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21434 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) { 21435 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 21436 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 21437 /* This is a Core-i5-670-like machine */ 21438 } 21439 else { 21440 /* Give a CPUID for at least a baseline machine, SSE2 21441 only, and no CX16 */ 21442 fName = "amd64g_dirtyhelper_CPUID_baseline"; 21443 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 21444 } 21445 21446 vassert(fName); vassert(fAddr); 21447 d = unsafeIRDirty_0_N ( 0/*regparms*/, 21448 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21449 /* declare guest state effects */ 21450 d->nFxState = 4; 21451 vex_bzero(&d->fxState, sizeof(d->fxState)); 21452 d->fxState[0].fx = Ifx_Modify; 21453 d->fxState[0].offset = OFFB_RAX; 21454 d->fxState[0].size = 8; 21455 d->fxState[1].fx = Ifx_Write; 21456 d->fxState[1].offset = OFFB_RBX; 21457 d->fxState[1].size = 8; 21458 d->fxState[2].fx = Ifx_Modify; 21459 d->fxState[2].offset = OFFB_RCX; 21460 d->fxState[2].size = 8; 21461 d->fxState[3].fx = Ifx_Write; 21462 d->fxState[3].offset = OFFB_RDX; 21463 d->fxState[3].size = 8; 21464 /* execute the dirty call, side-effecting guest state */ 21465 stmt( IRStmt_Dirty(d) ); 21466 /* CPUID is a serialising insn. So, just in case someone is 21467 using it as a memory fence ... */ 21468 stmt( IRStmt_MBE(Imbe_Fence) ); 21469 DIP("cpuid\n"); 21470 return delta; 21471 } 21472 21473 case 0xA3: { /* BT Gv,Ev */ 21474 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21475 Bool ok = True; 21476 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21477 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok ); 21478 if (!ok) goto decode_failure; 21479 return delta; 21480 } 21481 21482 case 0xA4: /* SHLDv imm8,Gv,Ev */ 21483 modrm = getUChar(delta); 21484 d64 = delta + lengthAMode(pfx, delta); 21485 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 21486 delta = dis_SHLRD_Gv_Ev ( 21487 vbi, pfx, delta, modrm, sz, 21488 mkU8(getUChar(d64)), True, /* literal */ 21489 dis_buf, True /* left */ ); 21490 return delta; 21491 21492 case 0xA5: /* SHLDv %cl,Gv,Ev */ 21493 modrm = getUChar(delta); 21494 delta = dis_SHLRD_Gv_Ev ( 21495 vbi, pfx, delta, modrm, sz, 21496 getIRegCL(), False, /* not literal */ 21497 "%cl", True /* left */ ); 21498 return delta; 21499 21500 case 0xAB: { /* BTS Gv,Ev */ 21501 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21502 Bool ok = True; 21503 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21504 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok ); 21505 if (!ok) goto decode_failure; 21506 return delta; 21507 } 21508 21509 case 0xAC: /* SHRDv imm8,Gv,Ev */ 21510 modrm = getUChar(delta); 21511 d64 = delta + lengthAMode(pfx, delta); 21512 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 21513 delta = dis_SHLRD_Gv_Ev ( 21514 vbi, pfx, delta, modrm, sz, 21515 mkU8(getUChar(d64)), True, /* literal */ 21516 dis_buf, False /* right */ ); 21517 return delta; 21518 21519 case 0xAD: /* SHRDv %cl,Gv,Ev */ 21520 modrm = getUChar(delta); 21521 delta = dis_SHLRD_Gv_Ev ( 21522 vbi, pfx, delta, modrm, sz, 21523 getIRegCL(), False, /* not literal */ 21524 "%cl", False /* right */); 21525 return delta; 21526 21527 case 0xAF: /* IMUL Ev, Gv */ 21528 if (haveF2orF3(pfx)) goto decode_failure; 21529 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 21530 return delta; 21531 21532 case 0xB0: { /* CMPXCHG Gb,Eb */ 21533 Bool ok = True; 21534 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 21535 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 21536 if (!ok) goto decode_failure; 21537 return delta; 21538 } 21539 21540 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 21541 Bool ok = True; 21542 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 21543 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 21544 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 21545 if (!ok) goto decode_failure; 21546 return delta; 21547 } 21548 21549 case 0xB3: { /* BTR Gv,Ev */ 21550 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21551 Bool ok = True; 21552 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21553 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok ); 21554 if (!ok) goto decode_failure; 21555 return delta; 21556 } 21557 21558 case 0xB6: /* MOVZXb Eb,Gv */ 21559 if (haveF2orF3(pfx)) goto decode_failure; 21560 if (sz != 2 && sz != 4 && sz != 8) 21561 goto decode_failure; 21562 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 21563 return delta; 21564 21565 case 0xB7: /* MOVZXw Ew,Gv */ 21566 if (haveF2orF3(pfx)) goto decode_failure; 21567 if (sz != 4 && sz != 8) 21568 goto decode_failure; 21569 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 21570 return delta; 21571 21572 case 0xBA: { /* Grp8 Ib,Ev */ 21573 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */ 21574 Bool decode_OK = False; 21575 modrm = getUChar(delta); 21576 am_sz = lengthAMode(pfx,delta); 21577 d64 = getSDisp8(delta + am_sz); 21578 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 21579 &decode_OK ); 21580 if (!decode_OK) 21581 goto decode_failure; 21582 return delta; 21583 } 21584 21585 case 0xBB: { /* BTC Gv,Ev */ 21586 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21587 Bool ok = False; 21588 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21589 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok ); 21590 if (!ok) goto decode_failure; 21591 return delta; 21592 } 21593 21594 case 0xBC: /* BSF Gv,Ev */ 21595 if (!haveF2orF3(pfx) 21596 || (haveF3noF2(pfx) 21597 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) { 21598 /* no-F2 no-F3 0F BC = BSF 21599 or F3 0F BC = REP; BSF on older CPUs. */ 21600 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 21601 return delta; 21602 } 21603 /* Fall through, since F3 0F BC is TZCNT, and needs to 21604 be handled by dis_ESC_0F__SSE4. */ 21605 break; 21606 21607 case 0xBD: /* BSR Gv,Ev */ 21608 if (!haveF2orF3(pfx) 21609 || (haveF3noF2(pfx) 21610 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) { 21611 /* no-F2 no-F3 0F BD = BSR 21612 or F3 0F BD = REP; BSR on older CPUs. */ 21613 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 21614 return delta; 21615 } 21616 /* Fall through, since F3 0F BD is LZCNT, and needs to 21617 be handled by dis_ESC_0F__SSE4. */ 21618 break; 21619 21620 case 0xBE: /* MOVSXb Eb,Gv */ 21621 if (haveF2orF3(pfx)) goto decode_failure; 21622 if (sz != 2 && sz != 4 && sz != 8) 21623 goto decode_failure; 21624 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 21625 return delta; 21626 21627 case 0xBF: /* MOVSXw Ew,Gv */ 21628 if (haveF2orF3(pfx)) goto decode_failure; 21629 if (sz != 4 && sz != 8) 21630 goto decode_failure; 21631 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 21632 return delta; 21633 21634 case 0xC0: { /* XADD Gb,Eb */ 21635 Bool decode_OK = False; 21636 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 21637 if (!decode_OK) 21638 goto decode_failure; 21639 return delta; 21640 } 21641 21642 case 0xC1: { /* XADD Gv,Ev */ 21643 Bool decode_OK = False; 21644 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 21645 if (!decode_OK) 21646 goto decode_failure; 21647 return delta; 21648 } 21649 21650 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 21651 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 21652 IRTemp expdHi = newTemp(elemTy); 21653 IRTemp expdLo = newTemp(elemTy); 21654 IRTemp dataHi = newTemp(elemTy); 21655 IRTemp dataLo = newTemp(elemTy); 21656 IRTemp oldHi = newTemp(elemTy); 21657 IRTemp oldLo = newTemp(elemTy); 21658 IRTemp flags_old = newTemp(Ity_I64); 21659 IRTemp flags_new = newTemp(Ity_I64); 21660 IRTemp success = newTemp(Ity_I1); 21661 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 21662 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 21663 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 21664 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 21665 IRTemp expdHi64 = newTemp(Ity_I64); 21666 IRTemp expdLo64 = newTemp(Ity_I64); 21667 21668 /* Translate this using a DCAS, even if there is no LOCK 21669 prefix. Life is too short to bother with generating two 21670 different translations for the with/without-LOCK-prefix 21671 cases. */ 21672 *expect_CAS = True; 21673 21674 /* Decode, and generate address. */ 21675 if (have66(pfx)) goto decode_failure; 21676 if (sz != 4 && sz != 8) goto decode_failure; 21677 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 21678 goto decode_failure; 21679 modrm = getUChar(delta); 21680 if (epartIsReg(modrm)) goto decode_failure; 21681 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 21682 if (haveF2orF3(pfx)) { 21683 /* Since the e-part is memory only, F2 or F3 (one or the 21684 other) is acceptable if LOCK is also present. But only 21685 for cmpxchg8b. */ 21686 if (sz == 8) goto decode_failure; 21687 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure; 21688 } 21689 21690 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21691 delta += alen; 21692 21693 /* cmpxchg16b requires an alignment check. */ 21694 if (sz == 8) 21695 gen_SEGV_if_not_16_aligned( addr ); 21696 21697 /* Get the expected and new values. */ 21698 assign( expdHi64, getIReg64(R_RDX) ); 21699 assign( expdLo64, getIReg64(R_RAX) ); 21700 21701 /* These are the correctly-sized expected and new values. 21702 However, we also get expdHi64/expdLo64 above as 64-bits 21703 regardless, because we will need them later in the 32-bit 21704 case (paradoxically). */ 21705 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 21706 : mkexpr(expdHi64) ); 21707 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 21708 : mkexpr(expdLo64) ); 21709 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 21710 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 21711 21712 /* Do the DCAS */ 21713 stmt( IRStmt_CAS( 21714 mkIRCAS( oldHi, oldLo, 21715 Iend_LE, mkexpr(addr), 21716 mkexpr(expdHi), mkexpr(expdLo), 21717 mkexpr(dataHi), mkexpr(dataLo) 21718 ))); 21719 21720 /* success when oldHi:oldLo == expdHi:expdLo */ 21721 assign( success, 21722 binop(opCasCmpEQ, 21723 binop(opOR, 21724 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 21725 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 21726 ), 21727 zero 21728 )); 21729 21730 /* If the DCAS is successful, that is to say oldHi:oldLo == 21731 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 21732 which is where they came from originally. Both the actual 21733 contents of these two regs, and any shadow values, are 21734 unchanged. If the DCAS fails then we're putting into 21735 RDX:RAX the value seen in memory. */ 21736 /* Now of course there's a complication in the 32-bit case 21737 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 21738 unchanged; but if we use the same scheme as in the 64-bit 21739 case, we get hit by the standard rule that a write to the 21740 bottom 32 bits of an integer register zeros the upper 32 21741 bits. And so the upper halves of RDX and RAX mysteriously 21742 become zero. So we have to stuff back in the original 21743 64-bit values which we previously stashed in 21744 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 21745 /* It's just _so_ much fun ... */ 21746 putIRegRDX( 8, 21747 IRExpr_ITE( mkexpr(success), 21748 mkexpr(expdHi64), 21749 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 21750 : mkexpr(oldHi) 21751 )); 21752 putIRegRAX( 8, 21753 IRExpr_ITE( mkexpr(success), 21754 mkexpr(expdLo64), 21755 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 21756 : mkexpr(oldLo) 21757 )); 21758 21759 /* Copy the success bit into the Z flag and leave the others 21760 unchanged */ 21761 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 21762 assign( 21763 flags_new, 21764 binop(Iop_Or64, 21765 binop(Iop_And64, mkexpr(flags_old), 21766 mkU64(~AMD64G_CC_MASK_Z)), 21767 binop(Iop_Shl64, 21768 binop(Iop_And64, 21769 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 21770 mkU8(AMD64G_CC_SHIFT_Z)) )); 21771 21772 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21773 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 21774 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21775 /* Set NDEP even though it isn't used. This makes 21776 redundant-PUT elimination of previous stores to this field 21777 work better. */ 21778 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21779 21780 /* Sheesh. Aren't you glad it was me and not you that had to 21781 write and validate all this grunge? */ 21782 21783 DIP("cmpxchg8b %s\n", dis_buf); 21784 return delta; 21785 } 21786 21787 case 0xC8: /* BSWAP %eax */ 21788 case 0xC9: 21789 case 0xCA: 21790 case 0xCB: 21791 case 0xCC: 21792 case 0xCD: 21793 case 0xCE: 21794 case 0xCF: /* BSWAP %edi */ 21795 if (haveF2orF3(pfx)) goto decode_failure; 21796 /* According to the AMD64 docs, this insn can have size 4 or 21797 8. */ 21798 if (sz == 4) { 21799 t1 = newTemp(Ity_I32); 21800 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 21801 t2 = math_BSWAP( t1, Ity_I32 ); 21802 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 21803 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 21804 return delta; 21805 } 21806 if (sz == 8) { 21807 t1 = newTemp(Ity_I64); 21808 t2 = newTemp(Ity_I64); 21809 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 21810 t2 = math_BSWAP( t1, Ity_I64 ); 21811 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 21812 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 21813 return delta; 21814 } 21815 goto decode_failure; 21816 21817 default: 21818 break; 21819 21820 } /* first switch */ 21821 21822 21823 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */ 21824 /* In the second switch, pick off MMX insns. */ 21825 21826 if (!have66orF2orF3(pfx)) { 21827 /* So there's no SIMD prefix. */ 21828 21829 vassert(sz == 4 || sz == 8); 21830 21831 switch (opc) { /* second switch */ 21832 21833 case 0x71: 21834 case 0x72: 21835 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 21836 21837 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 21838 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 21839 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 21840 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 21841 21842 case 0xFC: 21843 case 0xFD: 21844 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 21845 21846 case 0xEC: 21847 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21848 21849 case 0xDC: 21850 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21851 21852 case 0xF8: 21853 case 0xF9: 21854 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 21855 21856 case 0xE8: 21857 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21858 21859 case 0xD8: 21860 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 21861 21862 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 21863 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 21864 21865 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 21866 21867 case 0x74: 21868 case 0x75: 21869 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 21870 21871 case 0x64: 21872 case 0x65: 21873 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 21874 21875 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 21876 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 21877 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 21878 21879 case 0x68: 21880 case 0x69: 21881 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 21882 21883 case 0x60: 21884 case 0x61: 21885 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21886 21887 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 21888 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 21889 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 21890 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 21891 21892 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21893 case 0xF2: 21894 case 0xF3: 21895 21896 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 21897 case 0xD2: 21898 case 0xD3: 21899 21900 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 21901 case 0xE2: { 21902 Bool decode_OK = False; 21903 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN ); 21904 if (decode_OK) 21905 return delta; 21906 goto decode_failure; 21907 } 21908 21909 default: 21910 break; 21911 } /* second switch */ 21912 21913 } 21914 21915 /* A couple of MMX corner cases */ 21916 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) { 21917 if (sz != 4) 21918 goto decode_failure; 21919 do_EMMS_preamble(); 21920 DIP("{f}emms\n"); 21921 return delta; 21922 } 21923 21924 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */ 21925 /* Perhaps it's an SSE or SSE2 instruction. We can try this 21926 without checking the guest hwcaps because SSE2 is a baseline 21927 facility in 64 bit mode. */ 21928 { 21929 Bool decode_OK = False; 21930 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres ); 21931 if (decode_OK) 21932 return delta; 21933 } 21934 21935 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */ 21936 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps 21937 first. */ 21938 { 21939 Bool decode_OK = False; 21940 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 21941 if (decode_OK) 21942 return delta; 21943 } 21944 21945 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 21946 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps 21947 first. */ 21948 { 21949 Bool decode_OK = False; 21950 delta = dis_ESC_0F__SSE4 ( &decode_OK, 21951 archinfo, vbi, pfx, sz, deltaIN ); 21952 if (decode_OK) 21953 return delta; 21954 } 21955 21956 decode_failure: 21957 return deltaIN; /* fail */ 21958 } 21959 21960 21961 /*------------------------------------------------------------*/ 21962 /*--- ---*/ 21963 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/ 21964 /*--- ---*/ 21965 /*------------------------------------------------------------*/ 21966 21967 __attribute__((noinline)) 21968 static 21969 Long dis_ESC_0F38 ( 21970 /*MB_OUT*/DisResult* dres, 21971 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 21972 Bool resteerCisOk, 21973 void* callback_opaque, 21974 VexArchInfo* archinfo, 21975 VexAbiInfo* vbi, 21976 Prefix pfx, Int sz, Long deltaIN 21977 ) 21978 { 21979 Long delta = deltaIN; 21980 UChar opc = getUChar(delta); 21981 delta++; 21982 switch (opc) { 21983 21984 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */ 21985 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */ 21986 if (!haveF2orF3(pfx) && !haveVEX(pfx) 21987 && (sz == 2 || sz == 4 || sz == 8)) { 21988 IRTemp addr = IRTemp_INVALID; 21989 UChar modrm = 0; 21990 Int alen = 0; 21991 HChar dis_buf[50]; 21992 modrm = getUChar(delta); 21993 if (epartIsReg(modrm)) break; 21994 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21995 delta += alen; 21996 IRType ty = szToITy(sz); 21997 IRTemp src = newTemp(ty); 21998 if (opc == 0xF0) { /* LOAD */ 21999 assign(src, loadLE(ty, mkexpr(addr))); 22000 IRTemp dst = math_BSWAP(src, ty); 22001 putIRegG(sz, pfx, modrm, mkexpr(dst)); 22002 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm)); 22003 } else { /* STORE */ 22004 assign(src, getIRegG(sz, pfx, modrm)); 22005 IRTemp dst = math_BSWAP(src, ty); 22006 storeLE(mkexpr(addr), mkexpr(dst)); 22007 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf); 22008 } 22009 return delta; 22010 } 22011 /* else fall through; maybe one of the decoders below knows what 22012 it is. */ 22013 break; 22014 } 22015 22016 default: 22017 break; 22018 22019 } 22020 22021 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22022 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22023 rather than proceeding indiscriminately. */ 22024 { 22025 Bool decode_OK = False; 22026 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22027 if (decode_OK) 22028 return delta; 22029 } 22030 22031 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22032 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22033 rather than proceeding indiscriminately. */ 22034 { 22035 Bool decode_OK = False; 22036 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22037 if (decode_OK) 22038 return delta; 22039 } 22040 22041 /*decode_failure:*/ 22042 return deltaIN; /* fail */ 22043 } 22044 22045 22046 /*------------------------------------------------------------*/ 22047 /*--- ---*/ 22048 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/ 22049 /*--- ---*/ 22050 /*------------------------------------------------------------*/ 22051 22052 __attribute__((noinline)) 22053 static 22054 Long dis_ESC_0F3A ( 22055 /*MB_OUT*/DisResult* dres, 22056 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 22057 Bool resteerCisOk, 22058 void* callback_opaque, 22059 VexArchInfo* archinfo, 22060 VexAbiInfo* vbi, 22061 Prefix pfx, Int sz, Long deltaIN 22062 ) 22063 { 22064 Long delta = deltaIN; 22065 UChar opc = getUChar(delta); 22066 delta++; 22067 switch (opc) { 22068 22069 default: 22070 break; 22071 22072 } 22073 22074 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22075 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22076 rather than proceeding indiscriminately. */ 22077 { 22078 Bool decode_OK = False; 22079 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22080 if (decode_OK) 22081 return delta; 22082 } 22083 22084 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22085 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22086 rather than proceeding indiscriminately. */ 22087 { 22088 Bool decode_OK = False; 22089 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22090 if (decode_OK) 22091 return delta; 22092 } 22093 22094 return deltaIN; /* fail */ 22095 } 22096 22097 22098 /*------------------------------------------------------------*/ 22099 /*--- ---*/ 22100 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/ 22101 /*--- ---*/ 22102 /*------------------------------------------------------------*/ 22103 22104 /* FIXME: common up with the _256_ version below? */ 22105 static 22106 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG ( 22107 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 22108 Prefix pfx, Long delta, const HChar* name, 22109 /* The actual operation. Use either 'op' or 'opfn', 22110 but not both. */ 22111 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 22112 Bool invertLeftArg, 22113 Bool swapArgs 22114 ) 22115 { 22116 UChar modrm = getUChar(delta); 22117 UInt rD = gregOfRexRM(pfx, modrm); 22118 UInt rSL = getVexNvvvv(pfx); 22119 IRTemp tSL = newTemp(Ity_V128); 22120 IRTemp tSR = newTemp(Ity_V128); 22121 IRTemp addr = IRTemp_INVALID; 22122 HChar dis_buf[50]; 22123 Int alen = 0; 22124 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/); 22125 22126 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL)) 22127 : getXMMReg(rSL)); 22128 22129 if (epartIsReg(modrm)) { 22130 UInt rSR = eregOfRexRM(pfx, modrm); 22131 delta += 1; 22132 assign(tSR, getXMMReg(rSR)); 22133 DIP("%s %s,%s,%s\n", 22134 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD)); 22135 } else { 22136 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 22137 delta += alen; 22138 assign(tSR, loadLE(Ity_V128, mkexpr(addr))); 22139 DIP("%s %s,%s,%s\n", 22140 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD)); 22141 } 22142 22143 IRTemp res = IRTemp_INVALID; 22144 if (op != Iop_INVALID) { 22145 vassert(opFn == NULL); 22146 res = newTemp(Ity_V128); 22147 if (requiresRMode(op)) { 22148 IRTemp rm = newTemp(Ity_I32); 22149 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 22150 assign(res, swapArgs 22151 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 22152 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 22153 } else { 22154 assign(res, swapArgs 22155 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 22156 : binop(op, mkexpr(tSL), mkexpr(tSR))); 22157 } 22158 } else { 22159 vassert(opFn != NULL); 22160 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 22161 } 22162 22163 putYMMRegLoAndZU(rD, mkexpr(res)); 22164 22165 *uses_vvvv = True; 22166 return delta; 22167 } 22168 22169 22170 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp 22171 for the operation, no inversion of the left arg, and no swapping of 22172 args. */ 22173 static 22174 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple ( 22175 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 22176 Prefix pfx, Long delta, const HChar* name, 22177 IROp op 22178 ) 22179 { 22180 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22181 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 22182 } 22183 22184 22185 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR 22186 generator to compute the result, no inversion of the left 22187 arg, and no swapping of args. */ 22188 static 22189 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex ( 22190 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 22191 Prefix pfx, Long delta, const HChar* name, 22192 IRTemp(*opFn)(IRTemp,IRTemp) 22193 ) 22194 { 22195 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22196 uses_vvvv, vbi, pfx, delta, name, 22197 Iop_INVALID, opFn, False, False ); 22198 } 22199 22200 22201 /* Vector by scalar shift of V by the amount specified at the bottom 22202 of E. */ 22203 static ULong dis_AVX128_shiftV_byE ( VexAbiInfo* vbi, 22204 Prefix pfx, Long delta, 22205 const HChar* opname, IROp op ) 22206 { 22207 HChar dis_buf[50]; 22208 Int alen, size; 22209 IRTemp addr; 22210 Bool shl, shr, sar; 22211 UChar modrm = getUChar(delta); 22212 UInt rG = gregOfRexRM(pfx,modrm); 22213 UInt rV = getVexNvvvv(pfx);; 22214 IRTemp g0 = newTemp(Ity_V128); 22215 IRTemp g1 = newTemp(Ity_V128); 22216 IRTemp amt = newTemp(Ity_I64); 22217 IRTemp amt8 = newTemp(Ity_I8); 22218 if (epartIsReg(modrm)) { 22219 UInt rE = eregOfRexRM(pfx,modrm); 22220 assign( amt, getXMMRegLane64(rE, 0) ); 22221 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22222 nameXMMReg(rV), nameXMMReg(rG) ); 22223 delta++; 22224 } else { 22225 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22226 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22227 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 22228 delta += alen; 22229 } 22230 assign( g0, getXMMReg(rV) ); 22231 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22232 22233 shl = shr = sar = False; 22234 size = 0; 22235 switch (op) { 22236 case Iop_ShlN16x8: shl = True; size = 32; break; 22237 case Iop_ShlN32x4: shl = True; size = 32; break; 22238 case Iop_ShlN64x2: shl = True; size = 64; break; 22239 case Iop_SarN16x8: sar = True; size = 16; break; 22240 case Iop_SarN32x4: sar = True; size = 32; break; 22241 case Iop_ShrN16x8: shr = True; size = 16; break; 22242 case Iop_ShrN32x4: shr = True; size = 32; break; 22243 case Iop_ShrN64x2: shr = True; size = 64; break; 22244 default: vassert(0); 22245 } 22246 22247 if (shl || shr) { 22248 assign( 22249 g1, 22250 IRExpr_ITE( 22251 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22252 binop(op, mkexpr(g0), mkexpr(amt8)), 22253 mkV128(0x0000) 22254 ) 22255 ); 22256 } else 22257 if (sar) { 22258 assign( 22259 g1, 22260 IRExpr_ITE( 22261 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22262 binop(op, mkexpr(g0), mkexpr(amt8)), 22263 binop(op, mkexpr(g0), mkU8(size-1)) 22264 ) 22265 ); 22266 } else { 22267 vassert(0); 22268 } 22269 22270 putYMMRegLoAndZU( rG, mkexpr(g1) ); 22271 return delta; 22272 } 22273 22274 22275 /* Vector by scalar shift of V by the amount specified at the bottom 22276 of E. */ 22277 static ULong dis_AVX256_shiftV_byE ( VexAbiInfo* vbi, 22278 Prefix pfx, Long delta, 22279 const HChar* opname, IROp op ) 22280 { 22281 HChar dis_buf[50]; 22282 Int alen, size; 22283 IRTemp addr; 22284 Bool shl, shr, sar; 22285 UChar modrm = getUChar(delta); 22286 UInt rG = gregOfRexRM(pfx,modrm); 22287 UInt rV = getVexNvvvv(pfx);; 22288 IRTemp g0 = newTemp(Ity_V256); 22289 IRTemp g1 = newTemp(Ity_V256); 22290 IRTemp amt = newTemp(Ity_I64); 22291 IRTemp amt8 = newTemp(Ity_I8); 22292 if (epartIsReg(modrm)) { 22293 UInt rE = eregOfRexRM(pfx,modrm); 22294 assign( amt, getXMMRegLane64(rE, 0) ); 22295 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22296 nameYMMReg(rV), nameYMMReg(rG) ); 22297 delta++; 22298 } else { 22299 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22300 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22301 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 22302 delta += alen; 22303 } 22304 assign( g0, getYMMReg(rV) ); 22305 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22306 22307 shl = shr = sar = False; 22308 size = 0; 22309 switch (op) { 22310 case Iop_ShlN16x16: shl = True; size = 32; break; 22311 case Iop_ShlN32x8: shl = True; size = 32; break; 22312 case Iop_ShlN64x4: shl = True; size = 64; break; 22313 case Iop_SarN16x16: sar = True; size = 16; break; 22314 case Iop_SarN32x8: sar = True; size = 32; break; 22315 case Iop_ShrN16x16: shr = True; size = 16; break; 22316 case Iop_ShrN32x8: shr = True; size = 32; break; 22317 case Iop_ShrN64x4: shr = True; size = 64; break; 22318 default: vassert(0); 22319 } 22320 22321 if (shl || shr) { 22322 assign( 22323 g1, 22324 IRExpr_ITE( 22325 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22326 binop(op, mkexpr(g0), mkexpr(amt8)), 22327 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 22328 ) 22329 ); 22330 } else 22331 if (sar) { 22332 assign( 22333 g1, 22334 IRExpr_ITE( 22335 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22336 binop(op, mkexpr(g0), mkexpr(amt8)), 22337 binop(op, mkexpr(g0), mkU8(size-1)) 22338 ) 22339 ); 22340 } else { 22341 vassert(0); 22342 } 22343 22344 putYMMReg( rG, mkexpr(g1) ); 22345 return delta; 22346 } 22347 22348 22349 /* Vector by vector shift of V by the amount specified at the bottom 22350 of E. Vector by vector shifts are defined for all shift amounts, 22351 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts 22352 anyway). */ 22353 static ULong dis_AVX_var_shiftV_byE ( VexAbiInfo* vbi, 22354 Prefix pfx, Long delta, 22355 const HChar* opname, IROp op, Bool isYMM ) 22356 { 22357 HChar dis_buf[50]; 22358 Int alen, size, i; 22359 IRTemp addr; 22360 UChar modrm = getUChar(delta); 22361 UInt rG = gregOfRexRM(pfx,modrm); 22362 UInt rV = getVexNvvvv(pfx);; 22363 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22364 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22365 IRTemp amts[8], sVs[8], res[8]; 22366 if (epartIsReg(modrm)) { 22367 UInt rE = eregOfRexRM(pfx,modrm); 22368 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) ); 22369 if (isYMM) { 22370 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE), 22371 nameYMMReg(rV), nameYMMReg(rG) ); 22372 } else { 22373 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22374 nameXMMReg(rV), nameXMMReg(rG) ); 22375 } 22376 delta++; 22377 } else { 22378 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22379 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) ); 22380 if (isYMM) { 22381 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), 22382 nameYMMReg(rG) ); 22383 } else { 22384 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), 22385 nameXMMReg(rG) ); 22386 } 22387 delta += alen; 22388 } 22389 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) ); 22390 22391 size = 0; 22392 switch (op) { 22393 case Iop_Shl32: size = 32; break; 22394 case Iop_Shl64: size = 64; break; 22395 case Iop_Sar32: size = 32; break; 22396 case Iop_Shr32: size = 32; break; 22397 case Iop_Shr64: size = 64; break; 22398 default: vassert(0); 22399 } 22400 22401 for (i = 0; i < 8; i++) { 22402 sVs[i] = IRTemp_INVALID; 22403 amts[i] = IRTemp_INVALID; 22404 } 22405 switch (size) { 22406 case 32: 22407 if (isYMM) { 22408 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4], 22409 &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22410 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4], 22411 &amts[3], &amts[2], &amts[1], &amts[0] ); 22412 } else { 22413 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22414 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22415 } 22416 break; 22417 case 64: 22418 if (isYMM) { 22419 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22420 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22421 } else { 22422 breakupV128to64s( sV, &sVs[1], &sVs[0] ); 22423 breakupV128to64s( amt, &amts[1], &amts[0] ); 22424 } 22425 break; 22426 default: vassert(0); 22427 } 22428 for (i = 0; i < 8; i++) 22429 if (sVs[i] != IRTemp_INVALID) { 22430 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64); 22431 assign( res[i], 22432 IRExpr_ITE( 22433 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U, 22434 mkexpr(amts[i]), 22435 size == 32 ? mkU32(size) : mkU64(size)), 22436 binop(op, mkexpr(sVs[i]), 22437 unop(size == 32 ? Iop_32to8 : Iop_64to8, 22438 mkexpr(amts[i]))), 22439 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1)) 22440 : size == 32 ? mkU32(0) : mkU64(0) 22441 )); 22442 } 22443 switch (size) { 22444 case 32: 22445 for (i = 0; i < 8; i++) 22446 putYMMRegLane32( rG, i, (i < 4 || isYMM) 22447 ? mkexpr(res[i]) : mkU32(0) ); 22448 break; 22449 case 64: 22450 for (i = 0; i < 4; i++) 22451 putYMMRegLane64( rG, i, (i < 2 || isYMM) 22452 ? mkexpr(res[i]) : mkU64(0) ); 22453 break; 22454 default: vassert(0); 22455 } 22456 22457 return delta; 22458 } 22459 22460 22461 /* Vector by scalar shift of E into V, by an immediate byte. Modified 22462 version of dis_SSE_shiftE_imm. */ 22463 static 22464 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx, 22465 Long delta, const HChar* opname, IROp op ) 22466 { 22467 Bool shl, shr, sar; 22468 UChar rm = getUChar(delta); 22469 IRTemp e0 = newTemp(Ity_V128); 22470 IRTemp e1 = newTemp(Ity_V128); 22471 UInt rD = getVexNvvvv(pfx); 22472 UChar amt, size; 22473 vassert(epartIsReg(rm)); 22474 vassert(gregLO3ofRM(rm) == 2 22475 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 22476 amt = getUChar(delta+1); 22477 delta += 2; 22478 DIP("%s $%d,%s,%s\n", opname, 22479 (Int)amt, 22480 nameXMMReg(eregOfRexRM(pfx,rm)), 22481 nameXMMReg(rD)); 22482 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 22483 22484 shl = shr = sar = False; 22485 size = 0; 22486 switch (op) { 22487 case Iop_ShlN16x8: shl = True; size = 16; break; 22488 case Iop_ShlN32x4: shl = True; size = 32; break; 22489 case Iop_ShlN64x2: shl = True; size = 64; break; 22490 case Iop_SarN16x8: sar = True; size = 16; break; 22491 case Iop_SarN32x4: sar = True; size = 32; break; 22492 case Iop_ShrN16x8: shr = True; size = 16; break; 22493 case Iop_ShrN32x4: shr = True; size = 32; break; 22494 case Iop_ShrN64x2: shr = True; size = 64; break; 22495 default: vassert(0); 22496 } 22497 22498 if (shl || shr) { 22499 assign( e1, amt >= size 22500 ? mkV128(0x0000) 22501 : binop(op, mkexpr(e0), mkU8(amt)) 22502 ); 22503 } else 22504 if (sar) { 22505 assign( e1, amt >= size 22506 ? binop(op, mkexpr(e0), mkU8(size-1)) 22507 : binop(op, mkexpr(e0), mkU8(amt)) 22508 ); 22509 } else { 22510 vassert(0); 22511 } 22512 22513 putYMMRegLoAndZU( rD, mkexpr(e1) ); 22514 return delta; 22515 } 22516 22517 22518 /* Vector by scalar shift of E into V, by an immediate byte. Modified 22519 version of dis_AVX128_shiftE_to_V_imm. */ 22520 static 22521 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx, 22522 Long delta, const HChar* opname, IROp op ) 22523 { 22524 Bool shl, shr, sar; 22525 UChar rm = getUChar(delta); 22526 IRTemp e0 = newTemp(Ity_V256); 22527 IRTemp e1 = newTemp(Ity_V256); 22528 UInt rD = getVexNvvvv(pfx); 22529 UChar amt, size; 22530 vassert(epartIsReg(rm)); 22531 vassert(gregLO3ofRM(rm) == 2 22532 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 22533 amt = getUChar(delta+1); 22534 delta += 2; 22535 DIP("%s $%d,%s,%s\n", opname, 22536 (Int)amt, 22537 nameYMMReg(eregOfRexRM(pfx,rm)), 22538 nameYMMReg(rD)); 22539 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) ); 22540 22541 shl = shr = sar = False; 22542 size = 0; 22543 switch (op) { 22544 case Iop_ShlN16x16: shl = True; size = 16; break; 22545 case Iop_ShlN32x8: shl = True; size = 32; break; 22546 case Iop_ShlN64x4: shl = True; size = 64; break; 22547 case Iop_SarN16x16: sar = True; size = 16; break; 22548 case Iop_SarN32x8: sar = True; size = 32; break; 22549 case Iop_ShrN16x16: shr = True; size = 16; break; 22550 case Iop_ShrN32x8: shr = True; size = 32; break; 22551 case Iop_ShrN64x4: shr = True; size = 64; break; 22552 default: vassert(0); 22553 } 22554 22555 22556 if (shl || shr) { 22557 assign( e1, amt >= size 22558 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 22559 : binop(op, mkexpr(e0), mkU8(amt)) 22560 ); 22561 } else 22562 if (sar) { 22563 assign( e1, amt >= size 22564 ? binop(op, mkexpr(e0), mkU8(size-1)) 22565 : binop(op, mkexpr(e0), mkU8(amt)) 22566 ); 22567 } else { 22568 vassert(0); 22569 } 22570 22571 putYMMReg( rD, mkexpr(e1) ); 22572 return delta; 22573 } 22574 22575 22576 /* Lower 64-bit lane only AVX128 binary operation: 22577 G[63:0] = V[63:0] `op` E[63:0] 22578 G[127:64] = V[127:64] 22579 G[255:128] = 0. 22580 The specified op must be of the 64F0x2 kind, so that it 22581 copies the upper half of the left operand to the result. 22582 */ 22583 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv, 22584 VexAbiInfo* vbi, 22585 Prefix pfx, Long delta, 22586 const HChar* opname, IROp op ) 22587 { 22588 HChar dis_buf[50]; 22589 Int alen; 22590 IRTemp addr; 22591 UChar rm = getUChar(delta); 22592 UInt rG = gregOfRexRM(pfx,rm); 22593 UInt rV = getVexNvvvv(pfx); 22594 IRExpr* vpart = getXMMReg(rV); 22595 if (epartIsReg(rm)) { 22596 UInt rE = eregOfRexRM(pfx,rm); 22597 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 22598 DIP("%s %s,%s,%s\n", opname, 22599 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22600 delta = delta+1; 22601 } else { 22602 /* We can only do a 64-bit memory read, so the upper half of the 22603 E operand needs to be made simply of zeroes. */ 22604 IRTemp epart = newTemp(Ity_V128); 22605 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22606 assign( epart, unop( Iop_64UtoV128, 22607 loadLE(Ity_I64, mkexpr(addr))) ); 22608 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 22609 DIP("%s %s,%s,%s\n", opname, 22610 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22611 delta = delta+alen; 22612 } 22613 putYMMRegLane128( rG, 1, mkV128(0) ); 22614 *uses_vvvv = True; 22615 return delta; 22616 } 22617 22618 22619 /* Lower 64-bit lane only AVX128 unary operation: 22620 G[63:0] = op(E[63:0]) 22621 G[127:64] = V[127:64] 22622 G[255:128] = 0 22623 The specified op must be of the 64F0x2 kind, so that it 22624 copies the upper half of the operand to the result. 22625 */ 22626 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv, 22627 VexAbiInfo* vbi, 22628 Prefix pfx, Long delta, 22629 const HChar* opname, IROp op ) 22630 { 22631 HChar dis_buf[50]; 22632 Int alen; 22633 IRTemp addr; 22634 UChar rm = getUChar(delta); 22635 UInt rG = gregOfRexRM(pfx,rm); 22636 UInt rV = getVexNvvvv(pfx); 22637 IRTemp e64 = newTemp(Ity_I64); 22638 22639 /* Fetch E[63:0] */ 22640 if (epartIsReg(rm)) { 22641 UInt rE = eregOfRexRM(pfx,rm); 22642 assign(e64, getXMMRegLane64(rE, 0)); 22643 DIP("%s %s,%s,%s\n", opname, 22644 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22645 delta += 1; 22646 } else { 22647 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22648 assign(e64, loadLE(Ity_I64, mkexpr(addr))); 22649 DIP("%s %s,%s,%s\n", opname, 22650 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22651 delta += alen; 22652 } 22653 22654 /* Create a value 'arg' as V[127:64]++E[63:0] */ 22655 IRTemp arg = newTemp(Ity_V128); 22656 assign(arg, 22657 binop(Iop_SetV128lo64, 22658 getXMMReg(rV), mkexpr(e64))); 22659 /* and apply op to it */ 22660 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 22661 *uses_vvvv = True; 22662 return delta; 22663 } 22664 22665 22666 /* Lower 32-bit lane only AVX128 unary operation: 22667 G[31:0] = op(E[31:0]) 22668 G[127:32] = V[127:32] 22669 G[255:128] = 0 22670 The specified op must be of the 32F0x4 kind, so that it 22671 copies the upper 3/4 of the operand to the result. 22672 */ 22673 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv, 22674 VexAbiInfo* vbi, 22675 Prefix pfx, Long delta, 22676 const HChar* opname, IROp op ) 22677 { 22678 HChar dis_buf[50]; 22679 Int alen; 22680 IRTemp addr; 22681 UChar rm = getUChar(delta); 22682 UInt rG = gregOfRexRM(pfx,rm); 22683 UInt rV = getVexNvvvv(pfx); 22684 IRTemp e32 = newTemp(Ity_I32); 22685 22686 /* Fetch E[31:0] */ 22687 if (epartIsReg(rm)) { 22688 UInt rE = eregOfRexRM(pfx,rm); 22689 assign(e32, getXMMRegLane32(rE, 0)); 22690 DIP("%s %s,%s,%s\n", opname, 22691 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22692 delta += 1; 22693 } else { 22694 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22695 assign(e32, loadLE(Ity_I32, mkexpr(addr))); 22696 DIP("%s %s,%s,%s\n", opname, 22697 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22698 delta += alen; 22699 } 22700 22701 /* Create a value 'arg' as V[127:32]++E[31:0] */ 22702 IRTemp arg = newTemp(Ity_V128); 22703 assign(arg, 22704 binop(Iop_SetV128lo32, 22705 getXMMReg(rV), mkexpr(e32))); 22706 /* and apply op to it */ 22707 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 22708 *uses_vvvv = True; 22709 return delta; 22710 } 22711 22712 22713 /* Lower 32-bit lane only AVX128 binary operation: 22714 G[31:0] = V[31:0] `op` E[31:0] 22715 G[127:32] = V[127:32] 22716 G[255:128] = 0. 22717 The specified op must be of the 32F0x4 kind, so that it 22718 copies the upper 3/4 of the left operand to the result. 22719 */ 22720 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv, 22721 VexAbiInfo* vbi, 22722 Prefix pfx, Long delta, 22723 const HChar* opname, IROp op ) 22724 { 22725 HChar dis_buf[50]; 22726 Int alen; 22727 IRTemp addr; 22728 UChar rm = getUChar(delta); 22729 UInt rG = gregOfRexRM(pfx,rm); 22730 UInt rV = getVexNvvvv(pfx); 22731 IRExpr* vpart = getXMMReg(rV); 22732 if (epartIsReg(rm)) { 22733 UInt rE = eregOfRexRM(pfx,rm); 22734 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 22735 DIP("%s %s,%s,%s\n", opname, 22736 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22737 delta = delta+1; 22738 } else { 22739 /* We can only do a 32-bit memory read, so the upper 3/4 of the 22740 E operand needs to be made simply of zeroes. */ 22741 IRTemp epart = newTemp(Ity_V128); 22742 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22743 assign( epart, unop( Iop_32UtoV128, 22744 loadLE(Ity_I32, mkexpr(addr))) ); 22745 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 22746 DIP("%s %s,%s,%s\n", opname, 22747 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22748 delta = delta+alen; 22749 } 22750 putYMMRegLane128( rG, 1, mkV128(0) ); 22751 *uses_vvvv = True; 22752 return delta; 22753 } 22754 22755 22756 /* All-lanes AVX128 binary operation: 22757 G[127:0] = V[127:0] `op` E[127:0] 22758 G[255:128] = 0. 22759 */ 22760 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 22761 VexAbiInfo* vbi, 22762 Prefix pfx, Long delta, 22763 const HChar* opname, IROp op ) 22764 { 22765 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22766 uses_vvvv, vbi, pfx, delta, opname, op, 22767 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 22768 ); 22769 } 22770 22771 22772 /* Handles AVX128 32F/64F comparisons. A derivative of 22773 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 22774 original delta to indicate failure. */ 22775 static 22776 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 22777 VexAbiInfo* vbi, 22778 Prefix pfx, Long delta, 22779 const HChar* opname, Bool all_lanes, Int sz ) 22780 { 22781 vassert(sz == 4 || sz == 8); 22782 Long deltaIN = delta; 22783 HChar dis_buf[50]; 22784 Int alen; 22785 UInt imm8; 22786 IRTemp addr; 22787 Bool preSwap = False; 22788 IROp op = Iop_INVALID; 22789 Bool postNot = False; 22790 IRTemp plain = newTemp(Ity_V128); 22791 UChar rm = getUChar(delta); 22792 UInt rG = gregOfRexRM(pfx, rm); 22793 UInt rV = getVexNvvvv(pfx); 22794 IRTemp argL = newTemp(Ity_V128); 22795 IRTemp argR = newTemp(Ity_V128); 22796 22797 assign(argL, getXMMReg(rV)); 22798 if (epartIsReg(rm)) { 22799 imm8 = getUChar(delta+1); 22800 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 22801 if (!ok) return deltaIN; /* FAIL */ 22802 UInt rE = eregOfRexRM(pfx,rm); 22803 assign(argR, getXMMReg(rE)); 22804 delta += 1+1; 22805 DIP("%s $%d,%s,%s,%s\n", 22806 opname, (Int)imm8, 22807 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 22808 } else { 22809 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 22810 imm8 = getUChar(delta+alen); 22811 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 22812 if (!ok) return deltaIN; /* FAIL */ 22813 assign(argR, 22814 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 22815 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 22816 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))); 22817 delta += alen+1; 22818 DIP("%s $%d,%s,%s,%s\n", 22819 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 22820 } 22821 22822 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL)) 22823 : binop(op, mkexpr(argL), mkexpr(argR))); 22824 22825 if (all_lanes) { 22826 /* This is simple: just invert the result, if necessary, and 22827 have done. */ 22828 if (postNot) { 22829 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) ); 22830 } else { 22831 putYMMRegLoAndZU( rG, mkexpr(plain) ); 22832 } 22833 } 22834 else 22835 if (!preSwap) { 22836 /* More complex. It's a one-lane-only, hence need to possibly 22837 invert only that one lane. But at least the other lanes are 22838 correctly "in" the result, having been copied from the left 22839 operand (argL). */ 22840 if (postNot) { 22841 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF); 22842 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain), 22843 mask) ); 22844 } else { 22845 putYMMRegLoAndZU( rG, mkexpr(plain) ); 22846 } 22847 } 22848 else { 22849 /* This is the most complex case. One-lane-only, but the args 22850 were swapped. So we have to possibly invert the bottom lane, 22851 and (definitely) we have to copy the upper lane(s) from argL 22852 since, due to the swapping, what's currently there is from 22853 argR, which is not correct. */ 22854 IRTemp res = newTemp(Ity_V128); 22855 IRTemp mask = newTemp(Ity_V128); 22856 IRTemp notMask = newTemp(Ity_V128); 22857 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF)); 22858 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00)); 22859 if (postNot) { 22860 assign(res, 22861 binop(Iop_OrV128, 22862 binop(Iop_AndV128, 22863 unop(Iop_NotV128, mkexpr(plain)), 22864 mkexpr(mask)), 22865 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 22866 } else { 22867 assign(res, 22868 binop(Iop_OrV128, 22869 binop(Iop_AndV128, 22870 mkexpr(plain), 22871 mkexpr(mask)), 22872 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 22873 } 22874 putYMMRegLoAndZU( rG, mkexpr(res) ); 22875 } 22876 22877 *uses_vvvv = True; 22878 return delta; 22879 } 22880 22881 22882 /* Handles AVX256 32F/64F comparisons. A derivative of 22883 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 22884 original delta to indicate failure. */ 22885 static 22886 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 22887 VexAbiInfo* vbi, 22888 Prefix pfx, Long delta, 22889 const HChar* opname, Int sz ) 22890 { 22891 vassert(sz == 4 || sz == 8); 22892 Long deltaIN = delta; 22893 HChar dis_buf[50]; 22894 Int alen; 22895 UInt imm8; 22896 IRTemp addr; 22897 Bool preSwap = False; 22898 IROp op = Iop_INVALID; 22899 Bool postNot = False; 22900 IRTemp plain = newTemp(Ity_V256); 22901 UChar rm = getUChar(delta); 22902 UInt rG = gregOfRexRM(pfx, rm); 22903 UInt rV = getVexNvvvv(pfx); 22904 IRTemp argL = newTemp(Ity_V256); 22905 IRTemp argR = newTemp(Ity_V256); 22906 IRTemp argLhi = IRTemp_INVALID; 22907 IRTemp argLlo = IRTemp_INVALID; 22908 IRTemp argRhi = IRTemp_INVALID; 22909 IRTemp argRlo = IRTemp_INVALID; 22910 22911 assign(argL, getYMMReg(rV)); 22912 if (epartIsReg(rm)) { 22913 imm8 = getUChar(delta+1); 22914 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 22915 True/*all_lanes*/, sz); 22916 if (!ok) return deltaIN; /* FAIL */ 22917 UInt rE = eregOfRexRM(pfx,rm); 22918 assign(argR, getYMMReg(rE)); 22919 delta += 1+1; 22920 DIP("%s $%d,%s,%s,%s\n", 22921 opname, (Int)imm8, 22922 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 22923 } else { 22924 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 22925 imm8 = getUChar(delta+alen); 22926 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 22927 True/*all_lanes*/, sz); 22928 if (!ok) return deltaIN; /* FAIL */ 22929 assign(argR, loadLE(Ity_V256, mkexpr(addr)) ); 22930 delta += alen+1; 22931 DIP("%s $%d,%s,%s,%s\n", 22932 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 22933 } 22934 22935 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo ); 22936 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo ); 22937 assign(plain, binop( Iop_V128HLtoV256, 22938 binop(op, mkexpr(argLhi), mkexpr(argRhi)), 22939 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) ); 22940 22941 /* This is simple: just invert the result, if necessary, and 22942 have done. */ 22943 if (postNot) { 22944 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) ); 22945 } else { 22946 putYMMReg( rG, mkexpr(plain) ); 22947 } 22948 22949 *uses_vvvv = True; 22950 return delta; 22951 } 22952 22953 22954 /* Handles AVX128 unary E-to-G all-lanes operations. */ 22955 static 22956 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 22957 VexAbiInfo* vbi, 22958 Prefix pfx, Long delta, 22959 const HChar* opname, 22960 IRTemp (*opFn)(IRTemp) ) 22961 { 22962 HChar dis_buf[50]; 22963 Int alen; 22964 IRTemp addr; 22965 IRTemp res = newTemp(Ity_V128); 22966 IRTemp arg = newTemp(Ity_V128); 22967 UChar rm = getUChar(delta); 22968 UInt rG = gregOfRexRM(pfx, rm); 22969 if (epartIsReg(rm)) { 22970 UInt rE = eregOfRexRM(pfx,rm); 22971 assign(arg, getXMMReg(rE)); 22972 delta += 1; 22973 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 22974 } else { 22975 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22976 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 22977 delta += alen; 22978 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 22979 } 22980 res = opFn(arg); 22981 putYMMRegLoAndZU( rG, mkexpr(res) ); 22982 *uses_vvvv = False; 22983 return delta; 22984 } 22985 22986 22987 /* Handles AVX128 unary E-to-G all-lanes operations. */ 22988 static 22989 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 22990 VexAbiInfo* vbi, 22991 Prefix pfx, Long delta, 22992 const HChar* opname, IROp op ) 22993 { 22994 HChar dis_buf[50]; 22995 Int alen; 22996 IRTemp addr; 22997 IRTemp arg = newTemp(Ity_V128); 22998 UChar rm = getUChar(delta); 22999 UInt rG = gregOfRexRM(pfx, rm); 23000 if (epartIsReg(rm)) { 23001 UInt rE = eregOfRexRM(pfx,rm); 23002 assign(arg, getXMMReg(rE)); 23003 delta += 1; 23004 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 23005 } else { 23006 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23007 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 23008 delta += alen; 23009 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 23010 } 23011 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 23012 *uses_vvvv = False; 23013 return delta; 23014 } 23015 23016 23017 /* FIXME: common up with the _128_ version above? */ 23018 static 23019 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG ( 23020 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 23021 Prefix pfx, Long delta, const HChar* name, 23022 /* The actual operation. Use either 'op' or 'opfn', 23023 but not both. */ 23024 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 23025 Bool invertLeftArg, 23026 Bool swapArgs 23027 ) 23028 { 23029 UChar modrm = getUChar(delta); 23030 UInt rD = gregOfRexRM(pfx, modrm); 23031 UInt rSL = getVexNvvvv(pfx); 23032 IRTemp tSL = newTemp(Ity_V256); 23033 IRTemp tSR = newTemp(Ity_V256); 23034 IRTemp addr = IRTemp_INVALID; 23035 HChar dis_buf[50]; 23036 Int alen = 0; 23037 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/); 23038 23039 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL)) 23040 : getYMMReg(rSL)); 23041 23042 if (epartIsReg(modrm)) { 23043 UInt rSR = eregOfRexRM(pfx, modrm); 23044 delta += 1; 23045 assign(tSR, getYMMReg(rSR)); 23046 DIP("%s %s,%s,%s\n", 23047 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD)); 23048 } else { 23049 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23050 delta += alen; 23051 assign(tSR, loadLE(Ity_V256, mkexpr(addr))); 23052 DIP("%s %s,%s,%s\n", 23053 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD)); 23054 } 23055 23056 IRTemp res = IRTemp_INVALID; 23057 if (op != Iop_INVALID) { 23058 vassert(opFn == NULL); 23059 res = newTemp(Ity_V256); 23060 if (requiresRMode(op)) { 23061 IRTemp rm = newTemp(Ity_I32); 23062 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 23063 assign(res, swapArgs 23064 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 23065 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 23066 } else { 23067 assign(res, swapArgs 23068 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 23069 : binop(op, mkexpr(tSL), mkexpr(tSR))); 23070 } 23071 } else { 23072 vassert(opFn != NULL); 23073 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 23074 } 23075 23076 putYMMReg(rD, mkexpr(res)); 23077 23078 *uses_vvvv = True; 23079 return delta; 23080 } 23081 23082 23083 /* All-lanes AVX256 binary operation: 23084 G[255:0] = V[255:0] `op` E[255:0] 23085 */ 23086 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 23087 VexAbiInfo* vbi, 23088 Prefix pfx, Long delta, 23089 const HChar* opname, IROp op ) 23090 { 23091 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23092 uses_vvvv, vbi, pfx, delta, opname, op, 23093 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 23094 ); 23095 } 23096 23097 23098 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp 23099 for the operation, no inversion of the left arg, and no swapping of 23100 args. */ 23101 static 23102 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple ( 23103 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 23104 Prefix pfx, Long delta, const HChar* name, 23105 IROp op 23106 ) 23107 { 23108 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23109 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 23110 } 23111 23112 23113 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR 23114 generator to compute the result, no inversion of the left 23115 arg, and no swapping of args. */ 23116 static 23117 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex ( 23118 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi, 23119 Prefix pfx, Long delta, const HChar* name, 23120 IRTemp(*opFn)(IRTemp,IRTemp) 23121 ) 23122 { 23123 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23124 uses_vvvv, vbi, pfx, delta, name, 23125 Iop_INVALID, opFn, False, False ); 23126 } 23127 23128 23129 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23130 static 23131 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 23132 VexAbiInfo* vbi, 23133 Prefix pfx, Long delta, 23134 const HChar* opname, 23135 IRTemp (*opFn)(IRTemp) ) 23136 { 23137 HChar dis_buf[50]; 23138 Int alen; 23139 IRTemp addr; 23140 IRTemp res = newTemp(Ity_V256); 23141 IRTemp arg = newTemp(Ity_V256); 23142 UChar rm = getUChar(delta); 23143 UInt rG = gregOfRexRM(pfx, rm); 23144 if (epartIsReg(rm)) { 23145 UInt rE = eregOfRexRM(pfx,rm); 23146 assign(arg, getYMMReg(rE)); 23147 delta += 1; 23148 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23149 } else { 23150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23151 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23152 delta += alen; 23153 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23154 } 23155 res = opFn(arg); 23156 putYMMReg( rG, mkexpr(res) ); 23157 *uses_vvvv = False; 23158 return delta; 23159 } 23160 23161 23162 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23163 static 23164 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 23165 VexAbiInfo* vbi, 23166 Prefix pfx, Long delta, 23167 const HChar* opname, IROp op ) 23168 { 23169 HChar dis_buf[50]; 23170 Int alen; 23171 IRTemp addr; 23172 IRTemp arg = newTemp(Ity_V256); 23173 UChar rm = getUChar(delta); 23174 UInt rG = gregOfRexRM(pfx, rm); 23175 if (epartIsReg(rm)) { 23176 UInt rE = eregOfRexRM(pfx,rm); 23177 assign(arg, getYMMReg(rE)); 23178 delta += 1; 23179 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23180 } else { 23181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23182 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23183 delta += alen; 23184 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23185 } 23186 putYMMReg( rG, unop(op, mkexpr(arg)) ); 23187 *uses_vvvv = False; 23188 return delta; 23189 } 23190 23191 23192 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we 23193 had a variant of Iop_64x4toV256 that took F64s as args instead. */ 23194 static Long dis_CVTDQ2PD_256 ( VexAbiInfo* vbi, Prefix pfx, 23195 Long delta ) 23196 { 23197 IRTemp addr = IRTemp_INVALID; 23198 Int alen = 0; 23199 HChar dis_buf[50]; 23200 UChar modrm = getUChar(delta); 23201 IRTemp sV = newTemp(Ity_V128); 23202 UInt rG = gregOfRexRM(pfx,modrm); 23203 if (epartIsReg(modrm)) { 23204 UInt rE = eregOfRexRM(pfx,modrm); 23205 assign( sV, getXMMReg(rE) ); 23206 delta += 1; 23207 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 23208 } else { 23209 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23210 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 23211 delta += alen; 23212 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) ); 23213 } 23214 IRTemp s3, s2, s1, s0; 23215 s3 = s2 = s1 = s0 = IRTemp_INVALID; 23216 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 23217 IRExpr* res 23218 = IRExpr_Qop( 23219 Iop_64x4toV256, 23220 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))), 23221 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))), 23222 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))), 23223 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0))) 23224 ); 23225 putYMMReg(rG, res); 23226 return delta; 23227 } 23228 23229 23230 static Long dis_CVTPD2PS_256 ( VexAbiInfo* vbi, Prefix pfx, 23231 Long delta ) 23232 { 23233 IRTemp addr = IRTemp_INVALID; 23234 Int alen = 0; 23235 HChar dis_buf[50]; 23236 UChar modrm = getUChar(delta); 23237 UInt rG = gregOfRexRM(pfx,modrm); 23238 IRTemp argV = newTemp(Ity_V256); 23239 IRTemp rmode = newTemp(Ity_I32); 23240 if (epartIsReg(modrm)) { 23241 UInt rE = eregOfRexRM(pfx,modrm); 23242 assign( argV, getYMMReg(rE) ); 23243 delta += 1; 23244 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG)); 23245 } else { 23246 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23247 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 23248 delta += alen; 23249 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) ); 23250 } 23251 23252 assign( rmode, get_sse_roundingmode() ); 23253 IRTemp t3, t2, t1, t0; 23254 t3 = t2 = t1 = t0 = IRTemp_INVALID; 23255 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 23256 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \ 23257 unop(Iop_ReinterpI64asF64, mkexpr(_t)) ) 23258 putXMMRegLane32F( rG, 3, CVT(t3) ); 23259 putXMMRegLane32F( rG, 2, CVT(t2) ); 23260 putXMMRegLane32F( rG, 1, CVT(t1) ); 23261 putXMMRegLane32F( rG, 0, CVT(t0) ); 23262 # undef CVT 23263 putYMMRegLane128( rG, 1, mkV128(0) ); 23264 return delta; 23265 } 23266 23267 23268 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op ) 23269 { 23270 IRTemp tLhi, tLlo, tRhi, tRlo; 23271 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID; 23272 IRTemp res = newTemp(Ity_V256); 23273 breakupV256toV128s( tL, &tLhi, &tLlo ); 23274 breakupV256toV128s( tR, &tRhi, &tRlo ); 23275 assign( res, binop( Iop_V128HLtoV256, 23276 binop( op, mkexpr(tRhi), mkexpr(tLhi) ), 23277 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) ); 23278 return res; 23279 } 23280 23281 23282 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR ) 23283 { 23284 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 ); 23285 } 23286 23287 23288 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR ) 23289 { 23290 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 ); 23291 } 23292 23293 23294 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR ) 23295 { 23296 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 ); 23297 } 23298 23299 23300 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR ) 23301 { 23302 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 ); 23303 } 23304 23305 23306 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR ) 23307 { 23308 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 ); 23309 } 23310 23311 23312 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR ) 23313 { 23314 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 ); 23315 } 23316 23317 23318 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR ) 23319 { 23320 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 ); 23321 } 23322 23323 23324 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR ) 23325 { 23326 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 ); 23327 } 23328 23329 23330 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR ) 23331 { 23332 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 ); 23333 } 23334 23335 23336 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR ) 23337 { 23338 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 ); 23339 } 23340 23341 23342 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR ) 23343 { 23344 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 ); 23345 } 23346 23347 23348 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR ) 23349 { 23350 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 ); 23351 } 23352 23353 23354 __attribute__((noinline)) 23355 static 23356 Long dis_ESC_0F__VEX ( 23357 /*MB_OUT*/DisResult* dres, 23358 /*OUT*/ Bool* uses_vvvv, 23359 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 23360 Bool resteerCisOk, 23361 void* callback_opaque, 23362 VexArchInfo* archinfo, 23363 VexAbiInfo* vbi, 23364 Prefix pfx, Int sz, Long deltaIN 23365 ) 23366 { 23367 IRTemp addr = IRTemp_INVALID; 23368 Int alen = 0; 23369 HChar dis_buf[50]; 23370 Long delta = deltaIN; 23371 UChar opc = getUChar(delta); 23372 delta++; 23373 *uses_vvvv = False; 23374 23375 switch (opc) { 23376 23377 case 0x10: 23378 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23379 /* Move 64 bits from E (mem only) to G (lo half xmm). 23380 Bits 255-64 of the dest are zeroed out. */ 23381 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 23382 UChar modrm = getUChar(delta); 23383 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23384 UInt rG = gregOfRexRM(pfx,modrm); 23385 IRTemp z128 = newTemp(Ity_V128); 23386 assign(z128, mkV128(0)); 23387 putXMMReg( rG, mkexpr(z128) ); 23388 /* FIXME: ALIGNMENT CHECK? */ 23389 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 23390 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23391 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG)); 23392 delta += alen; 23393 goto decode_success; 23394 } 23395 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23396 /* Reg form. */ 23397 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 23398 UChar modrm = getUChar(delta); 23399 UInt rG = gregOfRexRM(pfx, modrm); 23400 UInt rE = eregOfRexRM(pfx, modrm); 23401 UInt rV = getVexNvvvv(pfx); 23402 delta++; 23403 DIP("vmovsd %s,%s,%s\n", 23404 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23405 IRTemp res = newTemp(Ity_V128); 23406 assign(res, binop(Iop_64HLtoV128, 23407 getXMMRegLane64(rV, 1), 23408 getXMMRegLane64(rE, 0))); 23409 putYMMRegLoAndZU(rG, mkexpr(res)); 23410 *uses_vvvv = True; 23411 goto decode_success; 23412 } 23413 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23414 /* Move 32 bits from E (mem only) to G (lo half xmm). 23415 Bits 255-32 of the dest are zeroed out. */ 23416 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 23417 UChar modrm = getUChar(delta); 23418 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23419 UInt rG = gregOfRexRM(pfx,modrm); 23420 IRTemp z128 = newTemp(Ity_V128); 23421 assign(z128, mkV128(0)); 23422 putXMMReg( rG, mkexpr(z128) ); 23423 /* FIXME: ALIGNMENT CHECK? */ 23424 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) ); 23425 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23426 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG)); 23427 delta += alen; 23428 goto decode_success; 23429 } 23430 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23431 /* Reg form. */ 23432 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 23433 UChar modrm = getUChar(delta); 23434 UInt rG = gregOfRexRM(pfx, modrm); 23435 UInt rE = eregOfRexRM(pfx, modrm); 23436 UInt rV = getVexNvvvv(pfx); 23437 delta++; 23438 DIP("vmovss %s,%s,%s\n", 23439 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23440 IRTemp res = newTemp(Ity_V128); 23441 assign( res, binop( Iop_64HLtoV128, 23442 getXMMRegLane64(rV, 1), 23443 binop(Iop_32HLto64, 23444 getXMMRegLane32(rV, 1), 23445 getXMMRegLane32(rE, 0)) ) ); 23446 putYMMRegLoAndZU(rG, mkexpr(res)); 23447 *uses_vvvv = True; 23448 goto decode_success; 23449 } 23450 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */ 23451 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23452 UChar modrm = getUChar(delta); 23453 UInt rG = gregOfRexRM(pfx, modrm); 23454 if (epartIsReg(modrm)) { 23455 UInt rE = eregOfRexRM(pfx,modrm); 23456 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23457 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23458 delta += 1; 23459 } else { 23460 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23461 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23462 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG)); 23463 delta += alen; 23464 } 23465 goto decode_success; 23466 } 23467 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */ 23468 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23469 UChar modrm = getUChar(delta); 23470 UInt rG = gregOfRexRM(pfx, modrm); 23471 if (epartIsReg(modrm)) { 23472 UInt rE = eregOfRexRM(pfx,modrm); 23473 putYMMReg( rG, getYMMReg( rE )); 23474 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23475 delta += 1; 23476 } else { 23477 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23478 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23479 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG)); 23480 delta += alen; 23481 } 23482 goto decode_success; 23483 } 23484 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */ 23485 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23486 UChar modrm = getUChar(delta); 23487 UInt rG = gregOfRexRM(pfx, modrm); 23488 if (epartIsReg(modrm)) { 23489 UInt rE = eregOfRexRM(pfx,modrm); 23490 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23491 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23492 delta += 1; 23493 } else { 23494 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23495 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23496 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG)); 23497 delta += alen; 23498 } 23499 goto decode_success; 23500 } 23501 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */ 23502 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23503 UChar modrm = getUChar(delta); 23504 UInt rG = gregOfRexRM(pfx, modrm); 23505 if (epartIsReg(modrm)) { 23506 UInt rE = eregOfRexRM(pfx,modrm); 23507 putYMMReg( rG, getYMMReg( rE )); 23508 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23509 delta += 1; 23510 } else { 23511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23512 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23513 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG)); 23514 delta += alen; 23515 } 23516 goto decode_success; 23517 } 23518 break; 23519 23520 case 0x11: 23521 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */ 23522 /* Move 64 bits from G (low half xmm) to mem only. */ 23523 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 23524 UChar modrm = getUChar(delta); 23525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23526 UInt rG = gregOfRexRM(pfx,modrm); 23527 /* FIXME: ALIGNMENT CHECK? */ 23528 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0)); 23529 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf); 23530 delta += alen; 23531 goto decode_success; 23532 } 23533 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */ 23534 /* Reg form. */ 23535 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 23536 UChar modrm = getUChar(delta); 23537 UInt rG = gregOfRexRM(pfx, modrm); 23538 UInt rE = eregOfRexRM(pfx, modrm); 23539 UInt rV = getVexNvvvv(pfx); 23540 delta++; 23541 DIP("vmovsd %s,%s,%s\n", 23542 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23543 IRTemp res = newTemp(Ity_V128); 23544 assign(res, binop(Iop_64HLtoV128, 23545 getXMMRegLane64(rV, 1), 23546 getXMMRegLane64(rE, 0))); 23547 putYMMRegLoAndZU(rG, mkexpr(res)); 23548 *uses_vvvv = True; 23549 goto decode_success; 23550 } 23551 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */ 23552 /* Move 32 bits from G (low 1/4 xmm) to mem only. */ 23553 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 23554 UChar modrm = getUChar(delta); 23555 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23556 UInt rG = gregOfRexRM(pfx,modrm); 23557 /* FIXME: ALIGNMENT CHECK? */ 23558 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0)); 23559 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf); 23560 delta += alen; 23561 goto decode_success; 23562 } 23563 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */ 23564 /* Reg form. */ 23565 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 23566 UChar modrm = getUChar(delta); 23567 UInt rG = gregOfRexRM(pfx, modrm); 23568 UInt rE = eregOfRexRM(pfx, modrm); 23569 UInt rV = getVexNvvvv(pfx); 23570 delta++; 23571 DIP("vmovss %s,%s,%s\n", 23572 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23573 IRTemp res = newTemp(Ity_V128); 23574 assign( res, binop( Iop_64HLtoV128, 23575 getXMMRegLane64(rV, 1), 23576 binop(Iop_32HLto64, 23577 getXMMRegLane32(rV, 1), 23578 getXMMRegLane32(rE, 0)) ) ); 23579 putYMMRegLoAndZU(rG, mkexpr(res)); 23580 *uses_vvvv = True; 23581 goto decode_success; 23582 } 23583 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */ 23584 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23585 UChar modrm = getUChar(delta); 23586 UInt rG = gregOfRexRM(pfx,modrm); 23587 if (epartIsReg(modrm)) { 23588 UInt rE = eregOfRexRM(pfx,modrm); 23589 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 23590 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 23591 delta += 1; 23592 } else { 23593 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23594 storeLE( mkexpr(addr), getXMMReg(rG) ); 23595 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf); 23596 delta += alen; 23597 } 23598 goto decode_success; 23599 } 23600 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */ 23601 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23602 UChar modrm = getUChar(delta); 23603 UInt rG = gregOfRexRM(pfx,modrm); 23604 if (epartIsReg(modrm)) { 23605 UInt rE = eregOfRexRM(pfx,modrm); 23606 putYMMReg( rE, getYMMReg(rG) ); 23607 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 23608 delta += 1; 23609 } else { 23610 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23611 storeLE( mkexpr(addr), getYMMReg(rG) ); 23612 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf); 23613 delta += alen; 23614 } 23615 goto decode_success; 23616 } 23617 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */ 23618 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23619 UChar modrm = getUChar(delta); 23620 UInt rG = gregOfRexRM(pfx,modrm); 23621 if (epartIsReg(modrm)) { 23622 UInt rE = eregOfRexRM(pfx,modrm); 23623 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 23624 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 23625 delta += 1; 23626 } else { 23627 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23628 storeLE( mkexpr(addr), getXMMReg(rG) ); 23629 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf); 23630 delta += alen; 23631 } 23632 goto decode_success; 23633 } 23634 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */ 23635 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23636 UChar modrm = getUChar(delta); 23637 UInt rG = gregOfRexRM(pfx,modrm); 23638 if (epartIsReg(modrm)) { 23639 UInt rE = eregOfRexRM(pfx,modrm); 23640 putYMMReg( rE, getYMMReg(rG) ); 23641 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 23642 delta += 1; 23643 } else { 23644 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23645 storeLE( mkexpr(addr), getYMMReg(rG) ); 23646 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf); 23647 delta += alen; 23648 } 23649 goto decode_success; 23650 } 23651 break; 23652 23653 case 0x12: 23654 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */ 23655 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23656 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ ); 23657 goto decode_success; 23658 } 23659 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */ 23660 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23661 delta = dis_MOVDDUP_256( vbi, pfx, delta ); 23662 goto decode_success; 23663 } 23664 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */ 23665 /* Insn only exists in reg form */ 23666 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23667 && epartIsReg(getUChar(delta))) { 23668 UChar modrm = getUChar(delta); 23669 UInt rG = gregOfRexRM(pfx, modrm); 23670 UInt rE = eregOfRexRM(pfx, modrm); 23671 UInt rV = getVexNvvvv(pfx); 23672 delta++; 23673 DIP("vmovhlps %s,%s,%s\n", 23674 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23675 IRTemp res = newTemp(Ity_V128); 23676 assign(res, binop(Iop_64HLtoV128, 23677 getXMMRegLane64(rV, 1), 23678 getXMMRegLane64(rE, 1))); 23679 putYMMRegLoAndZU(rG, mkexpr(res)); 23680 *uses_vvvv = True; 23681 goto decode_success; 23682 } 23683 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */ 23684 /* Insn exists only in mem form, it appears. */ 23685 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */ 23686 /* Insn exists only in mem form, it appears. */ 23687 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23688 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23689 UChar modrm = getUChar(delta); 23690 UInt rG = gregOfRexRM(pfx, modrm); 23691 UInt rV = getVexNvvvv(pfx); 23692 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23693 delta += alen; 23694 DIP("vmovlpd %s,%s,%s\n", 23695 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23696 IRTemp res = newTemp(Ity_V128); 23697 assign(res, binop(Iop_64HLtoV128, 23698 getXMMRegLane64(rV, 1), 23699 loadLE(Ity_I64, mkexpr(addr)))); 23700 putYMMRegLoAndZU(rG, mkexpr(res)); 23701 *uses_vvvv = True; 23702 goto decode_success; 23703 } 23704 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */ 23705 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 23706 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 23707 True/*isL*/ ); 23708 goto decode_success; 23709 } 23710 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */ 23711 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 23712 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ ); 23713 goto decode_success; 23714 } 23715 break; 23716 23717 case 0x13: 23718 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */ 23719 /* Insn exists only in mem form, it appears. */ 23720 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */ 23721 /* Insn exists only in mem form, it appears. */ 23722 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23723 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23724 UChar modrm = getUChar(delta); 23725 UInt rG = gregOfRexRM(pfx, modrm); 23726 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23727 delta += alen; 23728 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0)); 23729 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf); 23730 goto decode_success; 23731 } 23732 break; 23733 23734 case 0x14: 23735 case 0x15: 23736 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */ 23737 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */ 23738 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23739 Bool hi = opc == 0x15; 23740 UChar modrm = getUChar(delta); 23741 UInt rG = gregOfRexRM(pfx,modrm); 23742 UInt rV = getVexNvvvv(pfx); 23743 IRTemp eV = newTemp(Ity_V128); 23744 IRTemp vV = newTemp(Ity_V128); 23745 assign( vV, getXMMReg(rV) ); 23746 if (epartIsReg(modrm)) { 23747 UInt rE = eregOfRexRM(pfx,modrm); 23748 assign( eV, getXMMReg(rE) ); 23749 delta += 1; 23750 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23751 nameXMMReg(rE), nameXMMReg(rG)); 23752 } else { 23753 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23754 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23755 delta += alen; 23756 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23757 dis_buf, nameXMMReg(rG)); 23758 } 23759 IRTemp res = math_UNPCKxPS_128( eV, vV, hi ); 23760 putYMMRegLoAndZU( rG, mkexpr(res) ); 23761 *uses_vvvv = True; 23762 goto decode_success; 23763 } 23764 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */ 23765 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */ 23766 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23767 Bool hi = opc == 0x15; 23768 UChar modrm = getUChar(delta); 23769 UInt rG = gregOfRexRM(pfx,modrm); 23770 UInt rV = getVexNvvvv(pfx); 23771 IRTemp eV = newTemp(Ity_V256); 23772 IRTemp vV = newTemp(Ity_V256); 23773 assign( vV, getYMMReg(rV) ); 23774 if (epartIsReg(modrm)) { 23775 UInt rE = eregOfRexRM(pfx,modrm); 23776 assign( eV, getYMMReg(rE) ); 23777 delta += 1; 23778 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23779 nameYMMReg(rE), nameYMMReg(rG)); 23780 } else { 23781 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23782 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23783 delta += alen; 23784 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 23785 dis_buf, nameYMMReg(rG)); 23786 } 23787 IRTemp res = math_UNPCKxPS_256( eV, vV, hi ); 23788 putYMMReg( rG, mkexpr(res) ); 23789 *uses_vvvv = True; 23790 goto decode_success; 23791 } 23792 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */ 23793 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */ 23794 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23795 Bool hi = opc == 0x15; 23796 UChar modrm = getUChar(delta); 23797 UInt rG = gregOfRexRM(pfx,modrm); 23798 UInt rV = getVexNvvvv(pfx); 23799 IRTemp eV = newTemp(Ity_V128); 23800 IRTemp vV = newTemp(Ity_V128); 23801 assign( vV, getXMMReg(rV) ); 23802 if (epartIsReg(modrm)) { 23803 UInt rE = eregOfRexRM(pfx,modrm); 23804 assign( eV, getXMMReg(rE) ); 23805 delta += 1; 23806 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23807 nameXMMReg(rE), nameXMMReg(rG)); 23808 } else { 23809 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23810 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 23811 delta += alen; 23812 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23813 dis_buf, nameXMMReg(rG)); 23814 } 23815 IRTemp res = math_UNPCKxPD_128( eV, vV, hi ); 23816 putYMMRegLoAndZU( rG, mkexpr(res) ); 23817 *uses_vvvv = True; 23818 goto decode_success; 23819 } 23820 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */ 23821 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */ 23822 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23823 Bool hi = opc == 0x15; 23824 UChar modrm = getUChar(delta); 23825 UInt rG = gregOfRexRM(pfx,modrm); 23826 UInt rV = getVexNvvvv(pfx); 23827 IRTemp eV = newTemp(Ity_V256); 23828 IRTemp vV = newTemp(Ity_V256); 23829 assign( vV, getYMMReg(rV) ); 23830 if (epartIsReg(modrm)) { 23831 UInt rE = eregOfRexRM(pfx,modrm); 23832 assign( eV, getYMMReg(rE) ); 23833 delta += 1; 23834 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23835 nameYMMReg(rE), nameYMMReg(rG)); 23836 } else { 23837 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23838 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 23839 delta += alen; 23840 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 23841 dis_buf, nameYMMReg(rG)); 23842 } 23843 IRTemp res = math_UNPCKxPD_256( eV, vV, hi ); 23844 putYMMReg( rG, mkexpr(res) ); 23845 *uses_vvvv = True; 23846 goto decode_success; 23847 } 23848 break; 23849 23850 case 0x16: 23851 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */ 23852 /* Insn only exists in reg form */ 23853 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 23854 && epartIsReg(getUChar(delta))) { 23855 UChar modrm = getUChar(delta); 23856 UInt rG = gregOfRexRM(pfx, modrm); 23857 UInt rE = eregOfRexRM(pfx, modrm); 23858 UInt rV = getVexNvvvv(pfx); 23859 delta++; 23860 DIP("vmovlhps %s,%s,%s\n", 23861 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23862 IRTemp res = newTemp(Ity_V128); 23863 assign(res, binop(Iop_64HLtoV128, 23864 getXMMRegLane64(rE, 0), 23865 getXMMRegLane64(rV, 0))); 23866 putYMMRegLoAndZU(rG, mkexpr(res)); 23867 *uses_vvvv = True; 23868 goto decode_success; 23869 } 23870 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */ 23871 /* Insn exists only in mem form, it appears. */ 23872 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */ 23873 /* Insn exists only in mem form, it appears. */ 23874 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23875 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23876 UChar modrm = getUChar(delta); 23877 UInt rG = gregOfRexRM(pfx, modrm); 23878 UInt rV = getVexNvvvv(pfx); 23879 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23880 delta += alen; 23881 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's', 23882 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23883 IRTemp res = newTemp(Ity_V128); 23884 assign(res, binop(Iop_64HLtoV128, 23885 loadLE(Ity_I64, mkexpr(addr)), 23886 getXMMRegLane64(rV, 0))); 23887 putYMMRegLoAndZU(rG, mkexpr(res)); 23888 *uses_vvvv = True; 23889 goto decode_success; 23890 } 23891 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */ 23892 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 23893 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 23894 False/*!isL*/ ); 23895 goto decode_success; 23896 } 23897 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */ 23898 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 23899 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ ); 23900 goto decode_success; 23901 } 23902 break; 23903 23904 case 0x17: 23905 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */ 23906 /* Insn exists only in mem form, it appears. */ 23907 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */ 23908 /* Insn exists only in mem form, it appears. */ 23909 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 23910 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 23911 UChar modrm = getUChar(delta); 23912 UInt rG = gregOfRexRM(pfx, modrm); 23913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23914 delta += alen; 23915 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1)); 23916 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's', 23917 nameXMMReg(rG), dis_buf); 23918 goto decode_success; 23919 } 23920 break; 23921 23922 case 0x28: 23923 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */ 23924 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23925 UChar modrm = getUChar(delta); 23926 UInt rG = gregOfRexRM(pfx, modrm); 23927 if (epartIsReg(modrm)) { 23928 UInt rE = eregOfRexRM(pfx,modrm); 23929 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23930 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23931 delta += 1; 23932 } else { 23933 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23934 gen_SEGV_if_not_16_aligned( addr ); 23935 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23936 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG)); 23937 delta += alen; 23938 } 23939 goto decode_success; 23940 } 23941 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */ 23942 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23943 UChar modrm = getUChar(delta); 23944 UInt rG = gregOfRexRM(pfx, modrm); 23945 if (epartIsReg(modrm)) { 23946 UInt rE = eregOfRexRM(pfx,modrm); 23947 putYMMReg( rG, getYMMReg( rE )); 23948 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23949 delta += 1; 23950 } else { 23951 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23952 gen_SEGV_if_not_32_aligned( addr ); 23953 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23954 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG)); 23955 delta += alen; 23956 } 23957 goto decode_success; 23958 } 23959 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */ 23960 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23961 UChar modrm = getUChar(delta); 23962 UInt rG = gregOfRexRM(pfx, modrm); 23963 if (epartIsReg(modrm)) { 23964 UInt rE = eregOfRexRM(pfx,modrm); 23965 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23966 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23967 delta += 1; 23968 } else { 23969 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23970 gen_SEGV_if_not_16_aligned( addr ); 23971 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23972 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG)); 23973 delta += alen; 23974 } 23975 goto decode_success; 23976 } 23977 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */ 23978 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23979 UChar modrm = getUChar(delta); 23980 UInt rG = gregOfRexRM(pfx, modrm); 23981 if (epartIsReg(modrm)) { 23982 UInt rE = eregOfRexRM(pfx,modrm); 23983 putYMMReg( rG, getYMMReg( rE )); 23984 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23985 delta += 1; 23986 } else { 23987 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23988 gen_SEGV_if_not_32_aligned( addr ); 23989 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23990 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG)); 23991 delta += alen; 23992 } 23993 goto decode_success; 23994 } 23995 break; 23996 23997 case 0x29: 23998 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */ 23999 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24000 UChar modrm = getUChar(delta); 24001 UInt rG = gregOfRexRM(pfx,modrm); 24002 if (epartIsReg(modrm)) { 24003 UInt rE = eregOfRexRM(pfx,modrm); 24004 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24005 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24006 delta += 1; 24007 } else { 24008 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24009 gen_SEGV_if_not_16_aligned( addr ); 24010 storeLE( mkexpr(addr), getXMMReg(rG) ); 24011 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf ); 24012 delta += alen; 24013 } 24014 goto decode_success; 24015 } 24016 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */ 24017 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24018 UChar modrm = getUChar(delta); 24019 UInt rG = gregOfRexRM(pfx,modrm); 24020 if (epartIsReg(modrm)) { 24021 UInt rE = eregOfRexRM(pfx,modrm); 24022 putYMMReg( rE, getYMMReg(rG) ); 24023 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24024 delta += 1; 24025 } else { 24026 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24027 gen_SEGV_if_not_32_aligned( addr ); 24028 storeLE( mkexpr(addr), getYMMReg(rG) ); 24029 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf ); 24030 delta += alen; 24031 } 24032 goto decode_success; 24033 } 24034 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */ 24035 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24036 UChar modrm = getUChar(delta); 24037 UInt rG = gregOfRexRM(pfx,modrm); 24038 if (epartIsReg(modrm)) { 24039 UInt rE = eregOfRexRM(pfx,modrm); 24040 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24041 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24042 delta += 1; 24043 goto decode_success; 24044 } else { 24045 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24046 gen_SEGV_if_not_16_aligned( addr ); 24047 storeLE( mkexpr(addr), getXMMReg(rG) ); 24048 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf ); 24049 delta += alen; 24050 goto decode_success; 24051 } 24052 } 24053 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */ 24054 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24055 UChar modrm = getUChar(delta); 24056 UInt rG = gregOfRexRM(pfx,modrm); 24057 if (epartIsReg(modrm)) { 24058 UInt rE = eregOfRexRM(pfx,modrm); 24059 putYMMReg( rE, getYMMReg(rG) ); 24060 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24061 delta += 1; 24062 goto decode_success; 24063 } else { 24064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24065 gen_SEGV_if_not_32_aligned( addr ); 24066 storeLE( mkexpr(addr), getYMMReg(rG) ); 24067 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf ); 24068 delta += alen; 24069 goto decode_success; 24070 } 24071 } 24072 break; 24073 24074 case 0x2A: { 24075 IRTemp rmode = newTemp(Ity_I32); 24076 assign( rmode, get_sse_roundingmode() ); 24077 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */ 24078 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24079 UChar modrm = getUChar(delta); 24080 UInt rV = getVexNvvvv(pfx); 24081 UInt rD = gregOfRexRM(pfx, modrm); 24082 IRTemp arg32 = newTemp(Ity_I32); 24083 if (epartIsReg(modrm)) { 24084 UInt rS = eregOfRexRM(pfx,modrm); 24085 assign( arg32, getIReg32(rS) ); 24086 delta += 1; 24087 DIP("vcvtsi2sdl %s,%s,%s\n", 24088 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24089 } else { 24090 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24091 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24092 delta += alen; 24093 DIP("vcvtsi2sdl %s,%s,%s\n", 24094 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24095 } 24096 putXMMRegLane64F( rD, 0, 24097 unop(Iop_I32StoF64, mkexpr(arg32))); 24098 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24099 putYMMRegLane128( rD, 1, mkV128(0) ); 24100 *uses_vvvv = True; 24101 goto decode_success; 24102 } 24103 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */ 24104 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24105 UChar modrm = getUChar(delta); 24106 UInt rV = getVexNvvvv(pfx); 24107 UInt rD = gregOfRexRM(pfx, modrm); 24108 IRTemp arg64 = newTemp(Ity_I64); 24109 if (epartIsReg(modrm)) { 24110 UInt rS = eregOfRexRM(pfx,modrm); 24111 assign( arg64, getIReg64(rS) ); 24112 delta += 1; 24113 DIP("vcvtsi2sdq %s,%s,%s\n", 24114 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24115 } else { 24116 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24117 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24118 delta += alen; 24119 DIP("vcvtsi2sdq %s,%s,%s\n", 24120 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24121 } 24122 putXMMRegLane64F( rD, 0, 24123 binop( Iop_I64StoF64, 24124 get_sse_roundingmode(), 24125 mkexpr(arg64)) ); 24126 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24127 putYMMRegLane128( rD, 1, mkV128(0) ); 24128 *uses_vvvv = True; 24129 goto decode_success; 24130 } 24131 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */ 24132 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24133 UChar modrm = getUChar(delta); 24134 UInt rV = getVexNvvvv(pfx); 24135 UInt rD = gregOfRexRM(pfx, modrm); 24136 IRTemp arg64 = newTemp(Ity_I64); 24137 if (epartIsReg(modrm)) { 24138 UInt rS = eregOfRexRM(pfx,modrm); 24139 assign( arg64, getIReg64(rS) ); 24140 delta += 1; 24141 DIP("vcvtsi2ssq %s,%s,%s\n", 24142 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24143 } else { 24144 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24145 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24146 delta += alen; 24147 DIP("vcvtsi2ssq %s,%s,%s\n", 24148 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24149 } 24150 putXMMRegLane32F( rD, 0, 24151 binop(Iop_F64toF32, 24152 mkexpr(rmode), 24153 binop(Iop_I64StoF64, mkexpr(rmode), 24154 mkexpr(arg64)) ) ); 24155 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24156 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24157 putYMMRegLane128( rD, 1, mkV128(0) ); 24158 *uses_vvvv = True; 24159 goto decode_success; 24160 } 24161 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */ 24162 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24163 UChar modrm = getUChar(delta); 24164 UInt rV = getVexNvvvv(pfx); 24165 UInt rD = gregOfRexRM(pfx, modrm); 24166 IRTemp arg32 = newTemp(Ity_I32); 24167 if (epartIsReg(modrm)) { 24168 UInt rS = eregOfRexRM(pfx,modrm); 24169 assign( arg32, getIReg32(rS) ); 24170 delta += 1; 24171 DIP("vcvtsi2ssl %s,%s,%s\n", 24172 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24173 } else { 24174 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24175 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24176 delta += alen; 24177 DIP("vcvtsi2ssl %s,%s,%s\n", 24178 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24179 } 24180 putXMMRegLane32F( rD, 0, 24181 binop(Iop_F64toF32, 24182 mkexpr(rmode), 24183 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 24184 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24185 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24186 putYMMRegLane128( rD, 1, mkV128(0) ); 24187 *uses_vvvv = True; 24188 goto decode_success; 24189 } 24190 break; 24191 } 24192 24193 case 0x2B: 24194 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */ 24195 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */ 24196 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24197 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24198 UChar modrm = getUChar(delta); 24199 UInt rS = gregOfRexRM(pfx, modrm); 24200 IRTemp tS = newTemp(Ity_V128); 24201 assign(tS, getXMMReg(rS)); 24202 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24203 delta += alen; 24204 gen_SEGV_if_not_16_aligned(addr); 24205 storeLE(mkexpr(addr), mkexpr(tS)); 24206 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24207 nameXMMReg(rS), dis_buf); 24208 goto decode_success; 24209 } 24210 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */ 24211 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */ 24212 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24213 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) { 24214 UChar modrm = getUChar(delta); 24215 UInt rS = gregOfRexRM(pfx, modrm); 24216 IRTemp tS = newTemp(Ity_V256); 24217 assign(tS, getYMMReg(rS)); 24218 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24219 delta += alen; 24220 gen_SEGV_if_not_32_aligned(addr); 24221 storeLE(mkexpr(addr), mkexpr(tS)); 24222 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24223 nameYMMReg(rS), dis_buf); 24224 goto decode_success; 24225 } 24226 break; 24227 24228 case 0x2C: 24229 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */ 24230 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24231 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24232 goto decode_success; 24233 } 24234 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */ 24235 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24236 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24237 goto decode_success; 24238 } 24239 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */ 24240 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24241 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24242 goto decode_success; 24243 } 24244 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */ 24245 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24246 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24247 goto decode_success; 24248 } 24249 break; 24250 24251 case 0x2D: 24252 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */ 24253 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24254 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24255 goto decode_success; 24256 } 24257 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */ 24258 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24259 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24260 goto decode_success; 24261 } 24262 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */ 24263 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24264 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24265 goto decode_success; 24266 } 24267 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */ 24268 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24269 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24270 goto decode_success; 24271 } 24272 break; 24273 24274 case 0x2E: 24275 case 0x2F: 24276 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */ 24277 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */ 24278 if (have66noF2noF3(pfx)) { 24279 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc ); 24280 goto decode_success; 24281 } 24282 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */ 24283 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */ 24284 if (haveNo66noF2noF3(pfx)) { 24285 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc ); 24286 goto decode_success; 24287 } 24288 break; 24289 24290 case 0x50: 24291 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */ 24292 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24293 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ ); 24294 goto decode_success; 24295 } 24296 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */ 24297 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24298 delta = dis_MOVMSKPD_256( vbi, pfx, delta ); 24299 goto decode_success; 24300 } 24301 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */ 24302 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24303 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ ); 24304 goto decode_success; 24305 } 24306 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */ 24307 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24308 delta = dis_MOVMSKPS_256( vbi, pfx, delta ); 24309 goto decode_success; 24310 } 24311 break; 24312 24313 case 0x51: 24314 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */ 24315 if (haveF3no66noF2(pfx)) { 24316 delta = dis_AVX128_E_V_to_G_lo32_unary( 24317 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 ); 24318 goto decode_success; 24319 } 24320 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */ 24321 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24322 delta = dis_AVX128_E_to_G_unary_all( 24323 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 ); 24324 goto decode_success; 24325 } 24326 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */ 24327 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24328 delta = dis_AVX256_E_to_G_unary_all( 24329 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 ); 24330 goto decode_success; 24331 } 24332 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */ 24333 if (haveF2no66noF3(pfx)) { 24334 delta = dis_AVX128_E_V_to_G_lo64_unary( 24335 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 ); 24336 goto decode_success; 24337 } 24338 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */ 24339 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24340 delta = dis_AVX128_E_to_G_unary_all( 24341 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 ); 24342 goto decode_success; 24343 } 24344 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */ 24345 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24346 delta = dis_AVX256_E_to_G_unary_all( 24347 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 ); 24348 goto decode_success; 24349 } 24350 break; 24351 24352 case 0x52: 24353 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */ 24354 if (haveF3no66noF2(pfx)) { 24355 delta = dis_AVX128_E_V_to_G_lo32_unary( 24356 uses_vvvv, vbi, pfx, delta, "vrsqrtss", Iop_RSqrt32F0x4 ); 24357 goto decode_success; 24358 } 24359 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */ 24360 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24361 delta = dis_AVX128_E_to_G_unary_all( 24362 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx4 ); 24363 goto decode_success; 24364 } 24365 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */ 24366 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24367 delta = dis_AVX256_E_to_G_unary_all( 24368 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrt32Fx8 ); 24369 goto decode_success; 24370 } 24371 break; 24372 24373 case 0x53: 24374 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */ 24375 if (haveF3no66noF2(pfx)) { 24376 delta = dis_AVX128_E_V_to_G_lo32_unary( 24377 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_Recip32F0x4 ); 24378 goto decode_success; 24379 } 24380 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */ 24381 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24382 delta = dis_AVX128_E_to_G_unary_all( 24383 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx4 ); 24384 goto decode_success; 24385 } 24386 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */ 24387 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24388 delta = dis_AVX256_E_to_G_unary_all( 24389 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_Recip32Fx8 ); 24390 goto decode_success; 24391 } 24392 break; 24393 24394 case 0x54: 24395 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24396 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */ 24397 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24398 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24399 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 ); 24400 goto decode_success; 24401 } 24402 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24403 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */ 24404 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24405 delta = dis_AVX256_E_V_to_G( 24406 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 ); 24407 goto decode_success; 24408 } 24409 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */ 24410 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24411 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24412 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 ); 24413 goto decode_success; 24414 } 24415 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */ 24416 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24417 delta = dis_AVX256_E_V_to_G( 24418 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 ); 24419 goto decode_success; 24420 } 24421 break; 24422 24423 case 0x55: 24424 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */ 24425 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */ 24426 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24427 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24428 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128, 24429 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24430 goto decode_success; 24431 } 24432 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */ 24433 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24434 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24435 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256, 24436 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24437 goto decode_success; 24438 } 24439 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */ 24440 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24441 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24442 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128, 24443 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24444 goto decode_success; 24445 } 24446 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */ 24447 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24448 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24449 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256, 24450 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24451 goto decode_success; 24452 } 24453 break; 24454 24455 case 0x56: 24456 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24457 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */ 24458 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24459 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24460 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 ); 24461 goto decode_success; 24462 } 24463 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24464 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */ 24465 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24466 delta = dis_AVX256_E_V_to_G( 24467 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 ); 24468 goto decode_success; 24469 } 24470 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24471 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */ 24472 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24473 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24474 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 ); 24475 goto decode_success; 24476 } 24477 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24478 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */ 24479 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24480 delta = dis_AVX256_E_V_to_G( 24481 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 ); 24482 goto decode_success; 24483 } 24484 break; 24485 24486 case 0x57: 24487 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 24488 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */ 24489 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24490 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24491 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 ); 24492 goto decode_success; 24493 } 24494 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 24495 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */ 24496 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24497 delta = dis_AVX256_E_V_to_G( 24498 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 ); 24499 goto decode_success; 24500 } 24501 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 24502 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */ 24503 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24504 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24505 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 ); 24506 goto decode_success; 24507 } 24508 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 24509 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */ 24510 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24511 delta = dis_AVX256_E_V_to_G( 24512 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 ); 24513 goto decode_success; 24514 } 24515 break; 24516 24517 case 0x58: 24518 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */ 24519 if (haveF2no66noF3(pfx)) { 24520 delta = dis_AVX128_E_V_to_G_lo64( 24521 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 ); 24522 goto decode_success; 24523 } 24524 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */ 24525 if (haveF3no66noF2(pfx)) { 24526 delta = dis_AVX128_E_V_to_G_lo32( 24527 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 ); 24528 goto decode_success; 24529 } 24530 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */ 24531 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24532 delta = dis_AVX128_E_V_to_G( 24533 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 ); 24534 goto decode_success; 24535 } 24536 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */ 24537 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24538 delta = dis_AVX256_E_V_to_G( 24539 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 ); 24540 goto decode_success; 24541 } 24542 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */ 24543 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24544 delta = dis_AVX128_E_V_to_G( 24545 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 ); 24546 goto decode_success; 24547 } 24548 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */ 24549 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24550 delta = dis_AVX256_E_V_to_G( 24551 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 ); 24552 goto decode_success; 24553 } 24554 break; 24555 24556 case 0x59: 24557 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */ 24558 if (haveF2no66noF3(pfx)) { 24559 delta = dis_AVX128_E_V_to_G_lo64( 24560 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 ); 24561 goto decode_success; 24562 } 24563 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */ 24564 if (haveF3no66noF2(pfx)) { 24565 delta = dis_AVX128_E_V_to_G_lo32( 24566 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 ); 24567 goto decode_success; 24568 } 24569 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */ 24570 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24571 delta = dis_AVX128_E_V_to_G( 24572 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 ); 24573 goto decode_success; 24574 } 24575 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */ 24576 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24577 delta = dis_AVX256_E_V_to_G( 24578 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 ); 24579 goto decode_success; 24580 } 24581 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */ 24582 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24583 delta = dis_AVX128_E_V_to_G( 24584 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 ); 24585 goto decode_success; 24586 } 24587 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */ 24588 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24589 delta = dis_AVX256_E_V_to_G( 24590 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 ); 24591 goto decode_success; 24592 } 24593 break; 24594 24595 case 0x5A: 24596 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */ 24597 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24598 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ ); 24599 goto decode_success; 24600 } 24601 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */ 24602 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24603 delta = dis_CVTPS2PD_256( vbi, pfx, delta ); 24604 goto decode_success; 24605 } 24606 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */ 24607 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24608 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ ); 24609 goto decode_success; 24610 } 24611 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */ 24612 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24613 delta = dis_CVTPD2PS_256( vbi, pfx, delta ); 24614 goto decode_success; 24615 } 24616 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */ 24617 if (haveF2no66noF3(pfx)) { 24618 UChar modrm = getUChar(delta); 24619 UInt rV = getVexNvvvv(pfx); 24620 UInt rD = gregOfRexRM(pfx, modrm); 24621 IRTemp f64lo = newTemp(Ity_F64); 24622 IRTemp rmode = newTemp(Ity_I32); 24623 assign( rmode, get_sse_roundingmode() ); 24624 if (epartIsReg(modrm)) { 24625 UInt rS = eregOfRexRM(pfx,modrm); 24626 assign(f64lo, getXMMRegLane64F(rS, 0)); 24627 delta += 1; 24628 DIP("vcvtsd2ss %s,%s,%s\n", 24629 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 24630 } else { 24631 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24632 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) ); 24633 delta += alen; 24634 DIP("vcvtsd2ss %s,%s,%s\n", 24635 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24636 } 24637 putXMMRegLane32F( rD, 0, 24638 binop( Iop_F64toF32, mkexpr(rmode), 24639 mkexpr(f64lo)) ); 24640 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24641 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24642 putYMMRegLane128( rD, 1, mkV128(0) ); 24643 *uses_vvvv = True; 24644 goto decode_success; 24645 } 24646 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */ 24647 if (haveF3no66noF2(pfx)) { 24648 UChar modrm = getUChar(delta); 24649 UInt rV = getVexNvvvv(pfx); 24650 UInt rD = gregOfRexRM(pfx, modrm); 24651 IRTemp f32lo = newTemp(Ity_F32); 24652 if (epartIsReg(modrm)) { 24653 UInt rS = eregOfRexRM(pfx,modrm); 24654 assign(f32lo, getXMMRegLane32F(rS, 0)); 24655 delta += 1; 24656 DIP("vcvtss2sd %s,%s,%s\n", 24657 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 24658 } else { 24659 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24660 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 24661 delta += alen; 24662 DIP("vcvtss2sd %s,%s,%s\n", 24663 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24664 } 24665 putXMMRegLane64F( rD, 0, 24666 unop( Iop_F32toF64, mkexpr(f32lo)) ); 24667 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24668 putYMMRegLane128( rD, 1, mkV128(0) ); 24669 *uses_vvvv = True; 24670 goto decode_success; 24671 } 24672 break; 24673 24674 case 0x5B: 24675 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */ 24676 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24677 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 24678 True/*isAvx*/, False/*!r2zero*/ ); 24679 goto decode_success; 24680 } 24681 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */ 24682 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24683 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 24684 False/*!r2zero*/ ); 24685 goto decode_success; 24686 } 24687 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */ 24688 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 24689 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 24690 True/*isAvx*/, True/*r2zero*/ ); 24691 goto decode_success; 24692 } 24693 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */ 24694 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 24695 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 24696 True/*r2zero*/ ); 24697 goto decode_success; 24698 } 24699 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */ 24700 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24701 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ ); 24702 goto decode_success; 24703 } 24704 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */ 24705 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24706 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta ); 24707 goto decode_success; 24708 } 24709 break; 24710 24711 case 0x5C: 24712 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */ 24713 if (haveF2no66noF3(pfx)) { 24714 delta = dis_AVX128_E_V_to_G_lo64( 24715 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 ); 24716 goto decode_success; 24717 } 24718 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */ 24719 if (haveF3no66noF2(pfx)) { 24720 delta = dis_AVX128_E_V_to_G_lo32( 24721 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 ); 24722 goto decode_success; 24723 } 24724 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */ 24725 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24726 delta = dis_AVX128_E_V_to_G( 24727 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 ); 24728 goto decode_success; 24729 } 24730 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */ 24731 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24732 delta = dis_AVX256_E_V_to_G( 24733 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 ); 24734 goto decode_success; 24735 } 24736 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */ 24737 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24738 delta = dis_AVX128_E_V_to_G( 24739 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 ); 24740 goto decode_success; 24741 } 24742 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */ 24743 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24744 delta = dis_AVX256_E_V_to_G( 24745 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 ); 24746 goto decode_success; 24747 } 24748 break; 24749 24750 case 0x5D: 24751 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */ 24752 if (haveF2no66noF3(pfx)) { 24753 delta = dis_AVX128_E_V_to_G_lo64( 24754 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 ); 24755 goto decode_success; 24756 } 24757 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */ 24758 if (haveF3no66noF2(pfx)) { 24759 delta = dis_AVX128_E_V_to_G_lo32( 24760 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 ); 24761 goto decode_success; 24762 } 24763 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */ 24764 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24765 delta = dis_AVX128_E_V_to_G( 24766 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 ); 24767 goto decode_success; 24768 } 24769 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */ 24770 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24771 delta = dis_AVX256_E_V_to_G( 24772 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 ); 24773 goto decode_success; 24774 } 24775 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */ 24776 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24777 delta = dis_AVX128_E_V_to_G( 24778 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 ); 24779 goto decode_success; 24780 } 24781 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */ 24782 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24783 delta = dis_AVX256_E_V_to_G( 24784 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 ); 24785 goto decode_success; 24786 } 24787 break; 24788 24789 case 0x5E: 24790 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */ 24791 if (haveF2no66noF3(pfx)) { 24792 delta = dis_AVX128_E_V_to_G_lo64( 24793 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 ); 24794 goto decode_success; 24795 } 24796 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */ 24797 if (haveF3no66noF2(pfx)) { 24798 delta = dis_AVX128_E_V_to_G_lo32( 24799 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 ); 24800 goto decode_success; 24801 } 24802 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */ 24803 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24804 delta = dis_AVX128_E_V_to_G( 24805 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 ); 24806 goto decode_success; 24807 } 24808 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */ 24809 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24810 delta = dis_AVX256_E_V_to_G( 24811 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 ); 24812 goto decode_success; 24813 } 24814 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */ 24815 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24816 delta = dis_AVX128_E_V_to_G( 24817 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 ); 24818 goto decode_success; 24819 } 24820 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */ 24821 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24822 delta = dis_AVX256_E_V_to_G( 24823 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 ); 24824 goto decode_success; 24825 } 24826 break; 24827 24828 case 0x5F: 24829 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */ 24830 if (haveF2no66noF3(pfx)) { 24831 delta = dis_AVX128_E_V_to_G_lo64( 24832 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 ); 24833 goto decode_success; 24834 } 24835 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */ 24836 if (haveF3no66noF2(pfx)) { 24837 delta = dis_AVX128_E_V_to_G_lo32( 24838 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 ); 24839 goto decode_success; 24840 } 24841 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */ 24842 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24843 delta = dis_AVX128_E_V_to_G( 24844 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 ); 24845 goto decode_success; 24846 } 24847 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */ 24848 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24849 delta = dis_AVX256_E_V_to_G( 24850 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 ); 24851 goto decode_success; 24852 } 24853 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */ 24854 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24855 delta = dis_AVX128_E_V_to_G( 24856 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 ); 24857 goto decode_success; 24858 } 24859 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */ 24860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24861 delta = dis_AVX256_E_V_to_G( 24862 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 ); 24863 goto decode_success; 24864 } 24865 break; 24866 24867 case 0x60: 24868 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 24869 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */ 24870 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24871 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24872 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 24873 Iop_InterleaveLO8x16, NULL, 24874 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24875 goto decode_success; 24876 } 24877 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 24878 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */ 24879 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24880 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24881 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 24882 math_VPUNPCKLBW_YMM ); 24883 goto decode_success; 24884 } 24885 break; 24886 24887 case 0x61: 24888 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 24889 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */ 24890 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24891 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24892 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 24893 Iop_InterleaveLO16x8, NULL, 24894 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24895 goto decode_success; 24896 } 24897 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 24898 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */ 24899 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24900 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24901 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 24902 math_VPUNPCKLWD_YMM ); 24903 goto decode_success; 24904 } 24905 break; 24906 24907 case 0x62: 24908 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 24909 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */ 24910 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24911 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24912 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 24913 Iop_InterleaveLO32x4, NULL, 24914 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24915 goto decode_success; 24916 } 24917 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 24918 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */ 24919 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24920 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24921 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 24922 math_VPUNPCKLDQ_YMM ); 24923 goto decode_success; 24924 } 24925 break; 24926 24927 case 0x63: 24928 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 24929 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */ 24930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24931 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24932 uses_vvvv, vbi, pfx, delta, "vpacksswb", 24933 Iop_QNarrowBin16Sto8Sx16, NULL, 24934 False/*!invertLeftArg*/, True/*swapArgs*/ ); 24935 goto decode_success; 24936 } 24937 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 24938 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */ 24939 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24940 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 24941 uses_vvvv, vbi, pfx, delta, "vpacksswb", 24942 math_VPACKSSWB_YMM ); 24943 goto decode_success; 24944 } 24945 break; 24946 24947 case 0x64: 24948 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 24949 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */ 24950 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24951 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24952 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 ); 24953 goto decode_success; 24954 } 24955 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 24956 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */ 24957 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24958 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 24959 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 ); 24960 goto decode_success; 24961 } 24962 break; 24963 24964 case 0x65: 24965 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 24966 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */ 24967 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24968 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24969 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 ); 24970 goto decode_success; 24971 } 24972 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 24973 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */ 24974 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24975 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 24976 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 ); 24977 goto decode_success; 24978 } 24979 break; 24980 24981 case 0x66: 24982 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 24983 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */ 24984 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24985 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24986 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 ); 24987 goto decode_success; 24988 } 24989 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 24990 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */ 24991 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24992 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 24993 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 ); 24994 goto decode_success; 24995 } 24996 break; 24997 24998 case 0x67: 24999 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25000 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */ 25001 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25002 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25003 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25004 Iop_QNarrowBin16Sto8Ux16, NULL, 25005 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25006 goto decode_success; 25007 } 25008 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25009 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */ 25010 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25011 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25012 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25013 math_VPACKUSWB_YMM ); 25014 goto decode_success; 25015 } 25016 break; 25017 25018 case 0x68: 25019 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25020 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */ 25021 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25022 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25023 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25024 Iop_InterleaveHI8x16, NULL, 25025 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25026 goto decode_success; 25027 } 25028 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25029 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */ 25030 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25031 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25032 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25033 math_VPUNPCKHBW_YMM ); 25034 goto decode_success; 25035 } 25036 break; 25037 25038 case 0x69: 25039 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25040 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */ 25041 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25042 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25043 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25044 Iop_InterleaveHI16x8, NULL, 25045 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25046 goto decode_success; 25047 } 25048 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25049 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */ 25050 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25051 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25052 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25053 math_VPUNPCKHWD_YMM ); 25054 goto decode_success; 25055 } 25056 break; 25057 25058 case 0x6A: 25059 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25060 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */ 25061 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25062 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25063 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25064 Iop_InterleaveHI32x4, NULL, 25065 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25066 goto decode_success; 25067 } 25068 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25069 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */ 25070 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25071 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25072 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25073 math_VPUNPCKHDQ_YMM ); 25074 goto decode_success; 25075 } 25076 break; 25077 25078 case 0x6B: 25079 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25080 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */ 25081 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25082 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25083 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25084 Iop_QNarrowBin32Sto16Sx8, NULL, 25085 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25086 goto decode_success; 25087 } 25088 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25089 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */ 25090 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25091 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25092 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25093 math_VPACKSSDW_YMM ); 25094 goto decode_success; 25095 } 25096 break; 25097 25098 case 0x6C: 25099 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25100 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */ 25101 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25102 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25103 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25104 Iop_InterleaveLO64x2, NULL, 25105 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25106 goto decode_success; 25107 } 25108 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25109 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */ 25110 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25111 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25112 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25113 math_VPUNPCKLQDQ_YMM ); 25114 goto decode_success; 25115 } 25116 break; 25117 25118 case 0x6D: 25119 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25120 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */ 25121 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25122 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25123 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25124 Iop_InterleaveHI64x2, NULL, 25125 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25126 goto decode_success; 25127 } 25128 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25129 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */ 25130 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25131 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25132 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25133 math_VPUNPCKHQDQ_YMM ); 25134 goto decode_success; 25135 } 25136 break; 25137 25138 case 0x6E: 25139 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */ 25140 if (have66noF2noF3(pfx) 25141 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25142 vassert(sz == 2); /* even tho we are transferring 4, not 2. */ 25143 UChar modrm = getUChar(delta); 25144 if (epartIsReg(modrm)) { 25145 delta += 1; 25146 putYMMRegLoAndZU( 25147 gregOfRexRM(pfx,modrm), 25148 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 25149 ); 25150 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 25151 nameXMMReg(gregOfRexRM(pfx,modrm))); 25152 } else { 25153 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25154 delta += alen; 25155 putYMMRegLoAndZU( 25156 gregOfRexRM(pfx,modrm), 25157 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr))) 25158 ); 25159 DIP("vmovd %s, %s\n", dis_buf, 25160 nameXMMReg(gregOfRexRM(pfx,modrm))); 25161 } 25162 goto decode_success; 25163 } 25164 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */ 25165 if (have66noF2noF3(pfx) 25166 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25167 vassert(sz == 2); /* even tho we are transferring 8, not 2. */ 25168 UChar modrm = getUChar(delta); 25169 if (epartIsReg(modrm)) { 25170 delta += 1; 25171 putYMMRegLoAndZU( 25172 gregOfRexRM(pfx,modrm), 25173 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 25174 ); 25175 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 25176 nameXMMReg(gregOfRexRM(pfx,modrm))); 25177 } else { 25178 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25179 delta += alen; 25180 putYMMRegLoAndZU( 25181 gregOfRexRM(pfx,modrm), 25182 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr))) 25183 ); 25184 DIP("vmovq %s, %s\n", dis_buf, 25185 nameXMMReg(gregOfRexRM(pfx,modrm))); 25186 } 25187 goto decode_success; 25188 } 25189 break; 25190 25191 case 0x6F: 25192 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */ 25193 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */ 25194 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25195 && 1==getVexL(pfx)/*256*/) { 25196 UChar modrm = getUChar(delta); 25197 UInt rD = gregOfRexRM(pfx, modrm); 25198 IRTemp tD = newTemp(Ity_V256); 25199 Bool isA = have66noF2noF3(pfx); 25200 HChar ch = isA ? 'a' : 'u'; 25201 if (epartIsReg(modrm)) { 25202 UInt rS = eregOfRexRM(pfx, modrm); 25203 delta += 1; 25204 assign(tD, getYMMReg(rS)); 25205 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 25206 } else { 25207 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25208 delta += alen; 25209 if (isA) 25210 gen_SEGV_if_not_32_aligned(addr); 25211 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 25212 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD)); 25213 } 25214 putYMMReg(rD, mkexpr(tD)); 25215 goto decode_success; 25216 } 25217 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */ 25218 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */ 25219 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25220 && 0==getVexL(pfx)/*128*/) { 25221 UChar modrm = getUChar(delta); 25222 UInt rD = gregOfRexRM(pfx, modrm); 25223 IRTemp tD = newTemp(Ity_V128); 25224 Bool isA = have66noF2noF3(pfx); 25225 HChar ch = isA ? 'a' : 'u'; 25226 if (epartIsReg(modrm)) { 25227 UInt rS = eregOfRexRM(pfx, modrm); 25228 delta += 1; 25229 assign(tD, getXMMReg(rS)); 25230 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 25231 } else { 25232 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25233 delta += alen; 25234 if (isA) 25235 gen_SEGV_if_not_16_aligned(addr); 25236 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 25237 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD)); 25238 } 25239 putYMMRegLoAndZU(rD, mkexpr(tD)); 25240 goto decode_success; 25241 } 25242 break; 25243 25244 case 0x70: 25245 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */ 25246 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25247 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/); 25248 goto decode_success; 25249 } 25250 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */ 25251 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25252 delta = dis_PSHUFD_32x8( vbi, pfx, delta); 25253 goto decode_success; 25254 } 25255 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */ 25256 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25257 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25258 True/*isAvx*/, False/*!xIsH*/ ); 25259 goto decode_success; 25260 } 25261 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */ 25262 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25263 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ ); 25264 goto decode_success; 25265 } 25266 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */ 25267 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 25268 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25269 True/*isAvx*/, True/*xIsH*/ ); 25270 goto decode_success; 25271 } 25272 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */ 25273 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 25274 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ ); 25275 goto decode_success; 25276 } 25277 break; 25278 25279 case 0x71: 25280 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */ 25281 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */ 25282 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */ 25283 if (have66noF2noF3(pfx) 25284 && 0==getVexL(pfx)/*128*/ 25285 && epartIsReg(getUChar(delta))) { 25286 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25287 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25288 "vpsrlw", Iop_ShrN16x8 ); 25289 *uses_vvvv = True; 25290 goto decode_success; 25291 } 25292 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25293 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25294 "vpsraw", Iop_SarN16x8 ); 25295 *uses_vvvv = True; 25296 goto decode_success; 25297 } 25298 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25299 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25300 "vpsllw", Iop_ShlN16x8 ); 25301 *uses_vvvv = True; 25302 goto decode_success; 25303 } 25304 /* else fall through */ 25305 } 25306 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */ 25307 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */ 25308 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */ 25309 if (have66noF2noF3(pfx) 25310 && 1==getVexL(pfx)/*256*/ 25311 && epartIsReg(getUChar(delta))) { 25312 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25313 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25314 "vpsrlw", Iop_ShrN16x16 ); 25315 *uses_vvvv = True; 25316 goto decode_success; 25317 } 25318 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25319 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25320 "vpsraw", Iop_SarN16x16 ); 25321 *uses_vvvv = True; 25322 goto decode_success; 25323 } 25324 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25325 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25326 "vpsllw", Iop_ShlN16x16 ); 25327 *uses_vvvv = True; 25328 goto decode_success; 25329 } 25330 /* else fall through */ 25331 } 25332 break; 25333 25334 case 0x72: 25335 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */ 25336 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */ 25337 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */ 25338 if (have66noF2noF3(pfx) 25339 && 0==getVexL(pfx)/*128*/ 25340 && epartIsReg(getUChar(delta))) { 25341 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25342 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25343 "vpsrld", Iop_ShrN32x4 ); 25344 *uses_vvvv = True; 25345 goto decode_success; 25346 } 25347 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25348 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25349 "vpsrad", Iop_SarN32x4 ); 25350 *uses_vvvv = True; 25351 goto decode_success; 25352 } 25353 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25354 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25355 "vpslld", Iop_ShlN32x4 ); 25356 *uses_vvvv = True; 25357 goto decode_success; 25358 } 25359 /* else fall through */ 25360 } 25361 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */ 25362 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */ 25363 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */ 25364 if (have66noF2noF3(pfx) 25365 && 1==getVexL(pfx)/*256*/ 25366 && epartIsReg(getUChar(delta))) { 25367 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25368 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25369 "vpsrld", Iop_ShrN32x8 ); 25370 *uses_vvvv = True; 25371 goto decode_success; 25372 } 25373 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25374 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25375 "vpsrad", Iop_SarN32x8 ); 25376 *uses_vvvv = True; 25377 goto decode_success; 25378 } 25379 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25380 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25381 "vpslld", Iop_ShlN32x8 ); 25382 *uses_vvvv = True; 25383 goto decode_success; 25384 } 25385 /* else fall through */ 25386 } 25387 break; 25388 25389 case 0x73: 25390 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */ 25391 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */ 25392 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */ 25393 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */ 25394 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 25395 && epartIsReg(getUChar(delta))) { 25396 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25397 Int rD = getVexNvvvv(pfx); 25398 IRTemp vecS = newTemp(Ity_V128); 25399 if (gregLO3ofRM(getUChar(delta)) == 3) { 25400 Int imm = (Int)getUChar(delta+1); 25401 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25402 delta += 2; 25403 assign( vecS, getXMMReg(rS) ); 25404 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm ))); 25405 *uses_vvvv = True; 25406 goto decode_success; 25407 } 25408 if (gregLO3ofRM(getUChar(delta)) == 7) { 25409 Int imm = (Int)getUChar(delta+1); 25410 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25411 delta += 2; 25412 assign( vecS, getXMMReg(rS) ); 25413 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm ))); 25414 *uses_vvvv = True; 25415 goto decode_success; 25416 } 25417 if (gregLO3ofRM(getUChar(delta)) == 2) { 25418 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25419 "vpsrlq", Iop_ShrN64x2 ); 25420 *uses_vvvv = True; 25421 goto decode_success; 25422 } 25423 if (gregLO3ofRM(getUChar(delta)) == 6) { 25424 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25425 "vpsllq", Iop_ShlN64x2 ); 25426 *uses_vvvv = True; 25427 goto decode_success; 25428 } 25429 /* else fall through */ 25430 } 25431 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */ 25432 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */ 25433 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */ 25434 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */ 25435 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 25436 && epartIsReg(getUChar(delta))) { 25437 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25438 Int rD = getVexNvvvv(pfx); 25439 if (gregLO3ofRM(getUChar(delta)) == 3) { 25440 IRTemp vecS0 = newTemp(Ity_V128); 25441 IRTemp vecS1 = newTemp(Ity_V128); 25442 Int imm = (Int)getUChar(delta+1); 25443 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25444 delta += 2; 25445 assign( vecS0, getYMMRegLane128(rS, 0)); 25446 assign( vecS1, getYMMRegLane128(rS, 1)); 25447 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm ))); 25448 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm ))); 25449 *uses_vvvv = True; 25450 goto decode_success; 25451 } 25452 if (gregLO3ofRM(getUChar(delta)) == 7) { 25453 IRTemp vecS0 = newTemp(Ity_V128); 25454 IRTemp vecS1 = newTemp(Ity_V128); 25455 Int imm = (Int)getUChar(delta+1); 25456 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25457 delta += 2; 25458 assign( vecS0, getYMMRegLane128(rS, 0)); 25459 assign( vecS1, getYMMRegLane128(rS, 1)); 25460 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm ))); 25461 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm ))); 25462 *uses_vvvv = True; 25463 goto decode_success; 25464 } 25465 if (gregLO3ofRM(getUChar(delta)) == 2) { 25466 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25467 "vpsrlq", Iop_ShrN64x4 ); 25468 *uses_vvvv = True; 25469 goto decode_success; 25470 } 25471 if (gregLO3ofRM(getUChar(delta)) == 6) { 25472 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25473 "vpsllq", Iop_ShlN64x4 ); 25474 *uses_vvvv = True; 25475 goto decode_success; 25476 } 25477 /* else fall through */ 25478 } 25479 break; 25480 25481 case 0x74: 25482 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 25483 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */ 25484 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25485 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25486 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 ); 25487 goto decode_success; 25488 } 25489 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 25490 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */ 25491 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25492 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25493 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 ); 25494 goto decode_success; 25495 } 25496 break; 25497 25498 case 0x75: 25499 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 25500 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */ 25501 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25502 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25503 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 ); 25504 goto decode_success; 25505 } 25506 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 25507 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */ 25508 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25509 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25510 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 ); 25511 goto decode_success; 25512 } 25513 break; 25514 25515 case 0x76: 25516 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 25517 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */ 25518 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25519 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25520 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 ); 25521 goto decode_success; 25522 } 25523 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 25524 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */ 25525 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25526 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25527 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 ); 25528 goto decode_success; 25529 } 25530 break; 25531 25532 case 0x77: 25533 /* VZEROUPPER = VEX.128.0F.WIG 77 */ 25534 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25535 Int i; 25536 IRTemp zero128 = newTemp(Ity_V128); 25537 assign(zero128, mkV128(0)); 25538 for (i = 0; i < 16; i++) { 25539 putYMMRegLane128(i, 1, mkexpr(zero128)); 25540 } 25541 DIP("vzeroupper\n"); 25542 goto decode_success; 25543 } 25544 /* VZEROALL = VEX.256.0F.WIG 77 */ 25545 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25546 Int i; 25547 IRTemp zero128 = newTemp(Ity_V128); 25548 assign(zero128, mkV128(0)); 25549 for (i = 0; i < 16; i++) { 25550 putYMMRegLoAndZU(i, mkexpr(zero128)); 25551 } 25552 DIP("vzeroall\n"); 25553 goto decode_success; 25554 } 25555 break; 25556 25557 case 0x7C: 25558 case 0x7D: 25559 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */ 25560 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */ 25561 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25562 IRTemp sV = newTemp(Ity_V128); 25563 IRTemp dV = newTemp(Ity_V128); 25564 Bool isAdd = opc == 0x7C; 25565 const HChar* str = isAdd ? "add" : "sub"; 25566 UChar modrm = getUChar(delta); 25567 UInt rG = gregOfRexRM(pfx,modrm); 25568 UInt rV = getVexNvvvv(pfx); 25569 if (epartIsReg(modrm)) { 25570 UInt rE = eregOfRexRM(pfx,modrm); 25571 assign( sV, getXMMReg(rE) ); 25572 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 25573 nameXMMReg(rV), nameXMMReg(rG)); 25574 delta += 1; 25575 } else { 25576 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25577 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 25578 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25579 nameXMMReg(rV), nameXMMReg(rG)); 25580 delta += alen; 25581 } 25582 assign( dV, getXMMReg(rV) ); 25583 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) ); 25584 *uses_vvvv = True; 25585 goto decode_success; 25586 } 25587 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */ 25588 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */ 25589 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25590 IRTemp sV = newTemp(Ity_V256); 25591 IRTemp dV = newTemp(Ity_V256); 25592 IRTemp s1, s0, d1, d0; 25593 Bool isAdd = opc == 0x7C; 25594 const HChar* str = isAdd ? "add" : "sub"; 25595 UChar modrm = getUChar(delta); 25596 UInt rG = gregOfRexRM(pfx,modrm); 25597 UInt rV = getVexNvvvv(pfx); 25598 s1 = s0 = d1 = d0 = IRTemp_INVALID; 25599 if (epartIsReg(modrm)) { 25600 UInt rE = eregOfRexRM(pfx,modrm); 25601 assign( sV, getYMMReg(rE) ); 25602 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 25603 nameYMMReg(rV), nameYMMReg(rG)); 25604 delta += 1; 25605 } else { 25606 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25607 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 25608 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25609 nameYMMReg(rV), nameYMMReg(rG)); 25610 delta += alen; 25611 } 25612 assign( dV, getYMMReg(rV) ); 25613 breakupV256toV128s( dV, &d1, &d0 ); 25614 breakupV256toV128s( sV, &s1, &s0 ); 25615 putYMMReg( rG, binop(Iop_V128HLtoV256, 25616 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ), 25617 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) ); 25618 *uses_vvvv = True; 25619 goto decode_success; 25620 } 25621 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */ 25622 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */ 25623 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25624 IRTemp sV = newTemp(Ity_V128); 25625 IRTemp dV = newTemp(Ity_V128); 25626 Bool isAdd = opc == 0x7C; 25627 const HChar* str = isAdd ? "add" : "sub"; 25628 UChar modrm = getUChar(delta); 25629 UInt rG = gregOfRexRM(pfx,modrm); 25630 UInt rV = getVexNvvvv(pfx); 25631 if (epartIsReg(modrm)) { 25632 UInt rE = eregOfRexRM(pfx,modrm); 25633 assign( sV, getXMMReg(rE) ); 25634 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 25635 nameXMMReg(rV), nameXMMReg(rG)); 25636 delta += 1; 25637 } else { 25638 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25639 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 25640 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25641 nameXMMReg(rV), nameXMMReg(rG)); 25642 delta += alen; 25643 } 25644 assign( dV, getXMMReg(rV) ); 25645 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) ); 25646 *uses_vvvv = True; 25647 goto decode_success; 25648 } 25649 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */ 25650 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */ 25651 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25652 IRTemp sV = newTemp(Ity_V256); 25653 IRTemp dV = newTemp(Ity_V256); 25654 IRTemp s1, s0, d1, d0; 25655 Bool isAdd = opc == 0x7C; 25656 const HChar* str = isAdd ? "add" : "sub"; 25657 UChar modrm = getUChar(delta); 25658 UInt rG = gregOfRexRM(pfx,modrm); 25659 UInt rV = getVexNvvvv(pfx); 25660 s1 = s0 = d1 = d0 = IRTemp_INVALID; 25661 if (epartIsReg(modrm)) { 25662 UInt rE = eregOfRexRM(pfx,modrm); 25663 assign( sV, getYMMReg(rE) ); 25664 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 25665 nameYMMReg(rV), nameYMMReg(rG)); 25666 delta += 1; 25667 } else { 25668 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25669 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 25670 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 25671 nameYMMReg(rV), nameYMMReg(rG)); 25672 delta += alen; 25673 } 25674 assign( dV, getYMMReg(rV) ); 25675 breakupV256toV128s( dV, &d1, &d0 ); 25676 breakupV256toV128s( sV, &s1, &s0 ); 25677 putYMMReg( rG, binop(Iop_V128HLtoV256, 25678 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ), 25679 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) ); 25680 *uses_vvvv = True; 25681 goto decode_success; 25682 } 25683 break; 25684 25685 case 0x7E: 25686 /* Note the Intel docs don't make sense for this. I think they 25687 are wrong. They seem to imply it is a store when in fact I 25688 think it is a load. Also it's unclear whether this is W0, W1 25689 or WIG. */ 25690 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */ 25691 if (haveF3no66noF2(pfx) 25692 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25693 vassert(sz == 4); /* even tho we are transferring 8, not 4. */ 25694 UChar modrm = getUChar(delta); 25695 UInt rG = gregOfRexRM(pfx,modrm); 25696 if (epartIsReg(modrm)) { 25697 UInt rE = eregOfRexRM(pfx,modrm); 25698 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 )); 25699 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 25700 delta += 1; 25701 } else { 25702 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25703 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 25704 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 25705 delta += alen; 25706 } 25707 /* zero bits 255:64 */ 25708 putXMMRegLane64( rG, 1, mkU64(0) ); 25709 putYMMRegLane128( rG, 1, mkV128(0) ); 25710 goto decode_success; 25711 } 25712 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */ 25713 /* Moves from G to E, so is a store-form insn */ 25714 /* Intel docs list this in the VMOVD entry for some reason. */ 25715 if (have66noF2noF3(pfx) 25716 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25717 UChar modrm = getUChar(delta); 25718 UInt rG = gregOfRexRM(pfx,modrm); 25719 if (epartIsReg(modrm)) { 25720 UInt rE = eregOfRexRM(pfx,modrm); 25721 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE)); 25722 putIReg64(rE, getXMMRegLane64(rG, 0)); 25723 delta += 1; 25724 } else { 25725 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25726 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) ); 25727 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 25728 delta += alen; 25729 } 25730 goto decode_success; 25731 } 25732 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */ 25733 /* Moves from G to E, so is a store-form insn */ 25734 if (have66noF2noF3(pfx) 25735 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25736 UChar modrm = getUChar(delta); 25737 UInt rG = gregOfRexRM(pfx,modrm); 25738 if (epartIsReg(modrm)) { 25739 UInt rE = eregOfRexRM(pfx,modrm); 25740 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE)); 25741 putIReg32(rE, getXMMRegLane32(rG, 0)); 25742 delta += 1; 25743 } else { 25744 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25745 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) ); 25746 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG)); 25747 delta += alen; 25748 } 25749 goto decode_success; 25750 } 25751 break; 25752 25753 case 0x7F: 25754 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */ 25755 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */ 25756 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25757 && 1==getVexL(pfx)/*256*/) { 25758 UChar modrm = getUChar(delta); 25759 UInt rS = gregOfRexRM(pfx, modrm); 25760 IRTemp tS = newTemp(Ity_V256); 25761 Bool isA = have66noF2noF3(pfx); 25762 HChar ch = isA ? 'a' : 'u'; 25763 assign(tS, getYMMReg(rS)); 25764 if (epartIsReg(modrm)) { 25765 UInt rD = eregOfRexRM(pfx, modrm); 25766 delta += 1; 25767 putYMMReg(rD, mkexpr(tS)); 25768 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 25769 } else { 25770 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25771 delta += alen; 25772 if (isA) 25773 gen_SEGV_if_not_32_aligned(addr); 25774 storeLE(mkexpr(addr), mkexpr(tS)); 25775 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf); 25776 } 25777 goto decode_success; 25778 } 25779 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */ 25780 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */ 25781 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25782 && 0==getVexL(pfx)/*128*/) { 25783 UChar modrm = getUChar(delta); 25784 UInt rS = gregOfRexRM(pfx, modrm); 25785 IRTemp tS = newTemp(Ity_V128); 25786 Bool isA = have66noF2noF3(pfx); 25787 HChar ch = isA ? 'a' : 'u'; 25788 assign(tS, getXMMReg(rS)); 25789 if (epartIsReg(modrm)) { 25790 UInt rD = eregOfRexRM(pfx, modrm); 25791 delta += 1; 25792 putYMMRegLoAndZU(rD, mkexpr(tS)); 25793 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 25794 } else { 25795 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25796 delta += alen; 25797 if (isA) 25798 gen_SEGV_if_not_16_aligned(addr); 25799 storeLE(mkexpr(addr), mkexpr(tS)); 25800 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf); 25801 } 25802 goto decode_success; 25803 } 25804 break; 25805 25806 case 0xAE: 25807 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */ 25808 if (haveNo66noF2noF3(pfx) 25809 && 0==getVexL(pfx)/*LZ*/ 25810 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 25811 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 25812 && sz == 4) { 25813 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/); 25814 goto decode_success; 25815 } 25816 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */ 25817 if (haveNo66noF2noF3(pfx) 25818 && 0==getVexL(pfx)/*LZ*/ 25819 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 25820 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 25821 && sz == 4) { 25822 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/); 25823 goto decode_success; 25824 } 25825 break; 25826 25827 case 0xC2: 25828 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */ 25829 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */ 25830 if (haveF2no66noF3(pfx)) { 25831 Long delta0 = delta; 25832 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25833 "vcmpsd", False/*!all_lanes*/, 25834 8/*sz*/); 25835 if (delta > delta0) goto decode_success; 25836 /* else fall through -- decoding has failed */ 25837 } 25838 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */ 25839 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */ 25840 if (haveF3no66noF2(pfx)) { 25841 Long delta0 = delta; 25842 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25843 "vcmpss", False/*!all_lanes*/, 25844 4/*sz*/); 25845 if (delta > delta0) goto decode_success; 25846 /* else fall through -- decoding has failed */ 25847 } 25848 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 25849 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */ 25850 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25851 Long delta0 = delta; 25852 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25853 "vcmppd", True/*all_lanes*/, 25854 8/*sz*/); 25855 if (delta > delta0) goto decode_success; 25856 /* else fall through -- decoding has failed */ 25857 } 25858 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 25859 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */ 25860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25861 Long delta0 = delta; 25862 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25863 "vcmppd", 8/*sz*/); 25864 if (delta > delta0) goto decode_success; 25865 /* else fall through -- decoding has failed */ 25866 } 25867 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 25868 /* = VEX.NDS.128.0F.WIG C2 /r ib */ 25869 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25870 Long delta0 = delta; 25871 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25872 "vcmpps", True/*all_lanes*/, 25873 4/*sz*/); 25874 if (delta > delta0) goto decode_success; 25875 /* else fall through -- decoding has failed */ 25876 } 25877 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 25878 /* = VEX.NDS.256.0F.WIG C2 /r ib */ 25879 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25880 Long delta0 = delta; 25881 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 25882 "vcmpps", 4/*sz*/); 25883 if (delta > delta0) goto decode_success; 25884 /* else fall through -- decoding has failed */ 25885 } 25886 break; 25887 25888 case 0xC4: 25889 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */ 25890 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25891 UChar modrm = getUChar(delta); 25892 UInt rG = gregOfRexRM(pfx, modrm); 25893 UInt rV = getVexNvvvv(pfx); 25894 Int imm8; 25895 IRTemp new16 = newTemp(Ity_I16); 25896 25897 if ( epartIsReg( modrm ) ) { 25898 imm8 = (Int)(getUChar(delta+1) & 7); 25899 assign( new16, unop(Iop_32to16, 25900 getIReg32(eregOfRexRM(pfx,modrm))) ); 25901 delta += 1+1; 25902 DIP( "vpinsrw $%d,%s,%s\n", imm8, 25903 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) ); 25904 } else { 25905 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 25906 imm8 = (Int)(getUChar(delta+alen) & 7); 25907 assign( new16, loadLE( Ity_I16, mkexpr(addr) )); 25908 delta += alen+1; 25909 DIP( "vpinsrw $%d,%s,%s\n", 25910 imm8, dis_buf, nameXMMReg(rG) ); 25911 } 25912 25913 IRTemp src_vec = newTemp(Ity_V128); 25914 assign(src_vec, getXMMReg( rV )); 25915 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 ); 25916 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 25917 *uses_vvvv = True; 25918 goto decode_success; 25919 } 25920 break; 25921 25922 case 0xC5: 25923 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */ 25924 if (have66noF2noF3(pfx) 25925 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25926 Long delta0 = delta; 25927 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 25928 True/*isAvx*/ ); 25929 if (delta > delta0) goto decode_success; 25930 /* else fall through -- decoding has failed */ 25931 } 25932 break; 25933 25934 case 0xC6: 25935 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 25936 /* = VEX.NDS.128.0F.WIG C6 /r ib */ 25937 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25938 Int imm8 = 0; 25939 IRTemp eV = newTemp(Ity_V128); 25940 IRTemp vV = newTemp(Ity_V128); 25941 UInt modrm = getUChar(delta); 25942 UInt rG = gregOfRexRM(pfx,modrm); 25943 UInt rV = getVexNvvvv(pfx); 25944 assign( vV, getXMMReg(rV) ); 25945 if (epartIsReg(modrm)) { 25946 UInt rE = eregOfRexRM(pfx,modrm); 25947 assign( eV, getXMMReg(rE) ); 25948 imm8 = (Int)getUChar(delta+1); 25949 delta += 1+1; 25950 DIP("vshufps $%d,%s,%s,%s\n", 25951 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 25952 } else { 25953 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 25954 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 25955 imm8 = (Int)getUChar(delta+alen); 25956 delta += 1+alen; 25957 DIP("vshufps $%d,%s,%s,%s\n", 25958 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 25959 } 25960 IRTemp res = math_SHUFPS_128( eV, vV, imm8 ); 25961 putYMMRegLoAndZU( rG, mkexpr(res) ); 25962 *uses_vvvv = True; 25963 goto decode_success; 25964 } 25965 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 25966 /* = VEX.NDS.256.0F.WIG C6 /r ib */ 25967 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25968 Int imm8 = 0; 25969 IRTemp eV = newTemp(Ity_V256); 25970 IRTemp vV = newTemp(Ity_V256); 25971 UInt modrm = getUChar(delta); 25972 UInt rG = gregOfRexRM(pfx,modrm); 25973 UInt rV = getVexNvvvv(pfx); 25974 assign( vV, getYMMReg(rV) ); 25975 if (epartIsReg(modrm)) { 25976 UInt rE = eregOfRexRM(pfx,modrm); 25977 assign( eV, getYMMReg(rE) ); 25978 imm8 = (Int)getUChar(delta+1); 25979 delta += 1+1; 25980 DIP("vshufps $%d,%s,%s,%s\n", 25981 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 25982 } else { 25983 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 25984 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 25985 imm8 = (Int)getUChar(delta+alen); 25986 delta += 1+alen; 25987 DIP("vshufps $%d,%s,%s,%s\n", 25988 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 25989 } 25990 IRTemp res = math_SHUFPS_256( eV, vV, imm8 ); 25991 putYMMReg( rG, mkexpr(res) ); 25992 *uses_vvvv = True; 25993 goto decode_success; 25994 } 25995 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 25996 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */ 25997 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25998 Int imm8 = 0; 25999 IRTemp eV = newTemp(Ity_V128); 26000 IRTemp vV = newTemp(Ity_V128); 26001 UInt modrm = getUChar(delta); 26002 UInt rG = gregOfRexRM(pfx,modrm); 26003 UInt rV = getVexNvvvv(pfx); 26004 assign( vV, getXMMReg(rV) ); 26005 if (epartIsReg(modrm)) { 26006 UInt rE = eregOfRexRM(pfx,modrm); 26007 assign( eV, getXMMReg(rE) ); 26008 imm8 = (Int)getUChar(delta+1); 26009 delta += 1+1; 26010 DIP("vshufpd $%d,%s,%s,%s\n", 26011 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 26012 } else { 26013 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26014 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 26015 imm8 = (Int)getUChar(delta+alen); 26016 delta += 1+alen; 26017 DIP("vshufpd $%d,%s,%s,%s\n", 26018 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 26019 } 26020 IRTemp res = math_SHUFPD_128( eV, vV, imm8 ); 26021 putYMMRegLoAndZU( rG, mkexpr(res) ); 26022 *uses_vvvv = True; 26023 goto decode_success; 26024 } 26025 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 26026 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */ 26027 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26028 Int imm8 = 0; 26029 IRTemp eV = newTemp(Ity_V256); 26030 IRTemp vV = newTemp(Ity_V256); 26031 UInt modrm = getUChar(delta); 26032 UInt rG = gregOfRexRM(pfx,modrm); 26033 UInt rV = getVexNvvvv(pfx); 26034 assign( vV, getYMMReg(rV) ); 26035 if (epartIsReg(modrm)) { 26036 UInt rE = eregOfRexRM(pfx,modrm); 26037 assign( eV, getYMMReg(rE) ); 26038 imm8 = (Int)getUChar(delta+1); 26039 delta += 1+1; 26040 DIP("vshufpd $%d,%s,%s,%s\n", 26041 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 26042 } else { 26043 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26044 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 26045 imm8 = (Int)getUChar(delta+alen); 26046 delta += 1+alen; 26047 DIP("vshufpd $%d,%s,%s,%s\n", 26048 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 26049 } 26050 IRTemp res = math_SHUFPD_256( eV, vV, imm8 ); 26051 putYMMReg( rG, mkexpr(res) ); 26052 *uses_vvvv = True; 26053 goto decode_success; 26054 } 26055 break; 26056 26057 case 0xD0: 26058 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */ 26059 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26060 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26061 uses_vvvv, vbi, pfx, delta, 26062 "vaddsubpd", math_ADDSUBPD_128 ); 26063 goto decode_success; 26064 } 26065 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */ 26066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26067 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26068 uses_vvvv, vbi, pfx, delta, 26069 "vaddsubpd", math_ADDSUBPD_256 ); 26070 goto decode_success; 26071 } 26072 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */ 26073 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26074 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26075 uses_vvvv, vbi, pfx, delta, 26076 "vaddsubps", math_ADDSUBPS_128 ); 26077 goto decode_success; 26078 } 26079 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */ 26080 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26081 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26082 uses_vvvv, vbi, pfx, delta, 26083 "vaddsubps", math_ADDSUBPS_256 ); 26084 goto decode_success; 26085 } 26086 break; 26087 26088 case 0xD1: 26089 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */ 26090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26091 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26092 "vpsrlw", Iop_ShrN16x8 ); 26093 *uses_vvvv = True; 26094 goto decode_success; 26095 26096 } 26097 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */ 26098 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26099 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26100 "vpsrlw", Iop_ShrN16x16 ); 26101 *uses_vvvv = True; 26102 goto decode_success; 26103 26104 } 26105 break; 26106 26107 case 0xD2: 26108 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */ 26109 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26110 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26111 "vpsrld", Iop_ShrN32x4 ); 26112 *uses_vvvv = True; 26113 goto decode_success; 26114 } 26115 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */ 26116 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26117 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26118 "vpsrld", Iop_ShrN32x8 ); 26119 *uses_vvvv = True; 26120 goto decode_success; 26121 } 26122 break; 26123 26124 case 0xD3: 26125 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */ 26126 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26127 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26128 "vpsrlq", Iop_ShrN64x2 ); 26129 *uses_vvvv = True; 26130 goto decode_success; 26131 } 26132 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */ 26133 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26134 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26135 "vpsrlq", Iop_ShrN64x4 ); 26136 *uses_vvvv = True; 26137 goto decode_success; 26138 } 26139 break; 26140 26141 case 0xD4: 26142 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26143 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */ 26144 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26145 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26146 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 ); 26147 goto decode_success; 26148 } 26149 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26150 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */ 26151 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26152 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26153 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 ); 26154 goto decode_success; 26155 } 26156 break; 26157 26158 case 0xD5: 26159 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */ 26160 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26161 delta = dis_AVX128_E_V_to_G( 26162 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 ); 26163 goto decode_success; 26164 } 26165 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */ 26166 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26167 delta = dis_AVX256_E_V_to_G( 26168 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 ); 26169 goto decode_success; 26170 } 26171 break; 26172 26173 case 0xD6: 26174 /* I can't even find any Intel docs for this one. */ 26175 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half 26176 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0 26177 (WIG, maybe?) */ 26178 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 26179 && 0==getRexW(pfx)/*this might be redundant, dunno*/) { 26180 UChar modrm = getUChar(delta); 26181 UInt rG = gregOfRexRM(pfx,modrm); 26182 if (epartIsReg(modrm)) { 26183 /* fall through, awaiting test case */ 26184 /* dst: lo half copied, hi half zeroed */ 26185 } else { 26186 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26187 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 )); 26188 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf ); 26189 delta += alen; 26190 goto decode_success; 26191 } 26192 } 26193 break; 26194 26195 case 0xD7: 26196 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */ 26197 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26198 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ ); 26199 goto decode_success; 26200 } 26201 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */ 26202 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26203 delta = dis_PMOVMSKB_256( vbi, pfx, delta ); 26204 goto decode_success; 26205 } 26206 break; 26207 26208 case 0xD8: 26209 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */ 26210 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26211 delta = dis_AVX128_E_V_to_G( 26212 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 ); 26213 goto decode_success; 26214 } 26215 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */ 26216 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26217 delta = dis_AVX256_E_V_to_G( 26218 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 ); 26219 goto decode_success; 26220 } 26221 break; 26222 26223 case 0xD9: 26224 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */ 26225 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26226 delta = dis_AVX128_E_V_to_G( 26227 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 ); 26228 goto decode_success; 26229 } 26230 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */ 26231 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26232 delta = dis_AVX256_E_V_to_G( 26233 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 ); 26234 goto decode_success; 26235 } 26236 break; 26237 26238 case 0xDA: 26239 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */ 26240 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26241 delta = dis_AVX128_E_V_to_G( 26242 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 ); 26243 goto decode_success; 26244 } 26245 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */ 26246 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26247 delta = dis_AVX256_E_V_to_G( 26248 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 ); 26249 goto decode_success; 26250 } 26251 break; 26252 26253 case 0xDB: 26254 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26255 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */ 26256 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26257 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26258 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 ); 26259 goto decode_success; 26260 } 26261 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26262 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */ 26263 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26264 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26265 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 ); 26266 goto decode_success; 26267 } 26268 break; 26269 26270 case 0xDC: 26271 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */ 26272 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26273 delta = dis_AVX128_E_V_to_G( 26274 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 ); 26275 goto decode_success; 26276 } 26277 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */ 26278 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26279 delta = dis_AVX256_E_V_to_G( 26280 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 ); 26281 goto decode_success; 26282 } 26283 break; 26284 26285 case 0xDD: 26286 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */ 26287 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26288 delta = dis_AVX128_E_V_to_G( 26289 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 ); 26290 goto decode_success; 26291 } 26292 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */ 26293 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26294 delta = dis_AVX256_E_V_to_G( 26295 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 ); 26296 goto decode_success; 26297 } 26298 break; 26299 26300 case 0xDE: 26301 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */ 26302 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26303 delta = dis_AVX128_E_V_to_G( 26304 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 ); 26305 goto decode_success; 26306 } 26307 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */ 26308 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26309 delta = dis_AVX256_E_V_to_G( 26310 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 ); 26311 goto decode_success; 26312 } 26313 break; 26314 26315 case 0xDF: 26316 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26317 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */ 26318 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26319 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 26320 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128, 26321 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26322 goto decode_success; 26323 } 26324 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26325 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */ 26326 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26327 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 26328 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256, 26329 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26330 goto decode_success; 26331 } 26332 break; 26333 26334 case 0xE0: 26335 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */ 26336 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26337 delta = dis_AVX128_E_V_to_G( 26338 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 ); 26339 goto decode_success; 26340 } 26341 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */ 26342 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26343 delta = dis_AVX256_E_V_to_G( 26344 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 ); 26345 goto decode_success; 26346 } 26347 break; 26348 26349 case 0xE1: 26350 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */ 26351 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26352 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26353 "vpsraw", Iop_SarN16x8 ); 26354 *uses_vvvv = True; 26355 goto decode_success; 26356 } 26357 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */ 26358 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26359 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26360 "vpsraw", Iop_SarN16x16 ); 26361 *uses_vvvv = True; 26362 goto decode_success; 26363 } 26364 break; 26365 26366 case 0xE2: 26367 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */ 26368 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26369 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26370 "vpsrad", Iop_SarN32x4 ); 26371 *uses_vvvv = True; 26372 goto decode_success; 26373 } 26374 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */ 26375 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26376 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26377 "vpsrad", Iop_SarN32x8 ); 26378 *uses_vvvv = True; 26379 goto decode_success; 26380 } 26381 break; 26382 26383 case 0xE3: 26384 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */ 26385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26386 delta = dis_AVX128_E_V_to_G( 26387 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 ); 26388 goto decode_success; 26389 } 26390 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */ 26391 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26392 delta = dis_AVX256_E_V_to_G( 26393 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 ); 26394 goto decode_success; 26395 } 26396 break; 26397 26398 case 0xE4: 26399 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */ 26400 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26401 delta = dis_AVX128_E_V_to_G( 26402 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 ); 26403 goto decode_success; 26404 } 26405 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */ 26406 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26407 delta = dis_AVX256_E_V_to_G( 26408 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 ); 26409 goto decode_success; 26410 } 26411 break; 26412 26413 case 0xE5: 26414 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */ 26415 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26416 delta = dis_AVX128_E_V_to_G( 26417 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 ); 26418 goto decode_success; 26419 } 26420 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */ 26421 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26422 delta = dis_AVX256_E_V_to_G( 26423 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 ); 26424 goto decode_success; 26425 } 26426 break; 26427 26428 case 0xE6: 26429 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */ 26430 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 26431 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/); 26432 goto decode_success; 26433 } 26434 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */ 26435 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 26436 delta = dis_CVTDQ2PD_256(vbi, pfx, delta); 26437 goto decode_success; 26438 } 26439 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */ 26440 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26441 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26442 True/*r2zero*/); 26443 goto decode_success; 26444 } 26445 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */ 26446 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26447 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/); 26448 goto decode_success; 26449 } 26450 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */ 26451 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26452 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26453 False/*!r2zero*/); 26454 goto decode_success; 26455 } 26456 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */ 26457 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26458 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/); 26459 goto decode_success; 26460 } 26461 break; 26462 26463 case 0xE7: 26464 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */ 26465 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26466 UChar modrm = getUChar(delta); 26467 UInt rG = gregOfRexRM(pfx,modrm); 26468 if (!epartIsReg(modrm)) { 26469 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26470 gen_SEGV_if_not_16_aligned( addr ); 26471 storeLE( mkexpr(addr), getXMMReg(rG) ); 26472 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG)); 26473 delta += alen; 26474 goto decode_success; 26475 } 26476 /* else fall through */ 26477 } 26478 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */ 26479 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26480 UChar modrm = getUChar(delta); 26481 UInt rG = gregOfRexRM(pfx,modrm); 26482 if (!epartIsReg(modrm)) { 26483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26484 gen_SEGV_if_not_32_aligned( addr ); 26485 storeLE( mkexpr(addr), getYMMReg(rG) ); 26486 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG)); 26487 delta += alen; 26488 goto decode_success; 26489 } 26490 /* else fall through */ 26491 } 26492 break; 26493 26494 case 0xE8: 26495 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */ 26496 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26497 delta = dis_AVX128_E_V_to_G( 26498 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 ); 26499 goto decode_success; 26500 } 26501 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */ 26502 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26503 delta = dis_AVX256_E_V_to_G( 26504 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 ); 26505 goto decode_success; 26506 } 26507 break; 26508 26509 case 0xE9: 26510 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */ 26511 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26512 delta = dis_AVX128_E_V_to_G( 26513 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 ); 26514 goto decode_success; 26515 } 26516 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */ 26517 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26518 delta = dis_AVX256_E_V_to_G( 26519 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 ); 26520 goto decode_success; 26521 } 26522 break; 26523 26524 case 0xEA: 26525 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 26526 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */ 26527 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26528 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26529 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 ); 26530 goto decode_success; 26531 } 26532 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 26533 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */ 26534 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26535 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26536 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 ); 26537 goto decode_success; 26538 } 26539 break; 26540 26541 case 0xEB: 26542 /* VPOR r/m, rV, r ::: r = rV | r/m */ 26543 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */ 26544 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26545 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26546 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 ); 26547 goto decode_success; 26548 } 26549 /* VPOR r/m, rV, r ::: r = rV | r/m */ 26550 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */ 26551 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26552 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26553 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 ); 26554 goto decode_success; 26555 } 26556 break; 26557 26558 case 0xEC: 26559 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */ 26560 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26561 delta = dis_AVX128_E_V_to_G( 26562 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 ); 26563 goto decode_success; 26564 } 26565 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */ 26566 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26567 delta = dis_AVX256_E_V_to_G( 26568 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 ); 26569 goto decode_success; 26570 } 26571 break; 26572 26573 case 0xED: 26574 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */ 26575 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26576 delta = dis_AVX128_E_V_to_G( 26577 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 ); 26578 goto decode_success; 26579 } 26580 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */ 26581 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26582 delta = dis_AVX256_E_V_to_G( 26583 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 ); 26584 goto decode_success; 26585 } 26586 break; 26587 26588 case 0xEE: 26589 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 26590 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */ 26591 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26592 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26593 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 ); 26594 goto decode_success; 26595 } 26596 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 26597 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */ 26598 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26599 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26600 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 ); 26601 goto decode_success; 26602 } 26603 break; 26604 26605 case 0xEF: 26606 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 26607 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */ 26608 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26609 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26610 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 ); 26611 goto decode_success; 26612 } 26613 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 26614 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */ 26615 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26616 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26617 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 ); 26618 goto decode_success; 26619 } 26620 break; 26621 26622 case 0xF0: 26623 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */ 26624 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26625 UChar modrm = getUChar(delta); 26626 UInt rD = gregOfRexRM(pfx, modrm); 26627 IRTemp tD = newTemp(Ity_V256); 26628 if (epartIsReg(modrm)) break; 26629 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26630 delta += alen; 26631 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 26632 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD)); 26633 putYMMReg(rD, mkexpr(tD)); 26634 goto decode_success; 26635 } 26636 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */ 26637 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26638 UChar modrm = getUChar(delta); 26639 UInt rD = gregOfRexRM(pfx, modrm); 26640 IRTemp tD = newTemp(Ity_V128); 26641 if (epartIsReg(modrm)) break; 26642 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26643 delta += alen; 26644 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 26645 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD)); 26646 putYMMRegLoAndZU(rD, mkexpr(tD)); 26647 goto decode_success; 26648 } 26649 break; 26650 26651 case 0xF1: 26652 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */ 26653 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26654 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26655 "vpsllw", Iop_ShlN16x8 ); 26656 *uses_vvvv = True; 26657 goto decode_success; 26658 26659 } 26660 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */ 26661 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26662 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26663 "vpsllw", Iop_ShlN16x16 ); 26664 *uses_vvvv = True; 26665 goto decode_success; 26666 26667 } 26668 break; 26669 26670 case 0xF2: 26671 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */ 26672 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26673 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26674 "vpslld", Iop_ShlN32x4 ); 26675 *uses_vvvv = True; 26676 goto decode_success; 26677 } 26678 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */ 26679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26680 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26681 "vpslld", Iop_ShlN32x8 ); 26682 *uses_vvvv = True; 26683 goto decode_success; 26684 } 26685 break; 26686 26687 case 0xF3: 26688 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */ 26689 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26690 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26691 "vpsllq", Iop_ShlN64x2 ); 26692 *uses_vvvv = True; 26693 goto decode_success; 26694 } 26695 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */ 26696 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26697 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26698 "vpsllq", Iop_ShlN64x4 ); 26699 *uses_vvvv = True; 26700 goto decode_success; 26701 } 26702 break; 26703 26704 case 0xF4: 26705 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */ 26706 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26707 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26708 uses_vvvv, vbi, pfx, delta, 26709 "vpmuludq", math_PMULUDQ_128 ); 26710 goto decode_success; 26711 } 26712 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */ 26713 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26714 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26715 uses_vvvv, vbi, pfx, delta, 26716 "vpmuludq", math_PMULUDQ_256 ); 26717 goto decode_success; 26718 } 26719 break; 26720 26721 case 0xF5: 26722 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */ 26723 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26724 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26725 uses_vvvv, vbi, pfx, delta, 26726 "vpmaddwd", math_PMADDWD_128 ); 26727 goto decode_success; 26728 } 26729 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */ 26730 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26731 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26732 uses_vvvv, vbi, pfx, delta, 26733 "vpmaddwd", math_PMADDWD_256 ); 26734 goto decode_success; 26735 } 26736 break; 26737 26738 case 0xF6: 26739 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */ 26740 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26741 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26742 uses_vvvv, vbi, pfx, delta, 26743 "vpsadbw", math_PSADBW_128 ); 26744 goto decode_success; 26745 } 26746 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */ 26747 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26748 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26749 uses_vvvv, vbi, pfx, delta, 26750 "vpsadbw", math_PSADBW_256 ); 26751 goto decode_success; 26752 } 26753 break; 26754 26755 case 0xF7: 26756 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */ 26757 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 26758 && epartIsReg(getUChar(delta))) { 26759 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ ); 26760 goto decode_success; 26761 } 26762 break; 26763 26764 case 0xF8: 26765 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 26766 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */ 26767 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26768 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26769 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 ); 26770 goto decode_success; 26771 } 26772 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 26773 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */ 26774 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26775 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26776 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 ); 26777 goto decode_success; 26778 } 26779 break; 26780 26781 case 0xF9: 26782 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 26783 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */ 26784 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26785 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26786 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 ); 26787 goto decode_success; 26788 } 26789 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 26790 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */ 26791 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26792 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26793 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 ); 26794 goto decode_success; 26795 } 26796 break; 26797 26798 case 0xFA: 26799 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 26800 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */ 26801 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26802 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26803 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 ); 26804 goto decode_success; 26805 } 26806 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 26807 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */ 26808 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26809 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26810 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 ); 26811 goto decode_success; 26812 } 26813 break; 26814 26815 case 0xFB: 26816 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 26817 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */ 26818 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26819 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26820 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 ); 26821 goto decode_success; 26822 } 26823 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 26824 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */ 26825 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26826 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26827 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 ); 26828 goto decode_success; 26829 } 26830 break; 26831 26832 case 0xFC: 26833 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 26834 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */ 26835 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26836 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26837 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 ); 26838 goto decode_success; 26839 } 26840 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 26841 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */ 26842 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26843 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26844 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 ); 26845 goto decode_success; 26846 } 26847 break; 26848 26849 case 0xFD: 26850 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 26851 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */ 26852 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26853 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26854 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 ); 26855 goto decode_success; 26856 } 26857 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 26858 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */ 26859 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26860 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26861 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 ); 26862 goto decode_success; 26863 } 26864 break; 26865 26866 case 0xFE: 26867 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 26868 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */ 26869 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26870 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26871 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 ); 26872 goto decode_success; 26873 } 26874 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 26875 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */ 26876 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26877 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26878 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 ); 26879 goto decode_success; 26880 } 26881 break; 26882 26883 default: 26884 break; 26885 26886 } 26887 26888 //decode_failure: 26889 return deltaIN; 26890 26891 decode_success: 26892 return delta; 26893 } 26894 26895 26896 /*------------------------------------------------------------*/ 26897 /*--- ---*/ 26898 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/ 26899 /*--- ---*/ 26900 /*------------------------------------------------------------*/ 26901 26902 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 26903 { 26904 /* In the control vector, zero out all but the bottom two bits of 26905 each 32-bit lane. */ 26906 IRExpr* cv1 = binop(Iop_ShrN32x4, 26907 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)), 26908 mkU8(30)); 26909 /* And use the resulting cleaned-up control vector as steering 26910 in a Perm operation. */ 26911 IRTemp res = newTemp(Ity_V128); 26912 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1)); 26913 return res; 26914 } 26915 26916 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 26917 { 26918 IRTemp dHi, dLo, cHi, cLo; 26919 dHi = dLo = cHi = cLo = IRTemp_INVALID; 26920 breakupV256toV128s( dataV, &dHi, &dLo ); 26921 breakupV256toV128s( ctrlV, &cHi, &cLo ); 26922 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi ); 26923 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo ); 26924 IRTemp res = newTemp(Ity_V256); 26925 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 26926 return res; 26927 } 26928 26929 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 26930 { 26931 /* No cleverness here .. */ 26932 IRTemp dHi, dLo, cHi, cLo; 26933 dHi = dLo = cHi = cLo = IRTemp_INVALID; 26934 breakupV128to64s( dataV, &dHi, &dLo ); 26935 breakupV128to64s( ctrlV, &cHi, &cLo ); 26936 IRExpr* rHi 26937 = IRExpr_ITE( unop(Iop_64to1, 26938 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))), 26939 mkexpr(dHi), mkexpr(dLo) ); 26940 IRExpr* rLo 26941 = IRExpr_ITE( unop(Iop_64to1, 26942 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))), 26943 mkexpr(dHi), mkexpr(dLo) ); 26944 IRTemp res = newTemp(Ity_V128); 26945 assign(res, binop(Iop_64HLtoV128, rHi, rLo)); 26946 return res; 26947 } 26948 26949 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 26950 { 26951 IRTemp dHi, dLo, cHi, cLo; 26952 dHi = dLo = cHi = cLo = IRTemp_INVALID; 26953 breakupV256toV128s( dataV, &dHi, &dLo ); 26954 breakupV256toV128s( ctrlV, &cHi, &cLo ); 26955 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi ); 26956 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo ); 26957 IRTemp res = newTemp(Ity_V256); 26958 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 26959 return res; 26960 } 26961 26962 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV ) 26963 { 26964 /* In the control vector, zero out all but the bottom three bits of 26965 each 32-bit lane. */ 26966 IRExpr* cv1 = binop(Iop_ShrN32x8, 26967 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)), 26968 mkU8(29)); 26969 /* And use the resulting cleaned-up control vector as steering 26970 in a Perm operation. */ 26971 IRTemp res = newTemp(Ity_V256); 26972 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1)); 26973 return res; 26974 } 26975 26976 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv, 26977 VexAbiInfo* vbi, Prefix pfx, Long delta, 26978 const HChar* opname, IROp op8 ) 26979 { 26980 HChar dis_buf[50]; 26981 Int alen; 26982 Int size = getRexW(pfx) ? 8 : 4; 26983 IRType ty = szToITy(size); 26984 IRTemp src = newTemp(ty); 26985 IRTemp amt = newTemp(ty); 26986 UChar rm = getUChar(delta); 26987 26988 assign( amt, getIRegV(size,pfx) ); 26989 if (epartIsReg(rm)) { 26990 assign( src, getIRegE(size,pfx,rm) ); 26991 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), 26992 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 26993 delta++; 26994 } else { 26995 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26996 assign( src, loadLE(ty, mkexpr(addr)) ); 26997 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf, 26998 nameIRegG(size,pfx,rm)); 26999 delta += alen; 27000 } 27001 27002 putIRegG( size, pfx, rm, 27003 binop(mkSizedOp(ty,op8), mkexpr(src), 27004 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt), 27005 mkU(ty,8*size-1)))) ); 27006 /* Flags aren't modified. */ 27007 *uses_vvvv = True; 27008 return delta; 27009 } 27010 27011 27012 static Long dis_FMA ( VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc ) 27013 { 27014 UChar modrm = getUChar(delta); 27015 UInt rG = gregOfRexRM(pfx, modrm); 27016 UInt rV = getVexNvvvv(pfx); 27017 Bool scalar = (opc & 0xF) > 7 && (opc & 1); 27018 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32; 27019 IRType vty = scalar ? ty : getVexL(pfx) ? Ity_V256 : Ity_V128; 27020 IRTemp vX = newTemp(vty); 27021 IRTemp vY = newTemp(vty); 27022 IRTemp vZ = newTemp(vty); 27023 IRExpr *x[8], *y[8], *z[8]; 27024 IRTemp addr = IRTemp_INVALID; 27025 HChar dis_buf[50]; 27026 Int alen = 0; 27027 const HChar *name; 27028 const HChar *suffix; 27029 const HChar *order; 27030 Bool negateRes = False; 27031 Bool negateZeven = False; 27032 Bool negateZodd = False; 27033 Int i, j; 27034 Int count; 27035 static IROp ops[] = { Iop_V256to64_0, Iop_V256to64_1, 27036 Iop_V256to64_2, Iop_V256to64_3, 27037 Iop_V128to64, Iop_V128HIto64 }; 27038 27039 switch (opc & 0xF) { 27040 case 0x6: 27041 name = "addsub"; 27042 negateZeven = True; 27043 break; 27044 case 0x7: 27045 name = "subadd"; 27046 negateZodd = True; 27047 break; 27048 case 0x8: 27049 case 0x9: 27050 name = "add"; 27051 break; 27052 case 0xA: 27053 case 0xB: 27054 name = "sub"; 27055 negateZeven = True; 27056 negateZodd = True; 27057 break; 27058 case 0xC: 27059 case 0xD: 27060 name = "add"; 27061 negateRes = True; 27062 negateZeven = True; 27063 negateZodd = True; 27064 break; 27065 case 0xE: 27066 case 0xF: 27067 name = "sub"; 27068 negateRes = True; 27069 break; 27070 default: 27071 vpanic("dis_FMA(amd64)"); 27072 break; 27073 } 27074 switch (opc & 0xF0) { 27075 case 0x90: order = "132"; break; 27076 case 0xA0: order = "213"; break; 27077 case 0xB0: order = "231"; break; 27078 default: vpanic("dis_FMA(amd64)"); break; 27079 } 27080 if (scalar) 27081 suffix = ty == Ity_F64 ? "sd" : "ss"; 27082 else 27083 suffix = ty == Ity_F64 ? "pd" : "ps"; 27084 27085 if (scalar) { 27086 assign( vX, ty == Ity_F64 27087 ? getXMMRegLane64F(rG, 0) : getXMMRegLane32F(rG, 0) ); 27088 assign( vZ, ty == Ity_F64 27089 ? getXMMRegLane64F(rV, 0) : getXMMRegLane32F(rV, 0) ); 27090 } else { 27091 assign( vX, vty == Ity_V256 ? getYMMReg(rG) : getXMMReg(rG) ); 27092 assign( vZ, vty == Ity_V256 ? getYMMReg(rV) : getXMMReg(rV) ); 27093 } 27094 27095 if (epartIsReg(modrm)) { 27096 UInt rE = eregOfRexRM(pfx, modrm); 27097 delta += 1; 27098 if (scalar) 27099 assign( vY, ty == Ity_F64 27100 ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 27101 else 27102 assign( vY, vty == Ity_V256 ? getYMMReg(rE) : getXMMReg(rE) ); 27103 if (vty == Ity_V256) { 27104 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27105 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV), 27106 nameYMMReg(rG)); 27107 } else { 27108 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27109 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV), 27110 nameXMMReg(rG)); 27111 } 27112 } else { 27113 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27114 delta += alen; 27115 assign(vY, loadLE(vty, mkexpr(addr))); 27116 if (vty == Ity_V256) { 27117 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27118 name, order, suffix, dis_buf, nameYMMReg(rV), 27119 nameYMMReg(rG)); 27120 } else { 27121 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27122 name, order, suffix, dis_buf, nameXMMReg(rV), 27123 nameXMMReg(rG)); 27124 } 27125 } 27126 27127 /* vX/vY/vZ now in 132 order. If it is different order, swap the 27128 arguments. */ 27129 if ((opc & 0xF0) != 0x90) { 27130 IRTemp tem = vX; 27131 if ((opc & 0xF0) == 0xA0) { 27132 vX = vZ; 27133 vZ = vY; 27134 vY = tem; 27135 } else { 27136 vX = vZ; 27137 vZ = tem; 27138 } 27139 } 27140 27141 if (scalar) { 27142 count = 1; 27143 x[0] = mkexpr(vX); 27144 y[0] = mkexpr(vY); 27145 z[0] = mkexpr(vZ); 27146 } else if (ty == Ity_F32) { 27147 count = vty == Ity_V256 ? 8 : 4; 27148 j = vty == Ity_V256 ? 0 : 4; 27149 for (i = 0; i < count; i += 2) { 27150 IRTemp tem = newTemp(Ity_I64); 27151 assign(tem, unop(ops[i / 2 + j], mkexpr(vX))); 27152 x[i] = unop(Iop_64to32, mkexpr(tem)); 27153 x[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27154 tem = newTemp(Ity_I64); 27155 assign(tem, unop(ops[i / 2 + j], mkexpr(vY))); 27156 y[i] = unop(Iop_64to32, mkexpr(tem)); 27157 y[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27158 tem = newTemp(Ity_I64); 27159 assign(tem, unop(ops[i / 2 + j], mkexpr(vZ))); 27160 z[i] = unop(Iop_64to32, mkexpr(tem)); 27161 z[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27162 } 27163 } else { 27164 count = vty == Ity_V256 ? 4 : 2; 27165 j = vty == Ity_V256 ? 0 : 4; 27166 for (i = 0; i < count; i++) { 27167 x[i] = unop(ops[i + j], mkexpr(vX)); 27168 y[i] = unop(ops[i + j], mkexpr(vY)); 27169 z[i] = unop(ops[i + j], mkexpr(vZ)); 27170 } 27171 } 27172 if (!scalar) 27173 for (i = 0; i < count; i++) { 27174 IROp op = ty == Ity_F64 27175 ? Iop_ReinterpI64asF64 : Iop_ReinterpI32asF32; 27176 x[i] = unop(op, x[i]); 27177 y[i] = unop(op, y[i]); 27178 z[i] = unop(op, z[i]); 27179 } 27180 for (i = 0; i < count; i++) { 27181 if ((i & 1) ? negateZodd : negateZeven) 27182 z[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, z[i]); 27183 x[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32, 27184 get_FAKE_roundingmode(), x[i], y[i], z[i]); 27185 if (negateRes) 27186 x[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, x[i]); 27187 if (ty == Ity_F64) 27188 putYMMRegLane64F( rG, i, x[i] ); 27189 else 27190 putYMMRegLane32F( rG, i, x[i] ); 27191 } 27192 if (vty != Ity_V256) 27193 putYMMRegLane128( rG, 1, mkV128(0) ); 27194 27195 return delta; 27196 } 27197 27198 27199 /* Masked load. */ 27200 static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, VexAbiInfo* vbi, 27201 Prefix pfx, Long delta, 27202 const HChar* opname, Bool isYMM, IRType ty ) 27203 { 27204 HChar dis_buf[50]; 27205 Int alen, i; 27206 IRTemp addr; 27207 UChar modrm = getUChar(delta); 27208 UInt rG = gregOfRexRM(pfx,modrm); 27209 UInt rV = getVexNvvvv(pfx); 27210 IRTemp res[8], cond; 27211 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27212 if (isYMM) { 27213 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 27214 } else { 27215 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 27216 } 27217 delta += alen; 27218 27219 for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) { 27220 res[i] = newTemp(ty); 27221 cond = newTemp(Ity_I1); 27222 assign( cond, 27223 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S, 27224 ty == Ity_I32 ? getYMMRegLane32( rV, i ) 27225 : getYMMRegLane64( rV, i ), 27226 mkU(ty, 0) )); 27227 assign( res[i], 27228 IRExpr_ITE( 27229 mkexpr(cond), 27230 loadLE(ty, IRExpr_ITE( 27231 mkexpr(cond), 27232 binop(Iop_Add64, mkexpr(addr), 27233 mkU64(i*(ty == Ity_I32 ? 4 : 8))), 27234 getIReg64(R_RSP) 27235 ) 27236 ), 27237 mkU(ty, 0) 27238 ) 27239 ); 27240 } 27241 switch (ty) { 27242 case Ity_I32: 27243 for (i = 0; i < 8; i++) 27244 putYMMRegLane32( rG, i, (i < 4 || isYMM) 27245 ? mkexpr(res[i]) : mkU32(0) ); 27246 break; 27247 case Ity_I64: 27248 for (i = 0; i < 4; i++) 27249 putYMMRegLane64( rG, i, (i < 2 || isYMM) 27250 ? mkexpr(res[i]) : mkU64(0) ); 27251 break; 27252 default: vassert(0); 27253 } 27254 27255 *uses_vvvv = True; 27256 return delta; 27257 } 27258 27259 27260 /* Gather. */ 27261 static ULong dis_VGATHER ( Bool *uses_vvvv, VexAbiInfo* vbi, 27262 Prefix pfx, Long delta, 27263 const HChar* opname, Bool isYMM, 27264 Bool isVM64x, IRType ty ) 27265 { 27266 HChar dis_buf[50]; 27267 Int alen, i, vscale, count1, count2; 27268 IRTemp addr; 27269 UChar modrm = getUChar(delta); 27270 UInt rG = gregOfRexRM(pfx,modrm); 27271 UInt rV = getVexNvvvv(pfx); 27272 UInt rI; 27273 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128; 27274 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128; 27275 IRTemp cond; 27276 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI, 27277 idxTy, &vscale ); 27278 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV) 27279 return delta; 27280 if (dstTy == Ity_V256) { 27281 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) ); 27282 } else { 27283 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) ); 27284 } 27285 delta += alen; 27286 27287 if (ty == Ity_I32) { 27288 count1 = isYMM ? 8 : 4; 27289 count2 = isVM64x ? count1 / 2 : count1; 27290 } else { 27291 count1 = count2 = isYMM ? 4 : 2; 27292 } 27293 27294 /* First update the mask register to copies of the sign bit. */ 27295 if (ty == Ity_I32) { 27296 if (isYMM) 27297 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) ); 27298 else 27299 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) ); 27300 } else { 27301 for (i = 0; i < count1; i++) { 27302 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ), 27303 mkU8(63)) ); 27304 } 27305 } 27306 27307 /* Next gather the individual elements. If any fault occurs, the 27308 corresponding mask element will be set and the loop stops. */ 27309 for (i = 0; i < count2; i++) { 27310 IRExpr *expr, *addr_expr; 27311 cond = newTemp(Ity_I1); 27312 assign( cond, 27313 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S, 27314 ty == Ity_I32 ? getYMMRegLane32( rV, i ) 27315 : getYMMRegLane64( rV, i ), 27316 mkU(ty, 0)) ); 27317 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i ) 27318 : getYMMRegLane64( rG, i ); 27319 addr_expr = isVM64x ? getYMMRegLane64( rI, i ) 27320 : unop(Iop_32Sto64, getYMMRegLane32( rI, i )); 27321 switch (vscale) { 27322 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break; 27323 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break; 27324 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break; 27325 default: break; 27326 } 27327 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr); 27328 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr); 27329 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP)); 27330 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr); 27331 if (ty == Ity_I32) { 27332 putYMMRegLane32( rG, i, expr ); 27333 putYMMRegLane32( rV, i, mkU32(0) ); 27334 } else { 27335 putYMMRegLane64( rG, i, expr); 27336 putYMMRegLane64( rV, i, mkU64(0) ); 27337 } 27338 } 27339 27340 if (!isYMM || (ty == Ity_I32 && isVM64x)) { 27341 if (ty == Ity_I64 || isYMM) 27342 putYMMRegLane128( rV, 1, mkV128(0) ); 27343 else if (ty == Ity_I32 && count2 == 2) { 27344 putYMMRegLane64( rV, 1, mkU64(0) ); 27345 putYMMRegLane64( rG, 1, mkU64(0) ); 27346 } 27347 putYMMRegLane128( rG, 1, mkV128(0) ); 27348 } 27349 27350 *uses_vvvv = True; 27351 return delta; 27352 } 27353 27354 27355 __attribute__((noinline)) 27356 static 27357 Long dis_ESC_0F38__VEX ( 27358 /*MB_OUT*/DisResult* dres, 27359 /*OUT*/ Bool* uses_vvvv, 27360 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 27361 Bool resteerCisOk, 27362 void* callback_opaque, 27363 VexArchInfo* archinfo, 27364 VexAbiInfo* vbi, 27365 Prefix pfx, Int sz, Long deltaIN 27366 ) 27367 { 27368 IRTemp addr = IRTemp_INVALID; 27369 Int alen = 0; 27370 HChar dis_buf[50]; 27371 Long delta = deltaIN; 27372 UChar opc = getUChar(delta); 27373 delta++; 27374 *uses_vvvv = False; 27375 27376 switch (opc) { 27377 27378 case 0x00: 27379 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27380 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */ 27381 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27382 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27383 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM ); 27384 goto decode_success; 27385 } 27386 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27387 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */ 27388 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27389 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27390 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM ); 27391 goto decode_success; 27392 } 27393 break; 27394 27395 case 0x01: 27396 case 0x02: 27397 case 0x03: 27398 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */ 27399 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */ 27400 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */ 27401 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27402 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27403 *uses_vvvv = True; 27404 goto decode_success; 27405 } 27406 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */ 27407 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */ 27408 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */ 27409 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27410 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27411 *uses_vvvv = True; 27412 goto decode_success; 27413 } 27414 break; 27415 27416 case 0x04: 27417 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */ 27418 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27419 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27420 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27421 math_PMADDUBSW_128 ); 27422 goto decode_success; 27423 } 27424 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */ 27425 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27426 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27427 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27428 math_PMADDUBSW_256 ); 27429 goto decode_success; 27430 } 27431 break; 27432 27433 case 0x05: 27434 case 0x06: 27435 case 0x07: 27436 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */ 27437 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */ 27438 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */ 27439 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27440 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27441 *uses_vvvv = True; 27442 goto decode_success; 27443 } 27444 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */ 27445 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */ 27446 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */ 27447 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27448 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27449 *uses_vvvv = True; 27450 goto decode_success; 27451 } 27452 break; 27453 27454 case 0x08: 27455 case 0x09: 27456 case 0x0A: 27457 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */ 27458 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */ 27459 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */ 27460 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27461 IRTemp sV = newTemp(Ity_V128); 27462 IRTemp dV = newTemp(Ity_V128); 27463 IRTemp sHi, sLo, dHi, dLo; 27464 sHi = sLo = dHi = dLo = IRTemp_INVALID; 27465 HChar ch = '?'; 27466 Int laneszB = 0; 27467 UChar modrm = getUChar(delta); 27468 UInt rG = gregOfRexRM(pfx,modrm); 27469 UInt rV = getVexNvvvv(pfx); 27470 27471 switch (opc) { 27472 case 0x08: laneszB = 1; ch = 'b'; break; 27473 case 0x09: laneszB = 2; ch = 'w'; break; 27474 case 0x0A: laneszB = 4; ch = 'd'; break; 27475 default: vassert(0); 27476 } 27477 27478 assign( dV, getXMMReg(rV) ); 27479 27480 if (epartIsReg(modrm)) { 27481 UInt rE = eregOfRexRM(pfx,modrm); 27482 assign( sV, getXMMReg(rE) ); 27483 delta += 1; 27484 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE), 27485 nameXMMReg(rV), nameXMMReg(rG)); 27486 } else { 27487 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27488 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 27489 delta += alen; 27490 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 27491 nameXMMReg(rV), nameXMMReg(rG)); 27492 } 27493 27494 breakupV128to64s( dV, &dHi, &dLo ); 27495 breakupV128to64s( sV, &sHi, &sLo ); 27496 27497 putYMMRegLoAndZU( 27498 rG, 27499 binop(Iop_64HLtoV128, 27500 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 27501 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 27502 ) 27503 ); 27504 *uses_vvvv = True; 27505 goto decode_success; 27506 } 27507 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */ 27508 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */ 27509 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */ 27510 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27511 IRTemp sV = newTemp(Ity_V256); 27512 IRTemp dV = newTemp(Ity_V256); 27513 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 27514 s3 = s2 = s1 = s0 = IRTemp_INVALID; 27515 d3 = d2 = d1 = d0 = IRTemp_INVALID; 27516 UChar ch = '?'; 27517 Int laneszB = 0; 27518 UChar modrm = getUChar(delta); 27519 UInt rG = gregOfRexRM(pfx,modrm); 27520 UInt rV = getVexNvvvv(pfx); 27521 27522 switch (opc) { 27523 case 0x08: laneszB = 1; ch = 'b'; break; 27524 case 0x09: laneszB = 2; ch = 'w'; break; 27525 case 0x0A: laneszB = 4; ch = 'd'; break; 27526 default: vassert(0); 27527 } 27528 27529 assign( dV, getYMMReg(rV) ); 27530 27531 if (epartIsReg(modrm)) { 27532 UInt rE = eregOfRexRM(pfx,modrm); 27533 assign( sV, getYMMReg(rE) ); 27534 delta += 1; 27535 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE), 27536 nameYMMReg(rV), nameYMMReg(rG)); 27537 } else { 27538 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27539 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 27540 delta += alen; 27541 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 27542 nameYMMReg(rV), nameYMMReg(rG)); 27543 } 27544 27545 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 27546 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 27547 27548 putYMMReg( 27549 rG, 27550 binop( Iop_V128HLtoV256, 27551 binop(Iop_64HLtoV128, 27552 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ), 27553 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB ) 27554 ), 27555 binop(Iop_64HLtoV128, 27556 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ), 27557 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB ) 27558 ) 27559 ) 27560 ); 27561 *uses_vvvv = True; 27562 goto decode_success; 27563 } 27564 break; 27565 27566 case 0x0B: 27567 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */ 27568 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27569 IRTemp sV = newTemp(Ity_V128); 27570 IRTemp dV = newTemp(Ity_V128); 27571 IRTemp sHi, sLo, dHi, dLo; 27572 sHi = sLo = dHi = dLo = IRTemp_INVALID; 27573 UChar modrm = getUChar(delta); 27574 UInt rG = gregOfRexRM(pfx,modrm); 27575 UInt rV = getVexNvvvv(pfx); 27576 27577 assign( dV, getXMMReg(rV) ); 27578 27579 if (epartIsReg(modrm)) { 27580 UInt rE = eregOfRexRM(pfx,modrm); 27581 assign( sV, getXMMReg(rE) ); 27582 delta += 1; 27583 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE), 27584 nameXMMReg(rV), nameXMMReg(rG)); 27585 } else { 27586 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27587 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 27588 delta += alen; 27589 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 27590 nameXMMReg(rV), nameXMMReg(rG)); 27591 } 27592 27593 breakupV128to64s( dV, &dHi, &dLo ); 27594 breakupV128to64s( sV, &sHi, &sLo ); 27595 27596 putYMMRegLoAndZU( 27597 rG, 27598 binop(Iop_64HLtoV128, 27599 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 27600 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 27601 ) 27602 ); 27603 *uses_vvvv = True; 27604 goto decode_success; 27605 } 27606 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */ 27607 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27608 IRTemp sV = newTemp(Ity_V256); 27609 IRTemp dV = newTemp(Ity_V256); 27610 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 27611 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 27612 UChar modrm = getUChar(delta); 27613 UInt rG = gregOfRexRM(pfx,modrm); 27614 UInt rV = getVexNvvvv(pfx); 27615 27616 assign( dV, getYMMReg(rV) ); 27617 27618 if (epartIsReg(modrm)) { 27619 UInt rE = eregOfRexRM(pfx,modrm); 27620 assign( sV, getYMMReg(rE) ); 27621 delta += 1; 27622 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE), 27623 nameYMMReg(rV), nameYMMReg(rG)); 27624 } else { 27625 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27626 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 27627 delta += alen; 27628 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 27629 nameYMMReg(rV), nameYMMReg(rG)); 27630 } 27631 27632 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 27633 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 27634 27635 putYMMReg( 27636 rG, 27637 binop(Iop_V128HLtoV256, 27638 binop(Iop_64HLtoV128, 27639 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ), 27640 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ), 27641 binop(Iop_64HLtoV128, 27642 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ), 27643 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) ) 27644 ) 27645 ); 27646 *uses_vvvv = True; 27647 goto decode_success; 27648 } 27649 break; 27650 27651 case 0x0C: 27652 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */ 27653 if (have66noF2noF3(pfx) 27654 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 27655 UChar modrm = getUChar(delta); 27656 UInt rG = gregOfRexRM(pfx, modrm); 27657 UInt rV = getVexNvvvv(pfx); 27658 IRTemp ctrlV = newTemp(Ity_V128); 27659 if (epartIsReg(modrm)) { 27660 UInt rE = eregOfRexRM(pfx, modrm); 27661 delta += 1; 27662 DIP("vpermilps %s,%s,%s\n", 27663 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 27664 assign(ctrlV, getXMMReg(rE)); 27665 } else { 27666 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27667 delta += alen; 27668 DIP("vpermilps %s,%s,%s\n", 27669 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 27670 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 27671 } 27672 IRTemp dataV = newTemp(Ity_V128); 27673 assign(dataV, getXMMReg(rV)); 27674 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV); 27675 putYMMRegLoAndZU(rG, mkexpr(resV)); 27676 *uses_vvvv = True; 27677 goto decode_success; 27678 } 27679 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */ 27680 if (have66noF2noF3(pfx) 27681 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27682 UChar modrm = getUChar(delta); 27683 UInt rG = gregOfRexRM(pfx, modrm); 27684 UInt rV = getVexNvvvv(pfx); 27685 IRTemp ctrlV = newTemp(Ity_V256); 27686 if (epartIsReg(modrm)) { 27687 UInt rE = eregOfRexRM(pfx, modrm); 27688 delta += 1; 27689 DIP("vpermilps %s,%s,%s\n", 27690 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 27691 assign(ctrlV, getYMMReg(rE)); 27692 } else { 27693 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27694 delta += alen; 27695 DIP("vpermilps %s,%s,%s\n", 27696 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 27697 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 27698 } 27699 IRTemp dataV = newTemp(Ity_V256); 27700 assign(dataV, getYMMReg(rV)); 27701 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV); 27702 putYMMReg(rG, mkexpr(resV)); 27703 *uses_vvvv = True; 27704 goto decode_success; 27705 } 27706 break; 27707 27708 case 0x0D: 27709 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */ 27710 if (have66noF2noF3(pfx) 27711 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 27712 UChar modrm = getUChar(delta); 27713 UInt rG = gregOfRexRM(pfx, modrm); 27714 UInt rV = getVexNvvvv(pfx); 27715 IRTemp ctrlV = newTemp(Ity_V128); 27716 if (epartIsReg(modrm)) { 27717 UInt rE = eregOfRexRM(pfx, modrm); 27718 delta += 1; 27719 DIP("vpermilpd %s,%s,%s\n", 27720 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 27721 assign(ctrlV, getXMMReg(rE)); 27722 } else { 27723 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27724 delta += alen; 27725 DIP("vpermilpd %s,%s,%s\n", 27726 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 27727 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 27728 } 27729 IRTemp dataV = newTemp(Ity_V128); 27730 assign(dataV, getXMMReg(rV)); 27731 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV); 27732 putYMMRegLoAndZU(rG, mkexpr(resV)); 27733 *uses_vvvv = True; 27734 goto decode_success; 27735 } 27736 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */ 27737 if (have66noF2noF3(pfx) 27738 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27739 UChar modrm = getUChar(delta); 27740 UInt rG = gregOfRexRM(pfx, modrm); 27741 UInt rV = getVexNvvvv(pfx); 27742 IRTemp ctrlV = newTemp(Ity_V256); 27743 if (epartIsReg(modrm)) { 27744 UInt rE = eregOfRexRM(pfx, modrm); 27745 delta += 1; 27746 DIP("vpermilpd %s,%s,%s\n", 27747 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 27748 assign(ctrlV, getYMMReg(rE)); 27749 } else { 27750 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27751 delta += alen; 27752 DIP("vpermilpd %s,%s,%s\n", 27753 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 27754 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 27755 } 27756 IRTemp dataV = newTemp(Ity_V256); 27757 assign(dataV, getYMMReg(rV)); 27758 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV); 27759 putYMMReg(rG, mkexpr(resV)); 27760 *uses_vvvv = True; 27761 goto decode_success; 27762 } 27763 break; 27764 27765 case 0x0E: 27766 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */ 27767 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27768 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 ); 27769 goto decode_success; 27770 } 27771 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */ 27772 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27773 delta = dis_xTESTy_256( vbi, pfx, delta, 32 ); 27774 goto decode_success; 27775 } 27776 break; 27777 27778 case 0x0F: 27779 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */ 27780 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27781 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 ); 27782 goto decode_success; 27783 } 27784 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */ 27785 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27786 delta = dis_xTESTy_256( vbi, pfx, delta, 64 ); 27787 goto decode_success; 27788 } 27789 break; 27790 27791 case 0x16: 27792 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */ 27793 if (have66noF2noF3(pfx) 27794 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 27795 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27796 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD ); 27797 goto decode_success; 27798 } 27799 break; 27800 27801 case 0x17: 27802 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */ 27803 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27804 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 ); 27805 goto decode_success; 27806 } 27807 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */ 27808 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27809 delta = dis_xTESTy_256( vbi, pfx, delta, 0 ); 27810 goto decode_success; 27811 } 27812 break; 27813 27814 case 0x18: 27815 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 27816 if (have66noF2noF3(pfx) 27817 && 0==getVexL(pfx)/*128*/ 27818 && !epartIsReg(getUChar(delta))) { 27819 UChar modrm = getUChar(delta); 27820 UInt rG = gregOfRexRM(pfx, modrm); 27821 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27822 delta += alen; 27823 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG)); 27824 IRTemp t32 = newTemp(Ity_I32); 27825 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 27826 IRTemp t64 = newTemp(Ity_I64); 27827 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27828 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 27829 putYMMRegLoAndZU(rG, res); 27830 goto decode_success; 27831 } 27832 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 27833 if (have66noF2noF3(pfx) 27834 && 1==getVexL(pfx)/*256*/ 27835 && !epartIsReg(getUChar(delta))) { 27836 UChar modrm = getUChar(delta); 27837 UInt rG = gregOfRexRM(pfx, modrm); 27838 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27839 delta += alen; 27840 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG)); 27841 IRTemp t32 = newTemp(Ity_I32); 27842 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 27843 IRTemp t64 = newTemp(Ity_I64); 27844 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27845 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27846 mkexpr(t64), mkexpr(t64)); 27847 putYMMReg(rG, res); 27848 goto decode_success; 27849 } 27850 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 27851 if (have66noF2noF3(pfx) 27852 && 0==getVexL(pfx)/*128*/ 27853 && epartIsReg(getUChar(delta))) { 27854 UChar modrm = getUChar(delta); 27855 UInt rG = gregOfRexRM(pfx, modrm); 27856 UInt rE = eregOfRexRM(pfx, modrm); 27857 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 27858 IRTemp t32 = newTemp(Ity_I32); 27859 assign(t32, getXMMRegLane32(rE, 0)); 27860 IRTemp t64 = newTemp(Ity_I64); 27861 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27862 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 27863 putYMMRegLoAndZU(rG, res); 27864 delta++; 27865 goto decode_success; 27866 } 27867 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 27868 if (have66noF2noF3(pfx) 27869 && 1==getVexL(pfx)/*256*/ 27870 && epartIsReg(getUChar(delta))) { 27871 UChar modrm = getUChar(delta); 27872 UInt rG = gregOfRexRM(pfx, modrm); 27873 UInt rE = eregOfRexRM(pfx, modrm); 27874 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 27875 IRTemp t32 = newTemp(Ity_I32); 27876 assign(t32, getXMMRegLane32(rE, 0)); 27877 IRTemp t64 = newTemp(Ity_I64); 27878 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 27879 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27880 mkexpr(t64), mkexpr(t64)); 27881 putYMMReg(rG, res); 27882 delta++; 27883 goto decode_success; 27884 } 27885 break; 27886 27887 case 0x19: 27888 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 27889 if (have66noF2noF3(pfx) 27890 && 1==getVexL(pfx)/*256*/ 27891 && !epartIsReg(getUChar(delta))) { 27892 UChar modrm = getUChar(delta); 27893 UInt rG = gregOfRexRM(pfx, modrm); 27894 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27895 delta += alen; 27896 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG)); 27897 IRTemp t64 = newTemp(Ity_I64); 27898 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 27899 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27900 mkexpr(t64), mkexpr(t64)); 27901 putYMMReg(rG, res); 27902 goto decode_success; 27903 } 27904 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 27905 if (have66noF2noF3(pfx) 27906 && 1==getVexL(pfx)/*256*/ 27907 && epartIsReg(getUChar(delta))) { 27908 UChar modrm = getUChar(delta); 27909 UInt rG = gregOfRexRM(pfx, modrm); 27910 UInt rE = eregOfRexRM(pfx, modrm); 27911 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 27912 IRTemp t64 = newTemp(Ity_I64); 27913 assign(t64, getXMMRegLane64(rE, 0)); 27914 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 27915 mkexpr(t64), mkexpr(t64)); 27916 putYMMReg(rG, res); 27917 delta++; 27918 goto decode_success; 27919 } 27920 break; 27921 27922 case 0x1A: 27923 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */ 27924 if (have66noF2noF3(pfx) 27925 && 1==getVexL(pfx)/*256*/ 27926 && !epartIsReg(getUChar(delta))) { 27927 UChar modrm = getUChar(delta); 27928 UInt rG = gregOfRexRM(pfx, modrm); 27929 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27930 delta += alen; 27931 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG)); 27932 IRTemp t128 = newTemp(Ity_V128); 27933 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 27934 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 27935 goto decode_success; 27936 } 27937 break; 27938 27939 case 0x1C: 27940 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */ 27941 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27942 delta = dis_AVX128_E_to_G_unary( 27943 uses_vvvv, vbi, pfx, delta, 27944 "vpabsb", math_PABS_XMM_pap1 ); 27945 goto decode_success; 27946 } 27947 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */ 27948 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27949 delta = dis_AVX256_E_to_G_unary( 27950 uses_vvvv, vbi, pfx, delta, 27951 "vpabsb", math_PABS_YMM_pap1 ); 27952 goto decode_success; 27953 } 27954 break; 27955 27956 case 0x1D: 27957 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */ 27958 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27959 delta = dis_AVX128_E_to_G_unary( 27960 uses_vvvv, vbi, pfx, delta, 27961 "vpabsw", math_PABS_XMM_pap2 ); 27962 goto decode_success; 27963 } 27964 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */ 27965 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27966 delta = dis_AVX256_E_to_G_unary( 27967 uses_vvvv, vbi, pfx, delta, 27968 "vpabsw", math_PABS_YMM_pap2 ); 27969 goto decode_success; 27970 } 27971 break; 27972 27973 case 0x1E: 27974 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */ 27975 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27976 delta = dis_AVX128_E_to_G_unary( 27977 uses_vvvv, vbi, pfx, delta, 27978 "vpabsd", math_PABS_XMM_pap4 ); 27979 goto decode_success; 27980 } 27981 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */ 27982 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27983 delta = dis_AVX256_E_to_G_unary( 27984 uses_vvvv, vbi, pfx, delta, 27985 "vpabsd", math_PABS_YMM_pap4 ); 27986 goto decode_success; 27987 } 27988 break; 27989 27990 case 0x20: 27991 /* VPMOVSXBW xmm2/m64, xmm1 */ 27992 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */ 27993 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27994 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 27995 True/*isAvx*/, False/*!xIsZ*/ ); 27996 goto decode_success; 27997 } 27998 /* VPMOVSXBW xmm2/m128, ymm1 */ 27999 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */ 28000 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28001 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28002 goto decode_success; 28003 } 28004 break; 28005 28006 case 0x21: 28007 /* VPMOVSXBD xmm2/m32, xmm1 */ 28008 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */ 28009 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28010 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28011 True/*isAvx*/, False/*!xIsZ*/ ); 28012 goto decode_success; 28013 } 28014 /* VPMOVSXBD xmm2/m64, ymm1 */ 28015 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */ 28016 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28017 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28018 goto decode_success; 28019 } 28020 break; 28021 28022 case 0x22: 28023 /* VPMOVSXBQ xmm2/m16, xmm1 */ 28024 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */ 28025 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28026 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28027 goto decode_success; 28028 } 28029 /* VPMOVSXBQ xmm2/m32, ymm1 */ 28030 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */ 28031 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28032 delta = dis_PMOVSXBQ_256( vbi, pfx, delta ); 28033 goto decode_success; 28034 } 28035 break; 28036 28037 case 0x23: 28038 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */ 28039 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28040 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28041 True/*isAvx*/, False/*!xIsZ*/ ); 28042 goto decode_success; 28043 } 28044 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */ 28045 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28046 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28047 goto decode_success; 28048 } 28049 break; 28050 28051 case 0x24: 28052 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */ 28053 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28054 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28055 goto decode_success; 28056 } 28057 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */ 28058 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28059 delta = dis_PMOVSXWQ_256( vbi, pfx, delta ); 28060 goto decode_success; 28061 } 28062 break; 28063 28064 case 0x25: 28065 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */ 28066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28067 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28068 True/*isAvx*/, False/*!xIsZ*/ ); 28069 goto decode_success; 28070 } 28071 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */ 28072 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28073 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28074 goto decode_success; 28075 } 28076 break; 28077 28078 case 0x28: 28079 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */ 28080 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28081 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 28082 uses_vvvv, vbi, pfx, delta, 28083 "vpmuldq", math_PMULDQ_128 ); 28084 goto decode_success; 28085 } 28086 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */ 28087 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28088 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28089 uses_vvvv, vbi, pfx, delta, 28090 "vpmuldq", math_PMULDQ_256 ); 28091 goto decode_success; 28092 } 28093 break; 28094 28095 case 0x29: 28096 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28097 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */ 28098 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28099 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28100 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 ); 28101 goto decode_success; 28102 } 28103 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28104 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */ 28105 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28106 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28107 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 ); 28108 goto decode_success; 28109 } 28110 break; 28111 28112 case 0x2A: 28113 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */ 28114 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28115 && !epartIsReg(getUChar(delta))) { 28116 UChar modrm = getUChar(delta); 28117 UInt rD = gregOfRexRM(pfx, modrm); 28118 IRTemp tD = newTemp(Ity_V128); 28119 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28120 delta += alen; 28121 gen_SEGV_if_not_16_aligned(addr); 28122 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 28123 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD)); 28124 putYMMRegLoAndZU(rD, mkexpr(tD)); 28125 goto decode_success; 28126 } 28127 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */ 28128 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28129 && !epartIsReg(getUChar(delta))) { 28130 UChar modrm = getUChar(delta); 28131 UInt rD = gregOfRexRM(pfx, modrm); 28132 IRTemp tD = newTemp(Ity_V256); 28133 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28134 delta += alen; 28135 gen_SEGV_if_not_32_aligned(addr); 28136 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 28137 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD)); 28138 putYMMReg(rD, mkexpr(tD)); 28139 goto decode_success; 28140 } 28141 break; 28142 28143 case 0x2B: 28144 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28145 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */ 28146 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28147 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 28148 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28149 Iop_QNarrowBin32Sto16Ux8, NULL, 28150 False/*!invertLeftArg*/, True/*swapArgs*/ ); 28151 goto decode_success; 28152 } 28153 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28154 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */ 28155 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28156 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28157 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28158 math_VPACKUSDW_YMM ); 28159 goto decode_success; 28160 } 28161 break; 28162 28163 case 0x2C: 28164 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2C /r */ 28165 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28166 && !epartIsReg(getUChar(delta))) { 28167 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28168 /*!isYMM*/False, Ity_I32 ); 28169 goto decode_success; 28170 } 28171 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2C /r */ 28172 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28173 && !epartIsReg(getUChar(delta))) { 28174 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28175 /*isYMM*/True, Ity_I32 ); 28176 goto decode_success; 28177 } 28178 break; 28179 28180 case 0x2D: 28181 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2D /r */ 28182 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28183 && !epartIsReg(getUChar(delta))) { 28184 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28185 /*!isYMM*/False, Ity_I64 ); 28186 goto decode_success; 28187 } 28188 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2D /r */ 28189 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28190 && !epartIsReg(getUChar(delta))) { 28191 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28192 /*isYMM*/True, Ity_I64 ); 28193 goto decode_success; 28194 } 28195 break; 28196 28197 case 0x30: 28198 /* VPMOVZXBW xmm2/m64, xmm1 */ 28199 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */ 28200 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28201 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 28202 True/*isAvx*/, True/*xIsZ*/ ); 28203 goto decode_success; 28204 } 28205 /* VPMOVZXBW xmm2/m128, ymm1 */ 28206 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */ 28207 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28208 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ ); 28209 goto decode_success; 28210 } 28211 break; 28212 28213 case 0x31: 28214 /* VPMOVZXBD xmm2/m32, xmm1 */ 28215 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */ 28216 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28217 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28218 True/*isAvx*/, True/*xIsZ*/ ); 28219 goto decode_success; 28220 } 28221 /* VPMOVZXBD xmm2/m64, ymm1 */ 28222 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */ 28223 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28224 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28225 goto decode_success; 28226 } 28227 break; 28228 28229 case 0x32: 28230 /* VPMOVZXBQ xmm2/m16, xmm1 */ 28231 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */ 28232 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28233 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28234 goto decode_success; 28235 } 28236 /* VPMOVZXBQ xmm2/m32, ymm1 */ 28237 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */ 28238 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28239 delta = dis_PMOVZXBQ_256( vbi, pfx, delta ); 28240 goto decode_success; 28241 } 28242 break; 28243 28244 case 0x33: 28245 /* VPMOVZXWD xmm2/m64, xmm1 */ 28246 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */ 28247 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28248 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28249 True/*isAvx*/, True/*xIsZ*/ ); 28250 goto decode_success; 28251 } 28252 /* VPMOVZXWD xmm2/m128, ymm1 */ 28253 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */ 28254 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28255 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28256 goto decode_success; 28257 } 28258 break; 28259 28260 case 0x34: 28261 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */ 28262 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28263 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28264 goto decode_success; 28265 } 28266 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */ 28267 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28268 delta = dis_PMOVZXWQ_256( vbi, pfx, delta ); 28269 goto decode_success; 28270 } 28271 break; 28272 28273 case 0x35: 28274 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */ 28275 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28276 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28277 True/*isAvx*/, True/*xIsZ*/ ); 28278 goto decode_success; 28279 } 28280 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */ 28281 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28282 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ ); 28283 goto decode_success; 28284 } 28285 break; 28286 28287 case 0x36: 28288 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */ 28289 if (have66noF2noF3(pfx) 28290 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28291 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28292 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD ); 28293 goto decode_success; 28294 } 28295 break; 28296 28297 case 0x37: 28298 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28299 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */ 28300 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28301 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28302 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 ); 28303 goto decode_success; 28304 } 28305 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28306 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */ 28307 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28308 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28309 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 ); 28310 goto decode_success; 28311 } 28312 break; 28313 28314 case 0x38: 28315 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28316 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */ 28317 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28318 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28319 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 ); 28320 goto decode_success; 28321 } 28322 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28323 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */ 28324 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28325 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28326 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 ); 28327 goto decode_success; 28328 } 28329 break; 28330 28331 case 0x39: 28332 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28333 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */ 28334 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28335 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28336 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 ); 28337 goto decode_success; 28338 } 28339 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28340 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */ 28341 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28342 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28343 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 ); 28344 goto decode_success; 28345 } 28346 break; 28347 28348 case 0x3A: 28349 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28350 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */ 28351 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28352 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28353 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 ); 28354 goto decode_success; 28355 } 28356 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28357 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */ 28358 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28359 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28360 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 ); 28361 goto decode_success; 28362 } 28363 break; 28364 28365 case 0x3B: 28366 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28367 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */ 28368 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28369 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28370 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 ); 28371 goto decode_success; 28372 } 28373 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28374 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */ 28375 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28376 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28377 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 ); 28378 goto decode_success; 28379 } 28380 break; 28381 28382 case 0x3C: 28383 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28384 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */ 28385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28386 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28387 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 ); 28388 goto decode_success; 28389 } 28390 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28391 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */ 28392 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28393 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28394 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 ); 28395 goto decode_success; 28396 } 28397 break; 28398 28399 case 0x3D: 28400 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28401 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */ 28402 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28403 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28404 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 ); 28405 goto decode_success; 28406 } 28407 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28408 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */ 28409 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28410 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28411 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 ); 28412 goto decode_success; 28413 } 28414 break; 28415 28416 case 0x3E: 28417 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28418 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */ 28419 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28420 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28421 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 ); 28422 goto decode_success; 28423 } 28424 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28425 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */ 28426 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28427 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28428 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 ); 28429 goto decode_success; 28430 } 28431 break; 28432 28433 case 0x3F: 28434 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 28435 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */ 28436 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28437 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28438 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 ); 28439 goto decode_success; 28440 } 28441 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 28442 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */ 28443 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28444 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28445 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 ); 28446 goto decode_success; 28447 } 28448 break; 28449 28450 case 0x40: 28451 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 28452 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */ 28453 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28454 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28455 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 ); 28456 goto decode_success; 28457 } 28458 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 28459 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */ 28460 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28461 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28462 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 ); 28463 goto decode_success; 28464 } 28465 break; 28466 28467 case 0x41: 28468 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */ 28469 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28470 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ ); 28471 goto decode_success; 28472 } 28473 break; 28474 28475 case 0x45: 28476 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */ 28477 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */ 28478 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28479 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd", 28480 Iop_Shr32, 1==getVexL(pfx) ); 28481 *uses_vvvv = True; 28482 goto decode_success; 28483 } 28484 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */ 28485 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */ 28486 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 28487 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq", 28488 Iop_Shr64, 1==getVexL(pfx) ); 28489 *uses_vvvv = True; 28490 goto decode_success; 28491 } 28492 break; 28493 28494 case 0x46: 28495 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */ 28496 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */ 28497 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28498 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd", 28499 Iop_Sar32, 1==getVexL(pfx) ); 28500 *uses_vvvv = True; 28501 goto decode_success; 28502 } 28503 break; 28504 28505 case 0x47: 28506 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */ 28507 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */ 28508 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 28509 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd", 28510 Iop_Shl32, 1==getVexL(pfx) ); 28511 *uses_vvvv = True; 28512 goto decode_success; 28513 } 28514 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */ 28515 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */ 28516 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 28517 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq", 28518 Iop_Shl64, 1==getVexL(pfx) ); 28519 *uses_vvvv = True; 28520 goto decode_success; 28521 } 28522 break; 28523 28524 case 0x58: 28525 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */ 28526 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28527 && 0==getRexW(pfx)/*W0*/) { 28528 UChar modrm = getUChar(delta); 28529 UInt rG = gregOfRexRM(pfx, modrm); 28530 IRTemp t32 = newTemp(Ity_I32); 28531 if (epartIsReg(modrm)) { 28532 UInt rE = eregOfRexRM(pfx, modrm); 28533 delta++; 28534 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28535 assign(t32, getXMMRegLane32(rE, 0)); 28536 } else { 28537 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28538 delta += alen; 28539 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG)); 28540 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28541 } 28542 IRTemp t64 = newTemp(Ity_I64); 28543 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28544 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28545 putYMMRegLoAndZU(rG, res); 28546 goto decode_success; 28547 } 28548 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */ 28549 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28550 && 0==getRexW(pfx)/*W0*/) { 28551 UChar modrm = getUChar(delta); 28552 UInt rG = gregOfRexRM(pfx, modrm); 28553 IRTemp t32 = newTemp(Ity_I32); 28554 if (epartIsReg(modrm)) { 28555 UInt rE = eregOfRexRM(pfx, modrm); 28556 delta++; 28557 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28558 assign(t32, getXMMRegLane32(rE, 0)); 28559 } else { 28560 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28561 delta += alen; 28562 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG)); 28563 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28564 } 28565 IRTemp t64 = newTemp(Ity_I64); 28566 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28567 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28568 mkexpr(t64), mkexpr(t64)); 28569 putYMMReg(rG, res); 28570 goto decode_success; 28571 } 28572 break; 28573 28574 case 0x59: 28575 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */ 28576 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28577 && 0==getRexW(pfx)/*W0*/) { 28578 UChar modrm = getUChar(delta); 28579 UInt rG = gregOfRexRM(pfx, modrm); 28580 IRTemp t64 = newTemp(Ity_I64); 28581 if (epartIsReg(modrm)) { 28582 UInt rE = eregOfRexRM(pfx, modrm); 28583 delta++; 28584 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28585 assign(t64, getXMMRegLane64(rE, 0)); 28586 } else { 28587 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28588 delta += alen; 28589 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG)); 28590 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 28591 } 28592 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28593 putYMMRegLoAndZU(rG, res); 28594 goto decode_success; 28595 } 28596 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */ 28597 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28598 && 0==getRexW(pfx)/*W0*/) { 28599 UChar modrm = getUChar(delta); 28600 UInt rG = gregOfRexRM(pfx, modrm); 28601 IRTemp t64 = newTemp(Ity_I64); 28602 if (epartIsReg(modrm)) { 28603 UInt rE = eregOfRexRM(pfx, modrm); 28604 delta++; 28605 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28606 assign(t64, getXMMRegLane64(rE, 0)); 28607 } else { 28608 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28609 delta += alen; 28610 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG)); 28611 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 28612 } 28613 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28614 mkexpr(t64), mkexpr(t64)); 28615 putYMMReg(rG, res); 28616 goto decode_success; 28617 } 28618 break; 28619 28620 case 0x5A: 28621 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */ 28622 if (have66noF2noF3(pfx) 28623 && 1==getVexL(pfx)/*256*/ 28624 && !epartIsReg(getUChar(delta))) { 28625 UChar modrm = getUChar(delta); 28626 UInt rG = gregOfRexRM(pfx, modrm); 28627 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28628 delta += alen; 28629 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG)); 28630 IRTemp t128 = newTemp(Ity_V128); 28631 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 28632 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 28633 goto decode_success; 28634 } 28635 break; 28636 28637 case 0x78: 28638 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */ 28639 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28640 && 0==getRexW(pfx)/*W0*/) { 28641 UChar modrm = getUChar(delta); 28642 UInt rG = gregOfRexRM(pfx, modrm); 28643 IRTemp t8 = newTemp(Ity_I8); 28644 if (epartIsReg(modrm)) { 28645 UInt rE = eregOfRexRM(pfx, modrm); 28646 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28647 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 28648 } else { 28649 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28650 delta += alen; 28651 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG)); 28652 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 28653 } 28654 IRTemp t16 = newTemp(Ity_I16); 28655 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 28656 IRTemp t32 = newTemp(Ity_I32); 28657 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28658 IRTemp t64 = newTemp(Ity_I64); 28659 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28660 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28661 putYMMRegLoAndZU(rG, res); 28662 goto decode_success; 28663 } 28664 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */ 28665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28666 && 0==getRexW(pfx)/*W0*/) { 28667 UChar modrm = getUChar(delta); 28668 UInt rG = gregOfRexRM(pfx, modrm); 28669 IRTemp t8 = newTemp(Ity_I8); 28670 if (epartIsReg(modrm)) { 28671 UInt rE = eregOfRexRM(pfx, modrm); 28672 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28673 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 28674 } else { 28675 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28676 delta += alen; 28677 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG)); 28678 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 28679 } 28680 IRTemp t16 = newTemp(Ity_I16); 28681 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 28682 IRTemp t32 = newTemp(Ity_I32); 28683 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28684 IRTemp t64 = newTemp(Ity_I64); 28685 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28686 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28687 mkexpr(t64), mkexpr(t64)); 28688 putYMMReg(rG, res); 28689 goto decode_success; 28690 } 28691 break; 28692 28693 case 0x79: 28694 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */ 28695 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28696 && 0==getRexW(pfx)/*W0*/) { 28697 UChar modrm = getUChar(delta); 28698 UInt rG = gregOfRexRM(pfx, modrm); 28699 IRTemp t16 = newTemp(Ity_I16); 28700 if (epartIsReg(modrm)) { 28701 UInt rE = eregOfRexRM(pfx, modrm); 28702 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28703 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 28704 } else { 28705 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28706 delta += alen; 28707 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG)); 28708 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 28709 } 28710 IRTemp t32 = newTemp(Ity_I32); 28711 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28712 IRTemp t64 = newTemp(Ity_I64); 28713 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28714 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28715 putYMMRegLoAndZU(rG, res); 28716 goto decode_success; 28717 } 28718 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */ 28719 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28720 && 0==getRexW(pfx)/*W0*/) { 28721 UChar modrm = getUChar(delta); 28722 UInt rG = gregOfRexRM(pfx, modrm); 28723 IRTemp t16 = newTemp(Ity_I16); 28724 if (epartIsReg(modrm)) { 28725 UInt rE = eregOfRexRM(pfx, modrm); 28726 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28727 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 28728 } else { 28729 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28730 delta += alen; 28731 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG)); 28732 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 28733 } 28734 IRTemp t32 = newTemp(Ity_I32); 28735 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 28736 IRTemp t64 = newTemp(Ity_I64); 28737 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28738 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28739 mkexpr(t64), mkexpr(t64)); 28740 putYMMReg(rG, res); 28741 goto decode_success; 28742 } 28743 break; 28744 28745 case 0x8C: 28746 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */ 28747 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28748 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28749 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28750 /*!isYMM*/False, Ity_I32 ); 28751 goto decode_success; 28752 } 28753 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */ 28754 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28755 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28756 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 28757 /*isYMM*/True, Ity_I32 ); 28758 goto decode_success; 28759 } 28760 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */ 28761 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28762 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28763 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28764 /*!isYMM*/False, Ity_I64 ); 28765 goto decode_success; 28766 } 28767 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */ 28768 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28769 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28770 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 28771 /*isYMM*/True, Ity_I64 ); 28772 goto decode_success; 28773 } 28774 break; 28775 28776 case 0x90: 28777 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */ 28778 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28779 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28780 Long delta0 = delta; 28781 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 28782 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 28783 if (delta != delta0) 28784 goto decode_success; 28785 } 28786 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */ 28787 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28788 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28789 Long delta0 = delta; 28790 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 28791 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 28792 if (delta != delta0) 28793 goto decode_success; 28794 } 28795 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */ 28796 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28797 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28798 Long delta0 = delta; 28799 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 28800 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 28801 if (delta != delta0) 28802 goto decode_success; 28803 } 28804 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */ 28805 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28806 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28807 Long delta0 = delta; 28808 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 28809 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 28810 if (delta != delta0) 28811 goto decode_success; 28812 } 28813 break; 28814 28815 case 0x91: 28816 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */ 28817 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28818 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28819 Long delta0 = delta; 28820 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 28821 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 28822 if (delta != delta0) 28823 goto decode_success; 28824 } 28825 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */ 28826 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28827 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28828 Long delta0 = delta; 28829 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 28830 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 28831 if (delta != delta0) 28832 goto decode_success; 28833 } 28834 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */ 28835 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28836 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28837 Long delta0 = delta; 28838 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 28839 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 28840 if (delta != delta0) 28841 goto decode_success; 28842 } 28843 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */ 28844 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28845 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28846 Long delta0 = delta; 28847 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 28848 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 28849 if (delta != delta0) 28850 goto decode_success; 28851 } 28852 break; 28853 28854 case 0x92: 28855 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */ 28856 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28857 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28858 Long delta0 = delta; 28859 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 28860 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 28861 if (delta != delta0) 28862 goto decode_success; 28863 } 28864 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */ 28865 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28866 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28867 Long delta0 = delta; 28868 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 28869 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 28870 if (delta != delta0) 28871 goto decode_success; 28872 } 28873 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */ 28874 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28875 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28876 Long delta0 = delta; 28877 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 28878 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 28879 if (delta != delta0) 28880 goto decode_success; 28881 } 28882 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */ 28883 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28884 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28885 Long delta0 = delta; 28886 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 28887 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 28888 if (delta != delta0) 28889 goto decode_success; 28890 } 28891 break; 28892 28893 case 0x93: 28894 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */ 28895 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28896 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28897 Long delta0 = delta; 28898 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 28899 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 28900 if (delta != delta0) 28901 goto decode_success; 28902 } 28903 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */ 28904 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28905 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 28906 Long delta0 = delta; 28907 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 28908 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 28909 if (delta != delta0) 28910 goto decode_success; 28911 } 28912 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */ 28913 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28914 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28915 Long delta0 = delta; 28916 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 28917 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 28918 if (delta != delta0) 28919 goto decode_success; 28920 } 28921 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */ 28922 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28923 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 28924 Long delta0 = delta; 28925 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 28926 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 28927 if (delta != delta0) 28928 goto decode_success; 28929 } 28930 break; 28931 28932 case 0x96 ... 0x9F: 28933 case 0xA6 ... 0xAF: 28934 case 0xB6 ... 0xBF: 28935 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */ 28936 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */ 28937 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */ 28938 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */ 28939 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */ 28940 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */ 28941 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */ 28942 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */ 28943 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */ 28944 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */ 28945 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */ 28946 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */ 28947 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */ 28948 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */ 28949 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */ 28950 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */ 28951 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */ 28952 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */ 28953 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */ 28954 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */ 28955 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */ 28956 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */ 28957 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */ 28958 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */ 28959 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */ 28960 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */ 28961 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */ 28962 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */ 28963 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */ 28964 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */ 28965 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */ 28966 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */ 28967 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */ 28968 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */ 28969 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */ 28970 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */ 28971 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */ 28972 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */ 28973 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */ 28974 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */ 28975 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */ 28976 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */ 28977 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */ 28978 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */ 28979 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */ 28980 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */ 28981 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */ 28982 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */ 28983 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */ 28984 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */ 28985 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */ 28986 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */ 28987 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */ 28988 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */ 28989 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */ 28990 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */ 28991 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */ 28992 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */ 28993 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */ 28994 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */ 28995 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */ 28996 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */ 28997 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */ 28998 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */ 28999 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */ 29000 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */ 29001 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */ 29002 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */ 29003 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */ 29004 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */ 29005 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */ 29006 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */ 29007 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */ 29008 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */ 29009 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */ 29010 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */ 29011 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */ 29012 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */ 29013 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */ 29014 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */ 29015 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */ 29016 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */ 29017 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */ 29018 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */ 29019 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */ 29020 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */ 29021 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */ 29022 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */ 29023 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */ 29024 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */ 29025 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */ 29026 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */ 29027 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */ 29028 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */ 29029 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */ 29030 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */ 29031 if (have66noF2noF3(pfx)) { 29032 delta = dis_FMA( vbi, pfx, delta, opc ); 29033 *uses_vvvv = True; 29034 goto decode_success; 29035 } 29036 break; 29037 29038 case 0xDB: 29039 case 0xDC: 29040 case 0xDD: 29041 case 0xDE: 29042 case 0xDF: 29043 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */ 29044 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */ 29045 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */ 29046 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */ 29047 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */ 29048 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29049 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc ); 29050 if (opc != 0xDB) *uses_vvvv = True; 29051 goto decode_success; 29052 } 29053 break; 29054 29055 case 0xF2: 29056 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */ 29057 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */ 29058 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29059 Int size = getRexW(pfx) ? 8 : 4; 29060 IRType ty = szToITy(size); 29061 IRTemp dst = newTemp(ty); 29062 IRTemp src1 = newTemp(ty); 29063 IRTemp src2 = newTemp(ty); 29064 UChar rm = getUChar(delta); 29065 29066 assign( src1, getIRegV(size,pfx) ); 29067 if (epartIsReg(rm)) { 29068 assign( src2, getIRegE(size,pfx,rm) ); 29069 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm), 29070 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29071 delta++; 29072 } else { 29073 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29074 assign( src2, loadLE(ty, mkexpr(addr)) ); 29075 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29076 nameIRegG(size,pfx,rm)); 29077 delta += alen; 29078 } 29079 29080 assign( dst, binop( mkSizedOp(ty,Iop_And8), 29081 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ), 29082 mkexpr(src2) ) ); 29083 putIRegG( size, pfx, rm, mkexpr(dst) ); 29084 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29085 ? AMD64G_CC_OP_ANDN64 29086 : AMD64G_CC_OP_ANDN32)) ); 29087 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29088 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 29089 *uses_vvvv = True; 29090 goto decode_success; 29091 } 29092 break; 29093 29094 case 0xF3: 29095 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */ 29096 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */ 29097 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29098 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) { 29099 Int size = getRexW(pfx) ? 8 : 4; 29100 IRType ty = szToITy(size); 29101 IRTemp src = newTemp(ty); 29102 IRTemp dst = newTemp(ty); 29103 UChar rm = getUChar(delta); 29104 29105 if (epartIsReg(rm)) { 29106 assign( src, getIRegE(size,pfx,rm) ); 29107 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm), 29108 nameIRegV(size,pfx)); 29109 delta++; 29110 } else { 29111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29112 assign( src, loadLE(ty, mkexpr(addr)) ); 29113 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29114 delta += alen; 29115 } 29116 29117 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29118 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0), 29119 mkexpr(src)), mkexpr(src)) ); 29120 putIRegV( size, pfx, mkexpr(dst) ); 29121 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29122 ? AMD64G_CC_OP_BLSI64 29123 : AMD64G_CC_OP_BLSI32)) ); 29124 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29125 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29126 *uses_vvvv = True; 29127 goto decode_success; 29128 } 29129 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */ 29130 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */ 29131 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29132 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) { 29133 Int size = getRexW(pfx) ? 8 : 4; 29134 IRType ty = szToITy(size); 29135 IRTemp src = newTemp(ty); 29136 IRTemp dst = newTemp(ty); 29137 UChar rm = getUChar(delta); 29138 29139 if (epartIsReg(rm)) { 29140 assign( src, getIRegE(size,pfx,rm) ); 29141 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm), 29142 nameIRegV(size,pfx)); 29143 delta++; 29144 } else { 29145 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29146 assign( src, loadLE(ty, mkexpr(addr)) ); 29147 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29148 delta += alen; 29149 } 29150 29151 assign( dst, binop(mkSizedOp(ty,Iop_Xor8), 29152 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29153 mkU(ty, 1)), mkexpr(src)) ); 29154 putIRegV( size, pfx, mkexpr(dst) ); 29155 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29156 ? AMD64G_CC_OP_BLSMSK64 29157 : AMD64G_CC_OP_BLSMSK32)) ); 29158 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29159 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29160 *uses_vvvv = True; 29161 goto decode_success; 29162 } 29163 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */ 29164 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */ 29165 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29166 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) { 29167 Int size = getRexW(pfx) ? 8 : 4; 29168 IRType ty = szToITy(size); 29169 IRTemp src = newTemp(ty); 29170 IRTemp dst = newTemp(ty); 29171 UChar rm = getUChar(delta); 29172 29173 if (epartIsReg(rm)) { 29174 assign( src, getIRegE(size,pfx,rm) ); 29175 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm), 29176 nameIRegV(size,pfx)); 29177 delta++; 29178 } else { 29179 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29180 assign( src, loadLE(ty, mkexpr(addr)) ); 29181 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29182 delta += alen; 29183 } 29184 29185 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29186 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29187 mkU(ty, 1)), mkexpr(src)) ); 29188 putIRegV( size, pfx, mkexpr(dst) ); 29189 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29190 ? AMD64G_CC_OP_BLSR64 29191 : AMD64G_CC_OP_BLSR32)) ); 29192 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29193 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29194 *uses_vvvv = True; 29195 goto decode_success; 29196 } 29197 break; 29198 29199 case 0xF5: 29200 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */ 29201 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */ 29202 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29203 Int size = getRexW(pfx) ? 8 : 4; 29204 IRType ty = szToITy(size); 29205 IRTemp dst = newTemp(ty); 29206 IRTemp src1 = newTemp(ty); 29207 IRTemp src2 = newTemp(ty); 29208 IRTemp start = newTemp(Ity_I8); 29209 IRTemp cond = newTemp(Ity_I1); 29210 UChar rm = getUChar(delta); 29211 29212 assign( src2, getIRegV(size,pfx) ); 29213 if (epartIsReg(rm)) { 29214 assign( src1, getIRegE(size,pfx,rm) ); 29215 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), 29216 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 29217 delta++; 29218 } else { 29219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29220 assign( src1, loadLE(ty, mkexpr(addr)) ); 29221 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 29222 nameIRegG(size,pfx,rm)); 29223 delta += alen; 29224 } 29225 29226 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) ); 29227 assign( cond, binop(Iop_CmpLT32U, 29228 unop(Iop_8Uto32, mkexpr(start)), 29229 mkU32(8*size)) ); 29230 /* if (start < opsize) { 29231 if (start == 0) 29232 dst = 0; 29233 else 29234 dst = (src1 << (opsize-start)) u>> (opsize-start); 29235 } else { 29236 dst = src1; 29237 } */ 29238 assign( dst, 29239 IRExpr_ITE( 29240 mkexpr(cond), 29241 IRExpr_ITE( 29242 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)), 29243 mkU(ty, 0), 29244 binop( 29245 mkSizedOp(ty,Iop_Shr8), 29246 binop( 29247 mkSizedOp(ty,Iop_Shl8), 29248 mkexpr(src1), 29249 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29250 ), 29251 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29252 ) 29253 ), 29254 mkexpr(src1) 29255 ) 29256 ); 29257 putIRegG( size, pfx, rm, mkexpr(dst) ); 29258 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29259 ? AMD64G_CC_OP_BLSR64 29260 : AMD64G_CC_OP_BLSR32)) ); 29261 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29262 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) ); 29263 *uses_vvvv = True; 29264 goto decode_success; 29265 } 29266 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */ 29267 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */ 29268 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29269 Int size = getRexW(pfx) ? 8 : 4; 29270 IRType ty = szToITy(size); 29271 IRTemp src = newTemp(ty); 29272 IRTemp mask = newTemp(ty); 29273 UChar rm = getUChar(delta); 29274 29275 assign( src, getIRegV(size,pfx) ); 29276 if (epartIsReg(rm)) { 29277 assign( mask, getIRegE(size,pfx,rm) ); 29278 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm), 29279 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29280 delta++; 29281 } else { 29282 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29283 assign( mask, loadLE(ty, mkexpr(addr)) ); 29284 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29285 nameIRegG(size,pfx,rm)); 29286 delta += alen; 29287 } 29288 29289 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)), 29290 widenUto64(mkexpr(mask)) ); 29291 putIRegG( size, pfx, rm, 29292 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29293 "amd64g_calculate_pdep", 29294 &amd64g_calculate_pdep, args)) ); 29295 *uses_vvvv = True; 29296 /* Flags aren't modified. */ 29297 goto decode_success; 29298 } 29299 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */ 29300 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */ 29301 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29302 Int size = getRexW(pfx) ? 8 : 4; 29303 IRType ty = szToITy(size); 29304 IRTemp src = newTemp(ty); 29305 IRTemp mask = newTemp(ty); 29306 UChar rm = getUChar(delta); 29307 29308 assign( src, getIRegV(size,pfx) ); 29309 if (epartIsReg(rm)) { 29310 assign( mask, getIRegE(size,pfx,rm) ); 29311 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm), 29312 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29313 delta++; 29314 } else { 29315 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29316 assign( mask, loadLE(ty, mkexpr(addr)) ); 29317 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29318 nameIRegG(size,pfx,rm)); 29319 delta += alen; 29320 } 29321 29322 /* First mask off bits not set in mask, they are ignored 29323 and it should be fine if they contain undefined values. */ 29324 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8), 29325 mkexpr(src), mkexpr(mask)); 29326 IRExpr** args = mkIRExprVec_2( widenUto64(masked), 29327 widenUto64(mkexpr(mask)) ); 29328 putIRegG( size, pfx, rm, 29329 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29330 "amd64g_calculate_pext", 29331 &amd64g_calculate_pext, args)) ); 29332 *uses_vvvv = True; 29333 /* Flags aren't modified. */ 29334 goto decode_success; 29335 } 29336 break; 29337 29338 case 0xF6: 29339 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */ 29340 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */ 29341 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29342 Int size = getRexW(pfx) ? 8 : 4; 29343 IRType ty = szToITy(size); 29344 IRTemp src1 = newTemp(ty); 29345 IRTemp src2 = newTemp(ty); 29346 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64); 29347 UChar rm = getUChar(delta); 29348 29349 assign( src1, getIRegRDX(size) ); 29350 if (epartIsReg(rm)) { 29351 assign( src2, getIRegE(size,pfx,rm) ); 29352 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm), 29353 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29354 delta++; 29355 } else { 29356 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29357 assign( src2, loadLE(ty, mkexpr(addr)) ); 29358 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29359 nameIRegG(size,pfx,rm)); 29360 delta += alen; 29361 } 29362 29363 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32, 29364 mkexpr(src1), mkexpr(src2)) ); 29365 putIRegV( size, pfx, 29366 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) ); 29367 putIRegG( size, pfx, rm, 29368 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32, 29369 mkexpr(res)) ); 29370 *uses_vvvv = True; 29371 /* Flags aren't modified. */ 29372 goto decode_success; 29373 } 29374 break; 29375 29376 case 0xF7: 29377 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */ 29378 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */ 29379 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29380 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 ); 29381 goto decode_success; 29382 } 29383 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */ 29384 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */ 29385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29386 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 ); 29387 goto decode_success; 29388 } 29389 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */ 29390 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */ 29391 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29392 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 ); 29393 goto decode_success; 29394 } 29395 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */ 29396 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */ 29397 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29398 Int size = getRexW(pfx) ? 8 : 4; 29399 IRType ty = szToITy(size); 29400 IRTemp dst = newTemp(ty); 29401 IRTemp src1 = newTemp(ty); 29402 IRTemp src2 = newTemp(ty); 29403 IRTemp stle = newTemp(Ity_I16); 29404 IRTemp start = newTemp(Ity_I8); 29405 IRTemp len = newTemp(Ity_I8); 29406 UChar rm = getUChar(delta); 29407 29408 assign( src2, getIRegV(size,pfx) ); 29409 if (epartIsReg(rm)) { 29410 assign( src1, getIRegE(size,pfx,rm) ); 29411 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), 29412 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 29413 delta++; 29414 } else { 29415 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29416 assign( src1, loadLE(ty, mkexpr(addr)) ); 29417 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 29418 nameIRegG(size,pfx,rm)); 29419 delta += alen; 29420 } 29421 29422 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) ); 29423 assign( start, unop( Iop_16to8, mkexpr(stle) ) ); 29424 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) ); 29425 /* if (start+len < opsize) { 29426 if (len != 0) 29427 dst = (src1 << (opsize-start-len)) u>> (opsize-len); 29428 else 29429 dst = 0; 29430 } else { 29431 if (start < opsize) 29432 dst = src1 u>> start; 29433 else 29434 dst = 0; 29435 } */ 29436 assign( dst, 29437 IRExpr_ITE( 29438 binop(Iop_CmpLT32U, 29439 binop(Iop_Add32, 29440 unop(Iop_8Uto32, mkexpr(start)), 29441 unop(Iop_8Uto32, mkexpr(len))), 29442 mkU32(8*size)), 29443 IRExpr_ITE( 29444 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)), 29445 mkU(ty, 0), 29446 binop(mkSizedOp(ty,Iop_Shr8), 29447 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1), 29448 binop(Iop_Sub8, 29449 binop(Iop_Sub8, mkU8(8*size), 29450 mkexpr(start)), 29451 mkexpr(len))), 29452 binop(Iop_Sub8, mkU8(8*size), 29453 mkexpr(len))) 29454 ), 29455 IRExpr_ITE( 29456 binop(Iop_CmpLT32U, 29457 unop(Iop_8Uto32, mkexpr(start)), 29458 mkU32(8*size)), 29459 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1), 29460 mkexpr(start)), 29461 mkU(ty, 0) 29462 ) 29463 ) 29464 ); 29465 putIRegG( size, pfx, rm, mkexpr(dst) ); 29466 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29467 ? AMD64G_CC_OP_ANDN64 29468 : AMD64G_CC_OP_ANDN32)) ); 29469 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29470 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 29471 *uses_vvvv = True; 29472 goto decode_success; 29473 } 29474 break; 29475 29476 default: 29477 break; 29478 29479 } 29480 29481 //decode_failure: 29482 return deltaIN; 29483 29484 decode_success: 29485 return delta; 29486 } 29487 29488 29489 /*------------------------------------------------------------*/ 29490 /*--- ---*/ 29491 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/ 29492 /*--- ---*/ 29493 /*------------------------------------------------------------*/ 29494 29495 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 ) 29496 { 29497 vassert(imm8 < 256); 29498 IRTemp s3, s2, s1, s0; 29499 s3 = s2 = s1 = s0 = IRTemp_INVALID; 29500 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 29501 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \ 29502 : ((_nn)==2) ? s2 : s3) 29503 IRTemp res = newTemp(Ity_V128); 29504 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3), 29505 SEL((imm8 >> 4) & 3), 29506 SEL((imm8 >> 2) & 3), 29507 SEL((imm8 >> 0) & 3) )); 29508 # undef SEL 29509 return res; 29510 } 29511 29512 __attribute__((noinline)) 29513 static 29514 Long dis_ESC_0F3A__VEX ( 29515 /*MB_OUT*/DisResult* dres, 29516 /*OUT*/ Bool* uses_vvvv, 29517 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 29518 Bool resteerCisOk, 29519 void* callback_opaque, 29520 VexArchInfo* archinfo, 29521 VexAbiInfo* vbi, 29522 Prefix pfx, Int sz, Long deltaIN 29523 ) 29524 { 29525 IRTemp addr = IRTemp_INVALID; 29526 Int alen = 0; 29527 HChar dis_buf[50]; 29528 Long delta = deltaIN; 29529 UChar opc = getUChar(delta); 29530 delta++; 29531 *uses_vvvv = False; 29532 29533 switch (opc) { 29534 29535 case 0x00: 29536 case 0x01: 29537 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */ 29538 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */ 29539 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29540 && 1==getRexW(pfx)/*W1*/) { 29541 UChar modrm = getUChar(delta); 29542 UInt imm8 = 0; 29543 UInt rG = gregOfRexRM(pfx, modrm); 29544 IRTemp sV = newTemp(Ity_V256); 29545 const HChar *name = opc == 0 ? "vpermq" : "vpermpd"; 29546 if (epartIsReg(modrm)) { 29547 UInt rE = eregOfRexRM(pfx, modrm); 29548 delta += 1; 29549 imm8 = getUChar(delta); 29550 DIP("%s $%u,%s,%s\n", 29551 name, imm8, nameYMMReg(rE), nameYMMReg(rG)); 29552 assign(sV, getYMMReg(rE)); 29553 } else { 29554 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29555 delta += alen; 29556 imm8 = getUChar(delta); 29557 DIP("%s $%u,%s,%s\n", 29558 name, imm8, dis_buf, nameYMMReg(rG)); 29559 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29560 } 29561 delta++; 29562 IRTemp s[4]; 29563 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 29564 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]); 29565 IRTemp dV = newTemp(Ity_V256); 29566 assign(dV, IRExpr_Qop(Iop_64x4toV256, 29567 mkexpr(s[(imm8 >> 6) & 3]), 29568 mkexpr(s[(imm8 >> 4) & 3]), 29569 mkexpr(s[(imm8 >> 2) & 3]), 29570 mkexpr(s[(imm8 >> 0) & 3]))); 29571 putYMMReg(rG, mkexpr(dV)); 29572 goto decode_success; 29573 } 29574 break; 29575 29576 case 0x02: 29577 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */ 29578 if (have66noF2noF3(pfx) 29579 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 29580 UChar modrm = getUChar(delta); 29581 UInt imm8 = 0; 29582 UInt rG = gregOfRexRM(pfx, modrm); 29583 UInt rV = getVexNvvvv(pfx); 29584 IRTemp sV = newTemp(Ity_V128); 29585 IRTemp dV = newTemp(Ity_V128); 29586 UInt i; 29587 IRTemp s[4], d[4]; 29588 assign(sV, getXMMReg(rV)); 29589 if (epartIsReg(modrm)) { 29590 UInt rE = eregOfRexRM(pfx, modrm); 29591 delta += 1; 29592 imm8 = getUChar(delta); 29593 DIP("vpblendd $%u,%s,%s,%s\n", 29594 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 29595 assign(dV, getXMMReg(rE)); 29596 } else { 29597 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29598 delta += alen; 29599 imm8 = getUChar(delta); 29600 DIP("vpblendd $%u,%s,%s,%s\n", 29601 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 29602 assign(dV, loadLE(Ity_V128, mkexpr(addr))); 29603 } 29604 delta++; 29605 for (i = 0; i < 4; i++) { 29606 s[i] = IRTemp_INVALID; 29607 d[i] = IRTemp_INVALID; 29608 } 29609 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] ); 29610 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] ); 29611 for (i = 0; i < 4; i++) 29612 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 29613 putYMMRegLane128(rG, 1, mkV128(0)); 29614 *uses_vvvv = True; 29615 goto decode_success; 29616 } 29617 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */ 29618 if (have66noF2noF3(pfx) 29619 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 29620 UChar modrm = getUChar(delta); 29621 UInt imm8 = 0; 29622 UInt rG = gregOfRexRM(pfx, modrm); 29623 UInt rV = getVexNvvvv(pfx); 29624 IRTemp sV = newTemp(Ity_V256); 29625 IRTemp dV = newTemp(Ity_V256); 29626 UInt i; 29627 IRTemp s[8], d[8]; 29628 assign(sV, getYMMReg(rV)); 29629 if (epartIsReg(modrm)) { 29630 UInt rE = eregOfRexRM(pfx, modrm); 29631 delta += 1; 29632 imm8 = getUChar(delta); 29633 DIP("vpblendd $%u,%s,%s,%s\n", 29634 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 29635 assign(dV, getYMMReg(rE)); 29636 } else { 29637 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29638 delta += alen; 29639 imm8 = getUChar(delta); 29640 DIP("vpblendd $%u,%s,%s,%s\n", 29641 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 29642 assign(dV, loadLE(Ity_V256, mkexpr(addr))); 29643 } 29644 delta++; 29645 for (i = 0; i < 8; i++) { 29646 s[i] = IRTemp_INVALID; 29647 d[i] = IRTemp_INVALID; 29648 } 29649 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 29650 &s[3], &s[2], &s[1], &s[0] ); 29651 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4], 29652 &d[3], &d[2], &d[1], &d[0] ); 29653 for (i = 0; i < 8; i++) 29654 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 29655 *uses_vvvv = True; 29656 goto decode_success; 29657 } 29658 break; 29659 29660 case 0x04: 29661 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */ 29662 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29663 UChar modrm = getUChar(delta); 29664 UInt imm8 = 0; 29665 UInt rG = gregOfRexRM(pfx, modrm); 29666 IRTemp sV = newTemp(Ity_V256); 29667 if (epartIsReg(modrm)) { 29668 UInt rE = eregOfRexRM(pfx, modrm); 29669 delta += 1; 29670 imm8 = getUChar(delta); 29671 DIP("vpermilps $%u,%s,%s\n", 29672 imm8, nameYMMReg(rE), nameYMMReg(rG)); 29673 assign(sV, getYMMReg(rE)); 29674 } else { 29675 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29676 delta += alen; 29677 imm8 = getUChar(delta); 29678 DIP("vpermilps $%u,%s,%s\n", 29679 imm8, dis_buf, nameYMMReg(rG)); 29680 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29681 } 29682 delta++; 29683 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 29684 breakupV256toV128s( sV, &sVhi, &sVlo ); 29685 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 ); 29686 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 ); 29687 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo)); 29688 putYMMReg(rG, res); 29689 goto decode_success; 29690 } 29691 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */ 29692 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29693 UChar modrm = getUChar(delta); 29694 UInt imm8 = 0; 29695 UInt rG = gregOfRexRM(pfx, modrm); 29696 IRTemp sV = newTemp(Ity_V128); 29697 if (epartIsReg(modrm)) { 29698 UInt rE = eregOfRexRM(pfx, modrm); 29699 delta += 1; 29700 imm8 = getUChar(delta); 29701 DIP("vpermilps $%u,%s,%s\n", 29702 imm8, nameXMMReg(rE), nameXMMReg(rG)); 29703 assign(sV, getXMMReg(rE)); 29704 } else { 29705 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29706 delta += alen; 29707 imm8 = getUChar(delta); 29708 DIP("vpermilps $%u,%s,%s\n", 29709 imm8, dis_buf, nameXMMReg(rG)); 29710 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 29711 } 29712 delta++; 29713 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) ); 29714 goto decode_success; 29715 } 29716 break; 29717 29718 case 0x05: 29719 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */ 29720 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29721 UChar modrm = getUChar(delta); 29722 UInt imm8 = 0; 29723 UInt rG = gregOfRexRM(pfx, modrm); 29724 IRTemp sV = newTemp(Ity_V128); 29725 if (epartIsReg(modrm)) { 29726 UInt rE = eregOfRexRM(pfx, modrm); 29727 delta += 1; 29728 imm8 = getUChar(delta); 29729 DIP("vpermilpd $%u,%s,%s\n", 29730 imm8, nameXMMReg(rE), nameXMMReg(rG)); 29731 assign(sV, getXMMReg(rE)); 29732 } else { 29733 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29734 delta += alen; 29735 imm8 = getUChar(delta); 29736 DIP("vpermilpd $%u,%s,%s\n", 29737 imm8, dis_buf, nameXMMReg(rG)); 29738 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 29739 } 29740 delta++; 29741 IRTemp s1 = newTemp(Ity_I64); 29742 IRTemp s0 = newTemp(Ity_I64); 29743 assign(s1, unop(Iop_V128HIto64, mkexpr(sV))); 29744 assign(s0, unop(Iop_V128to64, mkexpr(sV))); 29745 IRTemp dV = newTemp(Ity_V128); 29746 assign(dV, binop(Iop_64HLtoV128, 29747 mkexpr((imm8 & (1<<1)) ? s1 : s0), 29748 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 29749 putYMMRegLoAndZU(rG, mkexpr(dV)); 29750 goto decode_success; 29751 } 29752 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */ 29753 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29754 UChar modrm = getUChar(delta); 29755 UInt imm8 = 0; 29756 UInt rG = gregOfRexRM(pfx, modrm); 29757 IRTemp sV = newTemp(Ity_V256); 29758 if (epartIsReg(modrm)) { 29759 UInt rE = eregOfRexRM(pfx, modrm); 29760 delta += 1; 29761 imm8 = getUChar(delta); 29762 DIP("vpermilpd $%u,%s,%s\n", 29763 imm8, nameYMMReg(rE), nameYMMReg(rG)); 29764 assign(sV, getYMMReg(rE)); 29765 } else { 29766 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29767 delta += alen; 29768 imm8 = getUChar(delta); 29769 DIP("vpermilpd $%u,%s,%s\n", 29770 imm8, dis_buf, nameYMMReg(rG)); 29771 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 29772 } 29773 delta++; 29774 IRTemp s3, s2, s1, s0; 29775 s3 = s2 = s1 = s0 = IRTemp_INVALID; 29776 breakupV256to64s(sV, &s3, &s2, &s1, &s0); 29777 IRTemp dV = newTemp(Ity_V256); 29778 assign(dV, IRExpr_Qop(Iop_64x4toV256, 29779 mkexpr((imm8 & (1<<3)) ? s3 : s2), 29780 mkexpr((imm8 & (1<<2)) ? s3 : s2), 29781 mkexpr((imm8 & (1<<1)) ? s1 : s0), 29782 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 29783 putYMMReg(rG, mkexpr(dV)); 29784 goto decode_success; 29785 } 29786 break; 29787 29788 case 0x06: 29789 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */ 29790 if (have66noF2noF3(pfx) 29791 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 29792 UChar modrm = getUChar(delta); 29793 UInt imm8 = 0; 29794 UInt rG = gregOfRexRM(pfx, modrm); 29795 UInt rV = getVexNvvvv(pfx); 29796 IRTemp s00 = newTemp(Ity_V128); 29797 IRTemp s01 = newTemp(Ity_V128); 29798 IRTemp s10 = newTemp(Ity_V128); 29799 IRTemp s11 = newTemp(Ity_V128); 29800 assign(s00, getYMMRegLane128(rV, 0)); 29801 assign(s01, getYMMRegLane128(rV, 1)); 29802 if (epartIsReg(modrm)) { 29803 UInt rE = eregOfRexRM(pfx, modrm); 29804 delta += 1; 29805 imm8 = getUChar(delta); 29806 DIP("vperm2f128 $%u,%s,%s,%s\n", 29807 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 29808 assign(s10, getYMMRegLane128(rE, 0)); 29809 assign(s11, getYMMRegLane128(rE, 1)); 29810 } else { 29811 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29812 delta += alen; 29813 imm8 = getUChar(delta); 29814 DIP("vperm2f128 $%u,%s,%s,%s\n", 29815 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 29816 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 29817 mkexpr(addr), mkU64(0)))); 29818 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 29819 mkexpr(addr), mkU64(16)))); 29820 } 29821 delta++; 29822 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 29823 : ((_nn)==2) ? s10 : s11) 29824 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 29825 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 29826 # undef SEL 29827 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 29828 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 29829 *uses_vvvv = True; 29830 goto decode_success; 29831 } 29832 break; 29833 29834 case 0x08: 29835 /* VROUNDPS imm8, xmm2/m128, xmm1 */ 29836 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */ 29837 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29838 UChar modrm = getUChar(delta); 29839 UInt rG = gregOfRexRM(pfx, modrm); 29840 IRTemp src = newTemp(Ity_V128); 29841 IRTemp s0 = IRTemp_INVALID; 29842 IRTemp s1 = IRTemp_INVALID; 29843 IRTemp s2 = IRTemp_INVALID; 29844 IRTemp s3 = IRTemp_INVALID; 29845 IRTemp rm = newTemp(Ity_I32); 29846 Int imm = 0; 29847 29848 modrm = getUChar(delta); 29849 29850 if (epartIsReg(modrm)) { 29851 UInt rE = eregOfRexRM(pfx, modrm); 29852 assign( src, getXMMReg( rE ) ); 29853 imm = getUChar(delta+1); 29854 if (imm & ~15) break; 29855 delta += 1+1; 29856 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 29857 } else { 29858 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29859 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 29860 imm = getUChar(delta+alen); 29861 if (imm & ~15) break; 29862 delta += alen+1; 29863 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 29864 } 29865 29866 /* (imm & 3) contains an Intel-encoded rounding mode. Because 29867 that encoding is the same as the encoding for IRRoundingMode, 29868 we can use that value directly in the IR as a rounding 29869 mode. */ 29870 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 29871 29872 breakupV128to32s( src, &s3, &s2, &s1, &s0 ); 29873 putYMMRegLane128( rG, 1, mkV128(0) ); 29874 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 29875 unop(Iop_ReinterpI32asF32, mkexpr(s))) 29876 putYMMRegLane32F( rG, 3, CVT(s3) ); 29877 putYMMRegLane32F( rG, 2, CVT(s2) ); 29878 putYMMRegLane32F( rG, 1, CVT(s1) ); 29879 putYMMRegLane32F( rG, 0, CVT(s0) ); 29880 # undef CVT 29881 goto decode_success; 29882 } 29883 /* VROUNDPS imm8, ymm2/m256, ymm1 */ 29884 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */ 29885 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29886 UChar modrm = getUChar(delta); 29887 UInt rG = gregOfRexRM(pfx, modrm); 29888 IRTemp src = newTemp(Ity_V256); 29889 IRTemp s0 = IRTemp_INVALID; 29890 IRTemp s1 = IRTemp_INVALID; 29891 IRTemp s2 = IRTemp_INVALID; 29892 IRTemp s3 = IRTemp_INVALID; 29893 IRTemp s4 = IRTemp_INVALID; 29894 IRTemp s5 = IRTemp_INVALID; 29895 IRTemp s6 = IRTemp_INVALID; 29896 IRTemp s7 = IRTemp_INVALID; 29897 IRTemp rm = newTemp(Ity_I32); 29898 Int imm = 0; 29899 29900 modrm = getUChar(delta); 29901 29902 if (epartIsReg(modrm)) { 29903 UInt rE = eregOfRexRM(pfx, modrm); 29904 assign( src, getYMMReg( rE ) ); 29905 imm = getUChar(delta+1); 29906 if (imm & ~15) break; 29907 delta += 1+1; 29908 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 29909 } else { 29910 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29911 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 29912 imm = getUChar(delta+alen); 29913 if (imm & ~15) break; 29914 delta += alen+1; 29915 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 29916 } 29917 29918 /* (imm & 3) contains an Intel-encoded rounding mode. Because 29919 that encoding is the same as the encoding for IRRoundingMode, 29920 we can use that value directly in the IR as a rounding 29921 mode. */ 29922 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 29923 29924 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 29925 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 29926 unop(Iop_ReinterpI32asF32, mkexpr(s))) 29927 putYMMRegLane32F( rG, 7, CVT(s7) ); 29928 putYMMRegLane32F( rG, 6, CVT(s6) ); 29929 putYMMRegLane32F( rG, 5, CVT(s5) ); 29930 putYMMRegLane32F( rG, 4, CVT(s4) ); 29931 putYMMRegLane32F( rG, 3, CVT(s3) ); 29932 putYMMRegLane32F( rG, 2, CVT(s2) ); 29933 putYMMRegLane32F( rG, 1, CVT(s1) ); 29934 putYMMRegLane32F( rG, 0, CVT(s0) ); 29935 # undef CVT 29936 goto decode_success; 29937 } 29938 29939 case 0x09: 29940 /* VROUNDPD imm8, xmm2/m128, xmm1 */ 29941 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */ 29942 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29943 UChar modrm = getUChar(delta); 29944 UInt rG = gregOfRexRM(pfx, modrm); 29945 IRTemp src = newTemp(Ity_V128); 29946 IRTemp s0 = IRTemp_INVALID; 29947 IRTemp s1 = IRTemp_INVALID; 29948 IRTemp rm = newTemp(Ity_I32); 29949 Int imm = 0; 29950 29951 modrm = getUChar(delta); 29952 29953 if (epartIsReg(modrm)) { 29954 UInt rE = eregOfRexRM(pfx, modrm); 29955 assign( src, getXMMReg( rE ) ); 29956 imm = getUChar(delta+1); 29957 if (imm & ~15) break; 29958 delta += 1+1; 29959 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 29960 } else { 29961 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 29962 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 29963 imm = getUChar(delta+alen); 29964 if (imm & ~15) break; 29965 delta += alen+1; 29966 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 29967 } 29968 29969 /* (imm & 3) contains an Intel-encoded rounding mode. Because 29970 that encoding is the same as the encoding for IRRoundingMode, 29971 we can use that value directly in the IR as a rounding 29972 mode. */ 29973 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 29974 29975 breakupV128to64s( src, &s1, &s0 ); 29976 putYMMRegLane128( rG, 1, mkV128(0) ); 29977 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 29978 unop(Iop_ReinterpI64asF64, mkexpr(s))) 29979 putYMMRegLane64F( rG, 1, CVT(s1) ); 29980 putYMMRegLane64F( rG, 0, CVT(s0) ); 29981 # undef CVT 29982 goto decode_success; 29983 } 29984 /* VROUNDPD imm8, ymm2/m256, ymm1 */ 29985 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */ 29986 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29987 UChar modrm = getUChar(delta); 29988 UInt rG = gregOfRexRM(pfx, modrm); 29989 IRTemp src = newTemp(Ity_V256); 29990 IRTemp s0 = IRTemp_INVALID; 29991 IRTemp s1 = IRTemp_INVALID; 29992 IRTemp s2 = IRTemp_INVALID; 29993 IRTemp s3 = IRTemp_INVALID; 29994 IRTemp rm = newTemp(Ity_I32); 29995 Int imm = 0; 29996 29997 modrm = getUChar(delta); 29998 29999 if (epartIsReg(modrm)) { 30000 UInt rE = eregOfRexRM(pfx, modrm); 30001 assign( src, getYMMReg( rE ) ); 30002 imm = getUChar(delta+1); 30003 if (imm & ~15) break; 30004 delta += 1+1; 30005 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 30006 } else { 30007 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30008 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 30009 imm = getUChar(delta+alen); 30010 if (imm & ~15) break; 30011 delta += alen+1; 30012 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 30013 } 30014 30015 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30016 that encoding is the same as the encoding for IRRoundingMode, 30017 we can use that value directly in the IR as a rounding 30018 mode. */ 30019 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30020 30021 breakupV256to64s( src, &s3, &s2, &s1, &s0 ); 30022 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 30023 unop(Iop_ReinterpI64asF64, mkexpr(s))) 30024 putYMMRegLane64F( rG, 3, CVT(s3) ); 30025 putYMMRegLane64F( rG, 2, CVT(s2) ); 30026 putYMMRegLane64F( rG, 1, CVT(s1) ); 30027 putYMMRegLane64F( rG, 0, CVT(s0) ); 30028 # undef CVT 30029 goto decode_success; 30030 } 30031 30032 case 0x0A: 30033 case 0x0B: 30034 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */ 30035 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */ 30036 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */ 30037 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */ 30038 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30039 UChar modrm = getUChar(delta); 30040 UInt rG = gregOfRexRM(pfx, modrm); 30041 UInt rV = getVexNvvvv(pfx); 30042 Bool isD = opc == 0x0B; 30043 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 30044 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 30045 Int imm = 0; 30046 30047 if (epartIsReg(modrm)) { 30048 UInt rE = eregOfRexRM(pfx, modrm); 30049 assign( src, 30050 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 30051 imm = getUChar(delta+1); 30052 if (imm & ~15) break; 30053 delta += 1+1; 30054 DIP( "vrounds%c $%d,%s,%s,%s\n", 30055 isD ? 'd' : 's', 30056 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) ); 30057 } else { 30058 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30059 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 30060 imm = getUChar(delta+alen); 30061 if (imm & ~15) break; 30062 delta += alen+1; 30063 DIP( "vrounds%c $%d,%s,%s,%s\n", 30064 isD ? 'd' : 's', 30065 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) ); 30066 } 30067 30068 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30069 that encoding is the same as the encoding for IRRoundingMode, 30070 we can use that value directly in the IR as a rounding 30071 mode. */ 30072 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 30073 (imm & 4) ? get_sse_roundingmode() 30074 : mkU32(imm & 3), 30075 mkexpr(src)) ); 30076 30077 if (isD) 30078 putXMMRegLane64F( rG, 0, mkexpr(res) ); 30079 else { 30080 putXMMRegLane32F( rG, 0, mkexpr(res) ); 30081 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) ); 30082 } 30083 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) ); 30084 putYMMRegLane128( rG, 1, mkV128(0) ); 30085 *uses_vvvv = True; 30086 goto decode_success; 30087 } 30088 break; 30089 30090 case 0x0C: 30091 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */ 30092 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */ 30093 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30094 UChar modrm = getUChar(delta); 30095 UInt imm8; 30096 UInt rG = gregOfRexRM(pfx, modrm); 30097 UInt rV = getVexNvvvv(pfx); 30098 IRTemp sV = newTemp(Ity_V256); 30099 IRTemp sE = newTemp(Ity_V256); 30100 assign ( sV, getYMMReg(rV) ); 30101 if (epartIsReg(modrm)) { 30102 UInt rE = eregOfRexRM(pfx, modrm); 30103 delta += 1; 30104 imm8 = getUChar(delta); 30105 DIP("vblendps $%u,%s,%s,%s\n", 30106 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30107 assign(sE, getYMMReg(rE)); 30108 } else { 30109 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30110 delta += alen; 30111 imm8 = getUChar(delta); 30112 DIP("vblendps $%u,%s,%s,%s\n", 30113 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30114 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30115 } 30116 delta++; 30117 putYMMReg( rG, 30118 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) ); 30119 *uses_vvvv = True; 30120 goto decode_success; 30121 } 30122 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */ 30123 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */ 30124 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30125 UChar modrm = getUChar(delta); 30126 UInt imm8; 30127 UInt rG = gregOfRexRM(pfx, modrm); 30128 UInt rV = getVexNvvvv(pfx); 30129 IRTemp sV = newTemp(Ity_V128); 30130 IRTemp sE = newTemp(Ity_V128); 30131 assign ( sV, getXMMReg(rV) ); 30132 if (epartIsReg(modrm)) { 30133 UInt rE = eregOfRexRM(pfx, modrm); 30134 delta += 1; 30135 imm8 = getUChar(delta); 30136 DIP("vblendps $%u,%s,%s,%s\n", 30137 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30138 assign(sE, getXMMReg(rE)); 30139 } else { 30140 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30141 delta += alen; 30142 imm8 = getUChar(delta); 30143 DIP("vblendps $%u,%s,%s,%s\n", 30144 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30145 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30146 } 30147 delta++; 30148 putYMMRegLoAndZU( rG, 30149 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) ); 30150 *uses_vvvv = True; 30151 goto decode_success; 30152 } 30153 break; 30154 30155 case 0x0D: 30156 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */ 30157 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */ 30158 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30159 UChar modrm = getUChar(delta); 30160 UInt imm8; 30161 UInt rG = gregOfRexRM(pfx, modrm); 30162 UInt rV = getVexNvvvv(pfx); 30163 IRTemp sV = newTemp(Ity_V256); 30164 IRTemp sE = newTemp(Ity_V256); 30165 assign ( sV, getYMMReg(rV) ); 30166 if (epartIsReg(modrm)) { 30167 UInt rE = eregOfRexRM(pfx, modrm); 30168 delta += 1; 30169 imm8 = getUChar(delta); 30170 DIP("vblendpd $%u,%s,%s,%s\n", 30171 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30172 assign(sE, getYMMReg(rE)); 30173 } else { 30174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30175 delta += alen; 30176 imm8 = getUChar(delta); 30177 DIP("vblendpd $%u,%s,%s,%s\n", 30178 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30179 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30180 } 30181 delta++; 30182 putYMMReg( rG, 30183 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) ); 30184 *uses_vvvv = True; 30185 goto decode_success; 30186 } 30187 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */ 30188 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */ 30189 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30190 UChar modrm = getUChar(delta); 30191 UInt imm8; 30192 UInt rG = gregOfRexRM(pfx, modrm); 30193 UInt rV = getVexNvvvv(pfx); 30194 IRTemp sV = newTemp(Ity_V128); 30195 IRTemp sE = newTemp(Ity_V128); 30196 assign ( sV, getXMMReg(rV) ); 30197 if (epartIsReg(modrm)) { 30198 UInt rE = eregOfRexRM(pfx, modrm); 30199 delta += 1; 30200 imm8 = getUChar(delta); 30201 DIP("vblendpd $%u,%s,%s,%s\n", 30202 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30203 assign(sE, getXMMReg(rE)); 30204 } else { 30205 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30206 delta += alen; 30207 imm8 = getUChar(delta); 30208 DIP("vblendpd $%u,%s,%s,%s\n", 30209 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30210 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30211 } 30212 delta++; 30213 putYMMRegLoAndZU( rG, 30214 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) ); 30215 *uses_vvvv = True; 30216 goto decode_success; 30217 } 30218 break; 30219 30220 case 0x0E: 30221 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */ 30222 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */ 30223 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30224 UChar modrm = getUChar(delta); 30225 UInt imm8; 30226 UInt rG = gregOfRexRM(pfx, modrm); 30227 UInt rV = getVexNvvvv(pfx); 30228 IRTemp sV = newTemp(Ity_V128); 30229 IRTemp sE = newTemp(Ity_V128); 30230 assign ( sV, getXMMReg(rV) ); 30231 if (epartIsReg(modrm)) { 30232 UInt rE = eregOfRexRM(pfx, modrm); 30233 delta += 1; 30234 imm8 = getUChar(delta); 30235 DIP("vpblendw $%u,%s,%s,%s\n", 30236 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30237 assign(sE, getXMMReg(rE)); 30238 } else { 30239 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30240 delta += alen; 30241 imm8 = getUChar(delta); 30242 DIP("vpblendw $%u,%s,%s,%s\n", 30243 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30244 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30245 } 30246 delta++; 30247 putYMMRegLoAndZU( rG, 30248 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) ); 30249 *uses_vvvv = True; 30250 goto decode_success; 30251 } 30252 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */ 30253 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */ 30254 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30255 UChar modrm = getUChar(delta); 30256 UInt imm8; 30257 UInt rG = gregOfRexRM(pfx, modrm); 30258 UInt rV = getVexNvvvv(pfx); 30259 IRTemp sV = newTemp(Ity_V256); 30260 IRTemp sE = newTemp(Ity_V256); 30261 IRTemp sVhi, sVlo, sEhi, sElo; 30262 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID; 30263 assign ( sV, getYMMReg(rV) ); 30264 if (epartIsReg(modrm)) { 30265 UInt rE = eregOfRexRM(pfx, modrm); 30266 delta += 1; 30267 imm8 = getUChar(delta); 30268 DIP("vpblendw $%u,%s,%s,%s\n", 30269 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30270 assign(sE, getYMMReg(rE)); 30271 } else { 30272 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30273 delta += alen; 30274 imm8 = getUChar(delta); 30275 DIP("vpblendw $%u,%s,%s,%s\n", 30276 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30277 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30278 } 30279 delta++; 30280 breakupV256toV128s( sV, &sVhi, &sVlo ); 30281 breakupV256toV128s( sE, &sEhi, &sElo ); 30282 putYMMReg( rG, binop( Iop_V128HLtoV256, 30283 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ), 30284 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) ); 30285 *uses_vvvv = True; 30286 goto decode_success; 30287 } 30288 break; 30289 30290 case 0x0F: 30291 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */ 30292 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */ 30293 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30294 UChar modrm = getUChar(delta); 30295 UInt rG = gregOfRexRM(pfx, modrm); 30296 UInt rV = getVexNvvvv(pfx); 30297 IRTemp sV = newTemp(Ity_V128); 30298 IRTemp dV = newTemp(Ity_V128); 30299 UInt imm8; 30300 30301 assign( dV, getXMMReg(rV) ); 30302 30303 if ( epartIsReg( modrm ) ) { 30304 UInt rE = eregOfRexRM(pfx, modrm); 30305 assign( sV, getXMMReg(rE) ); 30306 imm8 = getUChar(delta+1); 30307 delta += 1+1; 30308 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE), 30309 nameXMMReg(rV), nameXMMReg(rG)); 30310 } else { 30311 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30312 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 30313 imm8 = getUChar(delta+alen); 30314 delta += alen+1; 30315 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf, 30316 nameXMMReg(rV), nameXMMReg(rG)); 30317 } 30318 30319 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 ); 30320 putYMMRegLoAndZU( rG, mkexpr(res) ); 30321 *uses_vvvv = True; 30322 goto decode_success; 30323 } 30324 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */ 30325 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */ 30326 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30327 UChar modrm = getUChar(delta); 30328 UInt rG = gregOfRexRM(pfx, modrm); 30329 UInt rV = getVexNvvvv(pfx); 30330 IRTemp sV = newTemp(Ity_V256); 30331 IRTemp dV = newTemp(Ity_V256); 30332 IRTemp sHi, sLo, dHi, dLo; 30333 sHi = sLo = dHi = dLo = IRTemp_INVALID; 30334 UInt imm8; 30335 30336 assign( dV, getYMMReg(rV) ); 30337 30338 if ( epartIsReg( modrm ) ) { 30339 UInt rE = eregOfRexRM(pfx, modrm); 30340 assign( sV, getYMMReg(rE) ); 30341 imm8 = getUChar(delta+1); 30342 delta += 1+1; 30343 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameYMMReg(rE), 30344 nameYMMReg(rV), nameYMMReg(rG)); 30345 } else { 30346 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30347 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 30348 imm8 = getUChar(delta+alen); 30349 delta += alen+1; 30350 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf, 30351 nameYMMReg(rV), nameYMMReg(rG)); 30352 } 30353 30354 breakupV256toV128s( dV, &dHi, &dLo ); 30355 breakupV256toV128s( sV, &sHi, &sLo ); 30356 putYMMReg( rG, binop( Iop_V128HLtoV256, 30357 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ), 30358 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) ) 30359 ); 30360 *uses_vvvv = True; 30361 goto decode_success; 30362 } 30363 break; 30364 30365 case 0x14: 30366 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */ 30367 if (have66noF2noF3(pfx) 30368 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30369 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 30370 goto decode_success; 30371 } 30372 break; 30373 30374 case 0x15: 30375 /* VPEXTRW imm8, reg/m16, xmm2 */ 30376 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */ 30377 if (have66noF2noF3(pfx) 30378 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30379 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ ); 30380 goto decode_success; 30381 } 30382 break; 30383 30384 case 0x16: 30385 /* VPEXTRD imm8, r32/m32, xmm2 */ 30386 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */ 30387 if (have66noF2noF3(pfx) 30388 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30389 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ ); 30390 goto decode_success; 30391 } 30392 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */ 30393 if (have66noF2noF3(pfx) 30394 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 30395 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ ); 30396 goto decode_success; 30397 } 30398 break; 30399 30400 case 0x17: 30401 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */ 30402 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30403 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ ); 30404 goto decode_success; 30405 } 30406 break; 30407 30408 case 0x18: 30409 /* VINSERTF128 r/m, rV, rD 30410 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 30411 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */ 30412 if (have66noF2noF3(pfx) 30413 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30414 UChar modrm = getUChar(delta); 30415 UInt ib = 0; 30416 UInt rG = gregOfRexRM(pfx, modrm); 30417 UInt rV = getVexNvvvv(pfx); 30418 IRTemp t128 = newTemp(Ity_V128); 30419 if (epartIsReg(modrm)) { 30420 UInt rE = eregOfRexRM(pfx, modrm); 30421 delta += 1; 30422 assign(t128, getXMMReg(rE)); 30423 ib = getUChar(delta); 30424 DIP("vinsertf128 $%u,%s,%s,%s\n", 30425 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30426 } else { 30427 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30428 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 30429 delta += alen; 30430 ib = getUChar(delta); 30431 DIP("vinsertf128 $%u,%s,%s,%s\n", 30432 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30433 } 30434 delta++; 30435 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 30436 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 30437 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 30438 *uses_vvvv = True; 30439 goto decode_success; 30440 } 30441 break; 30442 30443 case 0x19: 30444 /* VEXTRACTF128 $lane_no, rS, r/m 30445 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 30446 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */ 30447 if (have66noF2noF3(pfx) 30448 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30449 UChar modrm = getUChar(delta); 30450 UInt ib = 0; 30451 UInt rS = gregOfRexRM(pfx, modrm); 30452 IRTemp t128 = newTemp(Ity_V128); 30453 if (epartIsReg(modrm)) { 30454 UInt rD = eregOfRexRM(pfx, modrm); 30455 delta += 1; 30456 ib = getUChar(delta); 30457 assign(t128, getYMMRegLane128(rS, ib & 1)); 30458 putYMMRegLoAndZU(rD, mkexpr(t128)); 30459 DIP("vextractf128 $%u,%s,%s\n", 30460 ib, nameXMMReg(rS), nameYMMReg(rD)); 30461 } else { 30462 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30463 delta += alen; 30464 ib = getUChar(delta); 30465 assign(t128, getYMMRegLane128(rS, ib & 1)); 30466 storeLE(mkexpr(addr), mkexpr(t128)); 30467 DIP("vextractf128 $%u,%s,%s\n", 30468 ib, nameYMMReg(rS), dis_buf); 30469 } 30470 delta++; 30471 /* doesn't use vvvv */ 30472 goto decode_success; 30473 } 30474 break; 30475 30476 case 0x20: 30477 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */ 30478 if (have66noF2noF3(pfx) 30479 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30480 UChar modrm = getUChar(delta); 30481 UInt rG = gregOfRexRM(pfx, modrm); 30482 UInt rV = getVexNvvvv(pfx); 30483 Int imm8; 30484 IRTemp src_u8 = newTemp(Ity_I8); 30485 30486 if ( epartIsReg( modrm ) ) { 30487 UInt rE = eregOfRexRM(pfx,modrm); 30488 imm8 = (Int)(getUChar(delta+1) & 15); 30489 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) ); 30490 delta += 1+1; 30491 DIP( "vpinsrb $%d,%s,%s,%s\n", 30492 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30493 } else { 30494 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30495 imm8 = (Int)(getUChar(delta+alen) & 15); 30496 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) ); 30497 delta += alen+1; 30498 DIP( "vpinsrb $%d,%s,%s,%s\n", 30499 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30500 } 30501 30502 IRTemp src_vec = newTemp(Ity_V128); 30503 assign(src_vec, getXMMReg( rV )); 30504 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 ); 30505 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30506 *uses_vvvv = True; 30507 goto decode_success; 30508 } 30509 break; 30510 30511 case 0x21: 30512 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1 30513 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */ 30514 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30515 UChar modrm = getUChar(delta); 30516 UInt rG = gregOfRexRM(pfx, modrm); 30517 UInt rV = getVexNvvvv(pfx); 30518 UInt imm8; 30519 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 30520 const IRTemp inval = IRTemp_INVALID; 30521 30522 if ( epartIsReg( modrm ) ) { 30523 UInt rE = eregOfRexRM(pfx, modrm); 30524 IRTemp vE = newTemp(Ity_V128); 30525 assign( vE, getXMMReg(rE) ); 30526 IRTemp dsE[4] = { inval, inval, inval, inval }; 30527 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 30528 imm8 = getUChar(delta+1); 30529 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 30530 delta += 1+1; 30531 DIP( "insertps $%u, %s,%s\n", 30532 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 30533 } else { 30534 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30535 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 30536 imm8 = getUChar(delta+alen); 30537 delta += alen+1; 30538 DIP( "insertps $%u, %s,%s\n", 30539 imm8, dis_buf, nameXMMReg(rG) ); 30540 } 30541 30542 IRTemp vV = newTemp(Ity_V128); 30543 assign( vV, getXMMReg(rV) ); 30544 30545 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) ); 30546 *uses_vvvv = True; 30547 goto decode_success; 30548 } 30549 break; 30550 30551 case 0x22: 30552 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */ 30553 if (have66noF2noF3(pfx) 30554 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30555 UChar modrm = getUChar(delta); 30556 UInt rG = gregOfRexRM(pfx, modrm); 30557 UInt rV = getVexNvvvv(pfx); 30558 Int imm8_10; 30559 IRTemp src_u32 = newTemp(Ity_I32); 30560 30561 if ( epartIsReg( modrm ) ) { 30562 UInt rE = eregOfRexRM(pfx,modrm); 30563 imm8_10 = (Int)(getUChar(delta+1) & 3); 30564 assign( src_u32, getIReg32( rE ) ); 30565 delta += 1+1; 30566 DIP( "vpinsrd $%d,%s,%s,%s\n", 30567 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30568 } else { 30569 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30570 imm8_10 = (Int)(getUChar(delta+alen) & 3); 30571 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 30572 delta += alen+1; 30573 DIP( "vpinsrd $%d,%s,%s,%s\n", 30574 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30575 } 30576 30577 IRTemp src_vec = newTemp(Ity_V128); 30578 assign(src_vec, getXMMReg( rV )); 30579 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 30580 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30581 *uses_vvvv = True; 30582 goto decode_success; 30583 } 30584 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */ 30585 if (have66noF2noF3(pfx) 30586 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 30587 UChar modrm = getUChar(delta); 30588 UInt rG = gregOfRexRM(pfx, modrm); 30589 UInt rV = getVexNvvvv(pfx); 30590 Int imm8_0; 30591 IRTemp src_u64 = newTemp(Ity_I64); 30592 30593 if ( epartIsReg( modrm ) ) { 30594 UInt rE = eregOfRexRM(pfx,modrm); 30595 imm8_0 = (Int)(getUChar(delta+1) & 1); 30596 assign( src_u64, getIReg64( rE ) ); 30597 delta += 1+1; 30598 DIP( "vpinsrq $%d,%s,%s,%s\n", 30599 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30600 } else { 30601 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30602 imm8_0 = (Int)(getUChar(delta+alen) & 1); 30603 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 30604 delta += alen+1; 30605 DIP( "vpinsrd $%d,%s,%s,%s\n", 30606 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30607 } 30608 30609 IRTemp src_vec = newTemp(Ity_V128); 30610 assign(src_vec, getXMMReg( rV )); 30611 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 30612 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30613 *uses_vvvv = True; 30614 goto decode_success; 30615 } 30616 break; 30617 30618 case 0x38: 30619 /* VINSERTI128 r/m, rV, rD 30620 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 30621 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */ 30622 if (have66noF2noF3(pfx) 30623 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30624 UChar modrm = getUChar(delta); 30625 UInt ib = 0; 30626 UInt rG = gregOfRexRM(pfx, modrm); 30627 UInt rV = getVexNvvvv(pfx); 30628 IRTemp t128 = newTemp(Ity_V128); 30629 if (epartIsReg(modrm)) { 30630 UInt rE = eregOfRexRM(pfx, modrm); 30631 delta += 1; 30632 assign(t128, getXMMReg(rE)); 30633 ib = getUChar(delta); 30634 DIP("vinserti128 $%u,%s,%s,%s\n", 30635 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30636 } else { 30637 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30638 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 30639 delta += alen; 30640 ib = getUChar(delta); 30641 DIP("vinserti128 $%u,%s,%s,%s\n", 30642 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30643 } 30644 delta++; 30645 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 30646 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 30647 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 30648 *uses_vvvv = True; 30649 goto decode_success; 30650 } 30651 break; 30652 30653 case 0x39: 30654 /* VEXTRACTI128 $lane_no, rS, r/m 30655 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 30656 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */ 30657 if (have66noF2noF3(pfx) 30658 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30659 UChar modrm = getUChar(delta); 30660 UInt ib = 0; 30661 UInt rS = gregOfRexRM(pfx, modrm); 30662 IRTemp t128 = newTemp(Ity_V128); 30663 if (epartIsReg(modrm)) { 30664 UInt rD = eregOfRexRM(pfx, modrm); 30665 delta += 1; 30666 ib = getUChar(delta); 30667 assign(t128, getYMMRegLane128(rS, ib & 1)); 30668 putYMMRegLoAndZU(rD, mkexpr(t128)); 30669 DIP("vextracti128 $%u,%s,%s\n", 30670 ib, nameXMMReg(rS), nameYMMReg(rD)); 30671 } else { 30672 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30673 delta += alen; 30674 ib = getUChar(delta); 30675 assign(t128, getYMMRegLane128(rS, ib & 1)); 30676 storeLE(mkexpr(addr), mkexpr(t128)); 30677 DIP("vextracti128 $%u,%s,%s\n", 30678 ib, nameYMMReg(rS), dis_buf); 30679 } 30680 delta++; 30681 /* doesn't use vvvv */ 30682 goto decode_success; 30683 } 30684 break; 30685 30686 case 0x40: 30687 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */ 30688 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30689 UChar modrm = getUChar(delta); 30690 UInt rG = gregOfRexRM(pfx, modrm); 30691 UInt rV = getVexNvvvv(pfx); 30692 IRTemp dst_vec = newTemp(Ity_V128); 30693 Int imm8; 30694 if (epartIsReg( modrm )) { 30695 UInt rE = eregOfRexRM(pfx,modrm); 30696 imm8 = (Int)getUChar(delta+1); 30697 assign( dst_vec, getXMMReg( rE ) ); 30698 delta += 1+1; 30699 DIP( "vdpps $%d,%s,%s,%s\n", 30700 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30701 } else { 30702 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30703 imm8 = (Int)getUChar(delta+alen); 30704 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30705 delta += alen+1; 30706 DIP( "vdpps $%d,%s,%s,%s\n", 30707 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30708 } 30709 30710 IRTemp src_vec = newTemp(Ity_V128); 30711 assign(src_vec, getXMMReg( rV )); 30712 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 ); 30713 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30714 *uses_vvvv = True; 30715 goto decode_success; 30716 } 30717 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */ 30718 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30719 UChar modrm = getUChar(delta); 30720 UInt rG = gregOfRexRM(pfx, modrm); 30721 UInt rV = getVexNvvvv(pfx); 30722 IRTemp dst_vec = newTemp(Ity_V256); 30723 Int imm8; 30724 if (epartIsReg( modrm )) { 30725 UInt rE = eregOfRexRM(pfx,modrm); 30726 imm8 = (Int)getUChar(delta+1); 30727 assign( dst_vec, getYMMReg( rE ) ); 30728 delta += 1+1; 30729 DIP( "vdpps $%d,%s,%s,%s\n", 30730 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 30731 } else { 30732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30733 imm8 = (Int)getUChar(delta+alen); 30734 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 30735 delta += alen+1; 30736 DIP( "vdpps $%d,%s,%s,%s\n", 30737 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 30738 } 30739 30740 IRTemp src_vec = newTemp(Ity_V256); 30741 assign(src_vec, getYMMReg( rV )); 30742 IRTemp s0, s1, d0, d1; 30743 s0 = s1 = d0 = d1 = IRTemp_INVALID; 30744 breakupV256toV128s( dst_vec, &d1, &d0 ); 30745 breakupV256toV128s( src_vec, &s1, &s0 ); 30746 putYMMReg( rG, binop( Iop_V128HLtoV256, 30747 mkexpr( math_DPPS_128(s1, d1, imm8) ), 30748 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) ); 30749 *uses_vvvv = True; 30750 goto decode_success; 30751 } 30752 break; 30753 30754 case 0x41: 30755 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */ 30756 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30757 UChar modrm = getUChar(delta); 30758 UInt rG = gregOfRexRM(pfx, modrm); 30759 UInt rV = getVexNvvvv(pfx); 30760 IRTemp dst_vec = newTemp(Ity_V128); 30761 Int imm8; 30762 if (epartIsReg( modrm )) { 30763 UInt rE = eregOfRexRM(pfx,modrm); 30764 imm8 = (Int)getUChar(delta+1); 30765 assign( dst_vec, getXMMReg( rE ) ); 30766 delta += 1+1; 30767 DIP( "vdppd $%d,%s,%s,%s\n", 30768 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30769 } else { 30770 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30771 imm8 = (Int)getUChar(delta+alen); 30772 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30773 delta += alen+1; 30774 DIP( "vdppd $%d,%s,%s,%s\n", 30775 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30776 } 30777 30778 IRTemp src_vec = newTemp(Ity_V128); 30779 assign(src_vec, getXMMReg( rV )); 30780 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 ); 30781 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 30782 *uses_vvvv = True; 30783 goto decode_success; 30784 } 30785 break; 30786 30787 case 0x42: 30788 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */ 30789 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */ 30790 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30791 UChar modrm = getUChar(delta); 30792 Int imm8; 30793 IRTemp src_vec = newTemp(Ity_V128); 30794 IRTemp dst_vec = newTemp(Ity_V128); 30795 UInt rG = gregOfRexRM(pfx, modrm); 30796 UInt rV = getVexNvvvv(pfx); 30797 30798 assign( dst_vec, getXMMReg(rV) ); 30799 30800 if ( epartIsReg( modrm ) ) { 30801 UInt rE = eregOfRexRM(pfx, modrm); 30802 30803 imm8 = (Int)getUChar(delta+1); 30804 assign( src_vec, getXMMReg(rE) ); 30805 delta += 1+1; 30806 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30807 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30808 } else { 30809 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 30810 1/* imm8 is 1 byte after the amode */ ); 30811 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 30812 imm8 = (Int)getUChar(delta+alen); 30813 delta += alen+1; 30814 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30815 dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30816 } 30817 30818 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec, 30819 src_vec, imm8) ) ); 30820 *uses_vvvv = True; 30821 goto decode_success; 30822 } 30823 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */ 30824 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */ 30825 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30826 UChar modrm = getUChar(delta); 30827 Int imm8; 30828 IRTemp src_vec = newTemp(Ity_V256); 30829 IRTemp dst_vec = newTemp(Ity_V256); 30830 UInt rG = gregOfRexRM(pfx, modrm); 30831 UInt rV = getVexNvvvv(pfx); 30832 IRTemp sHi, sLo, dHi, dLo; 30833 sHi = sLo = dHi = dLo = IRTemp_INVALID; 30834 30835 assign( dst_vec, getYMMReg(rV) ); 30836 30837 if ( epartIsReg( modrm ) ) { 30838 UInt rE = eregOfRexRM(pfx, modrm); 30839 30840 imm8 = (Int)getUChar(delta+1); 30841 assign( src_vec, getYMMReg(rE) ); 30842 delta += 1+1; 30843 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30844 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 30845 } else { 30846 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 30847 1/* imm8 is 1 byte after the amode */ ); 30848 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 30849 imm8 = (Int)getUChar(delta+alen); 30850 delta += alen+1; 30851 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 30852 dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 30853 } 30854 30855 breakupV256toV128s( dst_vec, &dHi, &dLo ); 30856 breakupV256toV128s( src_vec, &sHi, &sLo ); 30857 putYMMReg( rG, binop( Iop_V128HLtoV256, 30858 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ), 30859 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) ); 30860 *uses_vvvv = True; 30861 goto decode_success; 30862 } 30863 break; 30864 30865 case 0x44: 30866 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */ 30867 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */ 30868 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 30869 * Carry-less multiplication of selected XMM quadwords into XMM 30870 * registers (a.k.a multiplication of polynomials over GF(2)) 30871 */ 30872 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30873 UChar modrm = getUChar(delta); 30874 Int imm8; 30875 IRTemp sV = newTemp(Ity_V128); 30876 IRTemp dV = newTemp(Ity_V128); 30877 UInt rG = gregOfRexRM(pfx, modrm); 30878 UInt rV = getVexNvvvv(pfx); 30879 30880 assign( dV, getXMMReg(rV) ); 30881 30882 if ( epartIsReg( modrm ) ) { 30883 UInt rE = eregOfRexRM(pfx, modrm); 30884 imm8 = (Int)getUChar(delta+1); 30885 assign( sV, getXMMReg(rE) ); 30886 delta += 1+1; 30887 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8, 30888 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 30889 } else { 30890 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 30891 1/* imm8 is 1 byte after the amode */ ); 30892 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) ); 30893 imm8 = (Int)getUChar(delta+alen); 30894 delta += alen+1; 30895 DIP( "vpclmulqdq $%d, %s,%s,%s\n", 30896 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 30897 } 30898 30899 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) ); 30900 *uses_vvvv = True; 30901 goto decode_success; 30902 } 30903 break; 30904 30905 case 0x46: 30906 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */ 30907 if (have66noF2noF3(pfx) 30908 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30909 UChar modrm = getUChar(delta); 30910 UInt imm8 = 0; 30911 UInt rG = gregOfRexRM(pfx, modrm); 30912 UInt rV = getVexNvvvv(pfx); 30913 IRTemp s00 = newTemp(Ity_V128); 30914 IRTemp s01 = newTemp(Ity_V128); 30915 IRTemp s10 = newTemp(Ity_V128); 30916 IRTemp s11 = newTemp(Ity_V128); 30917 assign(s00, getYMMRegLane128(rV, 0)); 30918 assign(s01, getYMMRegLane128(rV, 1)); 30919 if (epartIsReg(modrm)) { 30920 UInt rE = eregOfRexRM(pfx, modrm); 30921 delta += 1; 30922 imm8 = getUChar(delta); 30923 DIP("vperm2i128 $%u,%s,%s,%s\n", 30924 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30925 assign(s10, getYMMRegLane128(rE, 0)); 30926 assign(s11, getYMMRegLane128(rE, 1)); 30927 } else { 30928 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30929 delta += alen; 30930 imm8 = getUChar(delta); 30931 DIP("vperm2i128 $%u,%s,%s,%s\n", 30932 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30933 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 30934 mkexpr(addr), mkU64(0)))); 30935 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 30936 mkexpr(addr), mkU64(16)))); 30937 } 30938 delta++; 30939 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 30940 : ((_nn)==2) ? s10 : s11) 30941 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 30942 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 30943 # undef SEL 30944 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 30945 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 30946 *uses_vvvv = True; 30947 goto decode_success; 30948 } 30949 break; 30950 30951 case 0x4A: 30952 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4 30953 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 30954 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */ 30955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30956 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 30957 "vblendvps", 4, Iop_SarN32x4 ); 30958 *uses_vvvv = True; 30959 goto decode_success; 30960 } 30961 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4 30962 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 30963 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */ 30964 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30965 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 30966 "vblendvps", 4, Iop_SarN32x4 ); 30967 *uses_vvvv = True; 30968 goto decode_success; 30969 } 30970 break; 30971 30972 case 0x4B: 30973 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4 30974 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 30975 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */ 30976 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30977 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 30978 "vblendvpd", 8, Iop_SarN64x2 ); 30979 *uses_vvvv = True; 30980 goto decode_success; 30981 } 30982 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4 30983 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 30984 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */ 30985 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30986 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 30987 "vblendvpd", 8, Iop_SarN64x2 ); 30988 *uses_vvvv = True; 30989 goto decode_success; 30990 } 30991 break; 30992 30993 case 0x4C: 30994 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4 30995 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 30996 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */ 30997 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30998 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 30999 "vpblendvb", 1, Iop_SarN8x16 ); 31000 *uses_vvvv = True; 31001 goto decode_success; 31002 } 31003 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4 31004 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31005 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */ 31006 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31007 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31008 "vpblendvb", 1, Iop_SarN8x16 ); 31009 *uses_vvvv = True; 31010 goto decode_success; 31011 } 31012 break; 31013 31014 case 0x60: 31015 case 0x61: 31016 case 0x62: 31017 case 0x63: 31018 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1 31019 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1 31020 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1 31021 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1 31022 (selected special cases that actually occur in glibc, 31023 not by any means a complete implementation.) 31024 */ 31025 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31026 Long delta0 = delta; 31027 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc ); 31028 if (delta > delta0) goto decode_success; 31029 /* else fall though; dis_PCMPxSTRx failed to decode it */ 31030 } 31031 break; 31032 31033 case 0xDF: 31034 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */ 31035 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31036 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ ); 31037 goto decode_success; 31038 } 31039 break; 31040 31041 case 0xF0: 31042 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */ 31043 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */ 31044 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 31045 Int size = getRexW(pfx) ? 8 : 4; 31046 IRType ty = szToITy(size); 31047 IRTemp src = newTemp(ty); 31048 UChar rm = getUChar(delta); 31049 UChar imm8; 31050 31051 if (epartIsReg(rm)) { 31052 imm8 = getUChar(delta+1); 31053 assign( src, getIRegE(size,pfx,rm) ); 31054 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm), 31055 nameIRegG(size,pfx,rm)); 31056 delta += 2; 31057 } else { 31058 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 31059 imm8 = getUChar(delta+alen); 31060 assign( src, loadLE(ty, mkexpr(addr)) ); 31061 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm)); 31062 delta += alen + 1; 31063 } 31064 imm8 &= 8*size-1; 31065 31066 /* dst = (src >>u imm8) | (src << (size-imm8)) */ 31067 putIRegG( size, pfx, rm, 31068 imm8 == 0 ? mkexpr(src) 31069 : binop( mkSizedOp(ty,Iop_Or8), 31070 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src), 31071 mkU8(imm8) ), 31072 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src), 31073 mkU8(8*size-imm8) ) ) ); 31074 /* Flags aren't modified. */ 31075 goto decode_success; 31076 } 31077 break; 31078 31079 default: 31080 break; 31081 31082 } 31083 31084 //decode_failure: 31085 return deltaIN; 31086 31087 decode_success: 31088 return delta; 31089 } 31090 31091 31092 /*------------------------------------------------------------*/ 31093 /*--- ---*/ 31094 /*--- Disassemble a single instruction ---*/ 31095 /*--- ---*/ 31096 /*------------------------------------------------------------*/ 31097 31098 /* Disassemble a single instruction into IR. The instruction is 31099 located in host memory at &guest_code[delta]. */ 31100 31101 static 31102 DisResult disInstr_AMD64_WRK ( 31103 /*OUT*/Bool* expect_CAS, 31104 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ), 31105 Bool resteerCisOk, 31106 void* callback_opaque, 31107 Long delta64, 31108 VexArchInfo* archinfo, 31109 VexAbiInfo* vbi, 31110 Bool sigill_diag 31111 ) 31112 { 31113 IRTemp t1, t2, t3, t4, t5, t6; 31114 UChar pre; 31115 Int n, n_prefixes; 31116 DisResult dres; 31117 31118 /* The running delta */ 31119 Long delta = delta64; 31120 31121 /* Holds eip at the start of the insn, so that we can print 31122 consistent error messages for unimplemented insns. */ 31123 Long delta_start = delta; 31124 31125 /* sz denotes the nominal data-op size of the insn; we change it to 31126 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 31127 conflict REX.W takes precedence. */ 31128 Int sz = 4; 31129 31130 /* pfx holds the summary of prefixes. */ 31131 Prefix pfx = PFX_EMPTY; 31132 31133 /* Holds the computed opcode-escape indication. */ 31134 Escape esc = ESC_NONE; 31135 31136 /* Set result defaults. */ 31137 dres.whatNext = Dis_Continue; 31138 dres.len = 0; 31139 dres.continueAt = 0; 31140 dres.jk_StopHere = Ijk_INVALID; 31141 *expect_CAS = False; 31142 31143 vassert(guest_RIP_next_assumed == 0); 31144 vassert(guest_RIP_next_mustcheck == False); 31145 31146 t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID; 31147 31148 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 31149 31150 /* Spot "Special" instructions (see comment at top of file). */ 31151 { 31152 UChar* code = (UChar*)(guest_code + delta); 31153 /* Spot the 16-byte preamble: 31154 48C1C703 rolq $3, %rdi 31155 48C1C70D rolq $13, %rdi 31156 48C1C73D rolq $61, %rdi 31157 48C1C733 rolq $51, %rdi 31158 */ 31159 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 31160 && code[ 3] == 0x03 && 31161 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 31162 && code[ 7] == 0x0D && 31163 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 31164 && code[11] == 0x3D && 31165 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 31166 && code[15] == 0x33) { 31167 /* Got a "Special" instruction preamble. Which one is it? */ 31168 if (code[16] == 0x48 && code[17] == 0x87 31169 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 31170 /* %RDX = client_request ( %RAX ) */ 31171 DIP("%%rdx = client_request ( %%rax )\n"); 31172 delta += 19; 31173 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta); 31174 vassert(dres.whatNext == Dis_StopHere); 31175 goto decode_success; 31176 } 31177 else 31178 if (code[16] == 0x48 && code[17] == 0x87 31179 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 31180 /* %RAX = guest_NRADDR */ 31181 DIP("%%rax = guest_NRADDR\n"); 31182 delta += 19; 31183 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 31184 goto decode_success; 31185 } 31186 else 31187 if (code[16] == 0x48 && code[17] == 0x87 31188 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 31189 /* call-noredir *%RAX */ 31190 DIP("call-noredir *%%rax\n"); 31191 delta += 19; 31192 t1 = newTemp(Ity_I64); 31193 assign(t1, getIRegRAX(8)); 31194 t2 = newTemp(Ity_I64); 31195 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 31196 putIReg64(R_RSP, mkexpr(t2)); 31197 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 31198 jmp_treg(&dres, Ijk_NoRedir, t1); 31199 vassert(dres.whatNext == Dis_StopHere); 31200 goto decode_success; 31201 } 31202 else 31203 if (code[16] == 0x48 && code[17] == 0x87 31204 && code[18] == 0xff /* xchgq %rdi,%rdi */) { 31205 /* IR injection */ 31206 DIP("IR injection\n"); 31207 vex_inject_ir(irsb, Iend_LE); 31208 31209 // Invalidate the current insn. The reason is that the IRop we're 31210 // injecting here can change. In which case the translation has to 31211 // be redone. For ease of handling, we simply invalidate all the 31212 // time. 31213 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr))); 31214 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19))); 31215 31216 delta += 19; 31217 31218 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 31219 dres.whatNext = Dis_StopHere; 31220 dres.jk_StopHere = Ijk_InvalICache; 31221 goto decode_success; 31222 } 31223 /* We don't know what it is. */ 31224 goto decode_failure; 31225 /*NOTREACHED*/ 31226 } 31227 } 31228 31229 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 31230 as many invalid combinations as possible. */ 31231 n_prefixes = 0; 31232 while (True) { 31233 if (n_prefixes > 7) goto decode_failure; 31234 pre = getUChar(delta); 31235 switch (pre) { 31236 case 0x66: pfx |= PFX_66; break; 31237 case 0x67: pfx |= PFX_ASO; break; 31238 case 0xF2: pfx |= PFX_F2; break; 31239 case 0xF3: pfx |= PFX_F3; break; 31240 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 31241 case 0x2E: pfx |= PFX_CS; break; 31242 case 0x3E: pfx |= PFX_DS; break; 31243 case 0x26: pfx |= PFX_ES; break; 31244 case 0x64: pfx |= PFX_FS; break; 31245 case 0x65: pfx |= PFX_GS; break; 31246 case 0x36: pfx |= PFX_SS; break; 31247 case 0x40 ... 0x4F: 31248 pfx |= PFX_REX; 31249 if (pre & (1<<3)) pfx |= PFX_REXW; 31250 if (pre & (1<<2)) pfx |= PFX_REXR; 31251 if (pre & (1<<1)) pfx |= PFX_REXX; 31252 if (pre & (1<<0)) pfx |= PFX_REXB; 31253 break; 31254 default: 31255 goto not_a_legacy_prefix; 31256 } 31257 n_prefixes++; 31258 delta++; 31259 } 31260 31261 not_a_legacy_prefix: 31262 /* We've used up all the non-VEX prefixes. Parse and validate a 31263 VEX prefix if that's appropriate. */ 31264 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) { 31265 /* Used temporarily for holding VEX prefixes. */ 31266 UChar vex0 = getUChar(delta); 31267 if (vex0 == 0xC4) { 31268 /* 3-byte VEX */ 31269 UChar vex1 = getUChar(delta+1); 31270 UChar vex2 = getUChar(delta+2); 31271 delta += 3; 31272 pfx |= PFX_VEX; 31273 /* Snarf contents of byte 1 */ 31274 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31275 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX; 31276 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB; 31277 /* m-mmmm */ 31278 switch (vex1 & 0x1F) { 31279 case 1: esc = ESC_0F; break; 31280 case 2: esc = ESC_0F38; break; 31281 case 3: esc = ESC_0F3A; break; 31282 /* Any other m-mmmm field will #UD */ 31283 default: goto decode_failure; 31284 } 31285 /* Snarf contents of byte 2 */ 31286 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0; 31287 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3; 31288 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2; 31289 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1; 31290 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0; 31291 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0; 31292 /* pp */ 31293 switch (vex2 & 3) { 31294 case 0: break; 31295 case 1: pfx |= PFX_66; break; 31296 case 2: pfx |= PFX_F3; break; 31297 case 3: pfx |= PFX_F2; break; 31298 default: vassert(0); 31299 } 31300 } 31301 else if (vex0 == 0xC5) { 31302 /* 2-byte VEX */ 31303 UChar vex1 = getUChar(delta+1); 31304 delta += 2; 31305 pfx |= PFX_VEX; 31306 /* Snarf contents of byte 1 */ 31307 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31308 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3; 31309 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2; 31310 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1; 31311 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0; 31312 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0; 31313 /* pp */ 31314 switch (vex1 & 3) { 31315 case 0: break; 31316 case 1: pfx |= PFX_66; break; 31317 case 2: pfx |= PFX_F3; break; 31318 case 3: pfx |= PFX_F2; break; 31319 default: vassert(0); 31320 } 31321 /* implied: */ 31322 esc = ESC_0F; 31323 } 31324 /* Can't have both VEX and REX */ 31325 if ((pfx & PFX_VEX) && (pfx & PFX_REX)) 31326 goto decode_failure; /* can't have both */ 31327 } 31328 31329 /* Dump invalid combinations */ 31330 n = 0; 31331 if (pfx & PFX_F2) n++; 31332 if (pfx & PFX_F3) n++; 31333 if (n > 1) 31334 goto decode_failure; /* can't have both */ 31335 31336 n = 0; 31337 if (pfx & PFX_CS) n++; 31338 if (pfx & PFX_DS) n++; 31339 if (pfx & PFX_ES) n++; 31340 if (pfx & PFX_FS) n++; 31341 if (pfx & PFX_GS) n++; 31342 if (pfx & PFX_SS) n++; 31343 if (n > 1) 31344 goto decode_failure; /* multiple seg overrides == illegal */ 31345 31346 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 31347 that we should accept it. */ 31348 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero) 31349 goto decode_failure; 31350 31351 /* Ditto for %gs prefixes. */ 31352 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60) 31353 goto decode_failure; 31354 31355 /* Set up sz. */ 31356 sz = 4; 31357 if (pfx & PFX_66) sz = 2; 31358 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 31359 31360 /* Now we should be looking at the primary opcode byte or the 31361 leading escapes. Check that any LOCK prefix is actually 31362 allowed. */ 31363 if (haveLOCK(pfx)) { 31364 if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) { 31365 DIP("lock "); 31366 } else { 31367 *expect_CAS = False; 31368 goto decode_failure; 31369 } 31370 } 31371 31372 /* Eat up opcode escape bytes, until we're really looking at the 31373 primary opcode byte. But only if there's no VEX present. */ 31374 if (!(pfx & PFX_VEX)) { 31375 vassert(esc == ESC_NONE); 31376 pre = getUChar(delta); 31377 if (pre == 0x0F) { 31378 delta++; 31379 pre = getUChar(delta); 31380 switch (pre) { 31381 case 0x38: esc = ESC_0F38; delta++; break; 31382 case 0x3A: esc = ESC_0F3A; delta++; break; 31383 default: esc = ESC_0F; break; 31384 } 31385 } 31386 } 31387 31388 /* So now we're really really looking at the primary opcode 31389 byte. */ 31390 Long delta_at_primary_opcode = delta; 31391 31392 if (!(pfx & PFX_VEX)) { 31393 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE 31394 instructions preserve the upper 128 bits of YMM registers; 31395 iow we can simply ignore the presence of the upper halves of 31396 these registers. */ 31397 switch (esc) { 31398 case ESC_NONE: 31399 delta = dis_ESC_NONE( &dres, expect_CAS, 31400 resteerOkFn, resteerCisOk, callback_opaque, 31401 archinfo, vbi, pfx, sz, delta ); 31402 break; 31403 case ESC_0F: 31404 delta = dis_ESC_0F ( &dres, expect_CAS, 31405 resteerOkFn, resteerCisOk, callback_opaque, 31406 archinfo, vbi, pfx, sz, delta ); 31407 break; 31408 case ESC_0F38: 31409 delta = dis_ESC_0F38( &dres, 31410 resteerOkFn, resteerCisOk, callback_opaque, 31411 archinfo, vbi, pfx, sz, delta ); 31412 break; 31413 case ESC_0F3A: 31414 delta = dis_ESC_0F3A( &dres, 31415 resteerOkFn, resteerCisOk, callback_opaque, 31416 archinfo, vbi, pfx, sz, delta ); 31417 break; 31418 default: 31419 vassert(0); 31420 } 31421 } else { 31422 /* VEX prefixed instruction */ 31423 /* Sloppy Intel wording: "An instruction encoded with a VEX.128 31424 prefix that loads a YMM register operand ..." zeroes out bits 31425 128 and above of the register. */ 31426 Bool uses_vvvv = False; 31427 switch (esc) { 31428 case ESC_0F: 31429 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv, 31430 resteerOkFn, resteerCisOk, 31431 callback_opaque, 31432 archinfo, vbi, pfx, sz, delta ); 31433 break; 31434 case ESC_0F38: 31435 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv, 31436 resteerOkFn, resteerCisOk, 31437 callback_opaque, 31438 archinfo, vbi, pfx, sz, delta ); 31439 break; 31440 case ESC_0F3A: 31441 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv, 31442 resteerOkFn, resteerCisOk, 31443 callback_opaque, 31444 archinfo, vbi, pfx, sz, delta ); 31445 break; 31446 case ESC_NONE: 31447 /* The presence of a VEX prefix, by Intel definition, 31448 always implies at least an 0F escape. */ 31449 goto decode_failure; 31450 default: 31451 vassert(0); 31452 } 31453 /* If the insn doesn't use VEX.vvvv then it must be all ones. 31454 Check this. */ 31455 if (!uses_vvvv) { 31456 if (getVexNvvvv(pfx) != 0) 31457 goto decode_failure; 31458 } 31459 } 31460 31461 vassert(delta - delta_at_primary_opcode >= 0); 31462 vassert(delta - delta_at_primary_opcode < 16/*let's say*/); 31463 31464 /* Use delta == delta_at_primary_opcode to denote decode failure. 31465 This implies that any successful decode must use at least one 31466 byte up. */ 31467 if (delta == delta_at_primary_opcode) 31468 goto decode_failure; 31469 else 31470 goto decode_success; /* \o/ */ 31471 31472 #if 0 /* XYZZY */ 31473 31474 /* ---------------------------------------------------- */ 31475 /* --- The SSE/SSE2 decoder. --- */ 31476 /* ---------------------------------------------------- */ 31477 31478 /* What did I do to deserve SSE ? Perhaps I was really bad in a 31479 previous life? */ 31480 31481 /* Note, this doesn't handle SSE3 right now. All amd64s support 31482 SSE2 as a minimum so there is no point distinguishing SSE1 vs 31483 SSE2. */ 31484 31485 insn = (UChar*)&guest_code[delta]; 31486 31487 /* FXSAVE is spuriously at the start here only because it is 31488 thusly placed in guest-x86/toIR.c. */ 31489 31490 /* ------ SSE decoder main ------ */ 31491 31492 /* ---------------------------------------------------- */ 31493 /* --- end of the SSE decoder. --- */ 31494 /* ---------------------------------------------------- */ 31495 31496 /* ---------------------------------------------------- */ 31497 /* --- start of the SSE2 decoder. --- */ 31498 /* ---------------------------------------------------- */ 31499 31500 /* ---------------------------------------------------- */ 31501 /* --- end of the SSE/SSE2 decoder. --- */ 31502 /* ---------------------------------------------------- */ 31503 31504 /* ---------------------------------------------------- */ 31505 /* --- start of the SSE3 decoder. --- */ 31506 /* ---------------------------------------------------- */ 31507 31508 /* ---------------------------------------------------- */ 31509 /* --- end of the SSE3 decoder. --- */ 31510 /* ---------------------------------------------------- */ 31511 31512 /* ---------------------------------------------------- */ 31513 /* --- start of the SSSE3 decoder. --- */ 31514 /* ---------------------------------------------------- */ 31515 31516 /* ---------------------------------------------------- */ 31517 /* --- end of the SSSE3 decoder. --- */ 31518 /* ---------------------------------------------------- */ 31519 31520 /* ---------------------------------------------------- */ 31521 /* --- start of the SSE4 decoder --- */ 31522 /* ---------------------------------------------------- */ 31523 31524 /* ---------------------------------------------------- */ 31525 /* --- end of the SSE4 decoder --- */ 31526 /* ---------------------------------------------------- */ 31527 31528 /*after_sse_decoders:*/ 31529 31530 /* Get the primary opcode. */ 31531 opc = getUChar(delta); delta++; 31532 31533 /* We get here if the current insn isn't SSE, or this CPU doesn't 31534 support SSE. */ 31535 31536 switch (opc) { 31537 31538 /* ------------------------ Control flow --------------- */ 31539 31540 /* ------------------------ CWD/CDQ -------------------- */ 31541 31542 /* ------------------------ FPU ops -------------------- */ 31543 31544 /* ------------------------ INT ------------------------ */ 31545 31546 case 0xCD: { /* INT imm8 */ 31547 IRJumpKind jk = Ijk_Boring; 31548 if (have66orF2orF3(pfx)) goto decode_failure; 31549 d64 = getUChar(delta); delta++; 31550 switch (d64) { 31551 case 32: jk = Ijk_Sys_int32; break; 31552 default: goto decode_failure; 31553 } 31554 guest_RIP_next_mustcheck = True; 31555 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 31556 jmp_lit(jk, guest_RIP_next_assumed); 31557 /* It's important that all ArchRegs carry their up-to-date value 31558 at this point. So we declare an end-of-block here, which 31559 forces any TempRegs caching ArchRegs to be flushed. */ 31560 vassert(dres.whatNext == Dis_StopHere); 31561 DIP("int $0x%02x\n", (UInt)d64); 31562 break; 31563 } 31564 31565 /* ------------------------ Jcond, byte offset --------- */ 31566 31567 /* ------------------------ IMUL ----------------------- */ 31568 31569 /* ------------------------ MOV ------------------------ */ 31570 31571 /* ------------------------ MOVx ------------------------ */ 31572 31573 /* ------------------------ opl imm, A ----------------- */ 31574 31575 /* ------------------------ opl Ev, Gv ----------------- */ 31576 31577 /* ------------------------ opl Gv, Ev ----------------- */ 31578 31579 /* ------------------------ POP ------------------------ */ 31580 31581 /* ------------------------ PUSH ----------------------- */ 31582 31583 /* ------ AE: SCAS variants ------ */ 31584 31585 /* ------ A6, A7: CMPS variants ------ */ 31586 31587 /* ------ AA, AB: STOS variants ------ */ 31588 31589 /* ------ A4, A5: MOVS variants ------ */ 31590 31591 /* ------------------------ XCHG ----------------------- */ 31592 31593 /* ------------------------ IN / OUT ----------------------- */ 31594 31595 /* ------------------------ (Grp1 extensions) ---------- */ 31596 31597 /* ------------------------ (Grp2 extensions) ---------- */ 31598 31599 /* ------------------------ (Grp3 extensions) ---------- */ 31600 31601 /* ------------------------ (Grp4 extensions) ---------- */ 31602 31603 /* ------------------------ (Grp5 extensions) ---------- */ 31604 31605 /* ------------------------ Escapes to 2-byte opcodes -- */ 31606 31607 case 0x0F: { 31608 opc = getUChar(delta); delta++; 31609 switch (opc) { 31610 31611 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ 31612 31613 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ 31614 31615 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ 31616 31617 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ 31618 31619 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ 31620 31621 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ 31622 31623 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ 31624 31625 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ 31626 31627 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ 31628 31629 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */ 31630 31631 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ 31632 31633 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */ 31634 31635 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ 31636 31637 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ 31638 31639 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ 31640 31641 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */ 31642 31643 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */ 31644 31645 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */ 31646 31647 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ 31648 31649 default: 31650 goto decode_failure; 31651 } /* switch (opc) for the 2-byte opcodes */ 31652 goto decode_success; 31653 } /* case 0x0F: of primary opcode */ 31654 31655 /* ------------------------ ??? ------------------------ */ 31656 #endif /* XYZZY */ 31657 31658 //default: 31659 decode_failure: 31660 /* All decode failures end up here. */ 31661 if (sigill_diag) { 31662 vex_printf("vex amd64->IR: unhandled instruction bytes: " 31663 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 31664 (Int)getUChar(delta_start+0), 31665 (Int)getUChar(delta_start+1), 31666 (Int)getUChar(delta_start+2), 31667 (Int)getUChar(delta_start+3), 31668 (Int)getUChar(delta_start+4), 31669 (Int)getUChar(delta_start+5), 31670 (Int)getUChar(delta_start+6), 31671 (Int)getUChar(delta_start+7) ); 31672 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n", 31673 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx), 31674 getRexX(pfx), getRexB(pfx)); 31675 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n", 31676 haveVEX(pfx) ? 1 : 0, getVexL(pfx), 31677 getVexNvvvv(pfx), 31678 esc==ESC_NONE ? "NONE" : 31679 esc==ESC_0F ? "0F" : 31680 esc==ESC_0F38 ? "0F38" : 31681 esc==ESC_0F3A ? "0F3A" : "???"); 31682 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n", 31683 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0, 31684 haveF3(pfx) ? 1 : 0); 31685 } 31686 31687 /* Tell the dispatcher that this insn cannot be decoded, and so has 31688 not been executed, and (is currently) the next to be executed. 31689 RIP should be up-to-date since it made so at the start of each 31690 insn, but nevertheless be paranoid and update it again right 31691 now. */ 31692 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 31693 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr); 31694 vassert(dres.whatNext == Dis_StopHere); 31695 dres.len = 0; 31696 /* We also need to say that a CAS is not expected now, regardless 31697 of what it might have been set to at the start of the function, 31698 since the IR that we've emitted just above (to synthesis a 31699 SIGILL) does not involve any CAS, and presumably no other IR has 31700 been emitted for this (non-decoded) insn. */ 31701 *expect_CAS = False; 31702 return dres; 31703 31704 // } /* switch (opc) for the main (primary) opcode switch. */ 31705 31706 decode_success: 31707 /* All decode successes end up here. */ 31708 switch (dres.whatNext) { 31709 case Dis_Continue: 31710 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 31711 break; 31712 case Dis_ResteerU: 31713 case Dis_ResteerC: 31714 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) ); 31715 break; 31716 case Dis_StopHere: 31717 break; 31718 default: 31719 vassert(0); 31720 } 31721 31722 DIP("\n"); 31723 dres.len = (Int)toUInt(delta - delta_start); 31724 return dres; 31725 } 31726 31727 #undef DIP 31728 #undef DIS 31729 31730 31731 /*------------------------------------------------------------*/ 31732 /*--- Top-level fn ---*/ 31733 /*------------------------------------------------------------*/ 31734 31735 /* Disassemble a single instruction into IR. The instruction 31736 is located in host memory at &guest_code[delta]. */ 31737 31738 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 31739 Bool (*resteerOkFn) ( void*, Addr64 ), 31740 Bool resteerCisOk, 31741 void* callback_opaque, 31742 UChar* guest_code_IN, 31743 Long delta, 31744 Addr64 guest_IP, 31745 VexArch guest_arch, 31746 VexArchInfo* archinfo, 31747 VexAbiInfo* abiinfo, 31748 Bool host_bigendian_IN, 31749 Bool sigill_diag_IN ) 31750 { 31751 Int i, x1, x2; 31752 Bool expect_CAS, has_CAS; 31753 DisResult dres; 31754 31755 /* Set globals (see top of this file) */ 31756 vassert(guest_arch == VexArchAMD64); 31757 guest_code = guest_code_IN; 31758 irsb = irsb_IN; 31759 host_is_bigendian = host_bigendian_IN; 31760 guest_RIP_curr_instr = guest_IP; 31761 guest_RIP_bbstart = guest_IP - delta; 31762 31763 /* We'll consult these after doing disInstr_AMD64_WRK. */ 31764 guest_RIP_next_assumed = 0; 31765 guest_RIP_next_mustcheck = False; 31766 31767 x1 = irsb_IN->stmts_used; 31768 expect_CAS = False; 31769 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 31770 resteerCisOk, 31771 callback_opaque, 31772 delta, archinfo, abiinfo, sigill_diag_IN ); 31773 x2 = irsb_IN->stmts_used; 31774 vassert(x2 >= x1); 31775 31776 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 31777 got it right. Failure of this assertion is serious and denotes 31778 a bug in disInstr. */ 31779 if (guest_RIP_next_mustcheck 31780 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 31781 vex_printf("\n"); 31782 vex_printf("assumed next %%rip = 0x%llx\n", 31783 guest_RIP_next_assumed ); 31784 vex_printf(" actual next %%rip = 0x%llx\n", 31785 guest_RIP_curr_instr + dres.len ); 31786 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 31787 } 31788 31789 /* See comment at the top of disInstr_AMD64_WRK for meaning of 31790 expect_CAS. Here, we (sanity-)check for the presence/absence of 31791 IRCAS as directed by the returned expect_CAS value. */ 31792 has_CAS = False; 31793 for (i = x1; i < x2; i++) { 31794 if (irsb_IN->stmts[i]->tag == Ist_CAS) 31795 has_CAS = True; 31796 } 31797 31798 if (expect_CAS != has_CAS) { 31799 /* inconsistency detected. re-disassemble the instruction so as 31800 to generate a useful error message; then assert. */ 31801 vex_traceflags |= VEX_TRACE_FE; 31802 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 31803 resteerCisOk, 31804 callback_opaque, 31805 delta, archinfo, abiinfo, sigill_diag_IN ); 31806 for (i = x1; i < x2; i++) { 31807 vex_printf("\t\t"); 31808 ppIRStmt(irsb_IN->stmts[i]); 31809 vex_printf("\n"); 31810 } 31811 /* Failure of this assertion is serious and denotes a bug in 31812 disInstr. */ 31813 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 31814 } 31815 31816 return dres; 31817 } 31818 31819 31820 /*------------------------------------------------------------*/ 31821 /*--- Unused stuff ---*/ 31822 /*------------------------------------------------------------*/ 31823 31824 // A potentially more Memcheck-friendly version of gen_LZCNT, if 31825 // this should ever be needed. 31826 // 31827 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 31828 //{ 31829 // /* Scheme is simple: propagate the most significant 1-bit into all 31830 // lower positions in the word. This gives a word of the form 31831 // 0---01---1. Now invert it, giving a word of the form 31832 // 1---10---0, then do a population-count idiom (to count the 1s, 31833 // which is the number of leading zeroes, or the word size if the 31834 // original word was 0. 31835 // */ 31836 // Int i; 31837 // IRTemp t[7]; 31838 // for (i = 0; i < 7; i++) { 31839 // t[i] = newTemp(ty); 31840 // } 31841 // if (ty == Ity_I64) { 31842 // assign(t[0], binop(Iop_Or64, mkexpr(src), 31843 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 31844 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 31845 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 31846 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 31847 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 31848 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 31849 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 31850 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 31851 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 31852 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 31853 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 31854 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 31855 // return gen_POPCOUNT(ty, t[6]); 31856 // } 31857 // if (ty == Ity_I32) { 31858 // assign(t[0], binop(Iop_Or32, mkexpr(src), 31859 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 31860 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 31861 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 31862 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 31863 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 31864 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 31865 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 31866 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 31867 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 31868 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 31869 // return gen_POPCOUNT(ty, t[5]); 31870 // } 31871 // if (ty == Ity_I16) { 31872 // assign(t[0], binop(Iop_Or16, mkexpr(src), 31873 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 31874 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 31875 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 31876 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 31877 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 31878 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 31879 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 31880 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 31881 // return gen_POPCOUNT(ty, t[4]); 31882 // } 31883 // vassert(0); 31884 //} 31885 31886 31887 /*--------------------------------------------------------------------*/ 31888 /*--- end guest_amd64_toIR.c ---*/ 31889 /*--------------------------------------------------------------------*/ 31890