1 2 /*--------------------------------------------------------------------*/ 3 /*--- begin guest_amd64_toIR.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2015 OpenWorks LLP 11 info (at) open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34 */ 35 36 /* Translates AMD64 code to IR. */ 37 38 /* TODO: 39 40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked 41 to ensure a 64-bit value is being written. 42 43 x87 FP Limitations: 44 45 * all arithmetic done at 64 bits 46 47 * no FP exceptions, except for handling stack over/underflow 48 49 * FP rounding mode observed only for float->int conversions and 50 int->float conversions which could lose accuracy, and for 51 float-to-float rounding. For all other operations, 52 round-to-nearest is used, regardless. 53 54 * some of the FCOM cases could do with testing -- not convinced 55 that the args are the right way round. 56 57 * FSAVE does not re-initialise the FPU; it should do 58 59 * FINIT not only initialises the FPU environment, it also zeroes 60 all the FP registers. It should leave the registers unchanged. 61 62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As 63 per Intel docs this bit has no meaning anyway. Since PUSHF is the 64 only way to observe eflags[1], a proper fix would be to make that 65 bit be set by PUSHF. 66 67 This module uses global variables and so is not MT-safe (if that 68 should ever become relevant). 69 */ 70 71 /* Notes re address size overrides (0x67). 72 73 According to the AMD documentation (24594 Rev 3.09, Sept 2003, 74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose 75 and System Instructions"), Section 1.2.3 ("Address-Size Override 76 Prefix"): 77 78 0x67 applies to all explicit memory references, causing the top 79 32 bits of the effective address to become zero. 80 81 0x67 has no effect on stack references (push/pop); these always 82 use a 64-bit address. 83 84 0x67 changes the interpretation of instructions which implicitly 85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used 86 instead. These are: 87 88 cmp{s,sb,sw,sd,sq} 89 in{s,sb,sw,sd} 90 jcxz, jecxz, jrcxz 91 lod{s,sb,sw,sd,sq} 92 loop{,e,bz,be,z} 93 mov{s,sb,sw,sd,sq} 94 out{s,sb,sw,sd} 95 rep{,e,ne,nz} 96 sca{s,sb,sw,sd,sq} 97 sto{s,sb,sw,sd,sq} 98 xlat{,b} */ 99 100 /* "Special" instructions. 101 102 This instruction decoder can decode three special instructions 103 which mean nothing natively (are no-ops as far as regs/mem are 104 concerned) but have meaning for supporting Valgrind. A special 105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D 106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq 107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi). 108 Following that, one of the following 3 are allowed (standard 109 interpretation in parentheses): 110 111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX ) 112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR 113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX 114 4887F6 (xchgq %rdi,%rdi) IR injection 115 116 Any other bytes following the 16-byte preamble are illegal and 117 constitute a failure in instruction decoding. This all assumes 118 that the preamble will never occur except in specific code 119 fragments designed for Valgrind to catch. 120 121 No prefixes may precede a "Special" instruction. 122 */ 123 124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed 125 insns: the side-exit back to the start of the insn is done with 126 Ijk_Boring. This is quite wrong, it should be done with 127 Ijk_NoRedir, since otherwise the side exit, which is intended to 128 restart the instruction for whatever reason, could go somewhere 129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make 130 no-redir jumps performance critical, at least for rep-prefixed 131 instructions, since all iterations thereof would involve such a 132 jump. It's not such a big deal with casLE since the side exit is 133 only taken if the CAS fails, that is, the location is contended, 134 which is relatively unlikely. 135 136 Note also, the test for CAS success vs failure is done using 137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary 138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it 139 shouldn't definedness-check these comparisons. See 140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for 141 background/rationale. 142 */ 143 144 /* LOCK prefixed instructions. These are translated using IR-level 145 CAS statements (IRCAS) and are believed to preserve atomicity, even 146 from the point of view of some other process racing against a 147 simulated one (presumably they communicate via a shared memory 148 segment). 149 150 Handlers which are aware of LOCK prefixes are: 151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor) 152 dis_cmpxchg_G_E (cmpxchg) 153 dis_Grp1 (add, or, adc, sbb, and, sub, xor) 154 dis_Grp3 (not, neg) 155 dis_Grp4 (inc, dec) 156 dis_Grp5 (inc, dec) 157 dis_Grp8_Imm (bts, btc, btr) 158 dis_bt_G_E (bts, btc, btr) 159 dis_xadd_G_E (xadd) 160 */ 161 162 163 #include "libvex_basictypes.h" 164 #include "libvex_ir.h" 165 #include "libvex.h" 166 #include "libvex_guest_amd64.h" 167 168 #include "main_util.h" 169 #include "main_globals.h" 170 #include "guest_generic_bb_to_IR.h" 171 #include "guest_generic_x87.h" 172 #include "guest_amd64_defs.h" 173 174 175 /*------------------------------------------------------------*/ 176 /*--- Globals ---*/ 177 /*------------------------------------------------------------*/ 178 179 /* These are set at the start of the translation of an insn, right 180 down in disInstr_AMD64, so that we don't have to pass them around 181 endlessly. They are all constant during the translation of any 182 given insn. */ 183 184 /* These are set at the start of the translation of a BB, so 185 that we don't have to pass them around endlessly. */ 186 187 /* We need to know this to do sub-register accesses correctly. */ 188 static VexEndness host_endness; 189 190 /* Pointer to the guest code area (points to start of BB, not to the 191 insn being processed). */ 192 static const UChar* guest_code; 193 194 /* The guest address corresponding to guest_code[0]. */ 195 static Addr64 guest_RIP_bbstart; 196 197 /* The guest address for the instruction currently being 198 translated. */ 199 static Addr64 guest_RIP_curr_instr; 200 201 /* The IRSB* into which we're generating code. */ 202 static IRSB* irsb; 203 204 /* For ensuring that %rip-relative addressing is done right. A read 205 of %rip generates the address of the next instruction. It may be 206 that we don't conveniently know that inside disAMode(). For sanity 207 checking, if the next insn %rip is needed, we make a guess at what 208 it is, record that guess here, and set the accompanying Bool to 209 indicate that -- after this insn's decode is finished -- that guess 210 needs to be checked. */ 211 212 /* At the start of each insn decode, is set to (0, False). 213 After the decode, if _mustcheck is now True, _assumed is 214 checked. */ 215 216 static Addr64 guest_RIP_next_assumed; 217 static Bool guest_RIP_next_mustcheck; 218 219 220 /*------------------------------------------------------------*/ 221 /*--- Helpers for constructing IR. ---*/ 222 /*------------------------------------------------------------*/ 223 224 /* Generate a new temporary of the given type. */ 225 static IRTemp newTemp ( IRType ty ) 226 { 227 vassert(isPlausibleIRType(ty)); 228 return newIRTemp( irsb->tyenv, ty ); 229 } 230 231 /* Add a statement to the list held by "irsb". */ 232 static void stmt ( IRStmt* st ) 233 { 234 addStmtToIRSB( irsb, st ); 235 } 236 237 /* Generate a statement "dst := e". */ 238 static void assign ( IRTemp dst, IRExpr* e ) 239 { 240 stmt( IRStmt_WrTmp(dst, e) ); 241 } 242 243 static IRExpr* unop ( IROp op, IRExpr* a ) 244 { 245 return IRExpr_Unop(op, a); 246 } 247 248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) 249 { 250 return IRExpr_Binop(op, a1, a2); 251 } 252 253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) 254 { 255 return IRExpr_Triop(op, a1, a2, a3); 256 } 257 258 static IRExpr* mkexpr ( IRTemp tmp ) 259 { 260 return IRExpr_RdTmp(tmp); 261 } 262 263 static IRExpr* mkU8 ( ULong i ) 264 { 265 vassert(i < 256); 266 return IRExpr_Const(IRConst_U8( (UChar)i )); 267 } 268 269 static IRExpr* mkU16 ( ULong i ) 270 { 271 vassert(i < 0x10000ULL); 272 return IRExpr_Const(IRConst_U16( (UShort)i )); 273 } 274 275 static IRExpr* mkU32 ( ULong i ) 276 { 277 vassert(i < 0x100000000ULL); 278 return IRExpr_Const(IRConst_U32( (UInt)i )); 279 } 280 281 static IRExpr* mkU64 ( ULong i ) 282 { 283 return IRExpr_Const(IRConst_U64(i)); 284 } 285 286 static IRExpr* mkU ( IRType ty, ULong i ) 287 { 288 switch (ty) { 289 case Ity_I8: return mkU8(i); 290 case Ity_I16: return mkU16(i); 291 case Ity_I32: return mkU32(i); 292 case Ity_I64: return mkU64(i); 293 default: vpanic("mkU(amd64)"); 294 } 295 } 296 297 static void storeLE ( IRExpr* addr, IRExpr* data ) 298 { 299 stmt( IRStmt_Store(Iend_LE, addr, data) ); 300 } 301 302 static IRExpr* loadLE ( IRType ty, IRExpr* addr ) 303 { 304 return IRExpr_Load(Iend_LE, ty, addr); 305 } 306 307 static IROp mkSizedOp ( IRType ty, IROp op8 ) 308 { 309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 310 || op8 == Iop_Mul8 311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 314 || op8 == Iop_CasCmpNE8 315 || op8 == Iop_Not8 ); 316 switch (ty) { 317 case Ity_I8: return 0 +op8; 318 case Ity_I16: return 1 +op8; 319 case Ity_I32: return 2 +op8; 320 case Ity_I64: return 3 +op8; 321 default: vpanic("mkSizedOp(amd64)"); 322 } 323 } 324 325 static 326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src ) 327 { 328 if (szSmall == 1 && szBig == 4) { 329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src); 330 } 331 if (szSmall == 1 && szBig == 2) { 332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src); 333 } 334 if (szSmall == 2 && szBig == 4) { 335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src); 336 } 337 if (szSmall == 1 && szBig == 8 && !signd) { 338 return unop(Iop_8Uto64, src); 339 } 340 if (szSmall == 1 && szBig == 8 && signd) { 341 return unop(Iop_8Sto64, src); 342 } 343 if (szSmall == 2 && szBig == 8 && !signd) { 344 return unop(Iop_16Uto64, src); 345 } 346 if (szSmall == 2 && szBig == 8 && signd) { 347 return unop(Iop_16Sto64, src); 348 } 349 vpanic("doScalarWidening(amd64)"); 350 } 351 352 static 353 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value ) 354 { 355 IRType ty = typeOfIRExpr(irsb->tyenv, value); 356 stmt( IRStmt_Put(gstOffB, 357 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) ); 358 } 359 360 361 /*------------------------------------------------------------*/ 362 /*--- Debugging output ---*/ 363 /*------------------------------------------------------------*/ 364 365 /* Bomb out if we can't handle something. */ 366 __attribute__ ((noreturn)) 367 static void unimplemented ( const HChar* str ) 368 { 369 vex_printf("amd64toIR: unimplemented feature\n"); 370 vpanic(str); 371 } 372 373 #define DIP(format, args...) \ 374 if (vex_traceflags & VEX_TRACE_FE) \ 375 vex_printf(format, ## args) 376 377 #define DIS(buf, format, args...) \ 378 if (vex_traceflags & VEX_TRACE_FE) \ 379 vex_sprintf(buf, format, ## args) 380 381 382 /*------------------------------------------------------------*/ 383 /*--- Offsets of various parts of the amd64 guest state. ---*/ 384 /*------------------------------------------------------------*/ 385 386 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX) 387 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX) 388 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX) 389 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX) 390 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP) 391 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP) 392 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI) 393 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI) 394 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8) 395 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9) 396 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10) 397 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11) 398 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12) 399 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13) 400 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14) 401 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15) 402 403 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP) 404 405 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST) 406 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST) 407 408 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP) 409 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1) 410 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2) 411 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP) 412 413 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0]) 414 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0]) 415 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) 416 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG) 417 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) 418 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) 419 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) 420 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) 421 422 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND) 423 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0) 424 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1) 425 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2) 426 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3) 427 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4) 428 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5) 429 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6) 430 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7) 431 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8) 432 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9) 433 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10) 434 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11) 435 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12) 436 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13) 437 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14) 438 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15) 439 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16) 440 441 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE) 442 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART) 443 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN) 444 445 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR) 446 447 448 /*------------------------------------------------------------*/ 449 /*--- Helper bits and pieces for deconstructing the ---*/ 450 /*--- amd64 insn stream. ---*/ 451 /*------------------------------------------------------------*/ 452 453 /* This is the AMD64 register encoding -- integer regs. */ 454 #define R_RAX 0 455 #define R_RCX 1 456 #define R_RDX 2 457 #define R_RBX 3 458 #define R_RSP 4 459 #define R_RBP 5 460 #define R_RSI 6 461 #define R_RDI 7 462 #define R_R8 8 463 #define R_R9 9 464 #define R_R10 10 465 #define R_R11 11 466 #define R_R12 12 467 #define R_R13 13 468 #define R_R14 14 469 #define R_R15 15 470 471 /* This is the Intel register encoding -- segment regs. */ 472 #define R_ES 0 473 #define R_CS 1 474 #define R_SS 2 475 #define R_DS 3 476 #define R_FS 4 477 #define R_GS 5 478 479 480 /* Various simple conversions */ 481 482 static ULong extend_s_8to64 ( UChar x ) 483 { 484 return (ULong)((Long)(((ULong)x) << 56) >> 56); 485 } 486 487 static ULong extend_s_16to64 ( UShort x ) 488 { 489 return (ULong)((Long)(((ULong)x) << 48) >> 48); 490 } 491 492 static ULong extend_s_32to64 ( UInt x ) 493 { 494 return (ULong)((Long)(((ULong)x) << 32) >> 32); 495 } 496 497 /* Figure out whether the mod and rm parts of a modRM byte refer to a 498 register or memory. If so, the byte will have the form 11XXXYYY, 499 where YYY is the register number. */ 500 inline 501 static Bool epartIsReg ( UChar mod_reg_rm ) 502 { 503 return toBool(0xC0 == (mod_reg_rm & 0xC0)); 504 } 505 506 /* Extract the 'g' field from a modRM byte. This only produces 3 507 bits, which is not a complete register number. You should avoid 508 this function if at all possible. */ 509 inline 510 static Int gregLO3ofRM ( UChar mod_reg_rm ) 511 { 512 return (Int)( (mod_reg_rm >> 3) & 7 ); 513 } 514 515 /* Ditto the 'e' field of a modRM byte. */ 516 inline 517 static Int eregLO3ofRM ( UChar mod_reg_rm ) 518 { 519 return (Int)(mod_reg_rm & 0x7); 520 } 521 522 /* Get a 8/16/32-bit unsigned value out of the insn stream. */ 523 524 static inline UChar getUChar ( Long delta ) 525 { 526 UChar v = guest_code[delta+0]; 527 return v; 528 } 529 530 static UInt getUDisp16 ( Long delta ) 531 { 532 UInt v = guest_code[delta+1]; v <<= 8; 533 v |= guest_code[delta+0]; 534 return v & 0xFFFF; 535 } 536 537 //.. static UInt getUDisp ( Int size, Long delta ) 538 //.. { 539 //.. switch (size) { 540 //.. case 4: return getUDisp32(delta); 541 //.. case 2: return getUDisp16(delta); 542 //.. case 1: return getUChar(delta); 543 //.. default: vpanic("getUDisp(x86)"); 544 //.. } 545 //.. return 0; /*notreached*/ 546 //.. } 547 548 549 /* Get a byte value out of the insn stream and sign-extend to 64 550 bits. */ 551 static Long getSDisp8 ( Long delta ) 552 { 553 return extend_s_8to64( guest_code[delta] ); 554 } 555 556 /* Get a 16-bit value out of the insn stream and sign-extend to 64 557 bits. */ 558 static Long getSDisp16 ( Long delta ) 559 { 560 UInt v = guest_code[delta+1]; v <<= 8; 561 v |= guest_code[delta+0]; 562 return extend_s_16to64( (UShort)v ); 563 } 564 565 /* Get a 32-bit value out of the insn stream and sign-extend to 64 566 bits. */ 567 static Long getSDisp32 ( Long delta ) 568 { 569 UInt v = guest_code[delta+3]; v <<= 8; 570 v |= guest_code[delta+2]; v <<= 8; 571 v |= guest_code[delta+1]; v <<= 8; 572 v |= guest_code[delta+0]; 573 return extend_s_32to64( v ); 574 } 575 576 /* Get a 64-bit value out of the insn stream. */ 577 static Long getDisp64 ( Long delta ) 578 { 579 ULong v = 0; 580 v |= guest_code[delta+7]; v <<= 8; 581 v |= guest_code[delta+6]; v <<= 8; 582 v |= guest_code[delta+5]; v <<= 8; 583 v |= guest_code[delta+4]; v <<= 8; 584 v |= guest_code[delta+3]; v <<= 8; 585 v |= guest_code[delta+2]; v <<= 8; 586 v |= guest_code[delta+1]; v <<= 8; 587 v |= guest_code[delta+0]; 588 return v; 589 } 590 591 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error 592 if this is called with size==8. Should not happen. */ 593 static Long getSDisp ( Int size, Long delta ) 594 { 595 switch (size) { 596 case 4: return getSDisp32(delta); 597 case 2: return getSDisp16(delta); 598 case 1: return getSDisp8(delta); 599 default: vpanic("getSDisp(amd64)"); 600 } 601 } 602 603 static ULong mkSizeMask ( Int sz ) 604 { 605 switch (sz) { 606 case 1: return 0x00000000000000FFULL; 607 case 2: return 0x000000000000FFFFULL; 608 case 4: return 0x00000000FFFFFFFFULL; 609 case 8: return 0xFFFFFFFFFFFFFFFFULL; 610 default: vpanic("mkSzMask(amd64)"); 611 } 612 } 613 614 static Int imin ( Int a, Int b ) 615 { 616 return (a < b) ? a : b; 617 } 618 619 static IRType szToITy ( Int n ) 620 { 621 switch (n) { 622 case 1: return Ity_I8; 623 case 2: return Ity_I16; 624 case 4: return Ity_I32; 625 case 8: return Ity_I64; 626 default: vex_printf("\nszToITy(%d)\n", n); 627 vpanic("szToITy(amd64)"); 628 } 629 } 630 631 632 /*------------------------------------------------------------*/ 633 /*--- For dealing with prefixes. ---*/ 634 /*------------------------------------------------------------*/ 635 636 /* The idea is to pass around an int holding a bitmask summarising 637 info from the prefixes seen on the current instruction, including 638 info from the REX byte. This info is used in various places, but 639 most especially when making sense of register fields in 640 instructions. 641 642 The top 8 bits of the prefix are 0x55, just as a hacky way to 643 ensure it really is a valid prefix. 644 645 Things you can safely assume about a well-formed prefix: 646 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set. 647 * if REX is not present then REXW,REXR,REXX,REXB will read 648 as zero. 649 * F2 and F3 will not both be 1. 650 */ 651 652 typedef UInt Prefix; 653 654 #define PFX_ASO (1<<0) /* address-size override present (0x67) */ 655 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */ 656 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */ 657 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */ 658 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */ 659 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */ 660 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */ 661 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */ 662 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */ 663 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */ 664 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */ 665 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */ 666 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */ 667 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */ 668 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */ 669 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */ 670 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */ 671 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */ 672 /* The extra register field VEX.vvvv is encoded (after not-ing it) as 673 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit 674 positions. */ 675 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */ 676 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */ 677 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */ 678 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */ 679 680 681 #define PFX_EMPTY 0x55000000 682 683 static Bool IS_VALID_PFX ( Prefix pfx ) { 684 return toBool((pfx & 0xFF000000) == PFX_EMPTY); 685 } 686 687 static Bool haveREX ( Prefix pfx ) { 688 return toBool(pfx & PFX_REX); 689 } 690 691 static Int getRexW ( Prefix pfx ) { 692 return (pfx & PFX_REXW) ? 1 : 0; 693 } 694 static Int getRexR ( Prefix pfx ) { 695 return (pfx & PFX_REXR) ? 1 : 0; 696 } 697 static Int getRexX ( Prefix pfx ) { 698 return (pfx & PFX_REXX) ? 1 : 0; 699 } 700 static Int getRexB ( Prefix pfx ) { 701 return (pfx & PFX_REXB) ? 1 : 0; 702 } 703 704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that 705 completely changes what instruction it really is. */ 706 static Bool haveF2orF3 ( Prefix pfx ) { 707 return toBool((pfx & (PFX_F2|PFX_F3)) > 0); 708 } 709 static Bool haveF2andF3 ( Prefix pfx ) { 710 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3)); 711 } 712 static Bool haveF2 ( Prefix pfx ) { 713 return toBool((pfx & PFX_F2) > 0); 714 } 715 static Bool haveF3 ( Prefix pfx ) { 716 return toBool((pfx & PFX_F3) > 0); 717 } 718 719 static Bool have66 ( Prefix pfx ) { 720 return toBool((pfx & PFX_66) > 0); 721 } 722 static Bool haveASO ( Prefix pfx ) { 723 return toBool((pfx & PFX_ASO) > 0); 724 } 725 static Bool haveLOCK ( Prefix pfx ) { 726 return toBool((pfx & PFX_LOCK) > 0); 727 } 728 729 /* Return True iff pfx has 66 set and F2 and F3 clear */ 730 static Bool have66noF2noF3 ( Prefix pfx ) 731 { 732 return 733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66); 734 } 735 736 /* Return True iff pfx has F2 set and 66 and F3 clear */ 737 static Bool haveF2no66noF3 ( Prefix pfx ) 738 { 739 return 740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2); 741 } 742 743 /* Return True iff pfx has F3 set and 66 and F2 clear */ 744 static Bool haveF3no66noF2 ( Prefix pfx ) 745 { 746 return 747 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3); 748 } 749 750 /* Return True iff pfx has F3 set and F2 clear */ 751 static Bool haveF3noF2 ( Prefix pfx ) 752 { 753 return 754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3); 755 } 756 757 /* Return True iff pfx has F2 set and F3 clear */ 758 static Bool haveF2noF3 ( Prefix pfx ) 759 { 760 return 761 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2); 762 } 763 764 /* Return True iff pfx has 66, F2 and F3 clear */ 765 static Bool haveNo66noF2noF3 ( Prefix pfx ) 766 { 767 return 768 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0); 769 } 770 771 /* Return True iff pfx has any of 66, F2 and F3 set */ 772 static Bool have66orF2orF3 ( Prefix pfx ) 773 { 774 return toBool( ! haveNo66noF2noF3(pfx) ); 775 } 776 777 /* Return True iff pfx has 66 or F3 set */ 778 static Bool have66orF3 ( Prefix pfx ) 779 { 780 return toBool((pfx & (PFX_66|PFX_F3)) > 0); 781 } 782 783 /* Clear all the segment-override bits in a prefix. */ 784 static Prefix clearSegBits ( Prefix p ) 785 { 786 return 787 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS); 788 } 789 790 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */ 791 static UInt getVexNvvvv ( Prefix pfx ) { 792 UInt r = (UInt)pfx; 793 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */ 794 return r & 0xF; 795 } 796 797 static Bool haveVEX ( Prefix pfx ) { 798 return toBool(pfx & PFX_VEX); 799 } 800 801 static Int getVexL ( Prefix pfx ) { 802 return (pfx & PFX_VEXL) ? 1 : 0; 803 } 804 805 806 /*------------------------------------------------------------*/ 807 /*--- For dealing with escapes ---*/ 808 /*------------------------------------------------------------*/ 809 810 811 /* Escapes come after the prefixes, but before the primary opcode 812 byte. They escape the primary opcode byte into a bigger space. 813 The 0xF0000000 isn't significant, except so as to make it not 814 overlap valid Prefix values, for sanity checking. 815 */ 816 817 typedef 818 enum { 819 ESC_NONE=0xF0000000, // none 820 ESC_0F, // 0F 821 ESC_0F38, // 0F 38 822 ESC_0F3A // 0F 3A 823 } 824 Escape; 825 826 827 /*------------------------------------------------------------*/ 828 /*--- For dealing with integer registers ---*/ 829 /*------------------------------------------------------------*/ 830 831 /* This is somewhat complex. The rules are: 832 833 For 64, 32 and 16 bit register references, the e or g fields in the 834 modrm bytes supply the low 3 bits of the register number. The 835 fourth (most-significant) bit of the register number is supplied by 836 the REX byte, if it is present; else that bit is taken to be zero. 837 838 The REX.R bit supplies the high bit corresponding to the g register 839 field, and the REX.B bit supplies the high bit corresponding to the 840 e register field (when the mod part of modrm indicates that modrm's 841 e component refers to a register and not to memory). 842 843 The REX.X bit supplies a high register bit for certain registers 844 in SIB address modes, and is generally rarely used. 845 846 For 8 bit register references, the presence of the REX byte itself 847 has significance. If there is no REX present, then the 3-bit 848 number extracted from the modrm e or g field is treated as an index 849 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the 850 old x86 encoding scheme. 851 852 But if there is a REX present, the register reference is 853 interpreted in the same way as for 64/32/16-bit references: a high 854 bit is extracted from REX, giving a 4-bit number, and the denoted 855 register is the lowest 8 bits of the 16 integer registers denoted 856 by the number. In particular, values 3 through 7 of this sequence 857 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of 858 %rsp %rbp %rsi %rdi. 859 860 The REX.W bit has no bearing at all on register numbers. Instead 861 its presence indicates that the operand size is to be overridden 862 from its default value (32 bits) to 64 bits instead. This is in 863 the same fashion that an 0x66 prefix indicates the operand size is 864 to be overridden from 32 bits down to 16 bits. When both REX.W and 865 0x66 are present there is a conflict, and REX.W takes precedence. 866 867 Rather than try to handle this complexity using a single huge 868 function, several smaller ones are provided. The aim is to make it 869 as difficult as possible to screw up register decoding in a subtle 870 and hard-to-track-down way. 871 872 Because these routines fish around in the host's memory (that is, 873 in the guest state area) for sub-parts of guest registers, their 874 correctness depends on the host's endianness. So far these 875 routines only work for little-endian hosts. Those for which 876 endianness is important have assertions to ensure sanity. 877 */ 878 879 880 /* About the simplest question you can ask: where do the 64-bit 881 integer registers live (in the guest state) ? */ 882 883 static Int integerGuestReg64Offset ( UInt reg ) 884 { 885 switch (reg) { 886 case R_RAX: return OFFB_RAX; 887 case R_RCX: return OFFB_RCX; 888 case R_RDX: return OFFB_RDX; 889 case R_RBX: return OFFB_RBX; 890 case R_RSP: return OFFB_RSP; 891 case R_RBP: return OFFB_RBP; 892 case R_RSI: return OFFB_RSI; 893 case R_RDI: return OFFB_RDI; 894 case R_R8: return OFFB_R8; 895 case R_R9: return OFFB_R9; 896 case R_R10: return OFFB_R10; 897 case R_R11: return OFFB_R11; 898 case R_R12: return OFFB_R12; 899 case R_R13: return OFFB_R13; 900 case R_R14: return OFFB_R14; 901 case R_R15: return OFFB_R15; 902 default: vpanic("integerGuestReg64Offset(amd64)"); 903 } 904 } 905 906 907 /* Produce the name of an integer register, for printing purposes. 908 reg is a number in the range 0 .. 15 that has been generated from a 909 3-bit reg-field number and a REX extension bit. irregular denotes 910 the case where sz==1 and no REX byte is present. */ 911 912 static 913 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular ) 914 { 915 static const HChar* ireg64_names[16] 916 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", 917 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; 918 static const HChar* ireg32_names[16] 919 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", 920 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" }; 921 static const HChar* ireg16_names[16] 922 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", 923 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" }; 924 static const HChar* ireg8_names[16] 925 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", 926 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" }; 927 static const HChar* ireg8_irregular[8] 928 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" }; 929 930 vassert(reg < 16); 931 if (sz == 1) { 932 if (irregular) 933 vassert(reg < 8); 934 } else { 935 vassert(irregular == False); 936 } 937 938 switch (sz) { 939 case 8: return ireg64_names[reg]; 940 case 4: return ireg32_names[reg]; 941 case 2: return ireg16_names[reg]; 942 case 1: if (irregular) { 943 return ireg8_irregular[reg]; 944 } else { 945 return ireg8_names[reg]; 946 } 947 default: vpanic("nameIReg(amd64)"); 948 } 949 } 950 951 /* Using the same argument conventions as nameIReg, produce the 952 guest state offset of an integer register. */ 953 954 static 955 Int offsetIReg ( Int sz, UInt reg, Bool irregular ) 956 { 957 vassert(reg < 16); 958 if (sz == 1) { 959 if (irregular) 960 vassert(reg < 8); 961 } else { 962 vassert(irregular == False); 963 } 964 965 /* Deal with irregular case -- sz==1 and no REX present */ 966 if (sz == 1 && irregular) { 967 switch (reg) { 968 case R_RSP: return 1+ OFFB_RAX; 969 case R_RBP: return 1+ OFFB_RCX; 970 case R_RSI: return 1+ OFFB_RDX; 971 case R_RDI: return 1+ OFFB_RBX; 972 default: break; /* use the normal case */ 973 } 974 } 975 976 /* Normal case */ 977 return integerGuestReg64Offset(reg); 978 } 979 980 981 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */ 982 983 static IRExpr* getIRegCL ( void ) 984 { 985 vassert(host_endness == VexEndnessLE); 986 return IRExpr_Get( OFFB_RCX, Ity_I8 ); 987 } 988 989 990 /* Write to the %AH register. */ 991 992 static void putIRegAH ( IRExpr* e ) 993 { 994 vassert(host_endness == VexEndnessLE); 995 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8); 996 stmt( IRStmt_Put( OFFB_RAX+1, e ) ); 997 } 998 999 1000 /* Read/write various widths of %RAX, as it has various 1001 special-purpose uses. */ 1002 1003 static const HChar* nameIRegRAX ( Int sz ) 1004 { 1005 switch (sz) { 1006 case 1: return "%al"; 1007 case 2: return "%ax"; 1008 case 4: return "%eax"; 1009 case 8: return "%rax"; 1010 default: vpanic("nameIRegRAX(amd64)"); 1011 } 1012 } 1013 1014 static IRExpr* getIRegRAX ( Int sz ) 1015 { 1016 vassert(host_endness == VexEndnessLE); 1017 switch (sz) { 1018 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 ); 1019 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 ); 1020 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 )); 1021 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 ); 1022 default: vpanic("getIRegRAX(amd64)"); 1023 } 1024 } 1025 1026 static void putIRegRAX ( Int sz, IRExpr* e ) 1027 { 1028 IRType ty = typeOfIRExpr(irsb->tyenv, e); 1029 vassert(host_endness == VexEndnessLE); 1030 switch (sz) { 1031 case 8: vassert(ty == Ity_I64); 1032 stmt( IRStmt_Put( OFFB_RAX, e )); 1033 break; 1034 case 4: vassert(ty == Ity_I32); 1035 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) )); 1036 break; 1037 case 2: vassert(ty == Ity_I16); 1038 stmt( IRStmt_Put( OFFB_RAX, e )); 1039 break; 1040 case 1: vassert(ty == Ity_I8); 1041 stmt( IRStmt_Put( OFFB_RAX, e )); 1042 break; 1043 default: vpanic("putIRegRAX(amd64)"); 1044 } 1045 } 1046 1047 1048 /* Read/write various widths of %RDX, as it has various 1049 special-purpose uses. */ 1050 1051 static const HChar* nameIRegRDX ( Int sz ) 1052 { 1053 switch (sz) { 1054 case 1: return "%dl"; 1055 case 2: return "%dx"; 1056 case 4: return "%edx"; 1057 case 8: return "%rdx"; 1058 default: vpanic("nameIRegRDX(amd64)"); 1059 } 1060 } 1061 1062 static IRExpr* getIRegRDX ( Int sz ) 1063 { 1064 vassert(host_endness == VexEndnessLE); 1065 switch (sz) { 1066 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 ); 1067 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 ); 1068 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 )); 1069 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 ); 1070 default: vpanic("getIRegRDX(amd64)"); 1071 } 1072 } 1073 1074 static void putIRegRDX ( Int sz, IRExpr* e ) 1075 { 1076 vassert(host_endness == VexEndnessLE); 1077 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1078 switch (sz) { 1079 case 8: stmt( IRStmt_Put( OFFB_RDX, e )); 1080 break; 1081 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) )); 1082 break; 1083 case 2: stmt( IRStmt_Put( OFFB_RDX, e )); 1084 break; 1085 case 1: stmt( IRStmt_Put( OFFB_RDX, e )); 1086 break; 1087 default: vpanic("putIRegRDX(amd64)"); 1088 } 1089 } 1090 1091 1092 /* Simplistic functions to deal with the integer registers as a 1093 straightforward bank of 16 64-bit regs. */ 1094 1095 static IRExpr* getIReg64 ( UInt regno ) 1096 { 1097 return IRExpr_Get( integerGuestReg64Offset(regno), 1098 Ity_I64 ); 1099 } 1100 1101 static void putIReg64 ( UInt regno, IRExpr* e ) 1102 { 1103 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1104 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) ); 1105 } 1106 1107 static const HChar* nameIReg64 ( UInt regno ) 1108 { 1109 return nameIReg( 8, regno, False ); 1110 } 1111 1112 1113 /* Simplistic functions to deal with the lower halves of integer 1114 registers as a straightforward bank of 16 32-bit regs. */ 1115 1116 static IRExpr* getIReg32 ( UInt regno ) 1117 { 1118 vassert(host_endness == VexEndnessLE); 1119 return unop(Iop_64to32, 1120 IRExpr_Get( integerGuestReg64Offset(regno), 1121 Ity_I64 )); 1122 } 1123 1124 static void putIReg32 ( UInt regno, IRExpr* e ) 1125 { 1126 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1127 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1128 unop(Iop_32Uto64,e) ) ); 1129 } 1130 1131 static const HChar* nameIReg32 ( UInt regno ) 1132 { 1133 return nameIReg( 4, regno, False ); 1134 } 1135 1136 1137 /* Simplistic functions to deal with the lower quarters of integer 1138 registers as a straightforward bank of 16 16-bit regs. */ 1139 1140 static IRExpr* getIReg16 ( UInt regno ) 1141 { 1142 vassert(host_endness == VexEndnessLE); 1143 return IRExpr_Get( integerGuestReg64Offset(regno), 1144 Ity_I16 ); 1145 } 1146 1147 static void putIReg16 ( UInt regno, IRExpr* e ) 1148 { 1149 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16); 1150 stmt( IRStmt_Put( integerGuestReg64Offset(regno), 1151 unop(Iop_16Uto64,e) ) ); 1152 } 1153 1154 static const HChar* nameIReg16 ( UInt regno ) 1155 { 1156 return nameIReg( 2, regno, False ); 1157 } 1158 1159 1160 /* Sometimes what we know is a 3-bit register number, a REX byte, and 1161 which field of the REX byte is to be used to extend to a 4-bit 1162 number. These functions cater for that situation. 1163 */ 1164 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits ) 1165 { 1166 vassert(lo3bits < 8); 1167 vassert(IS_VALID_PFX(pfx)); 1168 return getIReg64( lo3bits | (getRexX(pfx) << 3) ); 1169 } 1170 1171 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits ) 1172 { 1173 vassert(lo3bits < 8); 1174 vassert(IS_VALID_PFX(pfx)); 1175 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False ); 1176 } 1177 1178 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1179 { 1180 vassert(lo3bits < 8); 1181 vassert(IS_VALID_PFX(pfx)); 1182 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1183 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3), 1184 toBool(sz==1 && !haveREX(pfx)) ); 1185 } 1186 1187 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits ) 1188 { 1189 vassert(lo3bits < 8); 1190 vassert(IS_VALID_PFX(pfx)); 1191 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1192 if (sz == 4) { 1193 sz = 8; 1194 return unop(Iop_64to32, 1195 IRExpr_Get( 1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1197 False/*!irregular*/ ), 1198 szToITy(sz) 1199 ) 1200 ); 1201 } else { 1202 return IRExpr_Get( 1203 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1204 toBool(sz==1 && !haveREX(pfx)) ), 1205 szToITy(sz) 1206 ); 1207 } 1208 } 1209 1210 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e ) 1211 { 1212 vassert(lo3bits < 8); 1213 vassert(IS_VALID_PFX(pfx)); 1214 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1215 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz)); 1216 stmt( IRStmt_Put( 1217 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), 1218 toBool(sz==1 && !haveREX(pfx)) ), 1219 sz==4 ? unop(Iop_32Uto64,e) : e 1220 )); 1221 } 1222 1223 1224 /* Functions for getting register numbers from modrm bytes and REX 1225 when we don't have to consider the complexities of integer subreg 1226 accesses. 1227 */ 1228 /* Extract the g reg field from a modRM byte, and augment it using the 1229 REX.R bit from the supplied REX byte. The R bit usually is 1230 associated with the g register field. 1231 */ 1232 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1233 { 1234 Int reg = (Int)( (mod_reg_rm >> 3) & 7 ); 1235 reg += (pfx & PFX_REXR) ? 8 : 0; 1236 return reg; 1237 } 1238 1239 /* Extract the e reg field from a modRM byte, and augment it using the 1240 REX.B bit from the supplied REX byte. The B bit usually is 1241 associated with the e register field (when modrm indicates e is a 1242 register, that is). 1243 */ 1244 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm ) 1245 { 1246 Int rm; 1247 vassert(epartIsReg(mod_reg_rm)); 1248 rm = (Int)(mod_reg_rm & 0x7); 1249 rm += (pfx & PFX_REXB) ? 8 : 0; 1250 return rm; 1251 } 1252 1253 1254 /* General functions for dealing with integer register access. */ 1255 1256 /* Produce the guest state offset for a reference to the 'g' register 1257 field in a modrm byte, taking into account REX (or its absence), 1258 and the size of the access. 1259 */ 1260 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1261 { 1262 UInt reg; 1263 vassert(host_endness == VexEndnessLE); 1264 vassert(IS_VALID_PFX(pfx)); 1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1266 reg = gregOfRexRM( pfx, mod_reg_rm ); 1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1268 } 1269 1270 static 1271 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1272 { 1273 if (sz == 4) { 1274 sz = 8; 1275 return unop(Iop_64to32, 1276 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1277 szToITy(sz) )); 1278 } else { 1279 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ), 1280 szToITy(sz) ); 1281 } 1282 } 1283 1284 static 1285 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1286 { 1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1288 if (sz == 4) { 1289 e = unop(Iop_32Uto64,e); 1290 } 1291 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) ); 1292 } 1293 1294 static 1295 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1296 { 1297 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), 1298 toBool(sz==1 && !haveREX(pfx)) ); 1299 } 1300 1301 1302 static 1303 IRExpr* getIRegV ( Int sz, Prefix pfx ) 1304 { 1305 if (sz == 4) { 1306 sz = 8; 1307 return unop(Iop_64to32, 1308 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1309 szToITy(sz) )); 1310 } else { 1311 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ), 1312 szToITy(sz) ); 1313 } 1314 } 1315 1316 static 1317 void putIRegV ( Int sz, Prefix pfx, IRExpr* e ) 1318 { 1319 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1320 if (sz == 4) { 1321 e = unop(Iop_32Uto64,e); 1322 } 1323 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) ); 1324 } 1325 1326 static 1327 const HChar* nameIRegV ( Int sz, Prefix pfx ) 1328 { 1329 return nameIReg( sz, getVexNvvvv(pfx), False ); 1330 } 1331 1332 1333 1334 /* Produce the guest state offset for a reference to the 'e' register 1335 field in a modrm byte, taking into account REX (or its absence), 1336 and the size of the access. eregOfRexRM will assert if mod_reg_rm 1337 denotes a memory access rather than a register access. 1338 */ 1339 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1340 { 1341 UInt reg; 1342 vassert(host_endness == VexEndnessLE); 1343 vassert(IS_VALID_PFX(pfx)); 1344 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1); 1345 reg = eregOfRexRM( pfx, mod_reg_rm ); 1346 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) ); 1347 } 1348 1349 static 1350 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1351 { 1352 if (sz == 4) { 1353 sz = 8; 1354 return unop(Iop_64to32, 1355 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1356 szToITy(sz) )); 1357 } else { 1358 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ), 1359 szToITy(sz) ); 1360 } 1361 } 1362 1363 static 1364 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e ) 1365 { 1366 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz)); 1367 if (sz == 4) { 1368 e = unop(Iop_32Uto64,e); 1369 } 1370 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) ); 1371 } 1372 1373 static 1374 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm ) 1375 { 1376 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), 1377 toBool(sz==1 && !haveREX(pfx)) ); 1378 } 1379 1380 1381 /*------------------------------------------------------------*/ 1382 /*--- For dealing with XMM registers ---*/ 1383 /*------------------------------------------------------------*/ 1384 1385 static Int ymmGuestRegOffset ( UInt ymmreg ) 1386 { 1387 switch (ymmreg) { 1388 case 0: return OFFB_YMM0; 1389 case 1: return OFFB_YMM1; 1390 case 2: return OFFB_YMM2; 1391 case 3: return OFFB_YMM3; 1392 case 4: return OFFB_YMM4; 1393 case 5: return OFFB_YMM5; 1394 case 6: return OFFB_YMM6; 1395 case 7: return OFFB_YMM7; 1396 case 8: return OFFB_YMM8; 1397 case 9: return OFFB_YMM9; 1398 case 10: return OFFB_YMM10; 1399 case 11: return OFFB_YMM11; 1400 case 12: return OFFB_YMM12; 1401 case 13: return OFFB_YMM13; 1402 case 14: return OFFB_YMM14; 1403 case 15: return OFFB_YMM15; 1404 default: vpanic("ymmGuestRegOffset(amd64)"); 1405 } 1406 } 1407 1408 static Int xmmGuestRegOffset ( UInt xmmreg ) 1409 { 1410 /* Correct for little-endian host only. */ 1411 vassert(host_endness == VexEndnessLE); 1412 return ymmGuestRegOffset( xmmreg ); 1413 } 1414 1415 /* Lanes of vector registers are always numbered from zero being the 1416 least significant lane (rightmost in the register). */ 1417 1418 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno ) 1419 { 1420 /* Correct for little-endian host only. */ 1421 vassert(host_endness == VexEndnessLE); 1422 vassert(laneno >= 0 && laneno < 8); 1423 return xmmGuestRegOffset( xmmreg ) + 2 * laneno; 1424 } 1425 1426 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno ) 1427 { 1428 /* Correct for little-endian host only. */ 1429 vassert(host_endness == VexEndnessLE); 1430 vassert(laneno >= 0 && laneno < 4); 1431 return xmmGuestRegOffset( xmmreg ) + 4 * laneno; 1432 } 1433 1434 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno ) 1435 { 1436 /* Correct for little-endian host only. */ 1437 vassert(host_endness == VexEndnessLE); 1438 vassert(laneno >= 0 && laneno < 2); 1439 return xmmGuestRegOffset( xmmreg ) + 8 * laneno; 1440 } 1441 1442 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno ) 1443 { 1444 /* Correct for little-endian host only. */ 1445 vassert(host_endness == VexEndnessLE); 1446 vassert(laneno >= 0 && laneno < 2); 1447 return ymmGuestRegOffset( ymmreg ) + 16 * laneno; 1448 } 1449 1450 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno ) 1451 { 1452 /* Correct for little-endian host only. */ 1453 vassert(host_endness == VexEndnessLE); 1454 vassert(laneno >= 0 && laneno < 4); 1455 return ymmGuestRegOffset( ymmreg ) + 8 * laneno; 1456 } 1457 1458 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno ) 1459 { 1460 /* Correct for little-endian host only. */ 1461 vassert(host_endness == VexEndnessLE); 1462 vassert(laneno >= 0 && laneno < 8); 1463 return ymmGuestRegOffset( ymmreg ) + 4 * laneno; 1464 } 1465 1466 static IRExpr* getXMMReg ( UInt xmmreg ) 1467 { 1468 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 ); 1469 } 1470 1471 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno ) 1472 { 1473 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 ); 1474 } 1475 1476 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno ) 1477 { 1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 ); 1479 } 1480 1481 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno ) 1482 { 1483 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 ); 1484 } 1485 1486 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno ) 1487 { 1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 ); 1489 } 1490 1491 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno ) 1492 { 1493 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 ); 1494 } 1495 1496 static void putXMMReg ( UInt xmmreg, IRExpr* e ) 1497 { 1498 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1499 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) ); 1500 } 1501 1502 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e ) 1503 { 1504 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1505 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1506 } 1507 1508 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e ) 1509 { 1510 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1511 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) ); 1512 } 1513 1514 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e ) 1515 { 1516 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1517 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1518 } 1519 1520 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e ) 1521 { 1522 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1523 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) ); 1524 } 1525 1526 static IRExpr* getYMMReg ( UInt xmmreg ) 1527 { 1528 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 ); 1529 } 1530 1531 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno ) 1532 { 1533 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 ); 1534 } 1535 1536 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno ) 1537 { 1538 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 ); 1539 } 1540 1541 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno ) 1542 { 1543 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 ); 1544 } 1545 1546 static void putYMMReg ( UInt ymmreg, IRExpr* e ) 1547 { 1548 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256); 1549 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) ); 1550 } 1551 1552 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e ) 1553 { 1554 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128); 1555 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) ); 1556 } 1557 1558 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e ) 1559 { 1560 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64); 1561 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1562 } 1563 1564 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e ) 1565 { 1566 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 1567 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) ); 1568 } 1569 1570 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e ) 1571 { 1572 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32); 1573 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1574 } 1575 1576 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e ) 1577 { 1578 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32); 1579 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) ); 1580 } 1581 1582 static IRExpr* mkV128 ( UShort mask ) 1583 { 1584 return IRExpr_Const(IRConst_V128(mask)); 1585 } 1586 1587 /* Write the low half of a YMM reg and zero out the upper half. */ 1588 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e ) 1589 { 1590 putYMMRegLane128( ymmreg, 0, e ); 1591 putYMMRegLane128( ymmreg, 1, mkV128(0) ); 1592 } 1593 1594 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y ) 1595 { 1596 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1); 1597 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1); 1598 return unop(Iop_64to1, 1599 binop(Iop_And64, 1600 unop(Iop_1Uto64,x), 1601 unop(Iop_1Uto64,y))); 1602 } 1603 1604 /* Generate a compare-and-swap operation, operating on memory at 1605 'addr'. The expected value is 'expVal' and the new value is 1606 'newVal'. If the operation fails, then transfer control (with a 1607 no-redir jump (XXX no -- see comment at top of this file)) to 1608 'restart_point', which is presumably the address of the guest 1609 instruction again -- retrying, essentially. */ 1610 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal, 1611 Addr64 restart_point ) 1612 { 1613 IRCAS* cas; 1614 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal); 1615 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal); 1616 IRTemp oldTmp = newTemp(tyE); 1617 IRTemp expTmp = newTemp(tyE); 1618 vassert(tyE == tyN); 1619 vassert(tyE == Ity_I64 || tyE == Ity_I32 1620 || tyE == Ity_I16 || tyE == Ity_I8); 1621 assign(expTmp, expVal); 1622 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr, 1623 NULL, mkexpr(expTmp), NULL, newVal ); 1624 stmt( IRStmt_CAS(cas) ); 1625 stmt( IRStmt_Exit( 1626 binop( mkSizedOp(tyE,Iop_CasCmpNE8), 1627 mkexpr(oldTmp), mkexpr(expTmp) ), 1628 Ijk_Boring, /*Ijk_NoRedir*/ 1629 IRConst_U64( restart_point ), 1630 OFFB_RIP 1631 )); 1632 } 1633 1634 1635 /*------------------------------------------------------------*/ 1636 /*--- Helpers for %rflags. ---*/ 1637 /*------------------------------------------------------------*/ 1638 1639 /* -------------- Evaluating the flags-thunk. -------------- */ 1640 1641 /* Build IR to calculate all the eflags from stored 1642 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1643 Ity_I64. */ 1644 static IRExpr* mk_amd64g_calculate_rflags_all ( void ) 1645 { 1646 IRExpr** args 1647 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1648 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1649 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1650 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1651 IRExpr* call 1652 = mkIRExprCCall( 1653 Ity_I64, 1654 0/*regparm*/, 1655 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all, 1656 args 1657 ); 1658 /* Exclude OP and NDEP from definedness checking. We're only 1659 interested in DEP1 and DEP2. */ 1660 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1661 return call; 1662 } 1663 1664 /* Build IR to calculate some particular condition from stored 1665 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: 1666 Ity_Bit. */ 1667 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond ) 1668 { 1669 IRExpr** args 1670 = mkIRExprVec_5( mkU64(cond), 1671 IRExpr_Get(OFFB_CC_OP, Ity_I64), 1672 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1673 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1674 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1675 IRExpr* call 1676 = mkIRExprCCall( 1677 Ity_I64, 1678 0/*regparm*/, 1679 "amd64g_calculate_condition", &amd64g_calculate_condition, 1680 args 1681 ); 1682 /* Exclude the requested condition, OP and NDEP from definedness 1683 checking. We're only interested in DEP1 and DEP2. */ 1684 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4); 1685 return unop(Iop_64to1, call); 1686 } 1687 1688 /* Build IR to calculate just the carry flag from stored 1689 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */ 1690 static IRExpr* mk_amd64g_calculate_rflags_c ( void ) 1691 { 1692 IRExpr** args 1693 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64), 1694 IRExpr_Get(OFFB_CC_DEP1, Ity_I64), 1695 IRExpr_Get(OFFB_CC_DEP2, Ity_I64), 1696 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) ); 1697 IRExpr* call 1698 = mkIRExprCCall( 1699 Ity_I64, 1700 0/*regparm*/, 1701 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c, 1702 args 1703 ); 1704 /* Exclude OP and NDEP from definedness checking. We're only 1705 interested in DEP1 and DEP2. */ 1706 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3); 1707 return call; 1708 } 1709 1710 1711 /* -------------- Building the flags-thunk. -------------- */ 1712 1713 /* The machinery in this section builds the flag-thunk following a 1714 flag-setting operation. Hence the various setFlags_* functions. 1715 */ 1716 1717 static Bool isAddSub ( IROp op8 ) 1718 { 1719 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8); 1720 } 1721 1722 static Bool isLogic ( IROp op8 ) 1723 { 1724 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8); 1725 } 1726 1727 /* U-widen 1/8/16/32/64 bit int expr to 64. */ 1728 static IRExpr* widenUto64 ( IRExpr* e ) 1729 { 1730 switch (typeOfIRExpr(irsb->tyenv,e)) { 1731 case Ity_I64: return e; 1732 case Ity_I32: return unop(Iop_32Uto64, e); 1733 case Ity_I16: return unop(Iop_16Uto64, e); 1734 case Ity_I8: return unop(Iop_8Uto64, e); 1735 case Ity_I1: return unop(Iop_1Uto64, e); 1736 default: vpanic("widenUto64"); 1737 } 1738 } 1739 1740 /* S-widen 8/16/32/64 bit int expr to 32. */ 1741 static IRExpr* widenSto64 ( IRExpr* e ) 1742 { 1743 switch (typeOfIRExpr(irsb->tyenv,e)) { 1744 case Ity_I64: return e; 1745 case Ity_I32: return unop(Iop_32Sto64, e); 1746 case Ity_I16: return unop(Iop_16Sto64, e); 1747 case Ity_I8: return unop(Iop_8Sto64, e); 1748 default: vpanic("widenSto64"); 1749 } 1750 } 1751 1752 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some 1753 of these combinations make sense. */ 1754 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e ) 1755 { 1756 IRType src_ty = typeOfIRExpr(irsb->tyenv,e); 1757 if (src_ty == dst_ty) 1758 return e; 1759 if (src_ty == Ity_I32 && dst_ty == Ity_I16) 1760 return unop(Iop_32to16, e); 1761 if (src_ty == Ity_I32 && dst_ty == Ity_I8) 1762 return unop(Iop_32to8, e); 1763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) 1764 return unop(Iop_64to32, e); 1765 if (src_ty == Ity_I64 && dst_ty == Ity_I16) 1766 return unop(Iop_64to16, e); 1767 if (src_ty == Ity_I64 && dst_ty == Ity_I8) 1768 return unop(Iop_64to8, e); 1769 1770 vex_printf("\nsrc, dst tys are: "); 1771 ppIRType(src_ty); 1772 vex_printf(", "); 1773 ppIRType(dst_ty); 1774 vex_printf("\n"); 1775 vpanic("narrowTo(amd64)"); 1776 } 1777 1778 1779 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is 1780 auto-sized up to the real op. */ 1781 1782 static 1783 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty ) 1784 { 1785 Int ccOp = 0; 1786 switch (ty) { 1787 case Ity_I8: ccOp = 0; break; 1788 case Ity_I16: ccOp = 1; break; 1789 case Ity_I32: ccOp = 2; break; 1790 case Ity_I64: ccOp = 3; break; 1791 default: vassert(0); 1792 } 1793 switch (op8) { 1794 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break; 1795 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break; 1796 default: ppIROp(op8); 1797 vpanic("setFlags_DEP1_DEP2(amd64)"); 1798 } 1799 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1800 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1801 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) ); 1802 } 1803 1804 1805 /* Set the OP and DEP1 fields only, and write zero to DEP2. */ 1806 1807 static 1808 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty ) 1809 { 1810 Int ccOp = 0; 1811 switch (ty) { 1812 case Ity_I8: ccOp = 0; break; 1813 case Ity_I16: ccOp = 1; break; 1814 case Ity_I32: ccOp = 2; break; 1815 case Ity_I64: ccOp = 3; break; 1816 default: vassert(0); 1817 } 1818 switch (op8) { 1819 case Iop_Or8: 1820 case Iop_And8: 1821 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break; 1822 default: ppIROp(op8); 1823 vpanic("setFlags_DEP1(amd64)"); 1824 } 1825 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1826 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) ); 1827 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1828 } 1829 1830 1831 /* For shift operations, we put in the result and the undershifted 1832 result. Except if the shift amount is zero, the thunk is left 1833 unchanged. */ 1834 1835 static void setFlags_DEP1_DEP2_shift ( IROp op64, 1836 IRTemp res, 1837 IRTemp resUS, 1838 IRType ty, 1839 IRTemp guard ) 1840 { 1841 Int ccOp = 0; 1842 switch (ty) { 1843 case Ity_I8: ccOp = 0; break; 1844 case Ity_I16: ccOp = 1; break; 1845 case Ity_I32: ccOp = 2; break; 1846 case Ity_I64: ccOp = 3; break; 1847 default: vassert(0); 1848 } 1849 1850 vassert(guard); 1851 1852 /* Both kinds of right shifts are handled by the same thunk 1853 operation. */ 1854 switch (op64) { 1855 case Iop_Shr64: 1856 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break; 1857 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break; 1858 default: ppIROp(op64); 1859 vpanic("setFlags_DEP1_DEP2_shift(amd64)"); 1860 } 1861 1862 /* guard :: Ity_I8. We need to convert it to I1. */ 1863 IRTemp guardB = newTemp(Ity_I1); 1864 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) ); 1865 1866 /* DEP1 contains the result, DEP2 contains the undershifted value. */ 1867 stmt( IRStmt_Put( OFFB_CC_OP, 1868 IRExpr_ITE( mkexpr(guardB), 1869 mkU64(ccOp), 1870 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 1871 stmt( IRStmt_Put( OFFB_CC_DEP1, 1872 IRExpr_ITE( mkexpr(guardB), 1873 widenUto64(mkexpr(res)), 1874 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 1875 stmt( IRStmt_Put( OFFB_CC_DEP2, 1876 IRExpr_ITE( mkexpr(guardB), 1877 widenUto64(mkexpr(resUS)), 1878 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 1879 } 1880 1881 1882 /* For the inc/dec case, we store in DEP1 the result value and in NDEP 1883 the former value of the carry flag, which unfortunately we have to 1884 compute. */ 1885 1886 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty ) 1887 { 1888 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB; 1889 1890 switch (ty) { 1891 case Ity_I8: ccOp += 0; break; 1892 case Ity_I16: ccOp += 1; break; 1893 case Ity_I32: ccOp += 2; break; 1894 case Ity_I64: ccOp += 3; break; 1895 default: vassert(0); 1896 } 1897 1898 /* This has to come first, because calculating the C flag 1899 may require reading all four thunk fields. */ 1900 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) ); 1901 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) ); 1902 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) ); 1903 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 1904 } 1905 1906 1907 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the 1908 two arguments. */ 1909 1910 static 1911 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op ) 1912 { 1913 switch (ty) { 1914 case Ity_I8: 1915 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) ); 1916 break; 1917 case Ity_I16: 1918 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) ); 1919 break; 1920 case Ity_I32: 1921 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) ); 1922 break; 1923 case Ity_I64: 1924 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) ); 1925 break; 1926 default: 1927 vpanic("setFlags_MUL(amd64)"); 1928 } 1929 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) )); 1930 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) )); 1931 } 1932 1933 1934 /* -------------- Condition codes. -------------- */ 1935 1936 /* Condition codes, using the AMD encoding. */ 1937 1938 static const HChar* name_AMD64Condcode ( AMD64Condcode cond ) 1939 { 1940 switch (cond) { 1941 case AMD64CondO: return "o"; 1942 case AMD64CondNO: return "no"; 1943 case AMD64CondB: return "b"; 1944 case AMD64CondNB: return "ae"; /*"nb";*/ 1945 case AMD64CondZ: return "e"; /*"z";*/ 1946 case AMD64CondNZ: return "ne"; /*"nz";*/ 1947 case AMD64CondBE: return "be"; 1948 case AMD64CondNBE: return "a"; /*"nbe";*/ 1949 case AMD64CondS: return "s"; 1950 case AMD64CondNS: return "ns"; 1951 case AMD64CondP: return "p"; 1952 case AMD64CondNP: return "np"; 1953 case AMD64CondL: return "l"; 1954 case AMD64CondNL: return "ge"; /*"nl";*/ 1955 case AMD64CondLE: return "le"; 1956 case AMD64CondNLE: return "g"; /*"nle";*/ 1957 case AMD64CondAlways: return "ALWAYS"; 1958 default: vpanic("name_AMD64Condcode"); 1959 } 1960 } 1961 1962 static 1963 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond, 1964 /*OUT*/Bool* needInvert ) 1965 { 1966 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE); 1967 if (cond & 1) { 1968 *needInvert = True; 1969 return cond-1; 1970 } else { 1971 *needInvert = False; 1972 return cond; 1973 } 1974 } 1975 1976 1977 /* -------------- Helpers for ADD/SUB with carry. -------------- */ 1978 1979 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags 1980 appropriately. 1981 1982 Optionally, generate a store for the 'tres' value. This can either 1983 be a normal store, or it can be a cas-with-possible-failure style 1984 store: 1985 1986 if taddr is IRTemp_INVALID, then no store is generated. 1987 1988 if taddr is not IRTemp_INVALID, then a store (using taddr as 1989 the address) is generated: 1990 1991 if texpVal is IRTemp_INVALID then a normal store is 1992 generated, and restart_point must be zero (it is irrelevant). 1993 1994 if texpVal is not IRTemp_INVALID then a cas-style store is 1995 generated. texpVal is the expected value, restart_point 1996 is the restart point if the store fails, and texpVal must 1997 have the same type as tres. 1998 1999 */ 2000 static void helper_ADC ( Int sz, 2001 IRTemp tres, IRTemp ta1, IRTemp ta2, 2002 /* info about optional store: */ 2003 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2004 { 2005 UInt thunkOp; 2006 IRType ty = szToITy(sz); 2007 IRTemp oldc = newTemp(Ity_I64); 2008 IRTemp oldcn = newTemp(ty); 2009 IROp plus = mkSizedOp(ty, Iop_Add8); 2010 IROp xor = mkSizedOp(ty, Iop_Xor8); 2011 2012 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2013 2014 switch (sz) { 2015 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break; 2016 case 4: thunkOp = AMD64G_CC_OP_ADCL; break; 2017 case 2: thunkOp = AMD64G_CC_OP_ADCW; break; 2018 case 1: thunkOp = AMD64G_CC_OP_ADCB; break; 2019 default: vassert(0); 2020 } 2021 2022 /* oldc = old carry flag, 0 or 1 */ 2023 assign( oldc, binop(Iop_And64, 2024 mk_amd64g_calculate_rflags_c(), 2025 mkU64(1)) ); 2026 2027 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2028 2029 assign( tres, binop(plus, 2030 binop(plus,mkexpr(ta1),mkexpr(ta2)), 2031 mkexpr(oldcn)) ); 2032 2033 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2034 start of this function. */ 2035 if (taddr != IRTemp_INVALID) { 2036 if (texpVal == IRTemp_INVALID) { 2037 vassert(restart_point == 0); 2038 storeLE( mkexpr(taddr), mkexpr(tres) ); 2039 } else { 2040 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2041 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2042 casLE( mkexpr(taddr), 2043 mkexpr(texpVal), mkexpr(tres), restart_point ); 2044 } 2045 } 2046 2047 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2048 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) )); 2049 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2050 mkexpr(oldcn)) )) ); 2051 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2052 } 2053 2054 2055 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags 2056 appropriately. As with helper_ADC, possibly generate a store of 2057 the result -- see comments on helper_ADC for details. 2058 */ 2059 static void helper_SBB ( Int sz, 2060 IRTemp tres, IRTemp ta1, IRTemp ta2, 2061 /* info about optional store: */ 2062 IRTemp taddr, IRTemp texpVal, Addr64 restart_point ) 2063 { 2064 UInt thunkOp; 2065 IRType ty = szToITy(sz); 2066 IRTemp oldc = newTemp(Ity_I64); 2067 IRTemp oldcn = newTemp(ty); 2068 IROp minus = mkSizedOp(ty, Iop_Sub8); 2069 IROp xor = mkSizedOp(ty, Iop_Xor8); 2070 2071 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty); 2072 2073 switch (sz) { 2074 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break; 2075 case 4: thunkOp = AMD64G_CC_OP_SBBL; break; 2076 case 2: thunkOp = AMD64G_CC_OP_SBBW; break; 2077 case 1: thunkOp = AMD64G_CC_OP_SBBB; break; 2078 default: vassert(0); 2079 } 2080 2081 /* oldc = old carry flag, 0 or 1 */ 2082 assign( oldc, binop(Iop_And64, 2083 mk_amd64g_calculate_rflags_c(), 2084 mkU64(1)) ); 2085 2086 assign( oldcn, narrowTo(ty, mkexpr(oldc)) ); 2087 2088 assign( tres, binop(minus, 2089 binop(minus,mkexpr(ta1),mkexpr(ta2)), 2090 mkexpr(oldcn)) ); 2091 2092 /* Possibly generate a store of 'tres' to 'taddr'. See comment at 2093 start of this function. */ 2094 if (taddr != IRTemp_INVALID) { 2095 if (texpVal == IRTemp_INVALID) { 2096 vassert(restart_point == 0); 2097 storeLE( mkexpr(taddr), mkexpr(tres) ); 2098 } else { 2099 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty); 2100 /* .. and hence 'texpVal' has the same type as 'tres'. */ 2101 casLE( mkexpr(taddr), 2102 mkexpr(texpVal), mkexpr(tres), restart_point ); 2103 } 2104 } 2105 2106 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) ); 2107 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) ); 2108 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2), 2109 mkexpr(oldcn)) )) ); 2110 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) ); 2111 } 2112 2113 2114 /* -------------- Helpers for disassembly printing. -------------- */ 2115 2116 static const HChar* nameGrp1 ( Int opc_aux ) 2117 { 2118 static const HChar* grp1_names[8] 2119 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; 2120 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)"); 2121 return grp1_names[opc_aux]; 2122 } 2123 2124 static const HChar* nameGrp2 ( Int opc_aux ) 2125 { 2126 static const HChar* grp2_names[8] 2127 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; 2128 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)"); 2129 return grp2_names[opc_aux]; 2130 } 2131 2132 static const HChar* nameGrp4 ( Int opc_aux ) 2133 { 2134 static const HChar* grp4_names[8] 2135 = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; 2136 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)"); 2137 return grp4_names[opc_aux]; 2138 } 2139 2140 static const HChar* nameGrp5 ( Int opc_aux ) 2141 { 2142 static const HChar* grp5_names[8] 2143 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; 2144 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)"); 2145 return grp5_names[opc_aux]; 2146 } 2147 2148 static const HChar* nameGrp8 ( Int opc_aux ) 2149 { 2150 static const HChar* grp8_names[8] 2151 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; 2152 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)"); 2153 return grp8_names[opc_aux]; 2154 } 2155 2156 static const HChar* nameSReg ( UInt sreg ) 2157 { 2158 switch (sreg) { 2159 case R_ES: return "%es"; 2160 case R_CS: return "%cs"; 2161 case R_SS: return "%ss"; 2162 case R_DS: return "%ds"; 2163 case R_FS: return "%fs"; 2164 case R_GS: return "%gs"; 2165 default: vpanic("nameSReg(amd64)"); 2166 } 2167 } 2168 2169 static const HChar* nameMMXReg ( Int mmxreg ) 2170 { 2171 static const HChar* mmx_names[8] 2172 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; 2173 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)"); 2174 return mmx_names[mmxreg]; 2175 } 2176 2177 static const HChar* nameXMMReg ( Int xmmreg ) 2178 { 2179 static const HChar* xmm_names[16] 2180 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", 2181 "%xmm4", "%xmm5", "%xmm6", "%xmm7", 2182 "%xmm8", "%xmm9", "%xmm10", "%xmm11", 2183 "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; 2184 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)"); 2185 return xmm_names[xmmreg]; 2186 } 2187 2188 static const HChar* nameMMXGran ( Int gran ) 2189 { 2190 switch (gran) { 2191 case 0: return "b"; 2192 case 1: return "w"; 2193 case 2: return "d"; 2194 case 3: return "q"; 2195 default: vpanic("nameMMXGran(amd64,guest)"); 2196 } 2197 } 2198 2199 static HChar nameISize ( Int size ) 2200 { 2201 switch (size) { 2202 case 8: return 'q'; 2203 case 4: return 'l'; 2204 case 2: return 'w'; 2205 case 1: return 'b'; 2206 default: vpanic("nameISize(amd64)"); 2207 } 2208 } 2209 2210 static const HChar* nameYMMReg ( Int ymmreg ) 2211 { 2212 static const HChar* ymm_names[16] 2213 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", 2214 "%ymm4", "%ymm5", "%ymm6", "%ymm7", 2215 "%ymm8", "%ymm9", "%ymm10", "%ymm11", 2216 "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; 2217 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)"); 2218 return ymm_names[ymmreg]; 2219 } 2220 2221 2222 /*------------------------------------------------------------*/ 2223 /*--- JMP helpers ---*/ 2224 /*------------------------------------------------------------*/ 2225 2226 static void jmp_lit( /*MOD*/DisResult* dres, 2227 IRJumpKind kind, Addr64 d64 ) 2228 { 2229 vassert(dres->whatNext == Dis_Continue); 2230 vassert(dres->len == 0); 2231 vassert(dres->continueAt == 0); 2232 vassert(dres->jk_StopHere == Ijk_INVALID); 2233 dres->whatNext = Dis_StopHere; 2234 dres->jk_StopHere = kind; 2235 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) ); 2236 } 2237 2238 static void jmp_treg( /*MOD*/DisResult* dres, 2239 IRJumpKind kind, IRTemp t ) 2240 { 2241 vassert(dres->whatNext == Dis_Continue); 2242 vassert(dres->len == 0); 2243 vassert(dres->continueAt == 0); 2244 vassert(dres->jk_StopHere == Ijk_INVALID); 2245 dres->whatNext = Dis_StopHere; 2246 dres->jk_StopHere = kind; 2247 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) ); 2248 } 2249 2250 static 2251 void jcc_01 ( /*MOD*/DisResult* dres, 2252 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true ) 2253 { 2254 Bool invert; 2255 AMD64Condcode condPos; 2256 vassert(dres->whatNext == Dis_Continue); 2257 vassert(dres->len == 0); 2258 vassert(dres->continueAt == 0); 2259 vassert(dres->jk_StopHere == Ijk_INVALID); 2260 dres->whatNext = Dis_StopHere; 2261 dres->jk_StopHere = Ijk_Boring; 2262 condPos = positiveIse_AMD64Condcode ( cond, &invert ); 2263 if (invert) { 2264 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2265 Ijk_Boring, 2266 IRConst_U64(d64_false), 2267 OFFB_RIP ) ); 2268 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) ); 2269 } else { 2270 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos), 2271 Ijk_Boring, 2272 IRConst_U64(d64_true), 2273 OFFB_RIP ) ); 2274 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) ); 2275 } 2276 } 2277 2278 /* Let new_rsp be the %rsp value after a call/return. Let nia be the 2279 guest address of the next instruction to be executed. 2280 2281 This function generates an AbiHint to say that -128(%rsp) 2282 .. -1(%rsp) should now be regarded as uninitialised. 2283 */ 2284 static 2285 void make_redzone_AbiHint ( const VexAbiInfo* vbi, 2286 IRTemp new_rsp, IRTemp nia, const HChar* who ) 2287 { 2288 Int szB = vbi->guest_stack_redzone_size; 2289 vassert(szB >= 0); 2290 2291 /* A bit of a kludge. Currently the only AbI we've guested AMD64 2292 for is ELF. So just check it's the expected 128 value 2293 (paranoia). */ 2294 vassert(szB == 128); 2295 2296 if (0) vex_printf("AbiHint: %s\n", who); 2297 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64); 2298 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64); 2299 if (szB > 0) 2300 stmt( IRStmt_AbiHint( 2301 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)), 2302 szB, 2303 mkexpr(nia) 2304 )); 2305 } 2306 2307 2308 /*------------------------------------------------------------*/ 2309 /*--- Disassembling addressing modes ---*/ 2310 /*------------------------------------------------------------*/ 2311 2312 static 2313 const HChar* segRegTxt ( Prefix pfx ) 2314 { 2315 if (pfx & PFX_CS) return "%cs:"; 2316 if (pfx & PFX_DS) return "%ds:"; 2317 if (pfx & PFX_ES) return "%es:"; 2318 if (pfx & PFX_FS) return "%fs:"; 2319 if (pfx & PFX_GS) return "%gs:"; 2320 if (pfx & PFX_SS) return "%ss:"; 2321 return ""; /* no override */ 2322 } 2323 2324 2325 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a 2326 linear address by adding any required segment override as indicated 2327 by sorb, and also dealing with any address size override 2328 present. */ 2329 static 2330 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi, 2331 Prefix pfx, IRExpr* virtual ) 2332 { 2333 /* Note that the below are hacks that relies on the assumption 2334 that %fs or %gs are constant. 2335 Typically, %fs is always 0x63 on linux (in the main thread, it 2336 stays at value 0), %gs always 0x60 on Darwin, ... */ 2337 /* --- segment overrides --- */ 2338 if (pfx & PFX_FS) { 2339 if (vbi->guest_amd64_assume_fs_is_const) { 2340 /* return virtual + guest_FS_CONST. */ 2341 virtual = binop(Iop_Add64, virtual, 2342 IRExpr_Get(OFFB_FS_CONST, Ity_I64)); 2343 } else { 2344 unimplemented("amd64 %fs segment override"); 2345 } 2346 } 2347 2348 if (pfx & PFX_GS) { 2349 if (vbi->guest_amd64_assume_gs_is_const) { 2350 /* return virtual + guest_GS_CONST. */ 2351 virtual = binop(Iop_Add64, virtual, 2352 IRExpr_Get(OFFB_GS_CONST, Ity_I64)); 2353 } else { 2354 unimplemented("amd64 %gs segment override"); 2355 } 2356 } 2357 2358 /* cs, ds, es and ss are simply ignored in 64-bit mode. */ 2359 2360 /* --- address size override --- */ 2361 if (haveASO(pfx)) 2362 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual)); 2363 2364 return virtual; 2365 } 2366 2367 //.. { 2368 //.. Int sreg; 2369 //.. IRType hWordTy; 2370 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64; 2371 //.. 2372 //.. if (sorb == 0) 2373 //.. /* the common case - no override */ 2374 //.. return virtual; 2375 //.. 2376 //.. switch (sorb) { 2377 //.. case 0x3E: sreg = R_DS; break; 2378 //.. case 0x26: sreg = R_ES; break; 2379 //.. case 0x64: sreg = R_FS; break; 2380 //.. case 0x65: sreg = R_GS; break; 2381 //.. default: vpanic("handleAddrOverrides(x86,guest)"); 2382 //.. } 2383 //.. 2384 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64; 2385 //.. 2386 //.. seg_selector = newTemp(Ity_I32); 2387 //.. ldt_ptr = newTemp(hWordTy); 2388 //.. gdt_ptr = newTemp(hWordTy); 2389 //.. r64 = newTemp(Ity_I64); 2390 //.. 2391 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) ); 2392 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy )); 2393 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy )); 2394 //.. 2395 //.. /* 2396 //.. Call this to do the translation and limit checks: 2397 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt, 2398 //.. UInt seg_selector, UInt virtual_addr ) 2399 //.. */ 2400 //.. assign( 2401 //.. r64, 2402 //.. mkIRExprCCall( 2403 //.. Ity_I64, 2404 //.. 0/*regparms*/, 2405 //.. "x86g_use_seg_selector", 2406 //.. &x86g_use_seg_selector, 2407 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr), 2408 //.. mkexpr(seg_selector), virtual) 2409 //.. ) 2410 //.. ); 2411 //.. 2412 //.. /* If the high 32 of the result are non-zero, there was a 2413 //.. failure in address translation. In which case, make a 2414 //.. quick exit. 2415 //.. */ 2416 //.. stmt( 2417 //.. IRStmt_Exit( 2418 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)), 2419 //.. Ijk_MapFail, 2420 //.. IRConst_U32( guest_eip_curr_instr ) 2421 //.. ) 2422 //.. ); 2423 //.. 2424 //.. /* otherwise, here's the translated result. */ 2425 //.. return unop(Iop_64to32, mkexpr(r64)); 2426 //.. } 2427 2428 2429 /* Generate IR to calculate an address indicated by a ModRM and 2430 following SIB bytes. The expression, and the number of bytes in 2431 the address mode, are returned (the latter in *len). Note that 2432 this fn should not be called if the R/M part of the address denotes 2433 a register instead of memory. If print_codegen is true, text of 2434 the addressing mode is placed in buf. 2435 2436 The computed address is stored in a new tempreg, and the 2437 identity of the tempreg is returned. 2438 2439 extra_bytes holds the number of bytes after the amode, as supplied 2440 by the caller. This is needed to make sense of %rip-relative 2441 addresses. Note that the value that *len is set to is only the 2442 length of the amode itself and does not include the value supplied 2443 in extra_bytes. 2444 */ 2445 2446 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 ) 2447 { 2448 IRTemp tmp = newTemp(Ity_I64); 2449 assign( tmp, addr64 ); 2450 return tmp; 2451 } 2452 2453 static 2454 IRTemp disAMode ( /*OUT*/Int* len, 2455 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2456 /*OUT*/HChar* buf, Int extra_bytes ) 2457 { 2458 UChar mod_reg_rm = getUChar(delta); 2459 delta++; 2460 2461 buf[0] = (UChar)0; 2462 vassert(extra_bytes >= 0 && extra_bytes < 10); 2463 2464 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2465 jump table seems a bit excessive. 2466 */ 2467 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2468 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2469 /* is now XX0XXYYY */ 2470 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2471 switch (mod_reg_rm) { 2472 2473 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2474 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2475 */ 2476 case 0x00: case 0x01: case 0x02: case 0x03: 2477 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2478 { UChar rm = toUChar(mod_reg_rm & 7); 2479 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2480 *len = 1; 2481 return disAMode_copy2tmp( 2482 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm))); 2483 } 2484 2485 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2486 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2487 */ 2488 case 0x08: case 0x09: case 0x0A: case 0x0B: 2489 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2490 { UChar rm = toUChar(mod_reg_rm & 7); 2491 Long d = getSDisp8(delta); 2492 if (d == 0) { 2493 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm)); 2494 } else { 2495 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2496 } 2497 *len = 2; 2498 return disAMode_copy2tmp( 2499 handleAddrOverrides(vbi, pfx, 2500 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2501 } 2502 2503 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2504 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2505 */ 2506 case 0x10: case 0x11: case 0x12: case 0x13: 2507 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2508 { UChar rm = toUChar(mod_reg_rm & 7); 2509 Long d = getSDisp32(delta); 2510 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm)); 2511 *len = 5; 2512 return disAMode_copy2tmp( 2513 handleAddrOverrides(vbi, pfx, 2514 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d)))); 2515 } 2516 2517 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2518 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2519 case 0x18: case 0x19: case 0x1A: case 0x1B: 2520 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2521 vpanic("disAMode(amd64): not an addr!"); 2522 2523 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set 2524 correctly at the start of handling each instruction. */ 2525 case 0x05: 2526 { Long d = getSDisp32(delta); 2527 *len = 5; 2528 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d); 2529 /* We need to know the next instruction's start address. 2530 Try and figure out what it is, record the guess, and ask 2531 the top-level driver logic (bbToIR_AMD64) to check we 2532 guessed right, after the instruction is completely 2533 decoded. */ 2534 guest_RIP_next_mustcheck = True; 2535 guest_RIP_next_assumed = guest_RIP_bbstart 2536 + delta+4 + extra_bytes; 2537 return disAMode_copy2tmp( 2538 handleAddrOverrides(vbi, pfx, 2539 binop(Iop_Add64, mkU64(guest_RIP_next_assumed), 2540 mkU64(d)))); 2541 } 2542 2543 case 0x04: { 2544 /* SIB, with no displacement. Special cases: 2545 -- %rsp cannot act as an index value. 2546 If index_r indicates %rsp, zero is used for the index. 2547 -- when mod is zero and base indicates RBP or R13, base is 2548 instead a 32-bit sign-extended literal. 2549 It's all madness, I tell you. Extract %index, %base and 2550 scale from the SIB byte. The value denoted is then: 2551 | %index == %RSP && (%base == %RBP || %base == %R13) 2552 = d32 following SIB byte 2553 | %index == %RSP && !(%base == %RBP || %base == %R13) 2554 = %base 2555 | %index != %RSP && (%base == %RBP || %base == %R13) 2556 = d32 following SIB byte + (%index << scale) 2557 | %index != %RSP && !(%base == %RBP || %base == %R13) 2558 = %base + (%index << scale) 2559 */ 2560 UChar sib = getUChar(delta); 2561 UChar scale = toUChar((sib >> 6) & 3); 2562 UChar index_r = toUChar((sib >> 3) & 7); 2563 UChar base_r = toUChar(sib & 7); 2564 /* correct since #(R13) == 8 + #(RBP) */ 2565 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2566 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx)); 2567 delta++; 2568 2569 if ((!index_is_SP) && (!base_is_BPor13)) { 2570 if (scale == 0) { 2571 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2572 nameIRegRexB(8,pfx,base_r), 2573 nameIReg64rexX(pfx,index_r)); 2574 } else { 2575 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2576 nameIRegRexB(8,pfx,base_r), 2577 nameIReg64rexX(pfx,index_r), 1<<scale); 2578 } 2579 *len = 2; 2580 return 2581 disAMode_copy2tmp( 2582 handleAddrOverrides(vbi, pfx, 2583 binop(Iop_Add64, 2584 getIRegRexB(8,pfx,base_r), 2585 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2586 mkU8(scale))))); 2587 } 2588 2589 if ((!index_is_SP) && base_is_BPor13) { 2590 Long d = getSDisp32(delta); 2591 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, 2592 nameIReg64rexX(pfx,index_r), 1<<scale); 2593 *len = 6; 2594 return 2595 disAMode_copy2tmp( 2596 handleAddrOverrides(vbi, pfx, 2597 binop(Iop_Add64, 2598 binop(Iop_Shl64, getIReg64rexX(pfx,index_r), 2599 mkU8(scale)), 2600 mkU64(d)))); 2601 } 2602 2603 if (index_is_SP && (!base_is_BPor13)) { 2604 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r)); 2605 *len = 2; 2606 return disAMode_copy2tmp( 2607 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r))); 2608 } 2609 2610 if (index_is_SP && base_is_BPor13) { 2611 Long d = getSDisp32(delta); 2612 DIS(buf, "%s%lld", segRegTxt(pfx), d); 2613 *len = 6; 2614 return disAMode_copy2tmp( 2615 handleAddrOverrides(vbi, pfx, mkU64(d))); 2616 } 2617 2618 vassert(0); 2619 } 2620 2621 /* SIB, with 8-bit displacement. Special cases: 2622 -- %esp cannot act as an index value. 2623 If index_r indicates %esp, zero is used for the index. 2624 Denoted value is: 2625 | %index == %ESP 2626 = d8 + %base 2627 | %index != %ESP 2628 = d8 + %base + (%index << scale) 2629 */ 2630 case 0x0C: { 2631 UChar sib = getUChar(delta); 2632 UChar scale = toUChar((sib >> 6) & 3); 2633 UChar index_r = toUChar((sib >> 3) & 7); 2634 UChar base_r = toUChar(sib & 7); 2635 Long d = getSDisp8(delta+1); 2636 2637 if (index_r == R_RSP && 0==getRexX(pfx)) { 2638 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2639 d, nameIRegRexB(8,pfx,base_r)); 2640 *len = 3; 2641 return disAMode_copy2tmp( 2642 handleAddrOverrides(vbi, pfx, 2643 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2644 } else { 2645 if (scale == 0) { 2646 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2647 nameIRegRexB(8,pfx,base_r), 2648 nameIReg64rexX(pfx,index_r)); 2649 } else { 2650 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2651 nameIRegRexB(8,pfx,base_r), 2652 nameIReg64rexX(pfx,index_r), 1<<scale); 2653 } 2654 *len = 3; 2655 return 2656 disAMode_copy2tmp( 2657 handleAddrOverrides(vbi, pfx, 2658 binop(Iop_Add64, 2659 binop(Iop_Add64, 2660 getIRegRexB(8,pfx,base_r), 2661 binop(Iop_Shl64, 2662 getIReg64rexX(pfx,index_r), mkU8(scale))), 2663 mkU64(d)))); 2664 } 2665 vassert(0); /*NOTREACHED*/ 2666 } 2667 2668 /* SIB, with 32-bit displacement. Special cases: 2669 -- %rsp cannot act as an index value. 2670 If index_r indicates %rsp, zero is used for the index. 2671 Denoted value is: 2672 | %index == %RSP 2673 = d32 + %base 2674 | %index != %RSP 2675 = d32 + %base + (%index << scale) 2676 */ 2677 case 0x14: { 2678 UChar sib = getUChar(delta); 2679 UChar scale = toUChar((sib >> 6) & 3); 2680 UChar index_r = toUChar((sib >> 3) & 7); 2681 UChar base_r = toUChar(sib & 7); 2682 Long d = getSDisp32(delta+1); 2683 2684 if (index_r == R_RSP && 0==getRexX(pfx)) { 2685 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), 2686 d, nameIRegRexB(8,pfx,base_r)); 2687 *len = 6; 2688 return disAMode_copy2tmp( 2689 handleAddrOverrides(vbi, pfx, 2690 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) )); 2691 } else { 2692 if (scale == 0) { 2693 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2694 nameIRegRexB(8,pfx,base_r), 2695 nameIReg64rexX(pfx,index_r)); 2696 } else { 2697 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2698 nameIRegRexB(8,pfx,base_r), 2699 nameIReg64rexX(pfx,index_r), 1<<scale); 2700 } 2701 *len = 6; 2702 return 2703 disAMode_copy2tmp( 2704 handleAddrOverrides(vbi, pfx, 2705 binop(Iop_Add64, 2706 binop(Iop_Add64, 2707 getIRegRexB(8,pfx,base_r), 2708 binop(Iop_Shl64, 2709 getIReg64rexX(pfx,index_r), mkU8(scale))), 2710 mkU64(d)))); 2711 } 2712 vassert(0); /*NOTREACHED*/ 2713 } 2714 2715 default: 2716 vpanic("disAMode(amd64)"); 2717 return 0; /*notreached*/ 2718 } 2719 } 2720 2721 2722 /* Similarly for VSIB addressing. This returns just the addend, 2723 and fills in *rI and *vscale with the register number of the vector 2724 index and its multiplicand. */ 2725 static 2726 IRTemp disAVSIBMode ( /*OUT*/Int* len, 2727 const VexAbiInfo* vbi, Prefix pfx, Long delta, 2728 /*OUT*/HChar* buf, /*OUT*/UInt* rI, 2729 IRType ty, /*OUT*/Int* vscale ) 2730 { 2731 UChar mod_reg_rm = getUChar(delta); 2732 const HChar *vindex; 2733 2734 *len = 0; 2735 *rI = 0; 2736 *vscale = 0; 2737 buf[0] = (UChar)0; 2738 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm)) 2739 return IRTemp_INVALID; 2740 2741 UChar sib = getUChar(delta+1); 2742 UChar scale = toUChar((sib >> 6) & 3); 2743 UChar index_r = toUChar((sib >> 3) & 7); 2744 UChar base_r = toUChar(sib & 7); 2745 Long d = 0; 2746 /* correct since #(R13) == 8 + #(RBP) */ 2747 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2748 delta += 2; 2749 *len = 2; 2750 2751 *rI = index_r | (getRexX(pfx) << 3); 2752 if (ty == Ity_V128) 2753 vindex = nameXMMReg(*rI); 2754 else 2755 vindex = nameYMMReg(*rI); 2756 *vscale = 1<<scale; 2757 2758 switch (mod_reg_rm >> 6) { 2759 case 0: 2760 if (base_is_BPor13) { 2761 d = getSDisp32(delta); 2762 *len += 4; 2763 if (scale == 0) { 2764 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex); 2765 } else { 2766 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale); 2767 } 2768 return disAMode_copy2tmp( mkU64(d) ); 2769 } else { 2770 if (scale == 0) { 2771 DIS(buf, "%s(%s,%s)", segRegTxt(pfx), 2772 nameIRegRexB(8,pfx,base_r), vindex); 2773 } else { 2774 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx), 2775 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2776 } 2777 } 2778 break; 2779 case 1: 2780 d = getSDisp8(delta); 2781 *len += 1; 2782 goto have_disp; 2783 case 2: 2784 d = getSDisp32(delta); 2785 *len += 4; 2786 have_disp: 2787 if (scale == 0) { 2788 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d, 2789 nameIRegRexB(8,pfx,base_r), vindex); 2790 } else { 2791 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d, 2792 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale); 2793 } 2794 break; 2795 } 2796 2797 if (!d) 2798 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) ); 2799 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r), 2800 mkU64(d)) ); 2801 } 2802 2803 2804 /* Figure out the number of (insn-stream) bytes constituting the amode 2805 beginning at delta. Is useful for getting hold of literals beyond 2806 the end of the amode before it has been disassembled. */ 2807 2808 static UInt lengthAMode ( Prefix pfx, Long delta ) 2809 { 2810 UChar mod_reg_rm = getUChar(delta); 2811 delta++; 2812 2813 /* squeeze out the reg field from mod_reg_rm, since a 256-entry 2814 jump table seems a bit excessive. 2815 */ 2816 mod_reg_rm &= 0xC7; /* is now XX000YYY */ 2817 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3)); 2818 /* is now XX0XXYYY */ 2819 mod_reg_rm &= 0x1F; /* is now 000XXYYY */ 2820 switch (mod_reg_rm) { 2821 2822 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp). 2823 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13). 2824 */ 2825 case 0x00: case 0x01: case 0x02: case 0x03: 2826 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: 2827 return 1; 2828 2829 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp) 2830 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12) 2831 */ 2832 case 0x08: case 0x09: case 0x0A: case 0x0B: 2833 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: 2834 return 2; 2835 2836 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp) 2837 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12) 2838 */ 2839 case 0x10: case 0x11: case 0x12: case 0x13: 2840 /* ! 14 */ case 0x15: case 0x16: case 0x17: 2841 return 5; 2842 2843 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */ 2844 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */ 2845 /* Not an address, but still handled. */ 2846 case 0x18: case 0x19: case 0x1A: case 0x1B: 2847 case 0x1C: case 0x1D: case 0x1E: case 0x1F: 2848 return 1; 2849 2850 /* RIP + disp32. */ 2851 case 0x05: 2852 return 5; 2853 2854 case 0x04: { 2855 /* SIB, with no displacement. */ 2856 UChar sib = getUChar(delta); 2857 UChar base_r = toUChar(sib & 7); 2858 /* correct since #(R13) == 8 + #(RBP) */ 2859 Bool base_is_BPor13 = toBool(base_r == R_RBP); 2860 2861 if (base_is_BPor13) { 2862 return 6; 2863 } else { 2864 return 2; 2865 } 2866 } 2867 2868 /* SIB, with 8-bit displacement. */ 2869 case 0x0C: 2870 return 3; 2871 2872 /* SIB, with 32-bit displacement. */ 2873 case 0x14: 2874 return 6; 2875 2876 default: 2877 vpanic("lengthAMode(amd64)"); 2878 return 0; /*notreached*/ 2879 } 2880 } 2881 2882 2883 /*------------------------------------------------------------*/ 2884 /*--- Disassembling common idioms ---*/ 2885 /*------------------------------------------------------------*/ 2886 2887 /* Handle binary integer instructions of the form 2888 op E, G meaning 2889 op reg-or-mem, reg 2890 Is passed the a ptr to the modRM byte, the actual operation, and the 2891 data size. Returns the address advanced completely over this 2892 instruction. 2893 2894 E(src) is reg-or-mem 2895 G(dst) is reg. 2896 2897 If E is reg, --> GET %G, tmp 2898 OP %E, tmp 2899 PUT tmp, %G 2900 2901 If E is mem and OP is not reversible, 2902 --> (getAddr E) -> tmpa 2903 LD (tmpa), tmpa 2904 GET %G, tmp2 2905 OP tmpa, tmp2 2906 PUT tmp2, %G 2907 2908 If E is mem and OP is reversible 2909 --> (getAddr E) -> tmpa 2910 LD (tmpa), tmpa 2911 OP %G, tmpa 2912 PUT tmpa, %G 2913 */ 2914 static 2915 ULong dis_op2_E_G ( const VexAbiInfo* vbi, 2916 Prefix pfx, 2917 Bool addSubCarry, 2918 IROp op8, 2919 Bool keep, 2920 Int size, 2921 Long delta0, 2922 const HChar* t_amd64opc ) 2923 { 2924 HChar dis_buf[50]; 2925 Int len; 2926 IRType ty = szToITy(size); 2927 IRTemp dst1 = newTemp(ty); 2928 IRTemp src = newTemp(ty); 2929 IRTemp dst0 = newTemp(ty); 2930 UChar rm = getUChar(delta0); 2931 IRTemp addr = IRTemp_INVALID; 2932 2933 /* addSubCarry == True indicates the intended operation is 2934 add-with-carry or subtract-with-borrow. */ 2935 if (addSubCarry) { 2936 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 2937 vassert(keep); 2938 } 2939 2940 if (epartIsReg(rm)) { 2941 /* Specially handle XOR reg,reg, because that doesn't really 2942 depend on reg, and doing the obvious thing potentially 2943 generates a spurious value check failure due to the bogus 2944 dependency. */ 2945 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 2946 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 2947 if (False && op8 == Iop_Sub8) 2948 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n"); 2949 putIRegG(size,pfx,rm, mkU(ty,0)); 2950 } 2951 2952 assign( dst0, getIRegG(size,pfx,rm) ); 2953 assign( src, getIRegE(size,pfx,rm) ); 2954 2955 if (addSubCarry && op8 == Iop_Add8) { 2956 helper_ADC( size, dst1, dst0, src, 2957 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2958 putIRegG(size, pfx, rm, mkexpr(dst1)); 2959 } else 2960 if (addSubCarry && op8 == Iop_Sub8) { 2961 helper_SBB( size, dst1, dst0, src, 2962 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2963 putIRegG(size, pfx, rm, mkexpr(dst1)); 2964 } else { 2965 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2966 if (isAddSub(op8)) 2967 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2968 else 2969 setFlags_DEP1(op8, dst1, ty); 2970 if (keep) 2971 putIRegG(size, pfx, rm, mkexpr(dst1)); 2972 } 2973 2974 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 2975 nameIRegE(size,pfx,rm), 2976 nameIRegG(size,pfx,rm)); 2977 return 1+delta0; 2978 } else { 2979 /* E refers to memory */ 2980 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 2981 assign( dst0, getIRegG(size,pfx,rm) ); 2982 assign( src, loadLE(szToITy(size), mkexpr(addr)) ); 2983 2984 if (addSubCarry && op8 == Iop_Add8) { 2985 helper_ADC( size, dst1, dst0, src, 2986 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2987 putIRegG(size, pfx, rm, mkexpr(dst1)); 2988 } else 2989 if (addSubCarry && op8 == Iop_Sub8) { 2990 helper_SBB( size, dst1, dst0, src, 2991 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 2992 putIRegG(size, pfx, rm, mkexpr(dst1)); 2993 } else { 2994 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 2995 if (isAddSub(op8)) 2996 setFlags_DEP1_DEP2(op8, dst0, src, ty); 2997 else 2998 setFlags_DEP1(op8, dst1, ty); 2999 if (keep) 3000 putIRegG(size, pfx, rm, mkexpr(dst1)); 3001 } 3002 3003 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3004 dis_buf, nameIRegG(size, pfx, rm)); 3005 return len+delta0; 3006 } 3007 } 3008 3009 3010 3011 /* Handle binary integer instructions of the form 3012 op G, E meaning 3013 op reg, reg-or-mem 3014 Is passed the a ptr to the modRM byte, the actual operation, and the 3015 data size. Returns the address advanced completely over this 3016 instruction. 3017 3018 G(src) is reg. 3019 E(dst) is reg-or-mem 3020 3021 If E is reg, --> GET %E, tmp 3022 OP %G, tmp 3023 PUT tmp, %E 3024 3025 If E is mem, --> (getAddr E) -> tmpa 3026 LD (tmpa), tmpv 3027 OP %G, tmpv 3028 ST tmpv, (tmpa) 3029 */ 3030 static 3031 ULong dis_op2_G_E ( const VexAbiInfo* vbi, 3032 Prefix pfx, 3033 Bool addSubCarry, 3034 IROp op8, 3035 Bool keep, 3036 Int size, 3037 Long delta0, 3038 const HChar* t_amd64opc ) 3039 { 3040 HChar dis_buf[50]; 3041 Int len; 3042 IRType ty = szToITy(size); 3043 IRTemp dst1 = newTemp(ty); 3044 IRTemp src = newTemp(ty); 3045 IRTemp dst0 = newTemp(ty); 3046 UChar rm = getUChar(delta0); 3047 IRTemp addr = IRTemp_INVALID; 3048 3049 /* addSubCarry == True indicates the intended operation is 3050 add-with-carry or subtract-with-borrow. */ 3051 if (addSubCarry) { 3052 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8); 3053 vassert(keep); 3054 } 3055 3056 if (epartIsReg(rm)) { 3057 /* Specially handle XOR reg,reg, because that doesn't really 3058 depend on reg, and doing the obvious thing potentially 3059 generates a spurious value check failure due to the bogus 3060 dependency. Ditto SBB reg,reg. */ 3061 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry)) 3062 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) { 3063 putIRegE(size,pfx,rm, mkU(ty,0)); 3064 } 3065 3066 assign(dst0, getIRegE(size,pfx,rm)); 3067 assign(src, getIRegG(size,pfx,rm)); 3068 3069 if (addSubCarry && op8 == Iop_Add8) { 3070 helper_ADC( size, dst1, dst0, src, 3071 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3072 putIRegE(size, pfx, rm, mkexpr(dst1)); 3073 } else 3074 if (addSubCarry && op8 == Iop_Sub8) { 3075 helper_SBB( size, dst1, dst0, src, 3076 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3077 putIRegE(size, pfx, rm, mkexpr(dst1)); 3078 } else { 3079 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3080 if (isAddSub(op8)) 3081 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3082 else 3083 setFlags_DEP1(op8, dst1, ty); 3084 if (keep) 3085 putIRegE(size, pfx, rm, mkexpr(dst1)); 3086 } 3087 3088 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3089 nameIRegG(size,pfx,rm), 3090 nameIRegE(size,pfx,rm)); 3091 return 1+delta0; 3092 } 3093 3094 /* E refers to memory */ 3095 { 3096 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3097 assign(dst0, loadLE(ty,mkexpr(addr))); 3098 assign(src, getIRegG(size,pfx,rm)); 3099 3100 if (addSubCarry && op8 == Iop_Add8) { 3101 if (haveLOCK(pfx)) { 3102 /* cas-style store */ 3103 helper_ADC( size, dst1, dst0, src, 3104 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3105 } else { 3106 /* normal store */ 3107 helper_ADC( size, dst1, dst0, src, 3108 /*store*/addr, IRTemp_INVALID, 0 ); 3109 } 3110 } else 3111 if (addSubCarry && op8 == Iop_Sub8) { 3112 if (haveLOCK(pfx)) { 3113 /* cas-style store */ 3114 helper_SBB( size, dst1, dst0, src, 3115 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3116 } else { 3117 /* normal store */ 3118 helper_SBB( size, dst1, dst0, src, 3119 /*store*/addr, IRTemp_INVALID, 0 ); 3120 } 3121 } else { 3122 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3123 if (keep) { 3124 if (haveLOCK(pfx)) { 3125 if (0) vex_printf("locked case\n" ); 3126 casLE( mkexpr(addr), 3127 mkexpr(dst0)/*expval*/, 3128 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr ); 3129 } else { 3130 if (0) vex_printf("nonlocked case\n"); 3131 storeLE(mkexpr(addr), mkexpr(dst1)); 3132 } 3133 } 3134 if (isAddSub(op8)) 3135 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3136 else 3137 setFlags_DEP1(op8, dst1, ty); 3138 } 3139 3140 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size), 3141 nameIRegG(size,pfx,rm), dis_buf); 3142 return len+delta0; 3143 } 3144 } 3145 3146 3147 /* Handle move instructions of the form 3148 mov E, G meaning 3149 mov reg-or-mem, reg 3150 Is passed the a ptr to the modRM byte, and the data size. Returns 3151 the address advanced completely over this instruction. 3152 3153 E(src) is reg-or-mem 3154 G(dst) is reg. 3155 3156 If E is reg, --> GET %E, tmpv 3157 PUT tmpv, %G 3158 3159 If E is mem --> (getAddr E) -> tmpa 3160 LD (tmpa), tmpb 3161 PUT tmpb, %G 3162 */ 3163 static 3164 ULong dis_mov_E_G ( const VexAbiInfo* vbi, 3165 Prefix pfx, 3166 Int size, 3167 Long delta0 ) 3168 { 3169 Int len; 3170 UChar rm = getUChar(delta0); 3171 HChar dis_buf[50]; 3172 3173 if (epartIsReg(rm)) { 3174 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm)); 3175 DIP("mov%c %s,%s\n", nameISize(size), 3176 nameIRegE(size,pfx,rm), 3177 nameIRegG(size,pfx,rm)); 3178 return 1+delta0; 3179 } 3180 3181 /* E refers to memory */ 3182 { 3183 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3184 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr))); 3185 DIP("mov%c %s,%s\n", nameISize(size), 3186 dis_buf, 3187 nameIRegG(size,pfx,rm)); 3188 return delta0+len; 3189 } 3190 } 3191 3192 3193 /* Handle move instructions of the form 3194 mov G, E meaning 3195 mov reg, reg-or-mem 3196 Is passed the a ptr to the modRM byte, and the data size. Returns 3197 the address advanced completely over this instruction. 3198 We have to decide here whether F2 or F3 are acceptable. F2 never is. 3199 3200 G(src) is reg. 3201 E(dst) is reg-or-mem 3202 3203 If E is reg, --> GET %G, tmp 3204 PUT tmp, %E 3205 3206 If E is mem, --> (getAddr E) -> tmpa 3207 GET %G, tmpv 3208 ST tmpv, (tmpa) 3209 */ 3210 static 3211 ULong dis_mov_G_E ( const VexAbiInfo* vbi, 3212 Prefix pfx, 3213 Int size, 3214 Long delta0, 3215 /*OUT*/Bool* ok ) 3216 { 3217 Int len; 3218 UChar rm = getUChar(delta0); 3219 HChar dis_buf[50]; 3220 3221 *ok = True; 3222 3223 if (epartIsReg(rm)) { 3224 if (haveF2orF3(pfx)) { *ok = False; return delta0; } 3225 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm)); 3226 DIP("mov%c %s,%s\n", nameISize(size), 3227 nameIRegG(size,pfx,rm), 3228 nameIRegE(size,pfx,rm)); 3229 return 1+delta0; 3230 } 3231 3232 /* E refers to memory */ 3233 { 3234 if (haveF2(pfx)) { *ok = False; return delta0; } 3235 /* F3(XRELEASE) is acceptable, though. */ 3236 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 3237 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) ); 3238 DIP("mov%c %s,%s\n", nameISize(size), 3239 nameIRegG(size,pfx,rm), 3240 dis_buf); 3241 return len+delta0; 3242 } 3243 } 3244 3245 3246 /* op $immediate, AL/AX/EAX/RAX. */ 3247 static 3248 ULong dis_op_imm_A ( Int size, 3249 Bool carrying, 3250 IROp op8, 3251 Bool keep, 3252 Long delta, 3253 const HChar* t_amd64opc ) 3254 { 3255 Int size4 = imin(size,4); 3256 IRType ty = szToITy(size); 3257 IRTemp dst0 = newTemp(ty); 3258 IRTemp src = newTemp(ty); 3259 IRTemp dst1 = newTemp(ty); 3260 Long lit = getSDisp(size4,delta); 3261 assign(dst0, getIRegRAX(size)); 3262 assign(src, mkU(ty,lit & mkSizeMask(size))); 3263 3264 if (isAddSub(op8) && !carrying) { 3265 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3266 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3267 } 3268 else 3269 if (isLogic(op8)) { 3270 vassert(!carrying); 3271 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) ); 3272 setFlags_DEP1(op8, dst1, ty); 3273 } 3274 else 3275 if (op8 == Iop_Add8 && carrying) { 3276 helper_ADC( size, dst1, dst0, src, 3277 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3278 } 3279 else 3280 if (op8 == Iop_Sub8 && carrying) { 3281 helper_SBB( size, dst1, dst0, src, 3282 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3283 } 3284 else 3285 vpanic("dis_op_imm_A(amd64,guest)"); 3286 3287 if (keep) 3288 putIRegRAX(size, mkexpr(dst1)); 3289 3290 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size), 3291 lit, nameIRegRAX(size)); 3292 return delta+size4; 3293 } 3294 3295 3296 /* Sign- and Zero-extending moves. */ 3297 static 3298 ULong dis_movx_E_G ( const VexAbiInfo* vbi, 3299 Prefix pfx, 3300 Long delta, Int szs, Int szd, Bool sign_extend ) 3301 { 3302 UChar rm = getUChar(delta); 3303 if (epartIsReg(rm)) { 3304 putIRegG(szd, pfx, rm, 3305 doScalarWidening( 3306 szs,szd,sign_extend, 3307 getIRegE(szs,pfx,rm))); 3308 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3309 nameISize(szs), 3310 nameISize(szd), 3311 nameIRegE(szs,pfx,rm), 3312 nameIRegG(szd,pfx,rm)); 3313 return 1+delta; 3314 } 3315 3316 /* E refers to memory */ 3317 { 3318 Int len; 3319 HChar dis_buf[50]; 3320 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 3321 putIRegG(szd, pfx, rm, 3322 doScalarWidening( 3323 szs,szd,sign_extend, 3324 loadLE(szToITy(szs),mkexpr(addr)))); 3325 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z', 3326 nameISize(szs), 3327 nameISize(szd), 3328 dis_buf, 3329 nameIRegG(szd,pfx,rm)); 3330 return len+delta; 3331 } 3332 } 3333 3334 3335 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by 3336 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */ 3337 static 3338 void codegen_div ( Int sz, IRTemp t, Bool signed_divide ) 3339 { 3340 /* special-case the 64-bit case */ 3341 if (sz == 8) { 3342 IROp op = signed_divide ? Iop_DivModS128to64 3343 : Iop_DivModU128to64; 3344 IRTemp src128 = newTemp(Ity_I128); 3345 IRTemp dst128 = newTemp(Ity_I128); 3346 assign( src128, binop(Iop_64HLto128, 3347 getIReg64(R_RDX), 3348 getIReg64(R_RAX)) ); 3349 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) ); 3350 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) ); 3351 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) ); 3352 } else { 3353 IROp op = signed_divide ? Iop_DivModS64to32 3354 : Iop_DivModU64to32; 3355 IRTemp src64 = newTemp(Ity_I64); 3356 IRTemp dst64 = newTemp(Ity_I64); 3357 switch (sz) { 3358 case 4: 3359 assign( src64, 3360 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) ); 3361 assign( dst64, 3362 binop(op, mkexpr(src64), mkexpr(t)) ); 3363 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) ); 3364 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) ); 3365 break; 3366 case 2: { 3367 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3368 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3369 assign( src64, unop(widen3264, 3370 binop(Iop_16HLto32, 3371 getIRegRDX(2), 3372 getIRegRAX(2))) ); 3373 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) ); 3374 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) ); 3375 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) ); 3376 break; 3377 } 3378 case 1: { 3379 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64; 3380 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32; 3381 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16; 3382 assign( src64, unop(widen3264, 3383 unop(widen1632, getIRegRAX(2))) ); 3384 assign( dst64, 3385 binop(op, mkexpr(src64), 3386 unop(widen1632, unop(widen816, mkexpr(t)))) ); 3387 putIRegRAX( 1, unop(Iop_16to8, 3388 unop(Iop_32to16, 3389 unop(Iop_64to32,mkexpr(dst64)))) ); 3390 putIRegAH( unop(Iop_16to8, 3391 unop(Iop_32to16, 3392 unop(Iop_64HIto32,mkexpr(dst64)))) ); 3393 break; 3394 } 3395 default: 3396 vpanic("codegen_div(amd64)"); 3397 } 3398 } 3399 } 3400 3401 static 3402 ULong dis_Grp1 ( const VexAbiInfo* vbi, 3403 Prefix pfx, 3404 Long delta, UChar modrm, 3405 Int am_sz, Int d_sz, Int sz, Long d64 ) 3406 { 3407 Int len; 3408 HChar dis_buf[50]; 3409 IRType ty = szToITy(sz); 3410 IRTemp dst1 = newTemp(ty); 3411 IRTemp src = newTemp(ty); 3412 IRTemp dst0 = newTemp(ty); 3413 IRTemp addr = IRTemp_INVALID; 3414 IROp op8 = Iop_INVALID; 3415 ULong mask = mkSizeMask(sz); 3416 3417 switch (gregLO3ofRM(modrm)) { 3418 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break; 3419 case 2: break; // ADC 3420 case 3: break; // SBB 3421 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break; 3422 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break; 3423 /*NOTREACHED*/ 3424 default: vpanic("dis_Grp1(amd64): unhandled case"); 3425 } 3426 3427 if (epartIsReg(modrm)) { 3428 vassert(am_sz == 1); 3429 3430 assign(dst0, getIRegE(sz,pfx,modrm)); 3431 assign(src, mkU(ty,d64 & mask)); 3432 3433 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3434 helper_ADC( sz, dst1, dst0, src, 3435 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3436 } else 3437 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3438 helper_SBB( sz, dst1, dst0, src, 3439 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 ); 3440 } else { 3441 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3442 if (isAddSub(op8)) 3443 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3444 else 3445 setFlags_DEP1(op8, dst1, ty); 3446 } 3447 3448 if (gregLO3ofRM(modrm) < 7) 3449 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3450 3451 delta += (am_sz + d_sz); 3452 DIP("%s%c $%lld, %s\n", 3453 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64, 3454 nameIRegE(sz,pfx,modrm)); 3455 } else { 3456 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3457 3458 assign(dst0, loadLE(ty,mkexpr(addr))); 3459 assign(src, mkU(ty,d64 & mask)); 3460 3461 if (gregLO3ofRM(modrm) == 2 /* ADC */) { 3462 if (haveLOCK(pfx)) { 3463 /* cas-style store */ 3464 helper_ADC( sz, dst1, dst0, src, 3465 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3466 } else { 3467 /* normal store */ 3468 helper_ADC( sz, dst1, dst0, src, 3469 /*store*/addr, IRTemp_INVALID, 0 ); 3470 } 3471 } else 3472 if (gregLO3ofRM(modrm) == 3 /* SBB */) { 3473 if (haveLOCK(pfx)) { 3474 /* cas-style store */ 3475 helper_SBB( sz, dst1, dst0, src, 3476 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr ); 3477 } else { 3478 /* normal store */ 3479 helper_SBB( sz, dst1, dst0, src, 3480 /*store*/addr, IRTemp_INVALID, 0 ); 3481 } 3482 } else { 3483 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src))); 3484 if (gregLO3ofRM(modrm) < 7) { 3485 if (haveLOCK(pfx)) { 3486 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/, 3487 mkexpr(dst1)/*newVal*/, 3488 guest_RIP_curr_instr ); 3489 } else { 3490 storeLE(mkexpr(addr), mkexpr(dst1)); 3491 } 3492 } 3493 if (isAddSub(op8)) 3494 setFlags_DEP1_DEP2(op8, dst0, src, ty); 3495 else 3496 setFlags_DEP1(op8, dst1, ty); 3497 } 3498 3499 delta += (len+d_sz); 3500 DIP("%s%c $%lld, %s\n", 3501 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), 3502 d64, dis_buf); 3503 } 3504 return delta; 3505 } 3506 3507 3508 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed 3509 expression. */ 3510 3511 static 3512 ULong dis_Grp2 ( const VexAbiInfo* vbi, 3513 Prefix pfx, 3514 Long delta, UChar modrm, 3515 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr, 3516 const HChar* shift_expr_txt, Bool* decode_OK ) 3517 { 3518 /* delta on entry points at the modrm byte. */ 3519 HChar dis_buf[50]; 3520 Int len; 3521 Bool isShift, isRotate, isRotateC; 3522 IRType ty = szToITy(sz); 3523 IRTemp dst0 = newTemp(ty); 3524 IRTemp dst1 = newTemp(ty); 3525 IRTemp addr = IRTemp_INVALID; 3526 3527 *decode_OK = True; 3528 3529 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 3530 3531 /* Put value to shift/rotate in dst0. */ 3532 if (epartIsReg(modrm)) { 3533 assign(dst0, getIRegE(sz, pfx, modrm)); 3534 delta += (am_sz + d_sz); 3535 } else { 3536 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz ); 3537 assign(dst0, loadLE(ty,mkexpr(addr))); 3538 delta += len + d_sz; 3539 } 3540 3541 isShift = False; 3542 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; } 3543 3544 isRotate = False; 3545 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; } 3546 3547 isRotateC = False; 3548 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; } 3549 3550 if (!isShift && !isRotate && !isRotateC) { 3551 /*NOTREACHED*/ 3552 vpanic("dis_Grp2(Reg): unhandled case(amd64)"); 3553 } 3554 3555 if (isRotateC) { 3556 /* Call a helper; this insn is so ridiculous it does not deserve 3557 better. One problem is, the helper has to calculate both the 3558 new value and the new flags. This is more than 64 bits, and 3559 there is no way to return more than 64 bits from the helper. 3560 Hence the crude and obvious solution is to call it twice, 3561 using the sign of the sz field to indicate whether it is the 3562 value or rflags result we want. 3563 */ 3564 Bool left = toBool(gregLO3ofRM(modrm) == 2); 3565 IRExpr** argsVALUE; 3566 IRExpr** argsRFLAGS; 3567 3568 IRTemp new_value = newTemp(Ity_I64); 3569 IRTemp new_rflags = newTemp(Ity_I64); 3570 IRTemp old_rflags = newTemp(Ity_I64); 3571 3572 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) ); 3573 3574 argsVALUE 3575 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3576 widenUto64(shift_expr), /* rotate amount */ 3577 mkexpr(old_rflags), 3578 mkU64(sz) ); 3579 assign( new_value, 3580 mkIRExprCCall( 3581 Ity_I64, 3582 0/*regparm*/, 3583 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3584 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3585 argsVALUE 3586 ) 3587 ); 3588 3589 argsRFLAGS 3590 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */ 3591 widenUto64(shift_expr), /* rotate amount */ 3592 mkexpr(old_rflags), 3593 mkU64(-sz) ); 3594 assign( new_rflags, 3595 mkIRExprCCall( 3596 Ity_I64, 3597 0/*regparm*/, 3598 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR", 3599 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR, 3600 argsRFLAGS 3601 ) 3602 ); 3603 3604 assign( dst1, narrowTo(ty, mkexpr(new_value)) ); 3605 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3606 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) )); 3607 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3609 } 3610 3611 else 3612 if (isShift) { 3613 3614 IRTemp pre64 = newTemp(Ity_I64); 3615 IRTemp res64 = newTemp(Ity_I64); 3616 IRTemp res64ss = newTemp(Ity_I64); 3617 IRTemp shift_amt = newTemp(Ity_I8); 3618 UChar mask = toUChar(sz==8 ? 63 : 31); 3619 IROp op64; 3620 3621 switch (gregLO3ofRM(modrm)) { 3622 case 4: op64 = Iop_Shl64; break; 3623 case 5: op64 = Iop_Shr64; break; 3624 case 6: op64 = Iop_Shl64; break; 3625 case 7: op64 = Iop_Sar64; break; 3626 /*NOTREACHED*/ 3627 default: vpanic("dis_Grp2:shift"); break; 3628 } 3629 3630 /* Widen the value to be shifted to 64 bits, do the shift, and 3631 narrow back down. This seems surprisingly long-winded, but 3632 unfortunately the AMD semantics requires that 8/16/32-bit 3633 shifts give defined results for shift values all the way up 3634 to 32, and this seems the simplest way to do it. It has the 3635 advantage that the only IR level shifts generated are of 64 3636 bit values, and the shift amount is guaranteed to be in the 3637 range 0 .. 63, thereby observing the IR semantics requiring 3638 all shift values to be in the range 0 .. 2^word_size-1. 3639 3640 Therefore the shift amount is masked with 63 for 64-bit shifts 3641 and 31 for all others. 3642 */ 3643 /* shift_amt = shift_expr & MASK, regardless of operation size */ 3644 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) ); 3645 3646 /* suitably widen the value to be shifted to 64 bits. */ 3647 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0)) 3648 : widenUto64(mkexpr(dst0)) ); 3649 3650 /* res64 = pre64 `shift` shift_amt */ 3651 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) ); 3652 3653 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */ 3654 assign( res64ss, 3655 binop(op64, 3656 mkexpr(pre64), 3657 binop(Iop_And8, 3658 binop(Iop_Sub8, 3659 mkexpr(shift_amt), mkU8(1)), 3660 mkU8(mask))) ); 3661 3662 /* Build the flags thunk. */ 3663 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt); 3664 3665 /* Narrow the result back down. */ 3666 assign( dst1, narrowTo(ty, mkexpr(res64)) ); 3667 3668 } /* if (isShift) */ 3669 3670 else 3671 if (isRotate) { 3672 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 3673 : (ty==Ity_I32 ? 2 : 3)); 3674 Bool left = toBool(gregLO3ofRM(modrm) == 0); 3675 IRTemp rot_amt = newTemp(Ity_I8); 3676 IRTemp rot_amt64 = newTemp(Ity_I8); 3677 IRTemp oldFlags = newTemp(Ity_I64); 3678 UChar mask = toUChar(sz==8 ? 63 : 31); 3679 3680 /* rot_amt = shift_expr & mask */ 3681 /* By masking the rotate amount thusly, the IR-level Shl/Shr 3682 expressions never shift beyond the word size and thus remain 3683 well defined. */ 3684 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask))); 3685 3686 if (ty == Ity_I64) 3687 assign(rot_amt, mkexpr(rot_amt64)); 3688 else 3689 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1))); 3690 3691 if (left) { 3692 3693 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */ 3694 assign(dst1, 3695 binop( mkSizedOp(ty,Iop_Or8), 3696 binop( mkSizedOp(ty,Iop_Shl8), 3697 mkexpr(dst0), 3698 mkexpr(rot_amt) 3699 ), 3700 binop( mkSizedOp(ty,Iop_Shr8), 3701 mkexpr(dst0), 3702 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3703 ) 3704 ) 3705 ); 3706 ccOp += AMD64G_CC_OP_ROLB; 3707 3708 } else { /* right */ 3709 3710 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */ 3711 assign(dst1, 3712 binop( mkSizedOp(ty,Iop_Or8), 3713 binop( mkSizedOp(ty,Iop_Shr8), 3714 mkexpr(dst0), 3715 mkexpr(rot_amt) 3716 ), 3717 binop( mkSizedOp(ty,Iop_Shl8), 3718 mkexpr(dst0), 3719 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt)) 3720 ) 3721 ) 3722 ); 3723 ccOp += AMD64G_CC_OP_RORB; 3724 3725 } 3726 3727 /* dst1 now holds the rotated value. Build flag thunk. We 3728 need the resulting value for this, and the previous flags. 3729 Except don't set it if the rotate count is zero. */ 3730 3731 assign(oldFlags, mk_amd64g_calculate_rflags_all()); 3732 3733 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */ 3734 IRTemp rot_amt64b = newTemp(Ity_I1); 3735 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) ); 3736 3737 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */ 3738 stmt( IRStmt_Put( OFFB_CC_OP, 3739 IRExpr_ITE( mkexpr(rot_amt64b), 3740 mkU64(ccOp), 3741 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) )); 3742 stmt( IRStmt_Put( OFFB_CC_DEP1, 3743 IRExpr_ITE( mkexpr(rot_amt64b), 3744 widenUto64(mkexpr(dst1)), 3745 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) )); 3746 stmt( IRStmt_Put( OFFB_CC_DEP2, 3747 IRExpr_ITE( mkexpr(rot_amt64b), 3748 mkU64(0), 3749 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) )); 3750 stmt( IRStmt_Put( OFFB_CC_NDEP, 3751 IRExpr_ITE( mkexpr(rot_amt64b), 3752 mkexpr(oldFlags), 3753 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) )); 3754 } /* if (isRotate) */ 3755 3756 /* Save result, and finish up. */ 3757 if (epartIsReg(modrm)) { 3758 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 3759 if (vex_traceflags & VEX_TRACE_FE) { 3760 vex_printf("%s%c ", 3761 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3762 if (shift_expr_txt) 3763 vex_printf("%s", shift_expr_txt); 3764 else 3765 ppIRExpr(shift_expr); 3766 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm)); 3767 } 3768 } else { 3769 storeLE(mkexpr(addr), mkexpr(dst1)); 3770 if (vex_traceflags & VEX_TRACE_FE) { 3771 vex_printf("%s%c ", 3772 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) ); 3773 if (shift_expr_txt) 3774 vex_printf("%s", shift_expr_txt); 3775 else 3776 ppIRExpr(shift_expr); 3777 vex_printf(", %s\n", dis_buf); 3778 } 3779 } 3780 return delta; 3781 } 3782 3783 3784 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ 3785 static 3786 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi, 3787 Prefix pfx, 3788 Long delta, UChar modrm, 3789 Int am_sz, Int sz, ULong src_val, 3790 Bool* decode_OK ) 3791 { 3792 /* src_val denotes a d8. 3793 And delta on entry points at the modrm byte. */ 3794 3795 IRType ty = szToITy(sz); 3796 IRTemp t2 = newTemp(Ity_I64); 3797 IRTemp t2m = newTemp(Ity_I64); 3798 IRTemp t_addr = IRTemp_INVALID; 3799 HChar dis_buf[50]; 3800 ULong mask; 3801 3802 /* we're optimists :-) */ 3803 *decode_OK = True; 3804 3805 /* Check whether F2 or F3 are acceptable. */ 3806 if (epartIsReg(modrm)) { 3807 /* F2 or F3 are not allowed in the register case. */ 3808 if (haveF2orF3(pfx)) { 3809 *decode_OK = False; 3810 return delta; 3811 } 3812 } else { 3813 /* F2 or F3 (but not both) are allowable provided LOCK is also 3814 present. */ 3815 if (haveF2orF3(pfx)) { 3816 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 3817 *decode_OK = False; 3818 return delta; 3819 } 3820 } 3821 } 3822 3823 /* Limit src_val -- the bit offset -- to something within a word. 3824 The Intel docs say that literal offsets larger than a word are 3825 masked in this way. */ 3826 switch (sz) { 3827 case 2: src_val &= 15; break; 3828 case 4: src_val &= 31; break; 3829 case 8: src_val &= 63; break; 3830 default: *decode_OK = False; return delta; 3831 } 3832 3833 /* Invent a mask suitable for the operation. */ 3834 switch (gregLO3ofRM(modrm)) { 3835 case 4: /* BT */ mask = 0; break; 3836 case 5: /* BTS */ mask = 1ULL << src_val; break; 3837 case 6: /* BTR */ mask = ~(1ULL << src_val); break; 3838 case 7: /* BTC */ mask = 1ULL << src_val; break; 3839 /* If this needs to be extended, probably simplest to make a 3840 new function to handle the other cases (0 .. 3). The 3841 Intel docs do however not indicate any use for 0 .. 3, so 3842 we don't expect this to happen. */ 3843 default: *decode_OK = False; return delta; 3844 } 3845 3846 /* Fetch the value to be tested and modified into t2, which is 3847 64-bits wide regardless of sz. */ 3848 if (epartIsReg(modrm)) { 3849 vassert(am_sz == 1); 3850 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) ); 3851 delta += (am_sz + 1); 3852 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3853 nameISize(sz), 3854 src_val, nameIRegE(sz,pfx,modrm)); 3855 } else { 3856 Int len; 3857 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 ); 3858 delta += (len+1); 3859 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) ); 3860 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)), 3861 nameISize(sz), 3862 src_val, dis_buf); 3863 } 3864 3865 /* Compute the new value into t2m, if non-BT. */ 3866 switch (gregLO3ofRM(modrm)) { 3867 case 4: /* BT */ 3868 break; 3869 case 5: /* BTS */ 3870 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) ); 3871 break; 3872 case 6: /* BTR */ 3873 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) ); 3874 break; 3875 case 7: /* BTC */ 3876 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) ); 3877 break; 3878 default: 3879 /*NOTREACHED*/ /*the previous switch guards this*/ 3880 vassert(0); 3881 } 3882 3883 /* Write the result back, if non-BT. */ 3884 if (gregLO3ofRM(modrm) != 4 /* BT */) { 3885 if (epartIsReg(modrm)) { 3886 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m))); 3887 } else { 3888 if (haveLOCK(pfx)) { 3889 casLE( mkexpr(t_addr), 3890 narrowTo(ty, mkexpr(t2))/*expd*/, 3891 narrowTo(ty, mkexpr(t2m))/*new*/, 3892 guest_RIP_curr_instr ); 3893 } else { 3894 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m))); 3895 } 3896 } 3897 } 3898 3899 /* Copy relevant bit from t2 into the carry flag. */ 3900 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 3901 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 3902 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 3903 stmt( IRStmt_Put( 3904 OFFB_CC_DEP1, 3905 binop(Iop_And64, 3906 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)), 3907 mkU64(1)) 3908 )); 3909 /* Set NDEP even though it isn't used. This makes redundant-PUT 3910 elimination of previous stores to this field work better. */ 3911 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 3912 3913 return delta; 3914 } 3915 3916 3917 /* Signed/unsigned widening multiply. Generate IR to multiply the 3918 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in 3919 RDX:RAX/EDX:EAX/DX:AX/AX. 3920 */ 3921 static void codegen_mulL_A_D ( Int sz, Bool syned, 3922 IRTemp tmp, const HChar* tmp_txt ) 3923 { 3924 IRType ty = szToITy(sz); 3925 IRTemp t1 = newTemp(ty); 3926 3927 assign( t1, getIRegRAX(sz) ); 3928 3929 switch (ty) { 3930 case Ity_I64: { 3931 IRTemp res128 = newTemp(Ity_I128); 3932 IRTemp resHi = newTemp(Ity_I64); 3933 IRTemp resLo = newTemp(Ity_I64); 3934 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64; 3935 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3936 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp ); 3937 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3938 assign( resHi, unop(Iop_128HIto64,mkexpr(res128))); 3939 assign( resLo, unop(Iop_128to64,mkexpr(res128))); 3940 putIReg64(R_RDX, mkexpr(resHi)); 3941 putIReg64(R_RAX, mkexpr(resLo)); 3942 break; 3943 } 3944 case Ity_I32: { 3945 IRTemp res64 = newTemp(Ity_I64); 3946 IRTemp resHi = newTemp(Ity_I32); 3947 IRTemp resLo = newTemp(Ity_I32); 3948 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32; 3949 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3950 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp ); 3951 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3952 assign( resHi, unop(Iop_64HIto32,mkexpr(res64))); 3953 assign( resLo, unop(Iop_64to32,mkexpr(res64))); 3954 putIRegRDX(4, mkexpr(resHi)); 3955 putIRegRAX(4, mkexpr(resLo)); 3956 break; 3957 } 3958 case Ity_I16: { 3959 IRTemp res32 = newTemp(Ity_I32); 3960 IRTemp resHi = newTemp(Ity_I16); 3961 IRTemp resLo = newTemp(Ity_I16); 3962 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16; 3963 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3964 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp ); 3965 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3966 assign( resHi, unop(Iop_32HIto16,mkexpr(res32))); 3967 assign( resLo, unop(Iop_32to16,mkexpr(res32))); 3968 putIRegRDX(2, mkexpr(resHi)); 3969 putIRegRAX(2, mkexpr(resLo)); 3970 break; 3971 } 3972 case Ity_I8: { 3973 IRTemp res16 = newTemp(Ity_I16); 3974 IRTemp resHi = newTemp(Ity_I8); 3975 IRTemp resLo = newTemp(Ity_I8); 3976 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8; 3977 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB; 3978 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp ); 3979 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) ); 3980 assign( resHi, unop(Iop_16HIto8,mkexpr(res16))); 3981 assign( resLo, unop(Iop_16to8,mkexpr(res16))); 3982 putIRegRAX(2, mkexpr(res16)); 3983 break; 3984 } 3985 default: 3986 ppIRType(ty); 3987 vpanic("codegen_mulL_A_D(amd64)"); 3988 } 3989 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt); 3990 } 3991 3992 3993 /* Group 3 extended opcodes. We have to decide here whether F2 and F3 3994 might be valid.*/ 3995 static 3996 ULong dis_Grp3 ( const VexAbiInfo* vbi, 3997 Prefix pfx, Int sz, Long delta, Bool* decode_OK ) 3998 { 3999 Long d64; 4000 UChar modrm; 4001 HChar dis_buf[50]; 4002 Int len; 4003 IRTemp addr; 4004 IRType ty = szToITy(sz); 4005 IRTemp t1 = newTemp(ty); 4006 IRTemp dst1, src, dst0; 4007 *decode_OK = True; 4008 modrm = getUChar(delta); 4009 if (epartIsReg(modrm)) { 4010 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4011 if (haveF2orF3(pfx)) goto unhandled; 4012 switch (gregLO3ofRM(modrm)) { 4013 case 0: { /* TEST */ 4014 delta++; 4015 d64 = getSDisp(imin(4,sz), delta); 4016 delta += imin(4,sz); 4017 dst1 = newTemp(ty); 4018 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4019 getIRegE(sz,pfx,modrm), 4020 mkU(ty, d64 & mkSizeMask(sz)))); 4021 setFlags_DEP1( Iop_And8, dst1, ty ); 4022 DIP("test%c $%lld, %s\n", 4023 nameISize(sz), d64, 4024 nameIRegE(sz, pfx, modrm)); 4025 break; 4026 } 4027 case 1: 4028 *decode_OK = False; 4029 return delta; 4030 case 2: /* NOT */ 4031 delta++; 4032 putIRegE(sz, pfx, modrm, 4033 unop(mkSizedOp(ty,Iop_Not8), 4034 getIRegE(sz, pfx, modrm))); 4035 DIP("not%c %s\n", nameISize(sz), 4036 nameIRegE(sz, pfx, modrm)); 4037 break; 4038 case 3: /* NEG */ 4039 delta++; 4040 dst0 = newTemp(ty); 4041 src = newTemp(ty); 4042 dst1 = newTemp(ty); 4043 assign(dst0, mkU(ty,0)); 4044 assign(src, getIRegE(sz, pfx, modrm)); 4045 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4046 mkexpr(src))); 4047 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4048 putIRegE(sz, pfx, modrm, mkexpr(dst1)); 4049 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm)); 4050 break; 4051 case 4: /* MUL (unsigned widening) */ 4052 delta++; 4053 src = newTemp(ty); 4054 assign(src, getIRegE(sz,pfx,modrm)); 4055 codegen_mulL_A_D ( sz, False, src, 4056 nameIRegE(sz,pfx,modrm) ); 4057 break; 4058 case 5: /* IMUL (signed widening) */ 4059 delta++; 4060 src = newTemp(ty); 4061 assign(src, getIRegE(sz,pfx,modrm)); 4062 codegen_mulL_A_D ( sz, True, src, 4063 nameIRegE(sz,pfx,modrm) ); 4064 break; 4065 case 6: /* DIV */ 4066 delta++; 4067 assign( t1, getIRegE(sz, pfx, modrm) ); 4068 codegen_div ( sz, t1, False ); 4069 DIP("div%c %s\n", nameISize(sz), 4070 nameIRegE(sz, pfx, modrm)); 4071 break; 4072 case 7: /* IDIV */ 4073 delta++; 4074 assign( t1, getIRegE(sz, pfx, modrm) ); 4075 codegen_div ( sz, t1, True ); 4076 DIP("idiv%c %s\n", nameISize(sz), 4077 nameIRegE(sz, pfx, modrm)); 4078 break; 4079 default: 4080 /*NOTREACHED*/ 4081 vpanic("Grp3(amd64,R)"); 4082 } 4083 } else { 4084 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4085 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4086 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/) 4087 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4088 validF2orF3 = True; 4089 } 4090 if (!validF2orF3) goto unhandled; 4091 /* */ 4092 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 4093 /* we have to inform disAMode of any immediate 4094 bytes used */ 4095 gregLO3ofRM(modrm)==0/*TEST*/ 4096 ? imin(4,sz) 4097 : 0 4098 ); 4099 t1 = newTemp(ty); 4100 delta += len; 4101 assign(t1, loadLE(ty,mkexpr(addr))); 4102 switch (gregLO3ofRM(modrm)) { 4103 case 0: { /* TEST */ 4104 d64 = getSDisp(imin(4,sz), delta); 4105 delta += imin(4,sz); 4106 dst1 = newTemp(ty); 4107 assign(dst1, binop(mkSizedOp(ty,Iop_And8), 4108 mkexpr(t1), 4109 mkU(ty, d64 & mkSizeMask(sz)))); 4110 setFlags_DEP1( Iop_And8, dst1, ty ); 4111 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf); 4112 break; 4113 } 4114 case 1: 4115 *decode_OK = False; 4116 return delta; 4117 case 2: /* NOT */ 4118 dst1 = newTemp(ty); 4119 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1))); 4120 if (haveLOCK(pfx)) { 4121 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4122 guest_RIP_curr_instr ); 4123 } else { 4124 storeLE( mkexpr(addr), mkexpr(dst1) ); 4125 } 4126 DIP("not%c %s\n", nameISize(sz), dis_buf); 4127 break; 4128 case 3: /* NEG */ 4129 dst0 = newTemp(ty); 4130 src = newTemp(ty); 4131 dst1 = newTemp(ty); 4132 assign(dst0, mkU(ty,0)); 4133 assign(src, mkexpr(t1)); 4134 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), 4135 mkexpr(src))); 4136 if (haveLOCK(pfx)) { 4137 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/, 4138 guest_RIP_curr_instr ); 4139 } else { 4140 storeLE( mkexpr(addr), mkexpr(dst1) ); 4141 } 4142 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty); 4143 DIP("neg%c %s\n", nameISize(sz), dis_buf); 4144 break; 4145 case 4: /* MUL (unsigned widening) */ 4146 codegen_mulL_A_D ( sz, False, t1, dis_buf ); 4147 break; 4148 case 5: /* IMUL */ 4149 codegen_mulL_A_D ( sz, True, t1, dis_buf ); 4150 break; 4151 case 6: /* DIV */ 4152 codegen_div ( sz, t1, False ); 4153 DIP("div%c %s\n", nameISize(sz), dis_buf); 4154 break; 4155 case 7: /* IDIV */ 4156 codegen_div ( sz, t1, True ); 4157 DIP("idiv%c %s\n", nameISize(sz), dis_buf); 4158 break; 4159 default: 4160 /*NOTREACHED*/ 4161 vpanic("Grp3(amd64,M)"); 4162 } 4163 } 4164 return delta; 4165 unhandled: 4166 *decode_OK = False; 4167 return delta; 4168 } 4169 4170 4171 /* Group 4 extended opcodes. We have to decide here whether F2 and F3 4172 might be valid. */ 4173 static 4174 ULong dis_Grp4 ( const VexAbiInfo* vbi, 4175 Prefix pfx, Long delta, Bool* decode_OK ) 4176 { 4177 Int alen; 4178 UChar modrm; 4179 HChar dis_buf[50]; 4180 IRType ty = Ity_I8; 4181 IRTemp t1 = newTemp(ty); 4182 IRTemp t2 = newTemp(ty); 4183 4184 *decode_OK = True; 4185 4186 modrm = getUChar(delta); 4187 if (epartIsReg(modrm)) { 4188 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */ 4189 if (haveF2orF3(pfx)) goto unhandled; 4190 assign(t1, getIRegE(1, pfx, modrm)); 4191 switch (gregLO3ofRM(modrm)) { 4192 case 0: /* INC */ 4193 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4194 putIRegE(1, pfx, modrm, mkexpr(t2)); 4195 setFlags_INC_DEC( True, t2, ty ); 4196 break; 4197 case 1: /* DEC */ 4198 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4199 putIRegE(1, pfx, modrm, mkexpr(t2)); 4200 setFlags_INC_DEC( False, t2, ty ); 4201 break; 4202 default: 4203 *decode_OK = False; 4204 return delta; 4205 } 4206 delta++; 4207 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), 4208 nameIRegE(1, pfx, modrm)); 4209 } else { 4210 /* Decide if F2/XACQ or F3/XREL might be valid. */ 4211 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4212 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4213 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4214 validF2orF3 = True; 4215 } 4216 if (!validF2orF3) goto unhandled; 4217 /* */ 4218 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 4219 assign( t1, loadLE(ty, mkexpr(addr)) ); 4220 switch (gregLO3ofRM(modrm)) { 4221 case 0: /* INC */ 4222 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1))); 4223 if (haveLOCK(pfx)) { 4224 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4225 guest_RIP_curr_instr ); 4226 } else { 4227 storeLE( mkexpr(addr), mkexpr(t2) ); 4228 } 4229 setFlags_INC_DEC( True, t2, ty ); 4230 break; 4231 case 1: /* DEC */ 4232 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1))); 4233 if (haveLOCK(pfx)) { 4234 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/, 4235 guest_RIP_curr_instr ); 4236 } else { 4237 storeLE( mkexpr(addr), mkexpr(t2) ); 4238 } 4239 setFlags_INC_DEC( False, t2, ty ); 4240 break; 4241 default: 4242 *decode_OK = False; 4243 return delta; 4244 } 4245 delta += alen; 4246 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf); 4247 } 4248 return delta; 4249 unhandled: 4250 *decode_OK = False; 4251 return delta; 4252 } 4253 4254 4255 /* Group 5 extended opcodes. We have to decide here whether F2 and F3 4256 might be valid. */ 4257 static 4258 ULong dis_Grp5 ( const VexAbiInfo* vbi, 4259 Prefix pfx, Int sz, Long delta, 4260 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK ) 4261 { 4262 Int len; 4263 UChar modrm; 4264 HChar dis_buf[50]; 4265 IRTemp addr = IRTemp_INVALID; 4266 IRType ty = szToITy(sz); 4267 IRTemp t1 = newTemp(ty); 4268 IRTemp t2 = IRTemp_INVALID; 4269 IRTemp t3 = IRTemp_INVALID; 4270 Bool showSz = True; 4271 4272 *decode_OK = True; 4273 4274 modrm = getUChar(delta); 4275 if (epartIsReg(modrm)) { 4276 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. 4277 F2/CALL and F2/JMP may have bnd prefix. */ 4278 if (haveF2orF3(pfx) 4279 && ! (haveF2(pfx) 4280 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4))) 4281 goto unhandledR; 4282 assign(t1, getIRegE(sz,pfx,modrm)); 4283 switch (gregLO3ofRM(modrm)) { 4284 case 0: /* INC */ 4285 t2 = newTemp(ty); 4286 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4287 mkexpr(t1), mkU(ty,1))); 4288 setFlags_INC_DEC( True, t2, ty ); 4289 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4290 break; 4291 case 1: /* DEC */ 4292 t2 = newTemp(ty); 4293 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4294 mkexpr(t1), mkU(ty,1))); 4295 setFlags_INC_DEC( False, t2, ty ); 4296 putIRegE(sz,pfx,modrm, mkexpr(t2)); 4297 break; 4298 case 2: /* call Ev */ 4299 /* Ignore any sz value and operate as if sz==8. */ 4300 if (!(sz == 4 || sz == 8)) goto unhandledR; 4301 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4302 sz = 8; 4303 t3 = newTemp(Ity_I64); 4304 assign(t3, getIRegE(sz,pfx,modrm)); 4305 t2 = newTemp(Ity_I64); 4306 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4307 putIReg64(R_RSP, mkexpr(t2)); 4308 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1)); 4309 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)"); 4310 jmp_treg(dres, Ijk_Call, t3); 4311 vassert(dres->whatNext == Dis_StopHere); 4312 showSz = False; 4313 break; 4314 case 4: /* jmp Ev */ 4315 /* Ignore any sz value and operate as if sz==8. */ 4316 if (!(sz == 4 || sz == 8)) goto unhandledR; 4317 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4318 sz = 8; 4319 t3 = newTemp(Ity_I64); 4320 assign(t3, getIRegE(sz,pfx,modrm)); 4321 jmp_treg(dres, Ijk_Boring, t3); 4322 vassert(dres->whatNext == Dis_StopHere); 4323 showSz = False; 4324 break; 4325 case 6: /* PUSH Ev */ 4326 /* There is no encoding for 32-bit operand size; hence ... */ 4327 if (sz == 4) sz = 8; 4328 if (sz == 8 || sz == 2) { 4329 ty = szToITy(sz); /* redo it, since sz might have changed */ 4330 t3 = newTemp(ty); 4331 assign(t3, getIRegE(sz,pfx,modrm)); 4332 t2 = newTemp(Ity_I64); 4333 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4334 putIReg64(R_RSP, mkexpr(t2) ); 4335 storeLE( mkexpr(t2), mkexpr(t3) ); 4336 break; 4337 } else { 4338 goto unhandledR; /* awaiting test case */ 4339 } 4340 default: 4341 unhandledR: 4342 *decode_OK = False; 4343 return delta; 4344 } 4345 delta++; 4346 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4347 showSz ? nameISize(sz) : ' ', 4348 nameIRegE(sz, pfx, modrm)); 4349 } else { 4350 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */ 4351 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 4352 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/) 4353 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 4354 validF2orF3 = True; 4355 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4) 4356 && (haveF2(pfx) && !haveF3(pfx))) { 4357 validF2orF3 = True; 4358 } 4359 if (!validF2orF3) goto unhandledM; 4360 /* */ 4361 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 4362 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4 4363 && gregLO3ofRM(modrm) != 6) { 4364 assign(t1, loadLE(ty,mkexpr(addr))); 4365 } 4366 switch (gregLO3ofRM(modrm)) { 4367 case 0: /* INC */ 4368 t2 = newTemp(ty); 4369 assign(t2, binop(mkSizedOp(ty,Iop_Add8), 4370 mkexpr(t1), mkU(ty,1))); 4371 if (haveLOCK(pfx)) { 4372 casLE( mkexpr(addr), 4373 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4374 } else { 4375 storeLE(mkexpr(addr),mkexpr(t2)); 4376 } 4377 setFlags_INC_DEC( True, t2, ty ); 4378 break; 4379 case 1: /* DEC */ 4380 t2 = newTemp(ty); 4381 assign(t2, binop(mkSizedOp(ty,Iop_Sub8), 4382 mkexpr(t1), mkU(ty,1))); 4383 if (haveLOCK(pfx)) { 4384 casLE( mkexpr(addr), 4385 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 4386 } else { 4387 storeLE(mkexpr(addr),mkexpr(t2)); 4388 } 4389 setFlags_INC_DEC( False, t2, ty ); 4390 break; 4391 case 2: /* call Ev */ 4392 /* Ignore any sz value and operate as if sz==8. */ 4393 if (!(sz == 4 || sz == 8)) goto unhandledM; 4394 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4395 sz = 8; 4396 t3 = newTemp(Ity_I64); 4397 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4398 t2 = newTemp(Ity_I64); 4399 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 4400 putIReg64(R_RSP, mkexpr(t2)); 4401 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len)); 4402 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)"); 4403 jmp_treg(dres, Ijk_Call, t3); 4404 vassert(dres->whatNext == Dis_StopHere); 4405 showSz = False; 4406 break; 4407 case 4: /* JMP Ev */ 4408 /* Ignore any sz value and operate as if sz==8. */ 4409 if (!(sz == 4 || sz == 8)) goto unhandledM; 4410 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 4411 sz = 8; 4412 t3 = newTemp(Ity_I64); 4413 assign(t3, loadLE(Ity_I64,mkexpr(addr))); 4414 jmp_treg(dres, Ijk_Boring, t3); 4415 vassert(dres->whatNext == Dis_StopHere); 4416 showSz = False; 4417 break; 4418 case 6: /* PUSH Ev */ 4419 /* There is no encoding for 32-bit operand size; hence ... */ 4420 if (sz == 4) sz = 8; 4421 if (sz == 8 || sz == 2) { 4422 ty = szToITy(sz); /* redo it, since sz might have changed */ 4423 t3 = newTemp(ty); 4424 assign(t3, loadLE(ty,mkexpr(addr))); 4425 t2 = newTemp(Ity_I64); 4426 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 4427 putIReg64(R_RSP, mkexpr(t2) ); 4428 storeLE( mkexpr(t2), mkexpr(t3) ); 4429 break; 4430 } else { 4431 goto unhandledM; /* awaiting test case */ 4432 } 4433 default: 4434 unhandledM: 4435 *decode_OK = False; 4436 return delta; 4437 } 4438 delta += len; 4439 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)), 4440 showSz ? nameISize(sz) : ' ', 4441 dis_buf); 4442 } 4443 return delta; 4444 } 4445 4446 4447 /*------------------------------------------------------------*/ 4448 /*--- Disassembling string ops (including REP prefixes) ---*/ 4449 /*------------------------------------------------------------*/ 4450 4451 /* Code shared by all the string ops */ 4452 static 4453 void dis_string_op_increment ( Int sz, IRTemp t_inc ) 4454 { 4455 UChar logSz; 4456 if (sz == 8 || sz == 4 || sz == 2) { 4457 logSz = 1; 4458 if (sz == 4) logSz = 2; 4459 if (sz == 8) logSz = 3; 4460 assign( t_inc, 4461 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ), 4462 mkU8(logSz) ) ); 4463 } else { 4464 assign( t_inc, 4465 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) ); 4466 } 4467 } 4468 4469 static 4470 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ), 4471 Int sz, const HChar* name, Prefix pfx ) 4472 { 4473 IRTemp t_inc = newTemp(Ity_I64); 4474 /* Really we ought to inspect the override prefixes, but we don't. 4475 The following assertion catches any resulting sillyness. */ 4476 vassert(pfx == clearSegBits(pfx)); 4477 dis_string_op_increment(sz, t_inc); 4478 dis_OP( sz, t_inc, pfx ); 4479 DIP("%s%c\n", name, nameISize(sz)); 4480 } 4481 4482 static 4483 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx ) 4484 { 4485 IRType ty = szToITy(sz); 4486 IRTemp td = newTemp(Ity_I64); /* RDI */ 4487 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4488 IRExpr *incd, *incs; 4489 4490 if (haveASO(pfx)) { 4491 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4492 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4493 } else { 4494 assign( td, getIReg64(R_RDI) ); 4495 assign( ts, getIReg64(R_RSI) ); 4496 } 4497 4498 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) ); 4499 4500 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4501 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4502 if (haveASO(pfx)) { 4503 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4504 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4505 } 4506 putIReg64( R_RDI, incd ); 4507 putIReg64( R_RSI, incs ); 4508 } 4509 4510 static 4511 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx ) 4512 { 4513 IRType ty = szToITy(sz); 4514 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4515 IRExpr *incs; 4516 4517 if (haveASO(pfx)) 4518 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4519 else 4520 assign( ts, getIReg64(R_RSI) ); 4521 4522 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) ); 4523 4524 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4525 if (haveASO(pfx)) 4526 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4527 putIReg64( R_RSI, incs ); 4528 } 4529 4530 static 4531 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx ) 4532 { 4533 IRType ty = szToITy(sz); 4534 IRTemp ta = newTemp(ty); /* rAX */ 4535 IRTemp td = newTemp(Ity_I64); /* RDI */ 4536 IRExpr *incd; 4537 4538 assign( ta, getIRegRAX(sz) ); 4539 4540 if (haveASO(pfx)) 4541 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4542 else 4543 assign( td, getIReg64(R_RDI) ); 4544 4545 storeLE( mkexpr(td), mkexpr(ta) ); 4546 4547 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4548 if (haveASO(pfx)) 4549 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4550 putIReg64( R_RDI, incd ); 4551 } 4552 4553 static 4554 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx ) 4555 { 4556 IRType ty = szToITy(sz); 4557 IRTemp tdv = newTemp(ty); /* (RDI) */ 4558 IRTemp tsv = newTemp(ty); /* (RSI) */ 4559 IRTemp td = newTemp(Ity_I64); /* RDI */ 4560 IRTemp ts = newTemp(Ity_I64); /* RSI */ 4561 IRExpr *incd, *incs; 4562 4563 if (haveASO(pfx)) { 4564 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4565 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) ); 4566 } else { 4567 assign( td, getIReg64(R_RDI) ); 4568 assign( ts, getIReg64(R_RSI) ); 4569 } 4570 4571 assign( tdv, loadLE(ty,mkexpr(td)) ); 4572 4573 assign( tsv, loadLE(ty,mkexpr(ts)) ); 4574 4575 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty ); 4576 4577 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4578 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)); 4579 if (haveASO(pfx)) { 4580 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4581 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs)); 4582 } 4583 putIReg64( R_RDI, incd ); 4584 putIReg64( R_RSI, incs ); 4585 } 4586 4587 static 4588 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx ) 4589 { 4590 IRType ty = szToITy(sz); 4591 IRTemp ta = newTemp(ty); /* rAX */ 4592 IRTemp td = newTemp(Ity_I64); /* RDI */ 4593 IRTemp tdv = newTemp(ty); /* (RDI) */ 4594 IRExpr *incd; 4595 4596 assign( ta, getIRegRAX(sz) ); 4597 4598 if (haveASO(pfx)) 4599 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) ); 4600 else 4601 assign( td, getIReg64(R_RDI) ); 4602 4603 assign( tdv, loadLE(ty,mkexpr(td)) ); 4604 4605 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty ); 4606 4607 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)); 4608 if (haveASO(pfx)) 4609 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd)); 4610 putIReg64( R_RDI, incd ); 4611 } 4612 4613 4614 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume 4615 the insn is the last one in the basic block, and so emit a jump to 4616 the next insn, rather than just falling through. */ 4617 static 4618 void dis_REP_op ( /*MOD*/DisResult* dres, 4619 AMD64Condcode cond, 4620 void (*dis_OP)(Int, IRTemp, Prefix), 4621 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name, 4622 Prefix pfx ) 4623 { 4624 IRTemp t_inc = newTemp(Ity_I64); 4625 IRTemp tc; 4626 IRExpr* cmp; 4627 4628 /* Really we ought to inspect the override prefixes, but we don't. 4629 The following assertion catches any resulting sillyness. */ 4630 vassert(pfx == clearSegBits(pfx)); 4631 4632 if (haveASO(pfx)) { 4633 tc = newTemp(Ity_I32); /* ECX */ 4634 assign( tc, getIReg32(R_RCX) ); 4635 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0)); 4636 } else { 4637 tc = newTemp(Ity_I64); /* RCX */ 4638 assign( tc, getIReg64(R_RCX) ); 4639 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0)); 4640 } 4641 4642 stmt( IRStmt_Exit( cmp, Ijk_Boring, 4643 IRConst_U64(rip_next), OFFB_RIP ) ); 4644 4645 if (haveASO(pfx)) 4646 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) ); 4647 else 4648 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) ); 4649 4650 dis_string_op_increment(sz, t_inc); 4651 dis_OP (sz, t_inc, pfx); 4652 4653 if (cond == AMD64CondAlways) { 4654 jmp_lit(dres, Ijk_Boring, rip); 4655 vassert(dres->whatNext == Dis_StopHere); 4656 } else { 4657 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond), 4658 Ijk_Boring, 4659 IRConst_U64(rip), 4660 OFFB_RIP ) ); 4661 jmp_lit(dres, Ijk_Boring, rip_next); 4662 vassert(dres->whatNext == Dis_StopHere); 4663 } 4664 DIP("%s%c\n", name, nameISize(sz)); 4665 } 4666 4667 4668 /*------------------------------------------------------------*/ 4669 /*--- Arithmetic, etc. ---*/ 4670 /*------------------------------------------------------------*/ 4671 4672 /* IMUL E, G. Supplied eip points to the modR/M byte. */ 4673 static 4674 ULong dis_mul_E_G ( const VexAbiInfo* vbi, 4675 Prefix pfx, 4676 Int size, 4677 Long delta0 ) 4678 { 4679 Int alen; 4680 HChar dis_buf[50]; 4681 UChar rm = getUChar(delta0); 4682 IRType ty = szToITy(size); 4683 IRTemp te = newTemp(ty); 4684 IRTemp tg = newTemp(ty); 4685 IRTemp resLo = newTemp(ty); 4686 4687 assign( tg, getIRegG(size, pfx, rm) ); 4688 if (epartIsReg(rm)) { 4689 assign( te, getIRegE(size, pfx, rm) ); 4690 } else { 4691 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 ); 4692 assign( te, loadLE(ty,mkexpr(addr)) ); 4693 } 4694 4695 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB ); 4696 4697 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) ); 4698 4699 putIRegG(size, pfx, rm, mkexpr(resLo) ); 4700 4701 if (epartIsReg(rm)) { 4702 DIP("imul%c %s, %s\n", nameISize(size), 4703 nameIRegE(size,pfx,rm), 4704 nameIRegG(size,pfx,rm)); 4705 return 1+delta0; 4706 } else { 4707 DIP("imul%c %s, %s\n", nameISize(size), 4708 dis_buf, 4709 nameIRegG(size,pfx,rm)); 4710 return alen+delta0; 4711 } 4712 } 4713 4714 4715 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */ 4716 static 4717 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi, 4718 Prefix pfx, 4719 Int size, 4720 Long delta, 4721 Int litsize ) 4722 { 4723 Long d64; 4724 Int alen; 4725 HChar dis_buf[50]; 4726 UChar rm = getUChar(delta); 4727 IRType ty = szToITy(size); 4728 IRTemp te = newTemp(ty); 4729 IRTemp tl = newTemp(ty); 4730 IRTemp resLo = newTemp(ty); 4731 4732 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8); 4733 4734 if (epartIsReg(rm)) { 4735 assign(te, getIRegE(size, pfx, rm)); 4736 delta++; 4737 } else { 4738 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 4739 imin(4,litsize) ); 4740 assign(te, loadLE(ty, mkexpr(addr))); 4741 delta += alen; 4742 } 4743 d64 = getSDisp(imin(4,litsize),delta); 4744 delta += imin(4,litsize); 4745 4746 d64 &= mkSizeMask(size); 4747 assign(tl, mkU(ty,d64)); 4748 4749 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) )); 4750 4751 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB ); 4752 4753 putIRegG(size, pfx, rm, mkexpr(resLo)); 4754 4755 DIP("imul%c $%lld, %s, %s\n", 4756 nameISize(size), d64, 4757 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ), 4758 nameIRegG(size,pfx,rm) ); 4759 return delta; 4760 } 4761 4762 4763 /* Generate an IR sequence to do a popcount operation on the supplied 4764 IRTemp, and return a new IRTemp holding the result. 'ty' may be 4765 Ity_I16, Ity_I32 or Ity_I64 only. */ 4766 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src ) 4767 { 4768 Int i; 4769 if (ty == Ity_I16) { 4770 IRTemp old = IRTemp_INVALID; 4771 IRTemp nyu = IRTemp_INVALID; 4772 IRTemp mask[4], shift[4]; 4773 for (i = 0; i < 4; i++) { 4774 mask[i] = newTemp(ty); 4775 shift[i] = 1 << i; 4776 } 4777 assign(mask[0], mkU16(0x5555)); 4778 assign(mask[1], mkU16(0x3333)); 4779 assign(mask[2], mkU16(0x0F0F)); 4780 assign(mask[3], mkU16(0x00FF)); 4781 old = src; 4782 for (i = 0; i < 4; i++) { 4783 nyu = newTemp(ty); 4784 assign(nyu, 4785 binop(Iop_Add16, 4786 binop(Iop_And16, 4787 mkexpr(old), 4788 mkexpr(mask[i])), 4789 binop(Iop_And16, 4790 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])), 4791 mkexpr(mask[i])))); 4792 old = nyu; 4793 } 4794 return nyu; 4795 } 4796 if (ty == Ity_I32) { 4797 IRTemp old = IRTemp_INVALID; 4798 IRTemp nyu = IRTemp_INVALID; 4799 IRTemp mask[5], shift[5]; 4800 for (i = 0; i < 5; i++) { 4801 mask[i] = newTemp(ty); 4802 shift[i] = 1 << i; 4803 } 4804 assign(mask[0], mkU32(0x55555555)); 4805 assign(mask[1], mkU32(0x33333333)); 4806 assign(mask[2], mkU32(0x0F0F0F0F)); 4807 assign(mask[3], mkU32(0x00FF00FF)); 4808 assign(mask[4], mkU32(0x0000FFFF)); 4809 old = src; 4810 for (i = 0; i < 5; i++) { 4811 nyu = newTemp(ty); 4812 assign(nyu, 4813 binop(Iop_Add32, 4814 binop(Iop_And32, 4815 mkexpr(old), 4816 mkexpr(mask[i])), 4817 binop(Iop_And32, 4818 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])), 4819 mkexpr(mask[i])))); 4820 old = nyu; 4821 } 4822 return nyu; 4823 } 4824 if (ty == Ity_I64) { 4825 IRTemp old = IRTemp_INVALID; 4826 IRTemp nyu = IRTemp_INVALID; 4827 IRTemp mask[6], shift[6]; 4828 for (i = 0; i < 6; i++) { 4829 mask[i] = newTemp(ty); 4830 shift[i] = 1 << i; 4831 } 4832 assign(mask[0], mkU64(0x5555555555555555ULL)); 4833 assign(mask[1], mkU64(0x3333333333333333ULL)); 4834 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); 4835 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL)); 4836 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL)); 4837 assign(mask[5], mkU64(0x00000000FFFFFFFFULL)); 4838 old = src; 4839 for (i = 0; i < 6; i++) { 4840 nyu = newTemp(ty); 4841 assign(nyu, 4842 binop(Iop_Add64, 4843 binop(Iop_And64, 4844 mkexpr(old), 4845 mkexpr(mask[i])), 4846 binop(Iop_And64, 4847 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])), 4848 mkexpr(mask[i])))); 4849 old = nyu; 4850 } 4851 return nyu; 4852 } 4853 /*NOTREACHED*/ 4854 vassert(0); 4855 } 4856 4857 4858 /* Generate an IR sequence to do a count-leading-zeroes operation on 4859 the supplied IRTemp, and return a new IRTemp holding the result. 4860 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4861 the argument is zero, return the number of bits in the word (the 4862 natural semantics). */ 4863 static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 4864 { 4865 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4866 4867 IRTemp src64 = newTemp(Ity_I64); 4868 assign(src64, widenUto64( mkexpr(src) )); 4869 4870 IRTemp src64x = newTemp(Ity_I64); 4871 assign(src64x, 4872 binop(Iop_Shl64, mkexpr(src64), 4873 mkU8(64 - 8 * sizeofIRType(ty)))); 4874 4875 // Clz64 has undefined semantics when its input is zero, so 4876 // special-case around that. 4877 IRTemp res64 = newTemp(Ity_I64); 4878 assign(res64, 4879 IRExpr_ITE( 4880 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)), 4881 mkU64(8 * sizeofIRType(ty)), 4882 unop(Iop_Clz64, mkexpr(src64x)) 4883 )); 4884 4885 IRTemp res = newTemp(ty); 4886 assign(res, narrowTo(ty, mkexpr(res64))); 4887 return res; 4888 } 4889 4890 4891 /* Generate an IR sequence to do a count-trailing-zeroes operation on 4892 the supplied IRTemp, and return a new IRTemp holding the result. 4893 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where 4894 the argument is zero, return the number of bits in the word (the 4895 natural semantics). */ 4896 static IRTemp gen_TZCNT ( IRType ty, IRTemp src ) 4897 { 4898 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16); 4899 4900 IRTemp src64 = newTemp(Ity_I64); 4901 assign(src64, widenUto64( mkexpr(src) )); 4902 4903 // Ctz64 has undefined semantics when its input is zero, so 4904 // special-case around that. 4905 IRTemp res64 = newTemp(Ity_I64); 4906 assign(res64, 4907 IRExpr_ITE( 4908 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)), 4909 mkU64(8 * sizeofIRType(ty)), 4910 unop(Iop_Ctz64, mkexpr(src64)) 4911 )); 4912 4913 IRTemp res = newTemp(ty); 4914 assign(res, narrowTo(ty, mkexpr(res64))); 4915 return res; 4916 } 4917 4918 4919 /*------------------------------------------------------------*/ 4920 /*--- ---*/ 4921 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/ 4922 /*--- ---*/ 4923 /*------------------------------------------------------------*/ 4924 4925 /* --- Helper functions for dealing with the register stack. --- */ 4926 4927 /* --- Set the emulation-warning pseudo-register. --- */ 4928 4929 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ ) 4930 { 4931 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4932 stmt( IRStmt_Put( OFFB_EMNOTE, e ) ); 4933 } 4934 4935 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */ 4936 4937 static IRExpr* mkQNaN64 ( void ) 4938 { 4939 /* QNaN is 0 2047 1 0(51times) 4940 == 0b 11111111111b 1 0(51times) 4941 == 0x7FF8 0000 0000 0000 4942 */ 4943 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL)); 4944 } 4945 4946 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */ 4947 4948 static IRExpr* get_ftop ( void ) 4949 { 4950 return IRExpr_Get( OFFB_FTOP, Ity_I32 ); 4951 } 4952 4953 static void put_ftop ( IRExpr* e ) 4954 { 4955 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4956 stmt( IRStmt_Put( OFFB_FTOP, e ) ); 4957 } 4958 4959 /* --------- Get/put the C3210 bits. --------- */ 4960 4961 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) 4962 { 4963 return IRExpr_Get( OFFB_FC3210, Ity_I64 ); 4964 } 4965 4966 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) 4967 { 4968 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64); 4969 stmt( IRStmt_Put( OFFB_FC3210, e ) ); 4970 } 4971 4972 /* --------- Get/put the FPU rounding mode. --------- */ 4973 static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) 4974 { 4975 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 )); 4976 } 4977 4978 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e ) 4979 { 4980 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32); 4981 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) ); 4982 } 4983 4984 4985 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */ 4986 /* Produces a value in 0 .. 3, which is encoded as per the type 4987 IRRoundingMode. Since the guest_FPROUND value is also encoded as 4988 per IRRoundingMode, we merely need to get it and mask it for 4989 safety. 4990 */ 4991 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void ) 4992 { 4993 return binop( Iop_And32, get_fpround(), mkU32(3) ); 4994 } 4995 4996 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void ) 4997 { 4998 return mkU32(Irrm_NEAREST); 4999 } 5000 5001 5002 /* --------- Get/set FP register tag bytes. --------- */ 5003 5004 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */ 5005 5006 static void put_ST_TAG ( Int i, IRExpr* value ) 5007 { 5008 IRRegArray* descr; 5009 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8); 5010 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5011 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5012 } 5013 5014 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be 5015 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */ 5016 5017 static IRExpr* get_ST_TAG ( Int i ) 5018 { 5019 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 5020 return IRExpr_GetI( descr, get_ftop(), i ); 5021 } 5022 5023 5024 /* --------- Get/set FP registers. --------- */ 5025 5026 /* Given i, and some expression e, emit 'ST(i) = e' and set the 5027 register's tag to indicate the register is full. The previous 5028 state of the register is not checked. */ 5029 5030 static void put_ST_UNCHECKED ( Int i, IRExpr* value ) 5031 { 5032 IRRegArray* descr; 5033 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64); 5034 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5035 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) ); 5036 /* Mark the register as in-use. */ 5037 put_ST_TAG(i, mkU8(1)); 5038 } 5039 5040 /* Given i, and some expression e, emit 5041 ST(i) = is_full(i) ? NaN : e 5042 and set the tag accordingly. 5043 */ 5044 5045 static void put_ST ( Int i, IRExpr* value ) 5046 { 5047 put_ST_UNCHECKED( 5048 i, 5049 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5050 /* non-0 means full */ 5051 mkQNaN64(), 5052 /* 0 means empty */ 5053 value 5054 ) 5055 ); 5056 } 5057 5058 5059 /* Given i, generate an expression yielding 'ST(i)'. */ 5060 5061 static IRExpr* get_ST_UNCHECKED ( Int i ) 5062 { 5063 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 ); 5064 return IRExpr_GetI( descr, get_ftop(), i ); 5065 } 5066 5067 5068 /* Given i, generate an expression yielding 5069 is_full(i) ? ST(i) : NaN 5070 */ 5071 5072 static IRExpr* get_ST ( Int i ) 5073 { 5074 return 5075 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)), 5076 /* non-0 means full */ 5077 get_ST_UNCHECKED(i), 5078 /* 0 means empty */ 5079 mkQNaN64()); 5080 } 5081 5082 5083 /* Given i, and some expression e, and a condition cond, generate IR 5084 which has the same effect as put_ST(i,e) when cond is true and has 5085 no effect when cond is false. Given the lack of proper 5086 if-then-else in the IR, this is pretty tricky. 5087 */ 5088 5089 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) 5090 { 5091 // new_tag = if cond then FULL else old_tag 5092 // new_val = if cond then (if old_tag==FULL then NaN else val) 5093 // else old_val 5094 5095 IRTemp old_tag = newTemp(Ity_I8); 5096 assign(old_tag, get_ST_TAG(i)); 5097 IRTemp new_tag = newTemp(Ity_I8); 5098 assign(new_tag, 5099 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); 5100 5101 IRTemp old_val = newTemp(Ity_F64); 5102 assign(old_val, get_ST_UNCHECKED(i)); 5103 IRTemp new_val = newTemp(Ity_F64); 5104 assign(new_val, 5105 IRExpr_ITE(mkexpr(cond), 5106 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), 5107 /* non-0 means full */ 5108 mkQNaN64(), 5109 /* 0 means empty */ 5110 value), 5111 mkexpr(old_val))); 5112 5113 put_ST_UNCHECKED(i, mkexpr(new_val)); 5114 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So 5115 // now set it to new_tag instead. 5116 put_ST_TAG(i, mkexpr(new_tag)); 5117 } 5118 5119 /* Adjust FTOP downwards by one register. */ 5120 5121 static void fp_push ( void ) 5122 { 5123 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); 5124 } 5125 5126 /* Adjust FTOP downwards by one register when COND is 1:I1. Else 5127 don't change it. */ 5128 5129 static void maybe_fp_push ( IRTemp cond ) 5130 { 5131 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); 5132 } 5133 5134 /* Adjust FTOP upwards by one register, and mark the vacated register 5135 as empty. */ 5136 5137 static void fp_pop ( void ) 5138 { 5139 put_ST_TAG(0, mkU8(0)); 5140 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5141 } 5142 5143 /* Set the C2 bit of the FPU status register to e[0]. Assumes that 5144 e[31:1] == 0. 5145 */ 5146 static void set_C2 ( IRExpr* e ) 5147 { 5148 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); 5149 put_C3210( binop(Iop_Or64, 5150 cleared, 5151 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); 5152 } 5153 5154 /* Generate code to check that abs(d64) < 2^63 and is finite. This is 5155 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The 5156 test is simple, but the derivation of it is not so simple. 5157 5158 The exponent field for an IEEE754 double is 11 bits. That means it 5159 can take values 0 through 0x7FF. If the exponent has value 0x7FF, 5160 the number is either a NaN or an Infinity and so is not finite. 5161 Furthermore, a finite value of exactly 2^63 is the smallest value 5162 that has exponent value 0x43E. Hence, what we need to do is 5163 extract the exponent, ignoring the sign bit and mantissa, and check 5164 it is < 0x43E, or <= 0x43D. 5165 5166 To make this easily applicable to 32- and 64-bit targets, a 5167 roundabout approach is used. First the number is converted to I64, 5168 then the top 32 bits are taken. Shifting them right by 20 bits 5169 places the sign bit and exponent in the bottom 12 bits. Anding 5170 with 0x7FF gets rid of the sign bit, leaving just the exponent 5171 available for comparison. 5172 */ 5173 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) 5174 { 5175 IRTemp i64 = newTemp(Ity_I64); 5176 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); 5177 IRTemp exponent = newTemp(Ity_I32); 5178 assign(exponent, 5179 binop(Iop_And32, 5180 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), 5181 mkU32(0x7FF))); 5182 IRTemp in_range_and_finite = newTemp(Ity_I1); 5183 assign(in_range_and_finite, 5184 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); 5185 return in_range_and_finite; 5186 } 5187 5188 /* Invent a plausible-looking FPU status word value: 5189 ((ftop & 7) << 11) | (c3210 & 0x4700) 5190 */ 5191 static IRExpr* get_FPU_sw ( void ) 5192 { 5193 return 5194 unop(Iop_32to16, 5195 binop(Iop_Or32, 5196 binop(Iop_Shl32, 5197 binop(Iop_And32, get_ftop(), mkU32(7)), 5198 mkU8(11)), 5199 binop(Iop_And32, unop(Iop_64to32, get_C3210()), 5200 mkU32(0x4700)) 5201 )); 5202 } 5203 5204 5205 /* Generate a dirty helper call that initialises the x87 state a la 5206 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise 5207 |guard| is used as a guarding condition. 5208 */ 5209 static void gen_FINIT_SEQUENCE ( IRExpr* guard ) 5210 { 5211 /* Uses dirty helper: 5212 void amd64g_do_FINIT ( VexGuestAMD64State* ) */ 5213 IRDirty* d = unsafeIRDirty_0_N ( 5214 0/*regparms*/, 5215 "amd64g_dirtyhelper_FINIT", 5216 &amd64g_dirtyhelper_FINIT, 5217 mkIRExprVec_1( IRExpr_BBPTR() ) 5218 ); 5219 5220 /* declare we're writing guest state */ 5221 d->nFxState = 5; 5222 vex_bzero(&d->fxState, sizeof(d->fxState)); 5223 5224 d->fxState[0].fx = Ifx_Write; 5225 d->fxState[0].offset = OFFB_FTOP; 5226 d->fxState[0].size = sizeof(UInt); 5227 5228 d->fxState[1].fx = Ifx_Write; 5229 d->fxState[1].offset = OFFB_FPREGS; 5230 d->fxState[1].size = 8 * sizeof(ULong); 5231 5232 d->fxState[2].fx = Ifx_Write; 5233 d->fxState[2].offset = OFFB_FPTAGS; 5234 d->fxState[2].size = 8 * sizeof(UChar); 5235 5236 d->fxState[3].fx = Ifx_Write; 5237 d->fxState[3].offset = OFFB_FPROUND; 5238 d->fxState[3].size = sizeof(ULong); 5239 5240 d->fxState[4].fx = Ifx_Write; 5241 d->fxState[4].offset = OFFB_FC3210; 5242 d->fxState[4].size = sizeof(ULong); 5243 5244 if (guard) 5245 d->guard = guard; 5246 5247 stmt( IRStmt_Dirty(d) ); 5248 } 5249 5250 5251 /* ------------------------------------------------------- */ 5252 /* Given all that stack-mangling junk, we can now go ahead 5253 and describe FP instructions. 5254 */ 5255 5256 /* ST(0) = ST(0) `op` mem64/32(addr) 5257 Need to check ST(0)'s tag on read, but not on write. 5258 */ 5259 static 5260 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5261 IROp op, Bool dbl ) 5262 { 5263 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5264 if (dbl) { 5265 put_ST_UNCHECKED(0, 5266 triop( op, 5267 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5268 get_ST(0), 5269 loadLE(Ity_F64,mkexpr(addr)) 5270 )); 5271 } else { 5272 put_ST_UNCHECKED(0, 5273 triop( op, 5274 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5275 get_ST(0), 5276 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))) 5277 )); 5278 } 5279 } 5280 5281 5282 /* ST(0) = mem64/32(addr) `op` ST(0) 5283 Need to check ST(0)'s tag on read, but not on write. 5284 */ 5285 static 5286 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf, 5287 IROp op, Bool dbl ) 5288 { 5289 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf); 5290 if (dbl) { 5291 put_ST_UNCHECKED(0, 5292 triop( op, 5293 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5294 loadLE(Ity_F64,mkexpr(addr)), 5295 get_ST(0) 5296 )); 5297 } else { 5298 put_ST_UNCHECKED(0, 5299 triop( op, 5300 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5301 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))), 5302 get_ST(0) 5303 )); 5304 } 5305 } 5306 5307 5308 /* ST(dst) = ST(dst) `op` ST(src). 5309 Check dst and src tags when reading but not on write. 5310 */ 5311 static 5312 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5313 Bool pop_after ) 5314 { 5315 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5316 put_ST_UNCHECKED( 5317 st_dst, 5318 triop( op, 5319 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5320 get_ST(st_dst), 5321 get_ST(st_src) ) 5322 ); 5323 if (pop_after) 5324 fp_pop(); 5325 } 5326 5327 /* ST(dst) = ST(src) `op` ST(dst). 5328 Check dst and src tags when reading but not on write. 5329 */ 5330 static 5331 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst, 5332 Bool pop_after ) 5333 { 5334 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst ); 5335 put_ST_UNCHECKED( 5336 st_dst, 5337 triop( op, 5338 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5339 get_ST(st_src), 5340 get_ST(st_dst) ) 5341 ); 5342 if (pop_after) 5343 fp_pop(); 5344 } 5345 5346 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */ 5347 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after ) 5348 { 5349 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i); 5350 /* This is a bit of a hack (and isn't really right). It sets 5351 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel 5352 documentation implies A and S are unchanged. 5353 */ 5354 /* It's also fishy in that it is used both for COMIP and 5355 UCOMIP, and they aren't the same (although similar). */ 5356 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 5357 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 5358 stmt( IRStmt_Put( 5359 OFFB_CC_DEP1, 5360 binop( Iop_And64, 5361 unop( Iop_32Uto64, 5362 binop(Iop_CmpF64, get_ST(0), get_ST(i))), 5363 mkU64(0x45) 5364 ))); 5365 if (pop_after) 5366 fp_pop(); 5367 } 5368 5369 5370 /* returns 5371 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 ) 5372 */ 5373 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 ) 5374 { 5375 IRTemp t32 = newTemp(Ity_I32); 5376 assign( t32, e32 ); 5377 return 5378 IRExpr_ITE( 5379 binop(Iop_CmpLT64U, 5380 unop(Iop_32Uto64, 5381 binop(Iop_Add32, mkexpr(t32), mkU32(32768))), 5382 mkU64(65536)), 5383 unop(Iop_32to16, mkexpr(t32)), 5384 mkU16( 0x8000 ) ); 5385 } 5386 5387 5388 static 5389 ULong dis_FPU ( /*OUT*/Bool* decode_ok, 5390 const VexAbiInfo* vbi, Prefix pfx, Long delta ) 5391 { 5392 Int len; 5393 UInt r_src, r_dst; 5394 HChar dis_buf[50]; 5395 IRTemp t1, t2; 5396 5397 /* On entry, delta points at the second byte of the insn (the modrm 5398 byte).*/ 5399 UChar first_opcode = getUChar(delta-1); 5400 UChar modrm = getUChar(delta+0); 5401 5402 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */ 5403 5404 if (first_opcode == 0xD8) { 5405 if (modrm < 0xC0) { 5406 5407 /* bits 5,4,3 are an opcode extension, and the modRM also 5408 specifies an address. */ 5409 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5410 delta += len; 5411 5412 switch (gregLO3ofRM(modrm)) { 5413 5414 case 0: /* FADD single-real */ 5415 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False ); 5416 break; 5417 5418 case 1: /* FMUL single-real */ 5419 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False ); 5420 break; 5421 5422 case 2: /* FCOM single-real */ 5423 DIP("fcoms %s\n", dis_buf); 5424 /* This forces C1 to zero, which isn't right. */ 5425 /* The AMD documentation suggests that forcing C1 to 5426 zero is correct (Eliot Moss) */ 5427 put_C3210( 5428 unop( Iop_32Uto64, 5429 binop( Iop_And32, 5430 binop(Iop_Shl32, 5431 binop(Iop_CmpF64, 5432 get_ST(0), 5433 unop(Iop_F32toF64, 5434 loadLE(Ity_F32,mkexpr(addr)))), 5435 mkU8(8)), 5436 mkU32(0x4500) 5437 ))); 5438 break; 5439 5440 case 3: /* FCOMP single-real */ 5441 /* The AMD documentation suggests that forcing C1 to 5442 zero is correct (Eliot Moss) */ 5443 DIP("fcomps %s\n", dis_buf); 5444 /* This forces C1 to zero, which isn't right. */ 5445 put_C3210( 5446 unop( Iop_32Uto64, 5447 binop( Iop_And32, 5448 binop(Iop_Shl32, 5449 binop(Iop_CmpF64, 5450 get_ST(0), 5451 unop(Iop_F32toF64, 5452 loadLE(Ity_F32,mkexpr(addr)))), 5453 mkU8(8)), 5454 mkU32(0x4500) 5455 ))); 5456 fp_pop(); 5457 break; 5458 5459 case 4: /* FSUB single-real */ 5460 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False ); 5461 break; 5462 5463 case 5: /* FSUBR single-real */ 5464 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False ); 5465 break; 5466 5467 case 6: /* FDIV single-real */ 5468 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False ); 5469 break; 5470 5471 case 7: /* FDIVR single-real */ 5472 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False ); 5473 break; 5474 5475 default: 5476 vex_printf("unhandled opc_aux = 0x%2x\n", 5477 (UInt)gregLO3ofRM(modrm)); 5478 vex_printf("first_opcode == 0xD8\n"); 5479 goto decode_fail; 5480 } 5481 } else { 5482 delta++; 5483 switch (modrm) { 5484 5485 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */ 5486 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False ); 5487 break; 5488 5489 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */ 5490 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False ); 5491 break; 5492 5493 /* Dunno if this is right */ 5494 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */ 5495 r_dst = (UInt)modrm - 0xD0; 5496 DIP("fcom %%st(0),%%st(%u)\n", r_dst); 5497 /* This forces C1 to zero, which isn't right. */ 5498 put_C3210( 5499 unop(Iop_32Uto64, 5500 binop( Iop_And32, 5501 binop(Iop_Shl32, 5502 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5503 mkU8(8)), 5504 mkU32(0x4500) 5505 ))); 5506 break; 5507 5508 /* Dunno if this is right */ 5509 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */ 5510 r_dst = (UInt)modrm - 0xD8; 5511 DIP("fcomp %%st(0),%%st(%u)\n", r_dst); 5512 /* This forces C1 to zero, which isn't right. */ 5513 put_C3210( 5514 unop(Iop_32Uto64, 5515 binop( Iop_And32, 5516 binop(Iop_Shl32, 5517 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 5518 mkU8(8)), 5519 mkU32(0x4500) 5520 ))); 5521 fp_pop(); 5522 break; 5523 5524 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */ 5525 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False ); 5526 break; 5527 5528 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */ 5529 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False ); 5530 break; 5531 5532 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */ 5533 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False ); 5534 break; 5535 5536 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */ 5537 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False ); 5538 break; 5539 5540 default: 5541 goto decode_fail; 5542 } 5543 } 5544 } 5545 5546 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */ 5547 else 5548 if (first_opcode == 0xD9) { 5549 if (modrm < 0xC0) { 5550 5551 /* bits 5,4,3 are an opcode extension, and the modRM also 5552 specifies an address. */ 5553 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 5554 delta += len; 5555 5556 switch (gregLO3ofRM(modrm)) { 5557 5558 case 0: /* FLD single-real */ 5559 DIP("flds %s\n", dis_buf); 5560 fp_push(); 5561 put_ST(0, unop(Iop_F32toF64, 5562 loadLE(Ity_F32, mkexpr(addr)))); 5563 break; 5564 5565 case 2: /* FST single-real */ 5566 DIP("fsts %s\n", dis_buf); 5567 storeLE(mkexpr(addr), 5568 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5569 break; 5570 5571 case 3: /* FSTP single-real */ 5572 DIP("fstps %s\n", dis_buf); 5573 storeLE(mkexpr(addr), 5574 binop(Iop_F64toF32, get_roundingmode(), get_ST(0))); 5575 fp_pop(); 5576 break; 5577 5578 case 4: { /* FLDENV m28 */ 5579 /* Uses dirty helper: 5580 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */ 5581 IRTemp ew = newTemp(Ity_I32); 5582 IRTemp w64 = newTemp(Ity_I64); 5583 IRDirty* d = unsafeIRDirty_0_N ( 5584 0/*regparms*/, 5585 "amd64g_dirtyhelper_FLDENV", 5586 &amd64g_dirtyhelper_FLDENV, 5587 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5588 ); 5589 d->tmp = w64; 5590 /* declare we're reading memory */ 5591 d->mFx = Ifx_Read; 5592 d->mAddr = mkexpr(addr); 5593 d->mSize = 28; 5594 5595 /* declare we're writing guest state */ 5596 d->nFxState = 4; 5597 vex_bzero(&d->fxState, sizeof(d->fxState)); 5598 5599 d->fxState[0].fx = Ifx_Write; 5600 d->fxState[0].offset = OFFB_FTOP; 5601 d->fxState[0].size = sizeof(UInt); 5602 5603 d->fxState[1].fx = Ifx_Write; 5604 d->fxState[1].offset = OFFB_FPTAGS; 5605 d->fxState[1].size = 8 * sizeof(UChar); 5606 5607 d->fxState[2].fx = Ifx_Write; 5608 d->fxState[2].offset = OFFB_FPROUND; 5609 d->fxState[2].size = sizeof(ULong); 5610 5611 d->fxState[3].fx = Ifx_Write; 5612 d->fxState[3].offset = OFFB_FC3210; 5613 d->fxState[3].size = sizeof(ULong); 5614 5615 stmt( IRStmt_Dirty(d) ); 5616 5617 /* ew contains any emulation warning we may need to 5618 issue. If needed, side-exit to the next insn, 5619 reporting the warning, so that Valgrind's dispatcher 5620 sees the warning. */ 5621 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 5622 put_emwarn( mkexpr(ew) ); 5623 stmt( 5624 IRStmt_Exit( 5625 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5626 Ijk_EmWarn, 5627 IRConst_U64( guest_RIP_bbstart+delta ), 5628 OFFB_RIP 5629 ) 5630 ); 5631 5632 DIP("fldenv %s\n", dis_buf); 5633 break; 5634 } 5635 5636 case 5: {/* FLDCW */ 5637 /* The only thing we observe in the control word is the 5638 rounding mode. Therefore, pass the 16-bit value 5639 (x87 native-format control word) to a clean helper, 5640 getting back a 64-bit value, the lower half of which 5641 is the FPROUND value to store, and the upper half of 5642 which is the emulation-warning token which may be 5643 generated. 5644 */ 5645 /* ULong amd64h_check_fldcw ( ULong ); */ 5646 IRTemp t64 = newTemp(Ity_I64); 5647 IRTemp ew = newTemp(Ity_I32); 5648 DIP("fldcw %s\n", dis_buf); 5649 assign( t64, mkIRExprCCall( 5650 Ity_I64, 0/*regparms*/, 5651 "amd64g_check_fldcw", 5652 &amd64g_check_fldcw, 5653 mkIRExprVec_1( 5654 unop( Iop_16Uto64, 5655 loadLE(Ity_I16, mkexpr(addr))) 5656 ) 5657 ) 5658 ); 5659 5660 put_fpround( unop(Iop_64to32, mkexpr(t64)) ); 5661 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 5662 put_emwarn( mkexpr(ew) ); 5663 /* Finally, if an emulation warning was reported, 5664 side-exit to the next insn, reporting the warning, 5665 so that Valgrind's dispatcher sees the warning. */ 5666 stmt( 5667 IRStmt_Exit( 5668 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 5669 Ijk_EmWarn, 5670 IRConst_U64( guest_RIP_bbstart+delta ), 5671 OFFB_RIP 5672 ) 5673 ); 5674 break; 5675 } 5676 5677 case 6: { /* FNSTENV m28 */ 5678 /* Uses dirty helper: 5679 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */ 5680 IRDirty* d = unsafeIRDirty_0_N ( 5681 0/*regparms*/, 5682 "amd64g_dirtyhelper_FSTENV", 5683 &amd64g_dirtyhelper_FSTENV, 5684 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 5685 ); 5686 /* declare we're writing memory */ 5687 d->mFx = Ifx_Write; 5688 d->mAddr = mkexpr(addr); 5689 d->mSize = 28; 5690 5691 /* declare we're reading guest state */ 5692 d->nFxState = 4; 5693 vex_bzero(&d->fxState, sizeof(d->fxState)); 5694 5695 d->fxState[0].fx = Ifx_Read; 5696 d->fxState[0].offset = OFFB_FTOP; 5697 d->fxState[0].size = sizeof(UInt); 5698 5699 d->fxState[1].fx = Ifx_Read; 5700 d->fxState[1].offset = OFFB_FPTAGS; 5701 d->fxState[1].size = 8 * sizeof(UChar); 5702 5703 d->fxState[2].fx = Ifx_Read; 5704 d->fxState[2].offset = OFFB_FPROUND; 5705 d->fxState[2].size = sizeof(ULong); 5706 5707 d->fxState[3].fx = Ifx_Read; 5708 d->fxState[3].offset = OFFB_FC3210; 5709 d->fxState[3].size = sizeof(ULong); 5710 5711 stmt( IRStmt_Dirty(d) ); 5712 5713 DIP("fnstenv %s\n", dis_buf); 5714 break; 5715 } 5716 5717 case 7: /* FNSTCW */ 5718 /* Fake up a native x87 FPU control word. The only 5719 thing it depends on is FPROUND[1:0], so call a clean 5720 helper to cook it up. */ 5721 /* ULong amd64g_create_fpucw ( ULong fpround ) */ 5722 DIP("fnstcw %s\n", dis_buf); 5723 storeLE( 5724 mkexpr(addr), 5725 unop( Iop_64to16, 5726 mkIRExprCCall( 5727 Ity_I64, 0/*regp*/, 5728 "amd64g_create_fpucw", &amd64g_create_fpucw, 5729 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) ) 5730 ) 5731 ) 5732 ); 5733 break; 5734 5735 default: 5736 vex_printf("unhandled opc_aux = 0x%2x\n", 5737 (UInt)gregLO3ofRM(modrm)); 5738 vex_printf("first_opcode == 0xD9\n"); 5739 goto decode_fail; 5740 } 5741 5742 } else { 5743 delta++; 5744 switch (modrm) { 5745 5746 case 0xC0 ... 0xC7: /* FLD %st(?) */ 5747 r_src = (UInt)modrm - 0xC0; 5748 DIP("fld %%st(%u)\n", r_src); 5749 t1 = newTemp(Ity_F64); 5750 assign(t1, get_ST(r_src)); 5751 fp_push(); 5752 put_ST(0, mkexpr(t1)); 5753 break; 5754 5755 case 0xC8 ... 0xCF: /* FXCH %st(?) */ 5756 r_src = (UInt)modrm - 0xC8; 5757 DIP("fxch %%st(%u)\n", r_src); 5758 t1 = newTemp(Ity_F64); 5759 t2 = newTemp(Ity_F64); 5760 assign(t1, get_ST(0)); 5761 assign(t2, get_ST(r_src)); 5762 put_ST_UNCHECKED(0, mkexpr(t2)); 5763 put_ST_UNCHECKED(r_src, mkexpr(t1)); 5764 break; 5765 5766 case 0xE0: /* FCHS */ 5767 DIP("fchs\n"); 5768 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0))); 5769 break; 5770 5771 case 0xE1: /* FABS */ 5772 DIP("fabs\n"); 5773 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0))); 5774 break; 5775 5776 case 0xE5: { /* FXAM */ 5777 /* This is an interesting one. It examines %st(0), 5778 regardless of whether the tag says it's empty or not. 5779 Here, just pass both the tag (in our format) and the 5780 value (as a double, actually a ULong) to a helper 5781 function. */ 5782 IRExpr** args 5783 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)), 5784 unop(Iop_ReinterpF64asI64, 5785 get_ST_UNCHECKED(0)) ); 5786 put_C3210(mkIRExprCCall( 5787 Ity_I64, 5788 0/*regparm*/, 5789 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM, 5790 args 5791 )); 5792 DIP("fxam\n"); 5793 break; 5794 } 5795 5796 case 0xE8: /* FLD1 */ 5797 DIP("fld1\n"); 5798 fp_push(); 5799 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */ 5800 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL))); 5801 break; 5802 5803 case 0xE9: /* FLDL2T */ 5804 DIP("fldl2t\n"); 5805 fp_push(); 5806 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */ 5807 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL))); 5808 break; 5809 5810 case 0xEA: /* FLDL2E */ 5811 DIP("fldl2e\n"); 5812 fp_push(); 5813 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */ 5814 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL))); 5815 break; 5816 5817 case 0xEB: /* FLDPI */ 5818 DIP("fldpi\n"); 5819 fp_push(); 5820 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */ 5821 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL))); 5822 break; 5823 5824 case 0xEC: /* FLDLG2 */ 5825 DIP("fldlg2\n"); 5826 fp_push(); 5827 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */ 5828 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL))); 5829 break; 5830 5831 case 0xED: /* FLDLN2 */ 5832 DIP("fldln2\n"); 5833 fp_push(); 5834 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */ 5835 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL))); 5836 break; 5837 5838 case 0xEE: /* FLDZ */ 5839 DIP("fldz\n"); 5840 fp_push(); 5841 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */ 5842 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); 5843 break; 5844 5845 case 0xF0: /* F2XM1 */ 5846 DIP("f2xm1\n"); 5847 put_ST_UNCHECKED(0, 5848 binop(Iop_2xm1F64, 5849 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5850 get_ST(0))); 5851 break; 5852 5853 case 0xF1: /* FYL2X */ 5854 DIP("fyl2x\n"); 5855 put_ST_UNCHECKED(1, 5856 triop(Iop_Yl2xF64, 5857 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5858 get_ST(1), 5859 get_ST(0))); 5860 fp_pop(); 5861 break; 5862 5863 case 0xF2: { /* FPTAN */ 5864 DIP("fptan\n"); 5865 IRTemp argD = newTemp(Ity_F64); 5866 assign(argD, get_ST(0)); 5867 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 5868 IRTemp resD = newTemp(Ity_F64); 5869 assign(resD, 5870 IRExpr_ITE( 5871 mkexpr(argOK), 5872 binop(Iop_TanF64, 5873 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5874 mkexpr(argD)), 5875 mkexpr(argD)) 5876 ); 5877 put_ST_UNCHECKED(0, mkexpr(resD)); 5878 /* Conditionally push 1.0 on the stack, if the arg is 5879 in range */ 5880 maybe_fp_push(argOK); 5881 maybe_put_ST(argOK, 0, 5882 IRExpr_Const(IRConst_F64(1.0))); 5883 set_C2( binop(Iop_Xor64, 5884 unop(Iop_1Uto64, mkexpr(argOK)), 5885 mkU64(1)) ); 5886 break; 5887 } 5888 5889 case 0xF3: /* FPATAN */ 5890 DIP("fpatan\n"); 5891 put_ST_UNCHECKED(1, 5892 triop(Iop_AtanF64, 5893 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5894 get_ST(1), 5895 get_ST(0))); 5896 fp_pop(); 5897 break; 5898 5899 case 0xF4: { /* FXTRACT */ 5900 IRTemp argF = newTemp(Ity_F64); 5901 IRTemp sigF = newTemp(Ity_F64); 5902 IRTemp expF = newTemp(Ity_F64); 5903 IRTemp argI = newTemp(Ity_I64); 5904 IRTemp sigI = newTemp(Ity_I64); 5905 IRTemp expI = newTemp(Ity_I64); 5906 DIP("fxtract\n"); 5907 assign( argF, get_ST(0) ); 5908 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF))); 5909 assign( sigI, 5910 mkIRExprCCall( 5911 Ity_I64, 0/*regparms*/, 5912 "x86amd64g_calculate_FXTRACT", 5913 &x86amd64g_calculate_FXTRACT, 5914 mkIRExprVec_2( mkexpr(argI), 5915 mkIRExpr_HWord(0)/*sig*/ )) 5916 ); 5917 assign( expI, 5918 mkIRExprCCall( 5919 Ity_I64, 0/*regparms*/, 5920 "x86amd64g_calculate_FXTRACT", 5921 &x86amd64g_calculate_FXTRACT, 5922 mkIRExprVec_2( mkexpr(argI), 5923 mkIRExpr_HWord(1)/*exp*/ )) 5924 ); 5925 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) ); 5926 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) ); 5927 /* exponent */ 5928 put_ST_UNCHECKED(0, mkexpr(expF) ); 5929 fp_push(); 5930 /* significand */ 5931 put_ST(0, mkexpr(sigF) ); 5932 break; 5933 } 5934 5935 case 0xF5: { /* FPREM1 -- IEEE compliant */ 5936 IRTemp a1 = newTemp(Ity_F64); 5937 IRTemp a2 = newTemp(Ity_F64); 5938 DIP("fprem1\n"); 5939 /* Do FPREM1 twice, once to get the remainder, and once 5940 to get the C3210 flag values. */ 5941 assign( a1, get_ST(0) ); 5942 assign( a2, get_ST(1) ); 5943 put_ST_UNCHECKED(0, 5944 triop(Iop_PRem1F64, 5945 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5946 mkexpr(a1), 5947 mkexpr(a2))); 5948 put_C3210( 5949 unop(Iop_32Uto64, 5950 triop(Iop_PRem1C3210F64, 5951 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5952 mkexpr(a1), 5953 mkexpr(a2)) )); 5954 break; 5955 } 5956 5957 case 0xF7: /* FINCSTP */ 5958 DIP("fincstp\n"); 5959 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); 5960 break; 5961 5962 case 0xF8: { /* FPREM -- not IEEE compliant */ 5963 IRTemp a1 = newTemp(Ity_F64); 5964 IRTemp a2 = newTemp(Ity_F64); 5965 DIP("fprem\n"); 5966 /* Do FPREM twice, once to get the remainder, and once 5967 to get the C3210 flag values. */ 5968 assign( a1, get_ST(0) ); 5969 assign( a2, get_ST(1) ); 5970 put_ST_UNCHECKED(0, 5971 triop(Iop_PRemF64, 5972 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5973 mkexpr(a1), 5974 mkexpr(a2))); 5975 put_C3210( 5976 unop(Iop_32Uto64, 5977 triop(Iop_PRemC3210F64, 5978 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5979 mkexpr(a1), 5980 mkexpr(a2)) )); 5981 break; 5982 } 5983 5984 case 0xF9: /* FYL2XP1 */ 5985 DIP("fyl2xp1\n"); 5986 put_ST_UNCHECKED(1, 5987 triop(Iop_Yl2xp1F64, 5988 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5989 get_ST(1), 5990 get_ST(0))); 5991 fp_pop(); 5992 break; 5993 5994 case 0xFA: /* FSQRT */ 5995 DIP("fsqrt\n"); 5996 put_ST_UNCHECKED(0, 5997 binop(Iop_SqrtF64, 5998 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 5999 get_ST(0))); 6000 break; 6001 6002 case 0xFB: { /* FSINCOS */ 6003 DIP("fsincos\n"); 6004 IRTemp argD = newTemp(Ity_F64); 6005 assign(argD, get_ST(0)); 6006 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6007 IRTemp resD = newTemp(Ity_F64); 6008 assign(resD, 6009 IRExpr_ITE( 6010 mkexpr(argOK), 6011 binop(Iop_SinF64, 6012 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6013 mkexpr(argD)), 6014 mkexpr(argD)) 6015 ); 6016 put_ST_UNCHECKED(0, mkexpr(resD)); 6017 /* Conditionally push the cos value on the stack, if 6018 the arg is in range */ 6019 maybe_fp_push(argOK); 6020 maybe_put_ST(argOK, 0, 6021 binop(Iop_CosF64, 6022 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6023 mkexpr(argD))); 6024 set_C2( binop(Iop_Xor64, 6025 unop(Iop_1Uto64, mkexpr(argOK)), 6026 mkU64(1)) ); 6027 break; 6028 } 6029 6030 case 0xFC: /* FRNDINT */ 6031 DIP("frndint\n"); 6032 put_ST_UNCHECKED(0, 6033 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) ); 6034 break; 6035 6036 case 0xFD: /* FSCALE */ 6037 DIP("fscale\n"); 6038 put_ST_UNCHECKED(0, 6039 triop(Iop_ScaleF64, 6040 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6041 get_ST(0), 6042 get_ST(1))); 6043 break; 6044 6045 case 0xFE: /* FSIN */ 6046 case 0xFF: { /* FCOS */ 6047 Bool isSIN = modrm == 0xFE; 6048 DIP("%s\n", isSIN ? "fsin" : "fcos"); 6049 IRTemp argD = newTemp(Ity_F64); 6050 assign(argD, get_ST(0)); 6051 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); 6052 IRTemp resD = newTemp(Ity_F64); 6053 assign(resD, 6054 IRExpr_ITE( 6055 mkexpr(argOK), 6056 binop(isSIN ? Iop_SinF64 : Iop_CosF64, 6057 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6058 mkexpr(argD)), 6059 mkexpr(argD)) 6060 ); 6061 put_ST_UNCHECKED(0, mkexpr(resD)); 6062 set_C2( binop(Iop_Xor64, 6063 unop(Iop_1Uto64, mkexpr(argOK)), 6064 mkU64(1)) ); 6065 break; 6066 } 6067 6068 default: 6069 goto decode_fail; 6070 } 6071 } 6072 } 6073 6074 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */ 6075 else 6076 if (first_opcode == 0xDA) { 6077 6078 if (modrm < 0xC0) { 6079 6080 /* bits 5,4,3 are an opcode extension, and the modRM also 6081 specifies an address. */ 6082 IROp fop; 6083 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6084 delta += len; 6085 switch (gregLO3ofRM(modrm)) { 6086 6087 case 0: /* FIADD m32int */ /* ST(0) += m32int */ 6088 DIP("fiaddl %s\n", dis_buf); 6089 fop = Iop_AddF64; 6090 goto do_fop_m32; 6091 6092 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */ 6093 DIP("fimull %s\n", dis_buf); 6094 fop = Iop_MulF64; 6095 goto do_fop_m32; 6096 6097 case 4: /* FISUB m32int */ /* ST(0) -= m32int */ 6098 DIP("fisubl %s\n", dis_buf); 6099 fop = Iop_SubF64; 6100 goto do_fop_m32; 6101 6102 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */ 6103 DIP("fisubrl %s\n", dis_buf); 6104 fop = Iop_SubF64; 6105 goto do_foprev_m32; 6106 6107 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */ 6108 DIP("fisubl %s\n", dis_buf); 6109 fop = Iop_DivF64; 6110 goto do_fop_m32; 6111 6112 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */ 6113 DIP("fidivrl %s\n", dis_buf); 6114 fop = Iop_DivF64; 6115 goto do_foprev_m32; 6116 6117 do_fop_m32: 6118 put_ST_UNCHECKED(0, 6119 triop(fop, 6120 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6121 get_ST(0), 6122 unop(Iop_I32StoF64, 6123 loadLE(Ity_I32, mkexpr(addr))))); 6124 break; 6125 6126 do_foprev_m32: 6127 put_ST_UNCHECKED(0, 6128 triop(fop, 6129 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6130 unop(Iop_I32StoF64, 6131 loadLE(Ity_I32, mkexpr(addr))), 6132 get_ST(0))); 6133 break; 6134 6135 default: 6136 vex_printf("unhandled opc_aux = 0x%2x\n", 6137 (UInt)gregLO3ofRM(modrm)); 6138 vex_printf("first_opcode == 0xDA\n"); 6139 goto decode_fail; 6140 } 6141 6142 } else { 6143 6144 delta++; 6145 switch (modrm) { 6146 6147 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */ 6148 r_src = (UInt)modrm - 0xC0; 6149 DIP("fcmovb %%st(%u), %%st(0)\n", r_src); 6150 put_ST_UNCHECKED(0, 6151 IRExpr_ITE( 6152 mk_amd64g_calculate_condition(AMD64CondB), 6153 get_ST(r_src), get_ST(0)) ); 6154 break; 6155 6156 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */ 6157 r_src = (UInt)modrm - 0xC8; 6158 DIP("fcmovz %%st(%u), %%st(0)\n", r_src); 6159 put_ST_UNCHECKED(0, 6160 IRExpr_ITE( 6161 mk_amd64g_calculate_condition(AMD64CondZ), 6162 get_ST(r_src), get_ST(0)) ); 6163 break; 6164 6165 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */ 6166 r_src = (UInt)modrm - 0xD0; 6167 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src); 6168 put_ST_UNCHECKED(0, 6169 IRExpr_ITE( 6170 mk_amd64g_calculate_condition(AMD64CondBE), 6171 get_ST(r_src), get_ST(0)) ); 6172 break; 6173 6174 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ 6175 r_src = (UInt)modrm - 0xD8; 6176 DIP("fcmovu %%st(%u), %%st(0)\n", r_src); 6177 put_ST_UNCHECKED(0, 6178 IRExpr_ITE( 6179 mk_amd64g_calculate_condition(AMD64CondP), 6180 get_ST(r_src), get_ST(0)) ); 6181 break; 6182 6183 case 0xE9: /* FUCOMPP %st(0),%st(1) */ 6184 DIP("fucompp %%st(0),%%st(1)\n"); 6185 /* This forces C1 to zero, which isn't right. */ 6186 put_C3210( 6187 unop(Iop_32Uto64, 6188 binop( Iop_And32, 6189 binop(Iop_Shl32, 6190 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6191 mkU8(8)), 6192 mkU32(0x4500) 6193 ))); 6194 fp_pop(); 6195 fp_pop(); 6196 break; 6197 6198 default: 6199 goto decode_fail; 6200 } 6201 6202 } 6203 } 6204 6205 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */ 6206 else 6207 if (first_opcode == 0xDB) { 6208 if (modrm < 0xC0) { 6209 6210 /* bits 5,4,3 are an opcode extension, and the modRM also 6211 specifies an address. */ 6212 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6213 delta += len; 6214 6215 switch (gregLO3ofRM(modrm)) { 6216 6217 case 0: /* FILD m32int */ 6218 DIP("fildl %s\n", dis_buf); 6219 fp_push(); 6220 put_ST(0, unop(Iop_I32StoF64, 6221 loadLE(Ity_I32, mkexpr(addr)))); 6222 break; 6223 6224 case 1: /* FISTTPL m32 (SSE3) */ 6225 DIP("fisttpl %s\n", dis_buf); 6226 storeLE( mkexpr(addr), 6227 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ); 6228 fp_pop(); 6229 break; 6230 6231 case 2: /* FIST m32 */ 6232 DIP("fistl %s\n", dis_buf); 6233 storeLE( mkexpr(addr), 6234 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6235 break; 6236 6237 case 3: /* FISTP m32 */ 6238 DIP("fistpl %s\n", dis_buf); 6239 storeLE( mkexpr(addr), 6240 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ); 6241 fp_pop(); 6242 break; 6243 6244 case 5: { /* FLD extended-real */ 6245 /* Uses dirty helper: 6246 ULong amd64g_loadF80le ( ULong ) 6247 addr holds the address. First, do a dirty call to 6248 get hold of the data. */ 6249 IRTemp val = newTemp(Ity_I64); 6250 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) ); 6251 6252 IRDirty* d = unsafeIRDirty_1_N ( 6253 val, 6254 0/*regparms*/, 6255 "amd64g_dirtyhelper_loadF80le", 6256 &amd64g_dirtyhelper_loadF80le, 6257 args 6258 ); 6259 /* declare that we're reading memory */ 6260 d->mFx = Ifx_Read; 6261 d->mAddr = mkexpr(addr); 6262 d->mSize = 10; 6263 6264 /* execute the dirty call, dumping the result in val. */ 6265 stmt( IRStmt_Dirty(d) ); 6266 fp_push(); 6267 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val))); 6268 6269 DIP("fldt %s\n", dis_buf); 6270 break; 6271 } 6272 6273 case 7: { /* FSTP extended-real */ 6274 /* Uses dirty helper: 6275 void amd64g_storeF80le ( ULong addr, ULong data ) 6276 */ 6277 IRExpr** args 6278 = mkIRExprVec_2( mkexpr(addr), 6279 unop(Iop_ReinterpF64asI64, get_ST(0)) ); 6280 6281 IRDirty* d = unsafeIRDirty_0_N ( 6282 0/*regparms*/, 6283 "amd64g_dirtyhelper_storeF80le", 6284 &amd64g_dirtyhelper_storeF80le, 6285 args 6286 ); 6287 /* declare we're writing memory */ 6288 d->mFx = Ifx_Write; 6289 d->mAddr = mkexpr(addr); 6290 d->mSize = 10; 6291 6292 /* execute the dirty call. */ 6293 stmt( IRStmt_Dirty(d) ); 6294 fp_pop(); 6295 6296 DIP("fstpt\n %s", dis_buf); 6297 break; 6298 } 6299 6300 default: 6301 vex_printf("unhandled opc_aux = 0x%2x\n", 6302 (UInt)gregLO3ofRM(modrm)); 6303 vex_printf("first_opcode == 0xDB\n"); 6304 goto decode_fail; 6305 } 6306 6307 } else { 6308 6309 delta++; 6310 switch (modrm) { 6311 6312 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */ 6313 r_src = (UInt)modrm - 0xC0; 6314 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src); 6315 put_ST_UNCHECKED(0, 6316 IRExpr_ITE( 6317 mk_amd64g_calculate_condition(AMD64CondNB), 6318 get_ST(r_src), get_ST(0)) ); 6319 break; 6320 6321 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */ 6322 r_src = (UInt)modrm - 0xC8; 6323 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src); 6324 put_ST_UNCHECKED( 6325 0, 6326 IRExpr_ITE( 6327 mk_amd64g_calculate_condition(AMD64CondNZ), 6328 get_ST(r_src), 6329 get_ST(0) 6330 ) 6331 ); 6332 break; 6333 6334 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */ 6335 r_src = (UInt)modrm - 0xD0; 6336 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src); 6337 put_ST_UNCHECKED( 6338 0, 6339 IRExpr_ITE( 6340 mk_amd64g_calculate_condition(AMD64CondNBE), 6341 get_ST(r_src), 6342 get_ST(0) 6343 ) 6344 ); 6345 break; 6346 6347 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */ 6348 r_src = (UInt)modrm - 0xD8; 6349 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src); 6350 put_ST_UNCHECKED( 6351 0, 6352 IRExpr_ITE( 6353 mk_amd64g_calculate_condition(AMD64CondNP), 6354 get_ST(r_src), 6355 get_ST(0) 6356 ) 6357 ); 6358 break; 6359 6360 case 0xE2: 6361 DIP("fnclex\n"); 6362 break; 6363 6364 case 0xE3: { 6365 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/); 6366 DIP("fninit\n"); 6367 break; 6368 } 6369 6370 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */ 6371 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False ); 6372 break; 6373 6374 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */ 6375 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False ); 6376 break; 6377 6378 default: 6379 goto decode_fail; 6380 } 6381 } 6382 } 6383 6384 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */ 6385 else 6386 if (first_opcode == 0xDC) { 6387 if (modrm < 0xC0) { 6388 6389 /* bits 5,4,3 are an opcode extension, and the modRM also 6390 specifies an address. */ 6391 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6392 delta += len; 6393 6394 switch (gregLO3ofRM(modrm)) { 6395 6396 case 0: /* FADD double-real */ 6397 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True ); 6398 break; 6399 6400 case 1: /* FMUL double-real */ 6401 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True ); 6402 break; 6403 6404 //.. case 2: /* FCOM double-real */ 6405 //.. DIP("fcoml %s\n", dis_buf); 6406 //.. /* This forces C1 to zero, which isn't right. */ 6407 //.. put_C3210( 6408 //.. binop( Iop_And32, 6409 //.. binop(Iop_Shl32, 6410 //.. binop(Iop_CmpF64, 6411 //.. get_ST(0), 6412 //.. loadLE(Ity_F64,mkexpr(addr))), 6413 //.. mkU8(8)), 6414 //.. mkU32(0x4500) 6415 //.. )); 6416 //.. break; 6417 6418 case 3: /* FCOMP double-real */ 6419 DIP("fcompl %s\n", dis_buf); 6420 /* This forces C1 to zero, which isn't right. */ 6421 put_C3210( 6422 unop(Iop_32Uto64, 6423 binop( Iop_And32, 6424 binop(Iop_Shl32, 6425 binop(Iop_CmpF64, 6426 get_ST(0), 6427 loadLE(Ity_F64,mkexpr(addr))), 6428 mkU8(8)), 6429 mkU32(0x4500) 6430 ))); 6431 fp_pop(); 6432 break; 6433 6434 case 4: /* FSUB double-real */ 6435 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True ); 6436 break; 6437 6438 case 5: /* FSUBR double-real */ 6439 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True ); 6440 break; 6441 6442 case 6: /* FDIV double-real */ 6443 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True ); 6444 break; 6445 6446 case 7: /* FDIVR double-real */ 6447 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True ); 6448 break; 6449 6450 default: 6451 vex_printf("unhandled opc_aux = 0x%2x\n", 6452 (UInt)gregLO3ofRM(modrm)); 6453 vex_printf("first_opcode == 0xDC\n"); 6454 goto decode_fail; 6455 } 6456 6457 } else { 6458 6459 delta++; 6460 switch (modrm) { 6461 6462 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */ 6463 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False ); 6464 break; 6465 6466 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */ 6467 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False ); 6468 break; 6469 6470 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */ 6471 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False ); 6472 break; 6473 6474 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */ 6475 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False ); 6476 break; 6477 6478 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */ 6479 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False ); 6480 break; 6481 6482 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */ 6483 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False ); 6484 break; 6485 6486 default: 6487 goto decode_fail; 6488 } 6489 6490 } 6491 } 6492 6493 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */ 6494 else 6495 if (first_opcode == 0xDD) { 6496 6497 if (modrm < 0xC0) { 6498 6499 /* bits 5,4,3 are an opcode extension, and the modRM also 6500 specifies an address. */ 6501 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6502 delta += len; 6503 6504 switch (gregLO3ofRM(modrm)) { 6505 6506 case 0: /* FLD double-real */ 6507 DIP("fldl %s\n", dis_buf); 6508 fp_push(); 6509 put_ST(0, loadLE(Ity_F64, mkexpr(addr))); 6510 break; 6511 6512 case 1: /* FISTTPQ m64 (SSE3) */ 6513 DIP("fistppll %s\n", dis_buf); 6514 storeLE( mkexpr(addr), 6515 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) ); 6516 fp_pop(); 6517 break; 6518 6519 case 2: /* FST double-real */ 6520 DIP("fstl %s\n", dis_buf); 6521 storeLE(mkexpr(addr), get_ST(0)); 6522 break; 6523 6524 case 3: /* FSTP double-real */ 6525 DIP("fstpl %s\n", dis_buf); 6526 storeLE(mkexpr(addr), get_ST(0)); 6527 fp_pop(); 6528 break; 6529 6530 case 4: { /* FRSTOR m94/m108 */ 6531 IRTemp ew = newTemp(Ity_I32); 6532 IRTemp w64 = newTemp(Ity_I64); 6533 IRDirty* d; 6534 if ( have66(pfx) ) { 6535 /* Uses dirty helper: 6536 VexEmNote amd64g_dirtyhelper_FRSTORS 6537 ( VexGuestAMD64State*, HWord ) */ 6538 d = unsafeIRDirty_0_N ( 6539 0/*regparms*/, 6540 "amd64g_dirtyhelper_FRSTORS", 6541 &amd64g_dirtyhelper_FRSTORS, 6542 mkIRExprVec_1( mkexpr(addr) ) 6543 ); 6544 d->mSize = 94; 6545 } else { 6546 /* Uses dirty helper: 6547 VexEmNote amd64g_dirtyhelper_FRSTOR 6548 ( VexGuestAMD64State*, HWord ) */ 6549 d = unsafeIRDirty_0_N ( 6550 0/*regparms*/, 6551 "amd64g_dirtyhelper_FRSTOR", 6552 &amd64g_dirtyhelper_FRSTOR, 6553 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6554 ); 6555 d->mSize = 108; 6556 } 6557 6558 d->tmp = w64; 6559 /* declare we're reading memory */ 6560 d->mFx = Ifx_Read; 6561 d->mAddr = mkexpr(addr); 6562 /* d->mSize set above */ 6563 6564 /* declare we're writing guest state */ 6565 d->nFxState = 5; 6566 vex_bzero(&d->fxState, sizeof(d->fxState)); 6567 6568 d->fxState[0].fx = Ifx_Write; 6569 d->fxState[0].offset = OFFB_FTOP; 6570 d->fxState[0].size = sizeof(UInt); 6571 6572 d->fxState[1].fx = Ifx_Write; 6573 d->fxState[1].offset = OFFB_FPREGS; 6574 d->fxState[1].size = 8 * sizeof(ULong); 6575 6576 d->fxState[2].fx = Ifx_Write; 6577 d->fxState[2].offset = OFFB_FPTAGS; 6578 d->fxState[2].size = 8 * sizeof(UChar); 6579 6580 d->fxState[3].fx = Ifx_Write; 6581 d->fxState[3].offset = OFFB_FPROUND; 6582 d->fxState[3].size = sizeof(ULong); 6583 6584 d->fxState[4].fx = Ifx_Write; 6585 d->fxState[4].offset = OFFB_FC3210; 6586 d->fxState[4].size = sizeof(ULong); 6587 6588 stmt( IRStmt_Dirty(d) ); 6589 6590 /* ew contains any emulation warning we may need to 6591 issue. If needed, side-exit to the next insn, 6592 reporting the warning, so that Valgrind's dispatcher 6593 sees the warning. */ 6594 assign(ew, unop(Iop_64to32,mkexpr(w64)) ); 6595 put_emwarn( mkexpr(ew) ); 6596 stmt( 6597 IRStmt_Exit( 6598 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)), 6599 Ijk_EmWarn, 6600 IRConst_U64( guest_RIP_bbstart+delta ), 6601 OFFB_RIP 6602 ) 6603 ); 6604 6605 if ( have66(pfx) ) { 6606 DIP("frstors %s\n", dis_buf); 6607 } else { 6608 DIP("frstor %s\n", dis_buf); 6609 } 6610 break; 6611 } 6612 6613 case 6: { /* FNSAVE m94/m108 */ 6614 IRDirty *d; 6615 if ( have66(pfx) ) { 6616 /* Uses dirty helper: 6617 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, 6618 HWord ) */ 6619 d = unsafeIRDirty_0_N ( 6620 0/*regparms*/, 6621 "amd64g_dirtyhelper_FNSAVES", 6622 &amd64g_dirtyhelper_FNSAVES, 6623 mkIRExprVec_1( mkexpr(addr) ) 6624 ); 6625 d->mSize = 94; 6626 } else { 6627 /* Uses dirty helper: 6628 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*, 6629 HWord ) */ 6630 d = unsafeIRDirty_0_N ( 6631 0/*regparms*/, 6632 "amd64g_dirtyhelper_FNSAVE", 6633 &amd64g_dirtyhelper_FNSAVE, 6634 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 6635 ); 6636 d->mSize = 108; 6637 } 6638 6639 /* declare we're writing memory */ 6640 d->mFx = Ifx_Write; 6641 d->mAddr = mkexpr(addr); 6642 /* d->mSize set above */ 6643 6644 /* declare we're reading guest state */ 6645 d->nFxState = 5; 6646 vex_bzero(&d->fxState, sizeof(d->fxState)); 6647 6648 d->fxState[0].fx = Ifx_Read; 6649 d->fxState[0].offset = OFFB_FTOP; 6650 d->fxState[0].size = sizeof(UInt); 6651 6652 d->fxState[1].fx = Ifx_Read; 6653 d->fxState[1].offset = OFFB_FPREGS; 6654 d->fxState[1].size = 8 * sizeof(ULong); 6655 6656 d->fxState[2].fx = Ifx_Read; 6657 d->fxState[2].offset = OFFB_FPTAGS; 6658 d->fxState[2].size = 8 * sizeof(UChar); 6659 6660 d->fxState[3].fx = Ifx_Read; 6661 d->fxState[3].offset = OFFB_FPROUND; 6662 d->fxState[3].size = sizeof(ULong); 6663 6664 d->fxState[4].fx = Ifx_Read; 6665 d->fxState[4].offset = OFFB_FC3210; 6666 d->fxState[4].size = sizeof(ULong); 6667 6668 stmt( IRStmt_Dirty(d) ); 6669 6670 if ( have66(pfx) ) { 6671 DIP("fnsaves %s\n", dis_buf); 6672 } else { 6673 DIP("fnsave %s\n", dis_buf); 6674 } 6675 break; 6676 } 6677 6678 case 7: { /* FNSTSW m16 */ 6679 IRExpr* sw = get_FPU_sw(); 6680 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16); 6681 storeLE( mkexpr(addr), sw ); 6682 DIP("fnstsw %s\n", dis_buf); 6683 break; 6684 } 6685 6686 default: 6687 vex_printf("unhandled opc_aux = 0x%2x\n", 6688 (UInt)gregLO3ofRM(modrm)); 6689 vex_printf("first_opcode == 0xDD\n"); 6690 goto decode_fail; 6691 } 6692 } else { 6693 delta++; 6694 switch (modrm) { 6695 6696 case 0xC0 ... 0xC7: /* FFREE %st(?) */ 6697 r_dst = (UInt)modrm - 0xC0; 6698 DIP("ffree %%st(%u)\n", r_dst); 6699 put_ST_TAG ( r_dst, mkU8(0) ); 6700 break; 6701 6702 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */ 6703 r_dst = (UInt)modrm - 0xD0; 6704 DIP("fst %%st(0),%%st(%u)\n", r_dst); 6705 /* P4 manual says: "If the destination operand is a 6706 non-empty register, the invalid-operation exception 6707 is not generated. Hence put_ST_UNCHECKED. */ 6708 put_ST_UNCHECKED(r_dst, get_ST(0)); 6709 break; 6710 6711 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */ 6712 r_dst = (UInt)modrm - 0xD8; 6713 DIP("fstp %%st(0),%%st(%u)\n", r_dst); 6714 /* P4 manual says: "If the destination operand is a 6715 non-empty register, the invalid-operation exception 6716 is not generated. Hence put_ST_UNCHECKED. */ 6717 put_ST_UNCHECKED(r_dst, get_ST(0)); 6718 fp_pop(); 6719 break; 6720 6721 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */ 6722 r_dst = (UInt)modrm - 0xE0; 6723 DIP("fucom %%st(0),%%st(%u)\n", r_dst); 6724 /* This forces C1 to zero, which isn't right. */ 6725 put_C3210( 6726 unop(Iop_32Uto64, 6727 binop( Iop_And32, 6728 binop(Iop_Shl32, 6729 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6730 mkU8(8)), 6731 mkU32(0x4500) 6732 ))); 6733 break; 6734 6735 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */ 6736 r_dst = (UInt)modrm - 0xE8; 6737 DIP("fucomp %%st(0),%%st(%u)\n", r_dst); 6738 /* This forces C1 to zero, which isn't right. */ 6739 put_C3210( 6740 unop(Iop_32Uto64, 6741 binop( Iop_And32, 6742 binop(Iop_Shl32, 6743 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)), 6744 mkU8(8)), 6745 mkU32(0x4500) 6746 ))); 6747 fp_pop(); 6748 break; 6749 6750 default: 6751 goto decode_fail; 6752 } 6753 } 6754 } 6755 6756 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */ 6757 else 6758 if (first_opcode == 0xDE) { 6759 6760 if (modrm < 0xC0) { 6761 6762 /* bits 5,4,3 are an opcode extension, and the modRM also 6763 specifies an address. */ 6764 IROp fop; 6765 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6766 delta += len; 6767 6768 switch (gregLO3ofRM(modrm)) { 6769 6770 case 0: /* FIADD m16int */ /* ST(0) += m16int */ 6771 DIP("fiaddw %s\n", dis_buf); 6772 fop = Iop_AddF64; 6773 goto do_fop_m16; 6774 6775 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */ 6776 DIP("fimulw %s\n", dis_buf); 6777 fop = Iop_MulF64; 6778 goto do_fop_m16; 6779 6780 case 4: /* FISUB m16int */ /* ST(0) -= m16int */ 6781 DIP("fisubw %s\n", dis_buf); 6782 fop = Iop_SubF64; 6783 goto do_fop_m16; 6784 6785 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */ 6786 DIP("fisubrw %s\n", dis_buf); 6787 fop = Iop_SubF64; 6788 goto do_foprev_m16; 6789 6790 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */ 6791 DIP("fisubw %s\n", dis_buf); 6792 fop = Iop_DivF64; 6793 goto do_fop_m16; 6794 6795 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */ 6796 DIP("fidivrw %s\n", dis_buf); 6797 fop = Iop_DivF64; 6798 goto do_foprev_m16; 6799 6800 do_fop_m16: 6801 put_ST_UNCHECKED(0, 6802 triop(fop, 6803 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6804 get_ST(0), 6805 unop(Iop_I32StoF64, 6806 unop(Iop_16Sto32, 6807 loadLE(Ity_I16, mkexpr(addr)))))); 6808 break; 6809 6810 do_foprev_m16: 6811 put_ST_UNCHECKED(0, 6812 triop(fop, 6813 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 6814 unop(Iop_I32StoF64, 6815 unop(Iop_16Sto32, 6816 loadLE(Ity_I16, mkexpr(addr)))), 6817 get_ST(0))); 6818 break; 6819 6820 default: 6821 vex_printf("unhandled opc_aux = 0x%2x\n", 6822 (UInt)gregLO3ofRM(modrm)); 6823 vex_printf("first_opcode == 0xDE\n"); 6824 goto decode_fail; 6825 } 6826 6827 } else { 6828 6829 delta++; 6830 switch (modrm) { 6831 6832 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */ 6833 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True ); 6834 break; 6835 6836 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */ 6837 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True ); 6838 break; 6839 6840 case 0xD9: /* FCOMPP %st(0),%st(1) */ 6841 DIP("fcompp %%st(0),%%st(1)\n"); 6842 /* This forces C1 to zero, which isn't right. */ 6843 put_C3210( 6844 unop(Iop_32Uto64, 6845 binop( Iop_And32, 6846 binop(Iop_Shl32, 6847 binop(Iop_CmpF64, get_ST(0), get_ST(1)), 6848 mkU8(8)), 6849 mkU32(0x4500) 6850 ))); 6851 fp_pop(); 6852 fp_pop(); 6853 break; 6854 6855 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */ 6856 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True ); 6857 break; 6858 6859 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */ 6860 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True ); 6861 break; 6862 6863 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */ 6864 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True ); 6865 break; 6866 6867 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */ 6868 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True ); 6869 break; 6870 6871 default: 6872 goto decode_fail; 6873 } 6874 6875 } 6876 } 6877 6878 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */ 6879 else 6880 if (first_opcode == 0xDF) { 6881 6882 if (modrm < 0xC0) { 6883 6884 /* bits 5,4,3 are an opcode extension, and the modRM also 6885 specifies an address. */ 6886 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 6887 delta += len; 6888 6889 switch (gregLO3ofRM(modrm)) { 6890 6891 case 0: /* FILD m16int */ 6892 DIP("fildw %s\n", dis_buf); 6893 fp_push(); 6894 put_ST(0, unop(Iop_I32StoF64, 6895 unop(Iop_16Sto32, 6896 loadLE(Ity_I16, mkexpr(addr))))); 6897 break; 6898 6899 case 1: /* FISTTPS m16 (SSE3) */ 6900 DIP("fisttps %s\n", dis_buf); 6901 storeLE( mkexpr(addr), 6902 x87ishly_qnarrow_32_to_16( 6903 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) )); 6904 fp_pop(); 6905 break; 6906 6907 case 2: /* FIST m16 */ 6908 DIP("fists %s\n", dis_buf); 6909 storeLE( mkexpr(addr), 6910 x87ishly_qnarrow_32_to_16( 6911 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6912 break; 6913 6914 case 3: /* FISTP m16 */ 6915 DIP("fistps %s\n", dis_buf); 6916 storeLE( mkexpr(addr), 6917 x87ishly_qnarrow_32_to_16( 6918 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) )); 6919 fp_pop(); 6920 break; 6921 6922 case 5: /* FILD m64 */ 6923 DIP("fildll %s\n", dis_buf); 6924 fp_push(); 6925 put_ST(0, binop(Iop_I64StoF64, 6926 get_roundingmode(), 6927 loadLE(Ity_I64, mkexpr(addr)))); 6928 break; 6929 6930 case 7: /* FISTP m64 */ 6931 DIP("fistpll %s\n", dis_buf); 6932 storeLE( mkexpr(addr), 6933 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) ); 6934 fp_pop(); 6935 break; 6936 6937 default: 6938 vex_printf("unhandled opc_aux = 0x%2x\n", 6939 (UInt)gregLO3ofRM(modrm)); 6940 vex_printf("first_opcode == 0xDF\n"); 6941 goto decode_fail; 6942 } 6943 6944 } else { 6945 6946 delta++; 6947 switch (modrm) { 6948 6949 case 0xC0: /* FFREEP %st(0) */ 6950 DIP("ffreep %%st(%d)\n", 0); 6951 put_ST_TAG ( 0, mkU8(0) ); 6952 fp_pop(); 6953 break; 6954 6955 case 0xE0: /* FNSTSW %ax */ 6956 DIP("fnstsw %%ax\n"); 6957 /* Invent a plausible-looking FPU status word value and 6958 dump it in %AX: 6959 ((ftop & 7) << 11) | (c3210 & 0x4700) 6960 */ 6961 putIRegRAX( 6962 2, 6963 unop(Iop_32to16, 6964 binop(Iop_Or32, 6965 binop(Iop_Shl32, 6966 binop(Iop_And32, get_ftop(), mkU32(7)), 6967 mkU8(11)), 6968 binop(Iop_And32, 6969 unop(Iop_64to32, get_C3210()), 6970 mkU32(0x4700)) 6971 ))); 6972 break; 6973 6974 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */ 6975 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True ); 6976 break; 6977 6978 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */ 6979 /* not really right since COMIP != UCOMIP */ 6980 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True ); 6981 break; 6982 6983 default: 6984 goto decode_fail; 6985 } 6986 } 6987 6988 } 6989 6990 else 6991 goto decode_fail; 6992 6993 *decode_ok = True; 6994 return delta; 6995 6996 decode_fail: 6997 *decode_ok = False; 6998 return delta; 6999 } 7000 7001 7002 /*------------------------------------------------------------*/ 7003 /*--- ---*/ 7004 /*--- MMX INSTRUCTIONS ---*/ 7005 /*--- ---*/ 7006 /*------------------------------------------------------------*/ 7007 7008 /* Effect of MMX insns on x87 FPU state (table 11-2 of 7009 IA32 arch manual, volume 3): 7010 7011 Read from, or write to MMX register (viz, any insn except EMMS): 7012 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero 7013 * FP stack pointer set to zero 7014 7015 EMMS: 7016 * All tags set to Invalid (empty) -- FPTAGS[i] := zero 7017 * FP stack pointer set to zero 7018 */ 7019 7020 static void do_MMX_preamble ( void ) 7021 { 7022 Int i; 7023 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7024 IRExpr* zero = mkU32(0); 7025 IRExpr* tag1 = mkU8(1); 7026 put_ftop(zero); 7027 for (i = 0; i < 8; i++) 7028 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) ); 7029 } 7030 7031 static void do_EMMS_preamble ( void ) 7032 { 7033 Int i; 7034 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 ); 7035 IRExpr* zero = mkU32(0); 7036 IRExpr* tag0 = mkU8(0); 7037 put_ftop(zero); 7038 for (i = 0; i < 8; i++) 7039 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) ); 7040 } 7041 7042 7043 static IRExpr* getMMXReg ( UInt archreg ) 7044 { 7045 vassert(archreg < 8); 7046 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 ); 7047 } 7048 7049 7050 static void putMMXReg ( UInt archreg, IRExpr* e ) 7051 { 7052 vassert(archreg < 8); 7053 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64); 7054 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) ); 7055 } 7056 7057 7058 /* Helper for non-shift MMX insns. Note this is incomplete in the 7059 sense that it does not first call do_MMX_preamble() -- that is the 7060 responsibility of its caller. */ 7061 7062 static 7063 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi, 7064 Prefix pfx, 7065 Long delta, 7066 UChar opc, 7067 const HChar* name, 7068 Bool show_granularity ) 7069 { 7070 HChar dis_buf[50]; 7071 UChar modrm = getUChar(delta); 7072 Bool isReg = epartIsReg(modrm); 7073 IRExpr* argL = NULL; 7074 IRExpr* argR = NULL; 7075 IRExpr* argG = NULL; 7076 IRExpr* argE = NULL; 7077 IRTemp res = newTemp(Ity_I64); 7078 7079 Bool invG = False; 7080 IROp op = Iop_INVALID; 7081 void* hAddr = NULL; 7082 const HChar* hName = NULL; 7083 Bool eLeft = False; 7084 7085 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0) 7086 7087 switch (opc) { 7088 /* Original MMX ones */ 7089 case 0xFC: op = Iop_Add8x8; break; 7090 case 0xFD: op = Iop_Add16x4; break; 7091 case 0xFE: op = Iop_Add32x2; break; 7092 7093 case 0xEC: op = Iop_QAdd8Sx8; break; 7094 case 0xED: op = Iop_QAdd16Sx4; break; 7095 7096 case 0xDC: op = Iop_QAdd8Ux8; break; 7097 case 0xDD: op = Iop_QAdd16Ux4; break; 7098 7099 case 0xF8: op = Iop_Sub8x8; break; 7100 case 0xF9: op = Iop_Sub16x4; break; 7101 case 0xFA: op = Iop_Sub32x2; break; 7102 7103 case 0xE8: op = Iop_QSub8Sx8; break; 7104 case 0xE9: op = Iop_QSub16Sx4; break; 7105 7106 case 0xD8: op = Iop_QSub8Ux8; break; 7107 case 0xD9: op = Iop_QSub16Ux4; break; 7108 7109 case 0xE5: op = Iop_MulHi16Sx4; break; 7110 case 0xD5: op = Iop_Mul16x4; break; 7111 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break; 7112 7113 case 0x74: op = Iop_CmpEQ8x8; break; 7114 case 0x75: op = Iop_CmpEQ16x4; break; 7115 case 0x76: op = Iop_CmpEQ32x2; break; 7116 7117 case 0x64: op = Iop_CmpGT8Sx8; break; 7118 case 0x65: op = Iop_CmpGT16Sx4; break; 7119 case 0x66: op = Iop_CmpGT32Sx2; break; 7120 7121 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break; 7122 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break; 7123 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break; 7124 7125 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; 7126 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; 7127 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break; 7128 7129 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break; 7130 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break; 7131 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break; 7132 7133 case 0xDB: op = Iop_And64; break; 7134 case 0xDF: op = Iop_And64; invG = True; break; 7135 case 0xEB: op = Iop_Or64; break; 7136 case 0xEF: /* Possibly do better here if argL and argR are the 7137 same reg */ 7138 op = Iop_Xor64; break; 7139 7140 /* Introduced in SSE1 */ 7141 case 0xE0: op = Iop_Avg8Ux8; break; 7142 case 0xE3: op = Iop_Avg16Ux4; break; 7143 case 0xEE: op = Iop_Max16Sx4; break; 7144 case 0xDE: op = Iop_Max8Ux8; break; 7145 case 0xEA: op = Iop_Min16Sx4; break; 7146 case 0xDA: op = Iop_Min8Ux8; break; 7147 case 0xE4: op = Iop_MulHi16Ux4; break; 7148 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break; 7149 7150 /* Introduced in SSE2 */ 7151 case 0xD4: op = Iop_Add64; break; 7152 case 0xFB: op = Iop_Sub64; break; 7153 7154 default: 7155 vex_printf("\n0x%x\n", (UInt)opc); 7156 vpanic("dis_MMXop_regmem_to_reg"); 7157 } 7158 7159 # undef XXX 7160 7161 argG = getMMXReg(gregLO3ofRM(modrm)); 7162 if (invG) 7163 argG = unop(Iop_Not64, argG); 7164 7165 if (isReg) { 7166 delta++; 7167 argE = getMMXReg(eregLO3ofRM(modrm)); 7168 } else { 7169 Int len; 7170 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7171 delta += len; 7172 argE = loadLE(Ity_I64, mkexpr(addr)); 7173 } 7174 7175 if (eLeft) { 7176 argL = argE; 7177 argR = argG; 7178 } else { 7179 argL = argG; 7180 argR = argE; 7181 } 7182 7183 if (op != Iop_INVALID) { 7184 vassert(hName == NULL); 7185 vassert(hAddr == NULL); 7186 assign(res, binop(op, argL, argR)); 7187 } else { 7188 vassert(hName != NULL); 7189 vassert(hAddr != NULL); 7190 assign( res, 7191 mkIRExprCCall( 7192 Ity_I64, 7193 0/*regparms*/, hName, hAddr, 7194 mkIRExprVec_2( argL, argR ) 7195 ) 7196 ); 7197 } 7198 7199 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 7200 7201 DIP("%s%s %s, %s\n", 7202 name, show_granularity ? nameMMXGran(opc & 3) : "", 7203 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ), 7204 nameMMXReg(gregLO3ofRM(modrm)) ); 7205 7206 return delta; 7207 } 7208 7209 7210 /* Vector by scalar shift of G by the amount specified at the bottom 7211 of E. This is a straight copy of dis_SSE_shiftG_byE. */ 7212 7213 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi, 7214 Prefix pfx, Long delta, 7215 const HChar* opname, IROp op ) 7216 { 7217 HChar dis_buf[50]; 7218 Int alen, size; 7219 IRTemp addr; 7220 Bool shl, shr, sar; 7221 UChar rm = getUChar(delta); 7222 IRTemp g0 = newTemp(Ity_I64); 7223 IRTemp g1 = newTemp(Ity_I64); 7224 IRTemp amt = newTemp(Ity_I64); 7225 IRTemp amt8 = newTemp(Ity_I8); 7226 7227 if (epartIsReg(rm)) { 7228 assign( amt, getMMXReg(eregLO3ofRM(rm)) ); 7229 DIP("%s %s,%s\n", opname, 7230 nameMMXReg(eregLO3ofRM(rm)), 7231 nameMMXReg(gregLO3ofRM(rm)) ); 7232 delta++; 7233 } else { 7234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 7235 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 7236 DIP("%s %s,%s\n", opname, 7237 dis_buf, 7238 nameMMXReg(gregLO3ofRM(rm)) ); 7239 delta += alen; 7240 } 7241 assign( g0, getMMXReg(gregLO3ofRM(rm)) ); 7242 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 7243 7244 shl = shr = sar = False; 7245 size = 0; 7246 switch (op) { 7247 case Iop_ShlN16x4: shl = True; size = 32; break; 7248 case Iop_ShlN32x2: shl = True; size = 32; break; 7249 case Iop_Shl64: shl = True; size = 64; break; 7250 case Iop_ShrN16x4: shr = True; size = 16; break; 7251 case Iop_ShrN32x2: shr = True; size = 32; break; 7252 case Iop_Shr64: shr = True; size = 64; break; 7253 case Iop_SarN16x4: sar = True; size = 16; break; 7254 case Iop_SarN32x2: sar = True; size = 32; break; 7255 default: vassert(0); 7256 } 7257 7258 if (shl || shr) { 7259 assign( 7260 g1, 7261 IRExpr_ITE( 7262 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7263 binop(op, mkexpr(g0), mkexpr(amt8)), 7264 mkU64(0) 7265 ) 7266 ); 7267 } else 7268 if (sar) { 7269 assign( 7270 g1, 7271 IRExpr_ITE( 7272 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)), 7273 binop(op, mkexpr(g0), mkexpr(amt8)), 7274 binop(op, mkexpr(g0), mkU8(size-1)) 7275 ) 7276 ); 7277 } else { 7278 vassert(0); 7279 } 7280 7281 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) ); 7282 return delta; 7283 } 7284 7285 7286 /* Vector by scalar shift of E by an immediate byte. This is a 7287 straight copy of dis_SSE_shiftE_imm. */ 7288 7289 static 7290 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op ) 7291 { 7292 Bool shl, shr, sar; 7293 UChar rm = getUChar(delta); 7294 IRTemp e0 = newTemp(Ity_I64); 7295 IRTemp e1 = newTemp(Ity_I64); 7296 UChar amt, size; 7297 vassert(epartIsReg(rm)); 7298 vassert(gregLO3ofRM(rm) == 2 7299 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 7300 amt = getUChar(delta+1); 7301 delta += 2; 7302 DIP("%s $%d,%s\n", opname, 7303 (Int)amt, 7304 nameMMXReg(eregLO3ofRM(rm)) ); 7305 7306 assign( e0, getMMXReg(eregLO3ofRM(rm)) ); 7307 7308 shl = shr = sar = False; 7309 size = 0; 7310 switch (op) { 7311 case Iop_ShlN16x4: shl = True; size = 16; break; 7312 case Iop_ShlN32x2: shl = True; size = 32; break; 7313 case Iop_Shl64: shl = True; size = 64; break; 7314 case Iop_SarN16x4: sar = True; size = 16; break; 7315 case Iop_SarN32x2: sar = True; size = 32; break; 7316 case Iop_ShrN16x4: shr = True; size = 16; break; 7317 case Iop_ShrN32x2: shr = True; size = 32; break; 7318 case Iop_Shr64: shr = True; size = 64; break; 7319 default: vassert(0); 7320 } 7321 7322 if (shl || shr) { 7323 assign( e1, amt >= size 7324 ? mkU64(0) 7325 : binop(op, mkexpr(e0), mkU8(amt)) 7326 ); 7327 } else 7328 if (sar) { 7329 assign( e1, amt >= size 7330 ? binop(op, mkexpr(e0), mkU8(size-1)) 7331 : binop(op, mkexpr(e0), mkU8(amt)) 7332 ); 7333 } else { 7334 vassert(0); 7335 } 7336 7337 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) ); 7338 return delta; 7339 } 7340 7341 7342 /* Completely handle all MMX instructions except emms. */ 7343 7344 static 7345 ULong dis_MMX ( Bool* decode_ok, 7346 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta ) 7347 { 7348 Int len; 7349 UChar modrm; 7350 HChar dis_buf[50]; 7351 UChar opc = getUChar(delta); 7352 delta++; 7353 7354 /* dis_MMX handles all insns except emms. */ 7355 do_MMX_preamble(); 7356 7357 switch (opc) { 7358 7359 case 0x6E: 7360 if (sz == 4) { 7361 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/ 7362 modrm = getUChar(delta); 7363 if (epartIsReg(modrm)) { 7364 delta++; 7365 putMMXReg( 7366 gregLO3ofRM(modrm), 7367 binop( Iop_32HLto64, 7368 mkU32(0), 7369 getIReg32(eregOfRexRM(pfx,modrm)) ) ); 7370 DIP("movd %s, %s\n", 7371 nameIReg32(eregOfRexRM(pfx,modrm)), 7372 nameMMXReg(gregLO3ofRM(modrm))); 7373 } else { 7374 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7375 delta += len; 7376 putMMXReg( 7377 gregLO3ofRM(modrm), 7378 binop( Iop_32HLto64, 7379 mkU32(0), 7380 loadLE(Ity_I32, mkexpr(addr)) ) ); 7381 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7382 } 7383 } 7384 else 7385 if (sz == 8) { 7386 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/ 7387 modrm = getUChar(delta); 7388 if (epartIsReg(modrm)) { 7389 delta++; 7390 putMMXReg( gregLO3ofRM(modrm), 7391 getIReg64(eregOfRexRM(pfx,modrm)) ); 7392 DIP("movd %s, %s\n", 7393 nameIReg64(eregOfRexRM(pfx,modrm)), 7394 nameMMXReg(gregLO3ofRM(modrm))); 7395 } else { 7396 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7397 delta += len; 7398 putMMXReg( gregLO3ofRM(modrm), 7399 loadLE(Ity_I64, mkexpr(addr)) ); 7400 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7401 } 7402 } 7403 else { 7404 goto mmx_decode_failure; 7405 } 7406 break; 7407 7408 case 0x7E: 7409 if (sz == 4) { 7410 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */ 7411 modrm = getUChar(delta); 7412 if (epartIsReg(modrm)) { 7413 delta++; 7414 putIReg32( eregOfRexRM(pfx,modrm), 7415 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7416 DIP("movd %s, %s\n", 7417 nameMMXReg(gregLO3ofRM(modrm)), 7418 nameIReg32(eregOfRexRM(pfx,modrm))); 7419 } else { 7420 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7421 delta += len; 7422 storeLE( mkexpr(addr), 7423 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) ); 7424 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7425 } 7426 } 7427 else 7428 if (sz == 8) { 7429 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */ 7430 modrm = getUChar(delta); 7431 if (epartIsReg(modrm)) { 7432 delta++; 7433 putIReg64( eregOfRexRM(pfx,modrm), 7434 getMMXReg(gregLO3ofRM(modrm)) ); 7435 DIP("movd %s, %s\n", 7436 nameMMXReg(gregLO3ofRM(modrm)), 7437 nameIReg64(eregOfRexRM(pfx,modrm))); 7438 } else { 7439 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7440 delta += len; 7441 storeLE( mkexpr(addr), 7442 getMMXReg(gregLO3ofRM(modrm)) ); 7443 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7444 } 7445 } else { 7446 goto mmx_decode_failure; 7447 } 7448 break; 7449 7450 case 0x6F: 7451 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 7452 if (sz != 4 7453 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7454 goto mmx_decode_failure; 7455 modrm = getUChar(delta); 7456 if (epartIsReg(modrm)) { 7457 delta++; 7458 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) ); 7459 DIP("movq %s, %s\n", 7460 nameMMXReg(eregLO3ofRM(modrm)), 7461 nameMMXReg(gregLO3ofRM(modrm))); 7462 } else { 7463 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7464 delta += len; 7465 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) ); 7466 DIP("movq %s, %s\n", 7467 dis_buf, nameMMXReg(gregLO3ofRM(modrm))); 7468 } 7469 break; 7470 7471 case 0x7F: 7472 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 7473 if (sz != 4 7474 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7475 goto mmx_decode_failure; 7476 modrm = getUChar(delta); 7477 if (epartIsReg(modrm)) { 7478 delta++; 7479 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) ); 7480 DIP("movq %s, %s\n", 7481 nameMMXReg(gregLO3ofRM(modrm)), 7482 nameMMXReg(eregLO3ofRM(modrm))); 7483 } else { 7484 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 7485 delta += len; 7486 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 7487 DIP("mov(nt)q %s, %s\n", 7488 nameMMXReg(gregLO3ofRM(modrm)), dis_buf); 7489 } 7490 break; 7491 7492 case 0xFC: 7493 case 0xFD: 7494 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 7495 if (sz != 4) 7496 goto mmx_decode_failure; 7497 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True ); 7498 break; 7499 7500 case 0xEC: 7501 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7502 if (sz != 4 7503 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7504 goto mmx_decode_failure; 7505 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True ); 7506 break; 7507 7508 case 0xDC: 7509 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7510 if (sz != 4) 7511 goto mmx_decode_failure; 7512 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True ); 7513 break; 7514 7515 case 0xF8: 7516 case 0xF9: 7517 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 7518 if (sz != 4) 7519 goto mmx_decode_failure; 7520 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True ); 7521 break; 7522 7523 case 0xE8: 7524 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7525 if (sz != 4) 7526 goto mmx_decode_failure; 7527 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True ); 7528 break; 7529 7530 case 0xD8: 7531 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 7532 if (sz != 4) 7533 goto mmx_decode_failure; 7534 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True ); 7535 break; 7536 7537 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 7538 if (sz != 4) 7539 goto mmx_decode_failure; 7540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False ); 7541 break; 7542 7543 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 7544 if (sz != 4) 7545 goto mmx_decode_failure; 7546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False ); 7547 break; 7548 7549 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 7550 vassert(sz == 4); 7551 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False ); 7552 break; 7553 7554 case 0x74: 7555 case 0x75: 7556 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 7557 if (sz != 4) 7558 goto mmx_decode_failure; 7559 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True ); 7560 break; 7561 7562 case 0x64: 7563 case 0x65: 7564 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 7565 if (sz != 4) 7566 goto mmx_decode_failure; 7567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True ); 7568 break; 7569 7570 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 7571 if (sz != 4) 7572 goto mmx_decode_failure; 7573 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False ); 7574 break; 7575 7576 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7577 if (sz != 4) 7578 goto mmx_decode_failure; 7579 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False ); 7580 break; 7581 7582 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 7583 if (sz != 4) 7584 goto mmx_decode_failure; 7585 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False ); 7586 break; 7587 7588 case 0x68: 7589 case 0x69: 7590 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 7591 if (sz != 4 7592 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7593 goto mmx_decode_failure; 7594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True ); 7595 break; 7596 7597 case 0x60: 7598 case 0x61: 7599 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7600 if (sz != 4 7601 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx))) 7602 goto mmx_decode_failure; 7603 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True ); 7604 break; 7605 7606 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 7607 if (sz != 4) 7608 goto mmx_decode_failure; 7609 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False ); 7610 break; 7611 7612 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 7613 if (sz != 4) 7614 goto mmx_decode_failure; 7615 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False ); 7616 break; 7617 7618 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 7619 if (sz != 4) 7620 goto mmx_decode_failure; 7621 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False ); 7622 break; 7623 7624 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 7625 if (sz != 4) 7626 goto mmx_decode_failure; 7627 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False ); 7628 break; 7629 7630 # define SHIFT_BY_REG(_name,_op) \ 7631 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \ 7632 break; 7633 7634 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7635 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4); 7636 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2); 7637 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64); 7638 7639 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 7640 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4); 7641 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2); 7642 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64); 7643 7644 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 7645 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4); 7646 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2); 7647 7648 # undef SHIFT_BY_REG 7649 7650 case 0x71: 7651 case 0x72: 7652 case 0x73: { 7653 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 7654 UChar byte2, subopc; 7655 if (sz != 4) 7656 goto mmx_decode_failure; 7657 byte2 = getUChar(delta); /* amode / sub-opcode */ 7658 subopc = toUChar( (byte2 >> 3) & 7 ); 7659 7660 # define SHIFT_BY_IMM(_name,_op) \ 7661 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \ 7662 } while (0) 7663 7664 if (subopc == 2 /*SRL*/ && opc == 0x71) 7665 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4); 7666 else if (subopc == 2 /*SRL*/ && opc == 0x72) 7667 SHIFT_BY_IMM("psrld", Iop_ShrN32x2); 7668 else if (subopc == 2 /*SRL*/ && opc == 0x73) 7669 SHIFT_BY_IMM("psrlq", Iop_Shr64); 7670 7671 else if (subopc == 4 /*SAR*/ && opc == 0x71) 7672 SHIFT_BY_IMM("psraw", Iop_SarN16x4); 7673 else if (subopc == 4 /*SAR*/ && opc == 0x72) 7674 SHIFT_BY_IMM("psrad", Iop_SarN32x2); 7675 7676 else if (subopc == 6 /*SHL*/ && opc == 0x71) 7677 SHIFT_BY_IMM("psllw", Iop_ShlN16x4); 7678 else if (subopc == 6 /*SHL*/ && opc == 0x72) 7679 SHIFT_BY_IMM("pslld", Iop_ShlN32x2); 7680 else if (subopc == 6 /*SHL*/ && opc == 0x73) 7681 SHIFT_BY_IMM("psllq", Iop_Shl64); 7682 7683 else goto mmx_decode_failure; 7684 7685 # undef SHIFT_BY_IMM 7686 break; 7687 } 7688 7689 case 0xF7: { 7690 IRTemp addr = newTemp(Ity_I64); 7691 IRTemp regD = newTemp(Ity_I64); 7692 IRTemp regM = newTemp(Ity_I64); 7693 IRTemp mask = newTemp(Ity_I64); 7694 IRTemp olddata = newTemp(Ity_I64); 7695 IRTemp newdata = newTemp(Ity_I64); 7696 7697 modrm = getUChar(delta); 7698 if (sz != 4 || (!epartIsReg(modrm))) 7699 goto mmx_decode_failure; 7700 delta++; 7701 7702 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 7703 assign( regM, getMMXReg( eregLO3ofRM(modrm) )); 7704 assign( regD, getMMXReg( gregLO3ofRM(modrm) )); 7705 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); 7706 assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); 7707 assign( newdata, 7708 binop(Iop_Or64, 7709 binop(Iop_And64, 7710 mkexpr(regD), 7711 mkexpr(mask) ), 7712 binop(Iop_And64, 7713 mkexpr(olddata), 7714 unop(Iop_Not64, mkexpr(mask)))) ); 7715 storeLE( mkexpr(addr), mkexpr(newdata) ); 7716 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ), 7717 nameMMXReg( gregLO3ofRM(modrm) ) ); 7718 break; 7719 } 7720 7721 /* --- MMX decode failure --- */ 7722 default: 7723 mmx_decode_failure: 7724 *decode_ok = False; 7725 return delta; /* ignored */ 7726 7727 } 7728 7729 *decode_ok = True; 7730 return delta; 7731 } 7732 7733 7734 /*------------------------------------------------------------*/ 7735 /*--- More misc arithmetic and other obscure insns. ---*/ 7736 /*------------------------------------------------------------*/ 7737 7738 /* Generate base << amt with vacated places filled with stuff 7739 from xtra. amt guaranteed in 0 .. 63. */ 7740 static 7741 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt ) 7742 { 7743 /* if amt == 0 7744 then base 7745 else (base << amt) | (xtra >>u (64-amt)) 7746 */ 7747 return 7748 IRExpr_ITE( 7749 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7750 binop(Iop_Or64, 7751 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)), 7752 binop(Iop_Shr64, mkexpr(xtra), 7753 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7754 ), 7755 mkexpr(base) 7756 ); 7757 } 7758 7759 /* Generate base >>u amt with vacated places filled with stuff 7760 from xtra. amt guaranteed in 0 .. 63. */ 7761 static 7762 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt ) 7763 { 7764 /* if amt == 0 7765 then base 7766 else (base >>u amt) | (xtra << (64-amt)) 7767 */ 7768 return 7769 IRExpr_ITE( 7770 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)), 7771 binop(Iop_Or64, 7772 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)), 7773 binop(Iop_Shl64, mkexpr(xtra), 7774 binop(Iop_Sub8, mkU8(64), mkexpr(amt))) 7775 ), 7776 mkexpr(base) 7777 ); 7778 } 7779 7780 /* Double length left and right shifts. Apparently only required in 7781 v-size (no b- variant). */ 7782 static 7783 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi, 7784 Prefix pfx, 7785 Long delta, UChar modrm, 7786 Int sz, 7787 IRExpr* shift_amt, 7788 Bool amt_is_literal, 7789 const HChar* shift_amt_txt, 7790 Bool left_shift ) 7791 { 7792 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used 7793 for printing it. And eip on entry points at the modrm byte. */ 7794 Int len; 7795 HChar dis_buf[50]; 7796 7797 IRType ty = szToITy(sz); 7798 IRTemp gsrc = newTemp(ty); 7799 IRTemp esrc = newTemp(ty); 7800 IRTemp addr = IRTemp_INVALID; 7801 IRTemp tmpSH = newTemp(Ity_I8); 7802 IRTemp tmpSS = newTemp(Ity_I8); 7803 IRTemp tmp64 = IRTemp_INVALID; 7804 IRTemp res64 = IRTemp_INVALID; 7805 IRTemp rss64 = IRTemp_INVALID; 7806 IRTemp resTy = IRTemp_INVALID; 7807 IRTemp rssTy = IRTemp_INVALID; 7808 Int mask = sz==8 ? 63 : 31; 7809 7810 vassert(sz == 2 || sz == 4 || sz == 8); 7811 7812 /* The E-part is the destination; this is shifted. The G-part 7813 supplies bits to be shifted into the E-part, but is not 7814 changed. 7815 7816 If shifting left, form a double-length word with E at the top 7817 and G at the bottom, and shift this left. The result is then in 7818 the high part. 7819 7820 If shifting right, form a double-length word with G at the top 7821 and E at the bottom, and shift this right. The result is then 7822 at the bottom. */ 7823 7824 /* Fetch the operands. */ 7825 7826 assign( gsrc, getIRegG(sz, pfx, modrm) ); 7827 7828 if (epartIsReg(modrm)) { 7829 delta++; 7830 assign( esrc, getIRegE(sz, pfx, modrm) ); 7831 DIP("sh%cd%c %s, %s, %s\n", 7832 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7833 shift_amt_txt, 7834 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm)); 7835 } else { 7836 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 7837 /* # bytes following amode */ 7838 amt_is_literal ? 1 : 0 ); 7839 delta += len; 7840 assign( esrc, loadLE(ty, mkexpr(addr)) ); 7841 DIP("sh%cd%c %s, %s, %s\n", 7842 ( left_shift ? 'l' : 'r' ), nameISize(sz), 7843 shift_amt_txt, 7844 nameIRegG(sz, pfx, modrm), dis_buf); 7845 } 7846 7847 /* Calculate the masked shift amount (tmpSH), the masked subshift 7848 amount (tmpSS), the shifted value (res64) and the subshifted 7849 value (rss64). */ 7850 7851 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) ); 7852 assign( tmpSS, binop(Iop_And8, 7853 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ), 7854 mkU8(mask))); 7855 7856 tmp64 = newTemp(Ity_I64); 7857 res64 = newTemp(Ity_I64); 7858 rss64 = newTemp(Ity_I64); 7859 7860 if (sz == 2 || sz == 4) { 7861 7862 /* G is xtra; E is data */ 7863 /* what a freaking nightmare: */ 7864 if (sz == 4 && left_shift) { 7865 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) ); 7866 assign( res64, 7867 binop(Iop_Shr64, 7868 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7869 mkU8(32)) ); 7870 assign( rss64, 7871 binop(Iop_Shr64, 7872 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)), 7873 mkU8(32)) ); 7874 } 7875 else 7876 if (sz == 4 && !left_shift) { 7877 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) ); 7878 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7879 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) ); 7880 } 7881 else 7882 if (sz == 2 && left_shift) { 7883 assign( tmp64, 7884 binop(Iop_32HLto64, 7885 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)), 7886 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)) 7887 )); 7888 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */ 7889 assign( res64, 7890 binop(Iop_Shr64, 7891 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)), 7892 mkU8(48)) ); 7893 /* subshift formed by shifting [esrc'0000'0000'0000] */ 7894 assign( rss64, 7895 binop(Iop_Shr64, 7896 binop(Iop_Shl64, 7897 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)), 7898 mkU8(48)), 7899 mkexpr(tmpSS)), 7900 mkU8(48)) ); 7901 } 7902 else 7903 if (sz == 2 && !left_shift) { 7904 assign( tmp64, 7905 binop(Iop_32HLto64, 7906 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)), 7907 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc)) 7908 )); 7909 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */ 7910 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) ); 7911 /* subshift formed by shifting [0000'0000'0000'esrc] */ 7912 assign( rss64, binop(Iop_Shr64, 7913 unop(Iop_16Uto64, mkexpr(esrc)), 7914 mkexpr(tmpSS)) ); 7915 } 7916 7917 } else { 7918 7919 vassert(sz == 8); 7920 if (left_shift) { 7921 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH )); 7922 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS )); 7923 } else { 7924 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH )); 7925 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS )); 7926 } 7927 7928 } 7929 7930 resTy = newTemp(ty); 7931 rssTy = newTemp(ty); 7932 assign( resTy, narrowTo(ty, mkexpr(res64)) ); 7933 assign( rssTy, narrowTo(ty, mkexpr(rss64)) ); 7934 7935 /* Put result back and write the flags thunk. */ 7936 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64, 7937 resTy, rssTy, ty, tmpSH ); 7938 7939 if (epartIsReg(modrm)) { 7940 putIRegE(sz, pfx, modrm, mkexpr(resTy)); 7941 } else { 7942 storeLE( mkexpr(addr), mkexpr(resTy) ); 7943 } 7944 7945 if (amt_is_literal) delta++; 7946 return delta; 7947 } 7948 7949 7950 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not 7951 required. */ 7952 7953 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; 7954 7955 static const HChar* nameBtOp ( BtOp op ) 7956 { 7957 switch (op) { 7958 case BtOpNone: return ""; 7959 case BtOpSet: return "s"; 7960 case BtOpReset: return "r"; 7961 case BtOpComp: return "c"; 7962 default: vpanic("nameBtOp(amd64)"); 7963 } 7964 } 7965 7966 7967 static 7968 ULong dis_bt_G_E ( const VexAbiInfo* vbi, 7969 Prefix pfx, Int sz, Long delta, BtOp op, 7970 /*OUT*/Bool* decode_OK ) 7971 { 7972 HChar dis_buf[50]; 7973 UChar modrm; 7974 Int len; 7975 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0, 7976 t_addr1, t_rsp, t_mask, t_new; 7977 7978 vassert(sz == 2 || sz == 4 || sz == 8); 7979 7980 t_fetched = t_bitno0 = t_bitno1 = t_bitno2 7981 = t_addr0 = t_addr1 = t_rsp 7982 = t_mask = t_new = IRTemp_INVALID; 7983 7984 t_fetched = newTemp(Ity_I8); 7985 t_new = newTemp(Ity_I8); 7986 t_bitno0 = newTemp(Ity_I64); 7987 t_bitno1 = newTemp(Ity_I64); 7988 t_bitno2 = newTemp(Ity_I8); 7989 t_addr1 = newTemp(Ity_I64); 7990 modrm = getUChar(delta); 7991 7992 *decode_OK = True; 7993 if (epartIsReg(modrm)) { 7994 /* F2 and F3 are never acceptable. */ 7995 if (haveF2orF3(pfx)) { 7996 *decode_OK = False; 7997 return delta; 7998 } 7999 } else { 8000 /* F2 or F3 (but not both) are allowed, provided LOCK is also 8001 present, and only for the BTC/BTS/BTR cases (not BT). */ 8002 if (haveF2orF3(pfx)) { 8003 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) { 8004 *decode_OK = False; 8005 return delta; 8006 } 8007 } 8008 } 8009 8010 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) ); 8011 8012 if (epartIsReg(modrm)) { 8013 delta++; 8014 /* Get it onto the client's stack. Oh, this is a horrible 8015 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925. 8016 Because of the ELF ABI stack redzone, there may be live data 8017 up to 128 bytes below %RSP. So we can't just push it on the 8018 stack, else we may wind up trashing live data, and causing 8019 impossible-to-find simulation errors. (Yes, this did 8020 happen.) So we need to drop RSP before at least 128 before 8021 pushing it. That unfortunately means hitting Memcheck's 8022 fast-case painting code. Ideally we should drop more than 8023 128, to reduce the chances of breaking buggy programs that 8024 have live data below -128(%RSP). Memcheck fast-cases moves 8025 of 288 bytes due to the need to handle ppc64-linux quickly, 8026 so let's use 288. Of course the real fix is to get rid of 8027 this kludge entirely. */ 8028 t_rsp = newTemp(Ity_I64); 8029 t_addr0 = newTemp(Ity_I64); 8030 8031 vassert(vbi->guest_stack_redzone_size == 128); 8032 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) ); 8033 putIReg64(R_RSP, mkexpr(t_rsp)); 8034 8035 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) ); 8036 8037 /* Make t_addr0 point at it. */ 8038 assign( t_addr0, mkexpr(t_rsp) ); 8039 8040 /* Mask out upper bits of the shift amount, since we're doing a 8041 reg. */ 8042 assign( t_bitno1, binop(Iop_And64, 8043 mkexpr(t_bitno0), 8044 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) ); 8045 8046 } else { 8047 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 8048 delta += len; 8049 assign( t_bitno1, mkexpr(t_bitno0) ); 8050 } 8051 8052 /* At this point: t_addr0 is the address being operated on. If it 8053 was a reg, we will have pushed it onto the client's stack. 8054 t_bitno1 is the bit number, suitably masked in the case of a 8055 reg. */ 8056 8057 /* Now the main sequence. */ 8058 assign( t_addr1, 8059 binop(Iop_Add64, 8060 mkexpr(t_addr0), 8061 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) ); 8062 8063 /* t_addr1 now holds effective address */ 8064 8065 assign( t_bitno2, 8066 unop(Iop_64to8, 8067 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) ); 8068 8069 /* t_bitno2 contains offset of bit within byte */ 8070 8071 if (op != BtOpNone) { 8072 t_mask = newTemp(Ity_I8); 8073 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) ); 8074 } 8075 8076 /* t_mask is now a suitable byte mask */ 8077 8078 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) ); 8079 8080 if (op != BtOpNone) { 8081 switch (op) { 8082 case BtOpSet: 8083 assign( t_new, 8084 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8085 break; 8086 case BtOpComp: 8087 assign( t_new, 8088 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) ); 8089 break; 8090 case BtOpReset: 8091 assign( t_new, 8092 binop(Iop_And8, mkexpr(t_fetched), 8093 unop(Iop_Not8, mkexpr(t_mask))) ); 8094 break; 8095 default: 8096 vpanic("dis_bt_G_E(amd64)"); 8097 } 8098 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) { 8099 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/, 8100 mkexpr(t_new)/*new*/, 8101 guest_RIP_curr_instr ); 8102 } else { 8103 storeLE( mkexpr(t_addr1), mkexpr(t_new) ); 8104 } 8105 } 8106 8107 /* Side effect done; now get selected bit into Carry flag */ 8108 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */ 8109 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8110 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8111 stmt( IRStmt_Put( 8112 OFFB_CC_DEP1, 8113 binop(Iop_And64, 8114 binop(Iop_Shr64, 8115 unop(Iop_8Uto64, mkexpr(t_fetched)), 8116 mkexpr(t_bitno2)), 8117 mkU64(1))) 8118 ); 8119 /* Set NDEP even though it isn't used. This makes redundant-PUT 8120 elimination of previous stores to this field work better. */ 8121 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8122 8123 /* Move reg operand from stack back to reg */ 8124 if (epartIsReg(modrm)) { 8125 /* t_rsp still points at it. */ 8126 /* only write the reg if actually modifying it; doing otherwise 8127 zeroes the top half erroneously when doing btl due to 8128 standard zero-extend rule */ 8129 if (op != BtOpNone) 8130 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) ); 8131 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) ); 8132 } 8133 8134 DIP("bt%s%c %s, %s\n", 8135 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm), 8136 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) ); 8137 8138 return delta; 8139 } 8140 8141 8142 8143 /* Handle BSF/BSR. Only v-size seems necessary. */ 8144 static 8145 ULong dis_bs_E_G ( const VexAbiInfo* vbi, 8146 Prefix pfx, Int sz, Long delta, Bool fwds ) 8147 { 8148 Bool isReg; 8149 UChar modrm; 8150 HChar dis_buf[50]; 8151 8152 IRType ty = szToITy(sz); 8153 IRTemp src = newTemp(ty); 8154 IRTemp dst = newTemp(ty); 8155 IRTemp src64 = newTemp(Ity_I64); 8156 IRTemp dst64 = newTemp(Ity_I64); 8157 IRTemp srcB = newTemp(Ity_I1); 8158 8159 vassert(sz == 8 || sz == 4 || sz == 2); 8160 8161 modrm = getUChar(delta); 8162 isReg = epartIsReg(modrm); 8163 if (isReg) { 8164 delta++; 8165 assign( src, getIRegE(sz, pfx, modrm) ); 8166 } else { 8167 Int len; 8168 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 ); 8169 delta += len; 8170 assign( src, loadLE(ty, mkexpr(addr)) ); 8171 } 8172 8173 DIP("bs%c%c %s, %s\n", 8174 fwds ? 'f' : 'r', nameISize(sz), 8175 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ), 8176 nameIRegG(sz, pfx, modrm)); 8177 8178 /* First, widen src to 64 bits if it is not already. */ 8179 assign( src64, widenUto64(mkexpr(src)) ); 8180 8181 /* Generate a bool expression which is zero iff the original is 8182 zero, and nonzero otherwise. Ask for a CmpNE version which, if 8183 instrumented by Memcheck, is instrumented expensively, since 8184 this may be used on the output of a preceding movmskb insn, 8185 which has been known to be partially defined, and in need of 8186 careful handling. */ 8187 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) ); 8188 8189 /* Flags: Z is 1 iff source value is zero. All others 8190 are undefined -- we force them to zero. */ 8191 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8192 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8193 stmt( IRStmt_Put( 8194 OFFB_CC_DEP1, 8195 IRExpr_ITE( mkexpr(srcB), 8196 /* src!=0 */ 8197 mkU64(0), 8198 /* src==0 */ 8199 mkU64(AMD64G_CC_MASK_Z) 8200 ) 8201 )); 8202 /* Set NDEP even though it isn't used. This makes redundant-PUT 8203 elimination of previous stores to this field work better. */ 8204 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8205 8206 /* Result: iff source value is zero, we can't use 8207 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case. 8208 But anyway, amd64 semantics say the result is undefined in 8209 such situations. Hence handle the zero case specially. */ 8210 8211 /* Bleh. What we compute: 8212 8213 bsf64: if src == 0 then {dst is unchanged} 8214 else Ctz64(src) 8215 8216 bsr64: if src == 0 then {dst is unchanged} 8217 else 63 - Clz64(src) 8218 8219 bsf32: if src == 0 then {dst is unchanged} 8220 else Ctz64(32Uto64(src)) 8221 8222 bsr32: if src == 0 then {dst is unchanged} 8223 else 63 - Clz64(32Uto64(src)) 8224 8225 bsf16: if src == 0 then {dst is unchanged} 8226 else Ctz64(32Uto64(16Uto32(src))) 8227 8228 bsr16: if src == 0 then {dst is unchanged} 8229 else 63 - Clz64(32Uto64(16Uto32(src))) 8230 */ 8231 8232 /* The main computation, guarding against zero. */ 8233 assign( dst64, 8234 IRExpr_ITE( 8235 mkexpr(srcB), 8236 /* src != 0 */ 8237 fwds ? unop(Iop_Ctz64, mkexpr(src64)) 8238 : binop(Iop_Sub64, 8239 mkU64(63), 8240 unop(Iop_Clz64, mkexpr(src64))), 8241 /* src == 0 -- leave dst unchanged */ 8242 widenUto64( getIRegG( sz, pfx, modrm ) ) 8243 ) 8244 ); 8245 8246 if (sz == 2) 8247 assign( dst, unop(Iop_64to16, mkexpr(dst64)) ); 8248 else 8249 if (sz == 4) 8250 assign( dst, unop(Iop_64to32, mkexpr(dst64)) ); 8251 else 8252 assign( dst, mkexpr(dst64) ); 8253 8254 /* dump result back */ 8255 putIRegG( sz, pfx, modrm, mkexpr(dst) ); 8256 8257 return delta; 8258 } 8259 8260 8261 /* swap rAX with the reg specified by reg and REX.B */ 8262 static 8263 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 ) 8264 { 8265 IRType ty = szToITy(sz); 8266 IRTemp t1 = newTemp(ty); 8267 IRTemp t2 = newTemp(ty); 8268 vassert(sz == 2 || sz == 4 || sz == 8); 8269 vassert(regLo3 < 8); 8270 if (sz == 8) { 8271 assign( t1, getIReg64(R_RAX) ); 8272 assign( t2, getIRegRexB(8, pfx, regLo3) ); 8273 putIReg64( R_RAX, mkexpr(t2) ); 8274 putIRegRexB(8, pfx, regLo3, mkexpr(t1) ); 8275 } else if (sz == 4) { 8276 assign( t1, getIReg32(R_RAX) ); 8277 assign( t2, getIRegRexB(4, pfx, regLo3) ); 8278 putIReg32( R_RAX, mkexpr(t2) ); 8279 putIRegRexB(4, pfx, regLo3, mkexpr(t1) ); 8280 } else { 8281 assign( t1, getIReg16(R_RAX) ); 8282 assign( t2, getIRegRexB(2, pfx, regLo3) ); 8283 putIReg16( R_RAX, mkexpr(t2) ); 8284 putIRegRexB(2, pfx, regLo3, mkexpr(t1) ); 8285 } 8286 DIP("xchg%c %s, %s\n", 8287 nameISize(sz), nameIRegRAX(sz), 8288 nameIRegRexB(sz,pfx, regLo3)); 8289 } 8290 8291 8292 static 8293 void codegen_SAHF ( void ) 8294 { 8295 /* Set the flags to: 8296 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O) 8297 -- retain the old O flag 8298 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8299 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C) 8300 */ 8301 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8302 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8303 IRTemp oldflags = newTemp(Ity_I64); 8304 assign( oldflags, mk_amd64g_calculate_rflags_all() ); 8305 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 8306 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 8307 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 8308 stmt( IRStmt_Put( OFFB_CC_DEP1, 8309 binop(Iop_Or64, 8310 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)), 8311 binop(Iop_And64, 8312 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)), 8313 mkU64(mask_SZACP)) 8314 ) 8315 )); 8316 } 8317 8318 8319 static 8320 void codegen_LAHF ( void ) 8321 { 8322 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */ 8323 IRExpr* rax_with_hole; 8324 IRExpr* new_byte; 8325 IRExpr* new_rax; 8326 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A 8327 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P; 8328 8329 IRTemp flags = newTemp(Ity_I64); 8330 assign( flags, mk_amd64g_calculate_rflags_all() ); 8331 8332 rax_with_hole 8333 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL)); 8334 new_byte 8335 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)), 8336 mkU64(1<<1)); 8337 new_rax 8338 = binop(Iop_Or64, rax_with_hole, 8339 binop(Iop_Shl64, new_byte, mkU8(8))); 8340 putIReg64(R_RAX, new_rax); 8341 } 8342 8343 8344 static 8345 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok, 8346 const VexAbiInfo* vbi, 8347 Prefix pfx, 8348 Int size, 8349 Long delta0 ) 8350 { 8351 HChar dis_buf[50]; 8352 Int len; 8353 8354 IRType ty = szToITy(size); 8355 IRTemp acc = newTemp(ty); 8356 IRTemp src = newTemp(ty); 8357 IRTemp dest = newTemp(ty); 8358 IRTemp dest2 = newTemp(ty); 8359 IRTemp acc2 = newTemp(ty); 8360 IRTemp cond = newTemp(Ity_I1); 8361 IRTemp addr = IRTemp_INVALID; 8362 UChar rm = getUChar(delta0); 8363 8364 /* There are 3 cases to consider: 8365 8366 reg-reg: ignore any lock prefix, generate sequence based 8367 on ITE 8368 8369 reg-mem, not locked: ignore any lock prefix, generate sequence 8370 based on ITE 8371 8372 reg-mem, locked: use IRCAS 8373 */ 8374 8375 /* Decide whether F2 or F3 are acceptable. Never for register 8376 case, but for the memory case, one or the other is OK provided 8377 LOCK is also present. */ 8378 if (epartIsReg(rm)) { 8379 if (haveF2orF3(pfx)) { 8380 *ok = False; 8381 return delta0; 8382 } 8383 } else { 8384 if (haveF2orF3(pfx)) { 8385 if (haveF2andF3(pfx) || !haveLOCK(pfx)) { 8386 *ok = False; 8387 return delta0; 8388 } 8389 } 8390 } 8391 8392 if (epartIsReg(rm)) { 8393 /* case 1 */ 8394 assign( dest, getIRegE(size, pfx, rm) ); 8395 delta0++; 8396 assign( src, getIRegG(size, pfx, rm) ); 8397 assign( acc, getIRegRAX(size) ); 8398 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8399 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8400 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8401 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8402 putIRegRAX(size, mkexpr(acc2)); 8403 putIRegE(size, pfx, rm, mkexpr(dest2)); 8404 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8405 nameIRegG(size,pfx,rm), 8406 nameIRegE(size,pfx,rm) ); 8407 } 8408 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8409 /* case 2 */ 8410 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8411 assign( dest, loadLE(ty, mkexpr(addr)) ); 8412 delta0 += len; 8413 assign( src, getIRegG(size, pfx, rm) ); 8414 assign( acc, getIRegRAX(size) ); 8415 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8416 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8417 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) ); 8418 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8419 putIRegRAX(size, mkexpr(acc2)); 8420 storeLE( mkexpr(addr), mkexpr(dest2) ); 8421 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8422 nameIRegG(size,pfx,rm), dis_buf); 8423 } 8424 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8425 /* case 3 */ 8426 /* src is new value. acc is expected value. dest is old value. 8427 Compute success from the output of the IRCAS, and steer the 8428 new value for RAX accordingly: in case of success, RAX is 8429 unchanged. */ 8430 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8431 delta0 += len; 8432 assign( src, getIRegG(size, pfx, rm) ); 8433 assign( acc, getIRegRAX(size) ); 8434 stmt( IRStmt_CAS( 8435 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr), 8436 NULL, mkexpr(acc), NULL, mkexpr(src) ) 8437 )); 8438 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty); 8439 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) ); 8440 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) ); 8441 putIRegRAX(size, mkexpr(acc2)); 8442 DIP("cmpxchg%c %s,%s\n", nameISize(size), 8443 nameIRegG(size,pfx,rm), dis_buf); 8444 } 8445 else vassert(0); 8446 8447 *ok = True; 8448 return delta0; 8449 } 8450 8451 8452 /* Handle conditional move instructions of the form 8453 cmovcc E(reg-or-mem), G(reg) 8454 8455 E(src) is reg-or-mem 8456 G(dst) is reg. 8457 8458 If E is reg, --> GET %E, tmps 8459 GET %G, tmpd 8460 CMOVcc tmps, tmpd 8461 PUT tmpd, %G 8462 8463 If E is mem --> (getAddr E) -> tmpa 8464 LD (tmpa), tmps 8465 GET %G, tmpd 8466 CMOVcc tmps, tmpd 8467 PUT tmpd, %G 8468 */ 8469 static 8470 ULong dis_cmov_E_G ( const VexAbiInfo* vbi, 8471 Prefix pfx, 8472 Int sz, 8473 AMD64Condcode cond, 8474 Long delta0 ) 8475 { 8476 UChar rm = getUChar(delta0); 8477 HChar dis_buf[50]; 8478 Int len; 8479 8480 IRType ty = szToITy(sz); 8481 IRTemp tmps = newTemp(ty); 8482 IRTemp tmpd = newTemp(ty); 8483 8484 if (epartIsReg(rm)) { 8485 assign( tmps, getIRegE(sz, pfx, rm) ); 8486 assign( tmpd, getIRegG(sz, pfx, rm) ); 8487 8488 putIRegG( sz, pfx, rm, 8489 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8490 mkexpr(tmps), 8491 mkexpr(tmpd) ) 8492 ); 8493 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8494 nameIRegE(sz,pfx,rm), 8495 nameIRegG(sz,pfx,rm)); 8496 return 1+delta0; 8497 } 8498 8499 /* E refers to memory */ 8500 { 8501 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8502 assign( tmps, loadLE(ty, mkexpr(addr)) ); 8503 assign( tmpd, getIRegG(sz, pfx, rm) ); 8504 8505 putIRegG( sz, pfx, rm, 8506 IRExpr_ITE( mk_amd64g_calculate_condition(cond), 8507 mkexpr(tmps), 8508 mkexpr(tmpd) ) 8509 ); 8510 8511 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond), 8512 dis_buf, 8513 nameIRegG(sz,pfx,rm)); 8514 return len+delta0; 8515 } 8516 } 8517 8518 8519 static 8520 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok, 8521 const VexAbiInfo* vbi, 8522 Prefix pfx, Int sz, Long delta0 ) 8523 { 8524 Int len; 8525 UChar rm = getUChar(delta0); 8526 HChar dis_buf[50]; 8527 8528 IRType ty = szToITy(sz); 8529 IRTemp tmpd = newTemp(ty); 8530 IRTemp tmpt0 = newTemp(ty); 8531 IRTemp tmpt1 = newTemp(ty); 8532 8533 /* There are 3 cases to consider: 8534 8535 reg-reg: ignore any lock prefix, 8536 generate 'naive' (non-atomic) sequence 8537 8538 reg-mem, not locked: ignore any lock prefix, generate 'naive' 8539 (non-atomic) sequence 8540 8541 reg-mem, locked: use IRCAS 8542 */ 8543 8544 if (epartIsReg(rm)) { 8545 /* case 1 */ 8546 assign( tmpd, getIRegE(sz, pfx, rm) ); 8547 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8548 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8549 mkexpr(tmpd), mkexpr(tmpt0)) ); 8550 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8551 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8552 putIRegE(sz, pfx, rm, mkexpr(tmpt1)); 8553 DIP("xadd%c %s, %s\n", 8554 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm)); 8555 *decode_ok = True; 8556 return 1+delta0; 8557 } 8558 else if (!epartIsReg(rm) && !haveLOCK(pfx)) { 8559 /* case 2 */ 8560 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8561 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8562 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8563 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8564 mkexpr(tmpd), mkexpr(tmpt0)) ); 8565 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8566 storeLE( mkexpr(addr), mkexpr(tmpt1) ); 8567 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8568 DIP("xadd%c %s, %s\n", 8569 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8570 *decode_ok = True; 8571 return len+delta0; 8572 } 8573 else if (!epartIsReg(rm) && haveLOCK(pfx)) { 8574 /* case 3 */ 8575 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 ); 8576 assign( tmpd, loadLE(ty, mkexpr(addr)) ); 8577 assign( tmpt0, getIRegG(sz, pfx, rm) ); 8578 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8), 8579 mkexpr(tmpd), mkexpr(tmpt0)) ); 8580 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/, 8581 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr ); 8582 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty ); 8583 putIRegG(sz, pfx, rm, mkexpr(tmpd)); 8584 DIP("xadd%c %s, %s\n", 8585 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf); 8586 *decode_ok = True; 8587 return len+delta0; 8588 } 8589 /*UNREACHED*/ 8590 vassert(0); 8591 } 8592 8593 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */ 8594 //.. 8595 //.. static 8596 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 ) 8597 //.. { 8598 //.. Int len; 8599 //.. IRTemp addr; 8600 //.. UChar rm = getUChar(delta0); 8601 //.. HChar dis_buf[50]; 8602 //.. 8603 //.. if (epartIsReg(rm)) { 8604 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) ); 8605 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm))); 8606 //.. return 1+delta0; 8607 //.. } else { 8608 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8609 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) ); 8610 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm))); 8611 //.. return len+delta0; 8612 //.. } 8613 //.. } 8614 //.. 8615 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If 8616 //.. dst is ireg and sz==4, zero out top half of it. */ 8617 //.. 8618 //.. static 8619 //.. UInt dis_mov_Sw_Ew ( UChar sorb, 8620 //.. Int sz, 8621 //.. UInt delta0 ) 8622 //.. { 8623 //.. Int len; 8624 //.. IRTemp addr; 8625 //.. UChar rm = getUChar(delta0); 8626 //.. HChar dis_buf[50]; 8627 //.. 8628 //.. vassert(sz == 2 || sz == 4); 8629 //.. 8630 //.. if (epartIsReg(rm)) { 8631 //.. if (sz == 4) 8632 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm)))); 8633 //.. else 8634 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm))); 8635 //.. 8636 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm))); 8637 //.. return 1+delta0; 8638 //.. } else { 8639 //.. addr = disAMode ( &len, sorb, delta0, dis_buf ); 8640 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) ); 8641 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf); 8642 //.. return len+delta0; 8643 //.. } 8644 //.. } 8645 8646 /* Handle move instructions of the form 8647 mov S, E meaning 8648 mov sreg, reg-or-mem 8649 Is passed the a ptr to the modRM byte, and the data size. Returns 8650 the address advanced completely over this instruction. 8651 8652 VEX does not currently simulate segment registers on AMD64 which means that 8653 instead of moving a value of a segment register, zero is moved to the 8654 destination. The zero value represents a null (unused) selector. This is 8655 not correct (especially for the %cs, %fs and %gs registers) but it seems to 8656 provide a sufficient simulation for currently seen programs that use this 8657 instruction. If some program actually decides to use the obtained segment 8658 selector for something meaningful then the zero value should be a clear 8659 indicator that there is some problem. 8660 8661 S(src) is sreg. 8662 E(dst) is reg-or-mem 8663 8664 If E is reg, --> PUT $0, %E 8665 8666 If E is mem, --> (getAddr E) -> tmpa 8667 ST $0, (tmpa) 8668 */ 8669 static 8670 ULong dis_mov_S_E ( const VexAbiInfo* vbi, 8671 Prefix pfx, 8672 Int size, 8673 Long delta0 ) 8674 { 8675 Int len; 8676 UChar rm = getUChar(delta0); 8677 HChar dis_buf[50]; 8678 8679 if (epartIsReg(rm)) { 8680 putIRegE(size, pfx, rm, mkU(szToITy(size), 0)); 8681 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8682 nameIRegE(size, pfx, rm)); 8683 return 1+delta0; 8684 } 8685 8686 /* E refers to memory */ 8687 { 8688 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0); 8689 storeLE(mkexpr(addr), mkU16(0)); 8690 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)), 8691 dis_buf); 8692 return len+delta0; 8693 } 8694 } 8695 8696 //.. static 8697 //.. void dis_push_segreg ( UInt sreg, Int sz ) 8698 //.. { 8699 //.. IRTemp t1 = newTemp(Ity_I16); 8700 //.. IRTemp ta = newTemp(Ity_I32); 8701 //.. vassert(sz == 2 || sz == 4); 8702 //.. 8703 //.. assign( t1, getSReg(sreg) ); 8704 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) ); 8705 //.. putIReg(4, R_ESP, mkexpr(ta)); 8706 //.. storeLE( mkexpr(ta), mkexpr(t1) ); 8707 //.. 8708 //.. DIP("pushw %s\n", nameSReg(sreg)); 8709 //.. } 8710 //.. 8711 //.. static 8712 //.. void dis_pop_segreg ( UInt sreg, Int sz ) 8713 //.. { 8714 //.. IRTemp t1 = newTemp(Ity_I16); 8715 //.. IRTemp ta = newTemp(Ity_I32); 8716 //.. vassert(sz == 2 || sz == 4); 8717 //.. 8718 //.. assign( ta, getIReg(4, R_ESP) ); 8719 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) ); 8720 //.. 8721 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) ); 8722 //.. putSReg( sreg, mkexpr(t1) ); 8723 //.. DIP("pop %s\n", nameSReg(sreg)); 8724 //.. } 8725 8726 static 8727 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 ) 8728 { 8729 IRTemp t1 = newTemp(Ity_I64); 8730 IRTemp t2 = newTemp(Ity_I64); 8731 IRTemp t3 = newTemp(Ity_I64); 8732 assign(t1, getIReg64(R_RSP)); 8733 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 8734 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64))); 8735 putIReg64(R_RSP, mkexpr(t3)); 8736 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret"); 8737 jmp_treg(dres, Ijk_Ret, t2); 8738 vassert(dres->whatNext == Dis_StopHere); 8739 } 8740 8741 8742 /*------------------------------------------------------------*/ 8743 /*--- SSE/SSE2/SSE3 helpers ---*/ 8744 /*------------------------------------------------------------*/ 8745 8746 /* Indicates whether the op requires a rounding-mode argument. Note 8747 that this covers only vector floating point arithmetic ops, and 8748 omits the scalar ones that need rounding modes. Note also that 8749 inconsistencies here will get picked up later by the IR sanity 8750 checker, so this isn't correctness-critical. */ 8751 static Bool requiresRMode ( IROp op ) 8752 { 8753 switch (op) { 8754 /* 128 bit ops */ 8755 case Iop_Add32Fx4: case Iop_Sub32Fx4: 8756 case Iop_Mul32Fx4: case Iop_Div32Fx4: 8757 case Iop_Add64Fx2: case Iop_Sub64Fx2: 8758 case Iop_Mul64Fx2: case Iop_Div64Fx2: 8759 /* 256 bit ops */ 8760 case Iop_Add32Fx8: case Iop_Sub32Fx8: 8761 case Iop_Mul32Fx8: case Iop_Div32Fx8: 8762 case Iop_Add64Fx4: case Iop_Sub64Fx4: 8763 case Iop_Mul64Fx4: case Iop_Div64Fx4: 8764 return True; 8765 default: 8766 break; 8767 } 8768 return False; 8769 } 8770 8771 8772 /* Worker function; do not call directly. 8773 Handles full width G = G `op` E and G = (not G) `op` E. 8774 */ 8775 8776 static ULong dis_SSE_E_to_G_all_wrk ( 8777 const VexAbiInfo* vbi, 8778 Prefix pfx, Long delta, 8779 const HChar* opname, IROp op, 8780 Bool invertG 8781 ) 8782 { 8783 HChar dis_buf[50]; 8784 Int alen; 8785 IRTemp addr; 8786 UChar rm = getUChar(delta); 8787 Bool needsRMode = requiresRMode(op); 8788 IRExpr* gpart 8789 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm))) 8790 : getXMMReg(gregOfRexRM(pfx,rm)); 8791 if (epartIsReg(rm)) { 8792 putXMMReg( 8793 gregOfRexRM(pfx,rm), 8794 needsRMode 8795 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8796 gpart, 8797 getXMMReg(eregOfRexRM(pfx,rm))) 8798 : binop(op, gpart, 8799 getXMMReg(eregOfRexRM(pfx,rm))) 8800 ); 8801 DIP("%s %s,%s\n", opname, 8802 nameXMMReg(eregOfRexRM(pfx,rm)), 8803 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8804 return delta+1; 8805 } else { 8806 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8807 putXMMReg( 8808 gregOfRexRM(pfx,rm), 8809 needsRMode 8810 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ 8811 gpart, 8812 loadLE(Ity_V128, mkexpr(addr))) 8813 : binop(op, gpart, 8814 loadLE(Ity_V128, mkexpr(addr))) 8815 ); 8816 DIP("%s %s,%s\n", opname, 8817 dis_buf, 8818 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8819 return delta+alen; 8820 } 8821 } 8822 8823 8824 /* All lanes SSE binary operation, G = G `op` E. */ 8825 8826 static 8827 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi, 8828 Prefix pfx, Long delta, 8829 const HChar* opname, IROp op ) 8830 { 8831 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False ); 8832 } 8833 8834 /* All lanes SSE binary operation, G = (not G) `op` E. */ 8835 8836 static 8837 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi, 8838 Prefix pfx, Long delta, 8839 const HChar* opname, IROp op ) 8840 { 8841 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True ); 8842 } 8843 8844 8845 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */ 8846 8847 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi, 8848 Prefix pfx, Long delta, 8849 const HChar* opname, IROp op ) 8850 { 8851 HChar dis_buf[50]; 8852 Int alen; 8853 IRTemp addr; 8854 UChar rm = getUChar(delta); 8855 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8856 if (epartIsReg(rm)) { 8857 putXMMReg( gregOfRexRM(pfx,rm), 8858 binop(op, gpart, 8859 getXMMReg(eregOfRexRM(pfx,rm))) ); 8860 DIP("%s %s,%s\n", opname, 8861 nameXMMReg(eregOfRexRM(pfx,rm)), 8862 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8863 return delta+1; 8864 } else { 8865 /* We can only do a 32-bit memory read, so the upper 3/4 of the 8866 E operand needs to be made simply of zeroes. */ 8867 IRTemp epart = newTemp(Ity_V128); 8868 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8869 assign( epart, unop( Iop_32UtoV128, 8870 loadLE(Ity_I32, mkexpr(addr))) ); 8871 putXMMReg( gregOfRexRM(pfx,rm), 8872 binop(op, gpart, mkexpr(epart)) ); 8873 DIP("%s %s,%s\n", opname, 8874 dis_buf, 8875 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8876 return delta+alen; 8877 } 8878 } 8879 8880 8881 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */ 8882 8883 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi, 8884 Prefix pfx, Long delta, 8885 const HChar* opname, IROp op ) 8886 { 8887 HChar dis_buf[50]; 8888 Int alen; 8889 IRTemp addr; 8890 UChar rm = getUChar(delta); 8891 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 8892 if (epartIsReg(rm)) { 8893 putXMMReg( gregOfRexRM(pfx,rm), 8894 binop(op, gpart, 8895 getXMMReg(eregOfRexRM(pfx,rm))) ); 8896 DIP("%s %s,%s\n", opname, 8897 nameXMMReg(eregOfRexRM(pfx,rm)), 8898 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8899 return delta+1; 8900 } else { 8901 /* We can only do a 64-bit memory read, so the upper half of the 8902 E operand needs to be made simply of zeroes. */ 8903 IRTemp epart = newTemp(Ity_V128); 8904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8905 assign( epart, unop( Iop_64UtoV128, 8906 loadLE(Ity_I64, mkexpr(addr))) ); 8907 putXMMReg( gregOfRexRM(pfx,rm), 8908 binop(op, gpart, mkexpr(epart)) ); 8909 DIP("%s %s,%s\n", opname, 8910 dis_buf, 8911 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8912 return delta+alen; 8913 } 8914 } 8915 8916 8917 /* All lanes unary SSE operation, G = op(E). */ 8918 8919 static ULong dis_SSE_E_to_G_unary_all ( 8920 const VexAbiInfo* vbi, 8921 Prefix pfx, Long delta, 8922 const HChar* opname, IROp op 8923 ) 8924 { 8925 HChar dis_buf[50]; 8926 Int alen; 8927 IRTemp addr; 8928 UChar rm = getUChar(delta); 8929 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 8930 // up in the usual way. 8931 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 8932 if (epartIsReg(rm)) { 8933 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm)); 8934 /* XXXROUNDINGFIXME */ 8935 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8936 : unop(op, src); 8937 putXMMReg( gregOfRexRM(pfx,rm), res ); 8938 DIP("%s %s,%s\n", opname, 8939 nameXMMReg(eregOfRexRM(pfx,rm)), 8940 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8941 return delta+1; 8942 } else { 8943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8944 IRExpr* src = loadLE(Ity_V128, mkexpr(addr)); 8945 /* XXXROUNDINGFIXME */ 8946 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src) 8947 : unop(op, src); 8948 putXMMReg( gregOfRexRM(pfx,rm), res ); 8949 DIP("%s %s,%s\n", opname, 8950 dis_buf, 8951 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8952 return delta+alen; 8953 } 8954 } 8955 8956 8957 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */ 8958 8959 static ULong dis_SSE_E_to_G_unary_lo32 ( 8960 const VexAbiInfo* vbi, 8961 Prefix pfx, Long delta, 8962 const HChar* opname, IROp op 8963 ) 8964 { 8965 /* First we need to get the old G value and patch the low 32 bits 8966 of the E operand into it. Then apply op and write back to G. */ 8967 HChar dis_buf[50]; 8968 Int alen; 8969 IRTemp addr; 8970 UChar rm = getUChar(delta); 8971 IRTemp oldG0 = newTemp(Ity_V128); 8972 IRTemp oldG1 = newTemp(Ity_V128); 8973 8974 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 8975 8976 if (epartIsReg(rm)) { 8977 assign( oldG1, 8978 binop( Iop_SetV128lo32, 8979 mkexpr(oldG0), 8980 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) ); 8981 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8982 DIP("%s %s,%s\n", opname, 8983 nameXMMReg(eregOfRexRM(pfx,rm)), 8984 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8985 return delta+1; 8986 } else { 8987 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 8988 assign( oldG1, 8989 binop( Iop_SetV128lo32, 8990 mkexpr(oldG0), 8991 loadLE(Ity_I32, mkexpr(addr)) )); 8992 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 8993 DIP("%s %s,%s\n", opname, 8994 dis_buf, 8995 nameXMMReg(gregOfRexRM(pfx,rm)) ); 8996 return delta+alen; 8997 } 8998 } 8999 9000 9001 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */ 9002 9003 static ULong dis_SSE_E_to_G_unary_lo64 ( 9004 const VexAbiInfo* vbi, 9005 Prefix pfx, Long delta, 9006 const HChar* opname, IROp op 9007 ) 9008 { 9009 /* First we need to get the old G value and patch the low 64 bits 9010 of the E operand into it. Then apply op and write back to G. */ 9011 HChar dis_buf[50]; 9012 Int alen; 9013 IRTemp addr; 9014 UChar rm = getUChar(delta); 9015 IRTemp oldG0 = newTemp(Ity_V128); 9016 IRTemp oldG1 = newTemp(Ity_V128); 9017 9018 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9019 9020 if (epartIsReg(rm)) { 9021 assign( oldG1, 9022 binop( Iop_SetV128lo64, 9023 mkexpr(oldG0), 9024 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) ); 9025 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9026 DIP("%s %s,%s\n", opname, 9027 nameXMMReg(eregOfRexRM(pfx,rm)), 9028 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9029 return delta+1; 9030 } else { 9031 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9032 assign( oldG1, 9033 binop( Iop_SetV128lo64, 9034 mkexpr(oldG0), 9035 loadLE(Ity_I64, mkexpr(addr)) )); 9036 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) ); 9037 DIP("%s %s,%s\n", opname, 9038 dis_buf, 9039 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9040 return delta+alen; 9041 } 9042 } 9043 9044 9045 /* SSE integer binary operation: 9046 G = G `op` E (eLeft == False) 9047 G = E `op` G (eLeft == True) 9048 */ 9049 static ULong dis_SSEint_E_to_G( 9050 const VexAbiInfo* vbi, 9051 Prefix pfx, Long delta, 9052 const HChar* opname, IROp op, 9053 Bool eLeft 9054 ) 9055 { 9056 HChar dis_buf[50]; 9057 Int alen; 9058 IRTemp addr; 9059 UChar rm = getUChar(delta); 9060 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm)); 9061 IRExpr* epart = NULL; 9062 if (epartIsReg(rm)) { 9063 epart = getXMMReg(eregOfRexRM(pfx,rm)); 9064 DIP("%s %s,%s\n", opname, 9065 nameXMMReg(eregOfRexRM(pfx,rm)), 9066 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9067 delta += 1; 9068 } else { 9069 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9070 epart = loadLE(Ity_V128, mkexpr(addr)); 9071 DIP("%s %s,%s\n", opname, 9072 dis_buf, 9073 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9074 delta += alen; 9075 } 9076 putXMMReg( gregOfRexRM(pfx,rm), 9077 eLeft ? binop(op, epart, gpart) 9078 : binop(op, gpart, epart) ); 9079 return delta; 9080 } 9081 9082 9083 /* Helper for doing SSE FP comparisons. False return ==> unhandled. 9084 This is all a bit of a kludge in that it ignores the subtleties of 9085 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel 9086 spec. */ 9087 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP, 9088 /*OUT*/IROp* opP, 9089 /*OUT*/Bool* postNotP, 9090 UInt imm8, Bool all_lanes, Int sz ) 9091 { 9092 if (imm8 >= 32) return False; 9093 9094 /* First, compute a (preSwap, op, postNot) triple from 9095 the supplied imm8. */ 9096 Bool pre = False; 9097 IROp op = Iop_INVALID; 9098 Bool not = False; 9099 9100 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; } 9101 // If you add a case here, add a corresponding test for both VCMPSD_128 9102 // and VCMPSS_128 in avx-1.c. 9103 // Cases 0xA and above are 9104 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]" 9105 switch (imm8) { 9106 // "O" = ordered, "U" = unordered 9107 // "Q" = non-signalling (quiet), "S" = signalling 9108 // 9109 // swap operands? 9110 // | 9111 // | cmp op invert after? 9112 // | | | 9113 // v v v 9114 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ 9115 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ 9116 case 0x10: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OS 9117 case 0x18: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_US 9118 // 9119 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS 9120 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ 9121 // 9122 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS 9123 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ 9124 // 9125 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q 9126 case 0x13: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_S 9127 // 9128 // 0xC: this isn't really right because it returns all-1s when 9129 // either operand is a NaN, and it should return all-0s. 9130 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ 9131 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ 9132 case 0x14: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_US 9133 case 0x1C: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS 9134 // 9135 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US 9136 case 0x15: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_UQ 9137 // 9138 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US 9139 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ 9140 // 9141 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q 9142 case 0x17: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_S 9143 // 9144 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US 9145 case 0x19: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_UQ 9146 // 9147 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US 9148 case 0x1A: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_UQ 9149 // 9150 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS 9151 case 0x1D: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OQ 9152 // 9153 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS 9154 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ 9155 // Unhandled: 9156 // 0xB FALSE_OQ 9157 // 0xF TRUE_UQ 9158 // 0x1B FALSE_OS 9159 // 0x1F TRUE_US 9160 /* Don't forget to add test cases to VCMPSS_128_<imm8> in 9161 avx-1.c if new cases turn up. */ 9162 default: break; 9163 } 9164 # undef XXX 9165 if (op == Iop_INVALID) return False; 9166 9167 /* Now convert the op into one with the same arithmetic but that is 9168 correct for the width and laneage requirements. */ 9169 9170 /**/ if (sz == 4 && all_lanes) { 9171 switch (op) { 9172 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break; 9173 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break; 9174 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break; 9175 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break; 9176 default: vassert(0); 9177 } 9178 } 9179 else if (sz == 4 && !all_lanes) { 9180 switch (op) { 9181 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break; 9182 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break; 9183 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break; 9184 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break; 9185 default: vassert(0); 9186 } 9187 } 9188 else if (sz == 8 && all_lanes) { 9189 switch (op) { 9190 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break; 9191 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break; 9192 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break; 9193 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break; 9194 default: vassert(0); 9195 } 9196 } 9197 else if (sz == 8 && !all_lanes) { 9198 switch (op) { 9199 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break; 9200 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break; 9201 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break; 9202 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break; 9203 default: vassert(0); 9204 } 9205 } 9206 else { 9207 vpanic("findSSECmpOp(amd64,guest)"); 9208 } 9209 9210 *preSwapP = pre; *opP = op; *postNotP = not; 9211 return True; 9212 } 9213 9214 9215 /* Handles SSE 32F/64F comparisons. It can fail, in which case it 9216 returns the original delta to indicate failure. */ 9217 9218 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi, 9219 Prefix pfx, Long delta, 9220 const HChar* opname, Bool all_lanes, Int sz ) 9221 { 9222 Long delta0 = delta; 9223 HChar dis_buf[50]; 9224 Int alen; 9225 UInt imm8; 9226 IRTemp addr; 9227 Bool preSwap = False; 9228 IROp op = Iop_INVALID; 9229 Bool postNot = False; 9230 IRTemp plain = newTemp(Ity_V128); 9231 UChar rm = getUChar(delta); 9232 UShort mask = 0; 9233 vassert(sz == 4 || sz == 8); 9234 if (epartIsReg(rm)) { 9235 imm8 = getUChar(delta+1); 9236 if (imm8 >= 8) return delta0; /* FAIL */ 9237 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9238 if (!ok) return delta0; /* FAIL */ 9239 vassert(!preSwap); /* never needed for imm8 < 8 */ 9240 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)), 9241 getXMMReg(eregOfRexRM(pfx,rm))) ); 9242 delta += 2; 9243 DIP("%s $%u,%s,%s\n", opname, 9244 imm8, 9245 nameXMMReg(eregOfRexRM(pfx,rm)), 9246 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9247 } else { 9248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 9249 imm8 = getUChar(delta+alen); 9250 if (imm8 >= 8) return delta0; /* FAIL */ 9251 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 9252 if (!ok) return delta0; /* FAIL */ 9253 vassert(!preSwap); /* never needed for imm8 < 8 */ 9254 assign( plain, 9255 binop( 9256 op, 9257 getXMMReg(gregOfRexRM(pfx,rm)), 9258 all_lanes 9259 ? loadLE(Ity_V128, mkexpr(addr)) 9260 : sz == 8 9261 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 9262 : /*sz==4*/ 9263 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))) 9264 ) 9265 ); 9266 delta += alen+1; 9267 DIP("%s $%u,%s,%s\n", opname, 9268 imm8, 9269 dis_buf, 9270 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9271 } 9272 9273 if (postNot && all_lanes) { 9274 putXMMReg( gregOfRexRM(pfx,rm), 9275 unop(Iop_NotV128, mkexpr(plain)) ); 9276 } 9277 else 9278 if (postNot && !all_lanes) { 9279 mask = toUShort(sz==4 ? 0x000F : 0x00FF); 9280 putXMMReg( gregOfRexRM(pfx,rm), 9281 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) ); 9282 } 9283 else { 9284 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) ); 9285 } 9286 9287 return delta; 9288 } 9289 9290 9291 /* Vector by scalar shift of G by the amount specified at the bottom 9292 of E. */ 9293 9294 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi, 9295 Prefix pfx, Long delta, 9296 const HChar* opname, IROp op ) 9297 { 9298 HChar dis_buf[50]; 9299 Int alen, size; 9300 IRTemp addr; 9301 Bool shl, shr, sar; 9302 UChar rm = getUChar(delta); 9303 IRTemp g0 = newTemp(Ity_V128); 9304 IRTemp g1 = newTemp(Ity_V128); 9305 IRTemp amt = newTemp(Ity_I64); 9306 IRTemp amt8 = newTemp(Ity_I8); 9307 if (epartIsReg(rm)) { 9308 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) ); 9309 DIP("%s %s,%s\n", opname, 9310 nameXMMReg(eregOfRexRM(pfx,rm)), 9311 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9312 delta++; 9313 } else { 9314 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 9315 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 9316 DIP("%s %s,%s\n", opname, 9317 dis_buf, 9318 nameXMMReg(gregOfRexRM(pfx,rm)) ); 9319 delta += alen; 9320 } 9321 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) ); 9322 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 9323 9324 shl = shr = sar = False; 9325 size = 0; 9326 switch (op) { 9327 case Iop_ShlN16x8: shl = True; size = 32; break; 9328 case Iop_ShlN32x4: shl = True; size = 32; break; 9329 case Iop_ShlN64x2: shl = True; size = 64; break; 9330 case Iop_SarN16x8: sar = True; size = 16; break; 9331 case Iop_SarN32x4: sar = True; size = 32; break; 9332 case Iop_ShrN16x8: shr = True; size = 16; break; 9333 case Iop_ShrN32x4: shr = True; size = 32; break; 9334 case Iop_ShrN64x2: shr = True; size = 64; break; 9335 default: vassert(0); 9336 } 9337 9338 if (shl || shr) { 9339 assign( 9340 g1, 9341 IRExpr_ITE( 9342 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9343 binop(op, mkexpr(g0), mkexpr(amt8)), 9344 mkV128(0x0000) 9345 ) 9346 ); 9347 } else 9348 if (sar) { 9349 assign( 9350 g1, 9351 IRExpr_ITE( 9352 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 9353 binop(op, mkexpr(g0), mkexpr(amt8)), 9354 binop(op, mkexpr(g0), mkU8(size-1)) 9355 ) 9356 ); 9357 } else { 9358 vassert(0); 9359 } 9360 9361 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) ); 9362 return delta; 9363 } 9364 9365 9366 /* Vector by scalar shift of E by an immediate byte. */ 9367 9368 static 9369 ULong dis_SSE_shiftE_imm ( Prefix pfx, 9370 Long delta, const HChar* opname, IROp op ) 9371 { 9372 Bool shl, shr, sar; 9373 UChar rm = getUChar(delta); 9374 IRTemp e0 = newTemp(Ity_V128); 9375 IRTemp e1 = newTemp(Ity_V128); 9376 UChar amt, size; 9377 vassert(epartIsReg(rm)); 9378 vassert(gregLO3ofRM(rm) == 2 9379 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 9380 amt = getUChar(delta+1); 9381 delta += 2; 9382 DIP("%s $%d,%s\n", opname, 9383 (Int)amt, 9384 nameXMMReg(eregOfRexRM(pfx,rm)) ); 9385 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 9386 9387 shl = shr = sar = False; 9388 size = 0; 9389 switch (op) { 9390 case Iop_ShlN16x8: shl = True; size = 16; break; 9391 case Iop_ShlN32x4: shl = True; size = 32; break; 9392 case Iop_ShlN64x2: shl = True; size = 64; break; 9393 case Iop_SarN16x8: sar = True; size = 16; break; 9394 case Iop_SarN32x4: sar = True; size = 32; break; 9395 case Iop_ShrN16x8: shr = True; size = 16; break; 9396 case Iop_ShrN32x4: shr = True; size = 32; break; 9397 case Iop_ShrN64x2: shr = True; size = 64; break; 9398 default: vassert(0); 9399 } 9400 9401 if (shl || shr) { 9402 assign( e1, amt >= size 9403 ? mkV128(0x0000) 9404 : binop(op, mkexpr(e0), mkU8(amt)) 9405 ); 9406 } else 9407 if (sar) { 9408 assign( e1, amt >= size 9409 ? binop(op, mkexpr(e0), mkU8(size-1)) 9410 : binop(op, mkexpr(e0), mkU8(amt)) 9411 ); 9412 } else { 9413 vassert(0); 9414 } 9415 9416 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) ); 9417 return delta; 9418 } 9419 9420 9421 /* Get the current SSE rounding mode. */ 9422 9423 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void ) 9424 { 9425 return 9426 unop( Iop_64to32, 9427 binop( Iop_And64, 9428 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ), 9429 mkU64(3) )); 9430 } 9431 9432 static void put_sse_roundingmode ( IRExpr* sseround ) 9433 { 9434 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32); 9435 stmt( IRStmt_Put( OFFB_SSEROUND, 9436 unop(Iop_32Uto64,sseround) ) ); 9437 } 9438 9439 /* Break a V128-bit value up into four 32-bit ints. */ 9440 9441 static void breakupV128to32s ( IRTemp t128, 9442 /*OUTs*/ 9443 IRTemp* t3, IRTemp* t2, 9444 IRTemp* t1, IRTemp* t0 ) 9445 { 9446 IRTemp hi64 = newTemp(Ity_I64); 9447 IRTemp lo64 = newTemp(Ity_I64); 9448 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) ); 9449 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) ); 9450 9451 vassert(t0 && *t0 == IRTemp_INVALID); 9452 vassert(t1 && *t1 == IRTemp_INVALID); 9453 vassert(t2 && *t2 == IRTemp_INVALID); 9454 vassert(t3 && *t3 == IRTemp_INVALID); 9455 9456 *t0 = newTemp(Ity_I32); 9457 *t1 = newTemp(Ity_I32); 9458 *t2 = newTemp(Ity_I32); 9459 *t3 = newTemp(Ity_I32); 9460 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) ); 9461 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) ); 9462 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) ); 9463 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) ); 9464 } 9465 9466 /* Construct a V128-bit value from four 32-bit ints. */ 9467 9468 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2, 9469 IRTemp t1, IRTemp t0 ) 9470 { 9471 return 9472 binop( Iop_64HLtoV128, 9473 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9474 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) 9475 ); 9476 } 9477 9478 /* Break a 64-bit value up into four 16-bit ints. */ 9479 9480 static void breakup64to16s ( IRTemp t64, 9481 /*OUTs*/ 9482 IRTemp* t3, IRTemp* t2, 9483 IRTemp* t1, IRTemp* t0 ) 9484 { 9485 IRTemp hi32 = newTemp(Ity_I32); 9486 IRTemp lo32 = newTemp(Ity_I32); 9487 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) ); 9488 assign( lo32, unop(Iop_64to32, mkexpr(t64)) ); 9489 9490 vassert(t0 && *t0 == IRTemp_INVALID); 9491 vassert(t1 && *t1 == IRTemp_INVALID); 9492 vassert(t2 && *t2 == IRTemp_INVALID); 9493 vassert(t3 && *t3 == IRTemp_INVALID); 9494 9495 *t0 = newTemp(Ity_I16); 9496 *t1 = newTemp(Ity_I16); 9497 *t2 = newTemp(Ity_I16); 9498 *t3 = newTemp(Ity_I16); 9499 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) ); 9500 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) ); 9501 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) ); 9502 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) ); 9503 } 9504 9505 /* Construct a 64-bit value from four 16-bit ints. */ 9506 9507 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2, 9508 IRTemp t1, IRTemp t0 ) 9509 { 9510 return 9511 binop( Iop_32HLto64, 9512 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)), 9513 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0)) 9514 ); 9515 } 9516 9517 /* Break a V256-bit value up into four 64-bit ints. */ 9518 9519 static void breakupV256to64s ( IRTemp t256, 9520 /*OUTs*/ 9521 IRTemp* t3, IRTemp* t2, 9522 IRTemp* t1, IRTemp* t0 ) 9523 { 9524 vassert(t0 && *t0 == IRTemp_INVALID); 9525 vassert(t1 && *t1 == IRTemp_INVALID); 9526 vassert(t2 && *t2 == IRTemp_INVALID); 9527 vassert(t3 && *t3 == IRTemp_INVALID); 9528 *t0 = newTemp(Ity_I64); 9529 *t1 = newTemp(Ity_I64); 9530 *t2 = newTemp(Ity_I64); 9531 *t3 = newTemp(Ity_I64); 9532 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) ); 9533 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) ); 9534 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) ); 9535 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) ); 9536 } 9537 9538 /* Break a V256-bit value up into two V128s. */ 9539 9540 static void breakupV256toV128s ( IRTemp t256, 9541 /*OUTs*/ 9542 IRTemp* t1, IRTemp* t0 ) 9543 { 9544 vassert(t0 && *t0 == IRTemp_INVALID); 9545 vassert(t1 && *t1 == IRTemp_INVALID); 9546 *t0 = newTemp(Ity_V128); 9547 *t1 = newTemp(Ity_V128); 9548 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256))); 9549 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256))); 9550 } 9551 9552 /* Break a V256-bit value up into eight 32-bit ints. */ 9553 9554 static void breakupV256to32s ( IRTemp t256, 9555 /*OUTs*/ 9556 IRTemp* t7, IRTemp* t6, 9557 IRTemp* t5, IRTemp* t4, 9558 IRTemp* t3, IRTemp* t2, 9559 IRTemp* t1, IRTemp* t0 ) 9560 { 9561 IRTemp t128_1 = IRTemp_INVALID; 9562 IRTemp t128_0 = IRTemp_INVALID; 9563 breakupV256toV128s( t256, &t128_1, &t128_0 ); 9564 breakupV128to32s( t128_1, t7, t6, t5, t4 ); 9565 breakupV128to32s( t128_0, t3, t2, t1, t0 ); 9566 } 9567 9568 /* Break a V128-bit value up into two 64-bit ints. */ 9569 9570 static void breakupV128to64s ( IRTemp t128, 9571 /*OUTs*/ 9572 IRTemp* t1, IRTemp* t0 ) 9573 { 9574 vassert(t0 && *t0 == IRTemp_INVALID); 9575 vassert(t1 && *t1 == IRTemp_INVALID); 9576 *t0 = newTemp(Ity_I64); 9577 *t1 = newTemp(Ity_I64); 9578 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) ); 9579 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) ); 9580 } 9581 9582 /* Construct a V256-bit value from eight 32-bit ints. */ 9583 9584 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6, 9585 IRTemp t5, IRTemp t4, 9586 IRTemp t3, IRTemp t2, 9587 IRTemp t1, IRTemp t0 ) 9588 { 9589 return 9590 binop( Iop_V128HLtoV256, 9591 binop( Iop_64HLtoV128, 9592 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)), 9593 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ), 9594 binop( Iop_64HLtoV128, 9595 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)), 9596 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) ) 9597 ); 9598 } 9599 9600 /* Construct a V256-bit value from four 64-bit ints. */ 9601 9602 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2, 9603 IRTemp t1, IRTemp t0 ) 9604 { 9605 return 9606 binop( Iop_V128HLtoV256, 9607 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)), 9608 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0)) 9609 ); 9610 } 9611 9612 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit 9613 values (aa,bb), computes, for each of the 4 16-bit lanes: 9614 9615 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 9616 */ 9617 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) 9618 { 9619 IRTemp aa = newTemp(Ity_I64); 9620 IRTemp bb = newTemp(Ity_I64); 9621 IRTemp aahi32s = newTemp(Ity_I64); 9622 IRTemp aalo32s = newTemp(Ity_I64); 9623 IRTemp bbhi32s = newTemp(Ity_I64); 9624 IRTemp bblo32s = newTemp(Ity_I64); 9625 IRTemp rHi = newTemp(Ity_I64); 9626 IRTemp rLo = newTemp(Ity_I64); 9627 IRTemp one32x2 = newTemp(Ity_I64); 9628 assign(aa, aax); 9629 assign(bb, bbx); 9630 assign( aahi32s, 9631 binop(Iop_SarN32x2, 9632 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), 9633 mkU8(16) )); 9634 assign( aalo32s, 9635 binop(Iop_SarN32x2, 9636 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), 9637 mkU8(16) )); 9638 assign( bbhi32s, 9639 binop(Iop_SarN32x2, 9640 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), 9641 mkU8(16) )); 9642 assign( bblo32s, 9643 binop(Iop_SarN32x2, 9644 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), 9645 mkU8(16) )); 9646 assign(one32x2, mkU64( (1ULL << 32) + 1 )); 9647 assign( 9648 rHi, 9649 binop( 9650 Iop_ShrN32x2, 9651 binop( 9652 Iop_Add32x2, 9653 binop( 9654 Iop_ShrN32x2, 9655 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), 9656 mkU8(14) 9657 ), 9658 mkexpr(one32x2) 9659 ), 9660 mkU8(1) 9661 ) 9662 ); 9663 assign( 9664 rLo, 9665 binop( 9666 Iop_ShrN32x2, 9667 binop( 9668 Iop_Add32x2, 9669 binop( 9670 Iop_ShrN32x2, 9671 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), 9672 mkU8(14) 9673 ), 9674 mkexpr(one32x2) 9675 ), 9676 mkU8(1) 9677 ) 9678 ); 9679 return 9680 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); 9681 } 9682 9683 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit 9684 values (aa,bb), computes, for each lane: 9685 9686 if aa_lane < 0 then - bb_lane 9687 else if aa_lane > 0 then bb_lane 9688 else 0 9689 */ 9690 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) 9691 { 9692 IRTemp aa = newTemp(Ity_I64); 9693 IRTemp bb = newTemp(Ity_I64); 9694 IRTemp zero = newTemp(Ity_I64); 9695 IRTemp bbNeg = newTemp(Ity_I64); 9696 IRTemp negMask = newTemp(Ity_I64); 9697 IRTemp posMask = newTemp(Ity_I64); 9698 IROp opSub = Iop_INVALID; 9699 IROp opCmpGTS = Iop_INVALID; 9700 9701 switch (laneszB) { 9702 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; 9703 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; 9704 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; 9705 default: vassert(0); 9706 } 9707 9708 assign( aa, aax ); 9709 assign( bb, bbx ); 9710 assign( zero, mkU64(0) ); 9711 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); 9712 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); 9713 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); 9714 9715 return 9716 binop(Iop_Or64, 9717 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), 9718 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); 9719 9720 } 9721 9722 9723 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit 9724 value aa, computes, for each lane 9725 9726 if aa < 0 then -aa else aa 9727 9728 Note that the result is interpreted as unsigned, so that the 9729 absolute value of the most negative signed input can be 9730 represented. 9731 */ 9732 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB ) 9733 { 9734 IRTemp res = newTemp(Ity_I64); 9735 IRTemp zero = newTemp(Ity_I64); 9736 IRTemp aaNeg = newTemp(Ity_I64); 9737 IRTemp negMask = newTemp(Ity_I64); 9738 IRTemp posMask = newTemp(Ity_I64); 9739 IROp opSub = Iop_INVALID; 9740 IROp opSarN = Iop_INVALID; 9741 9742 switch (laneszB) { 9743 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; 9744 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; 9745 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; 9746 default: vassert(0); 9747 } 9748 9749 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); 9750 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); 9751 assign( zero, mkU64(0) ); 9752 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); 9753 assign( res, 9754 binop(Iop_Or64, 9755 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), 9756 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) )); 9757 return res; 9758 } 9759 9760 /* XMM version of math_PABS_MMX. */ 9761 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB ) 9762 { 9763 IRTemp res = newTemp(Ity_V128); 9764 IRTemp aaHi = newTemp(Ity_I64); 9765 IRTemp aaLo = newTemp(Ity_I64); 9766 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa))); 9767 assign(aaLo, unop(Iop_V128to64, mkexpr(aa))); 9768 assign(res, binop(Iop_64HLtoV128, 9769 mkexpr(math_PABS_MMX(aaHi, laneszB)), 9770 mkexpr(math_PABS_MMX(aaLo, laneszB)))); 9771 return res; 9772 } 9773 9774 /* Specialisations of math_PABS_XMM, since there's no easy way to do 9775 partial applications in C :-( */ 9776 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) { 9777 return math_PABS_XMM(aa, 4); 9778 } 9779 9780 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) { 9781 return math_PABS_XMM(aa, 2); 9782 } 9783 9784 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) { 9785 return math_PABS_XMM(aa, 1); 9786 } 9787 9788 /* YMM version of math_PABS_XMM. */ 9789 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB ) 9790 { 9791 IRTemp res = newTemp(Ity_V256); 9792 IRTemp aaHi = IRTemp_INVALID; 9793 IRTemp aaLo = IRTemp_INVALID; 9794 breakupV256toV128s(aa, &aaHi, &aaLo); 9795 assign(res, binop(Iop_V128HLtoV256, 9796 mkexpr(math_PABS_XMM(aaHi, laneszB)), 9797 mkexpr(math_PABS_XMM(aaLo, laneszB)))); 9798 return res; 9799 } 9800 9801 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) { 9802 return math_PABS_YMM(aa, 4); 9803 } 9804 9805 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) { 9806 return math_PABS_YMM(aa, 2); 9807 } 9808 9809 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) { 9810 return math_PABS_YMM(aa, 1); 9811 } 9812 9813 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, 9814 IRTemp lo64, Long byteShift ) 9815 { 9816 vassert(byteShift >= 1 && byteShift <= 7); 9817 return 9818 binop(Iop_Or64, 9819 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), 9820 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) 9821 ); 9822 } 9823 9824 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 ) 9825 { 9826 IRTemp res = newTemp(Ity_V128); 9827 IRTemp sHi = newTemp(Ity_I64); 9828 IRTemp sLo = newTemp(Ity_I64); 9829 IRTemp dHi = newTemp(Ity_I64); 9830 IRTemp dLo = newTemp(Ity_I64); 9831 IRTemp rHi = newTemp(Ity_I64); 9832 IRTemp rLo = newTemp(Ity_I64); 9833 9834 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 9835 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 9836 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 9837 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 9838 9839 if (imm8 == 0) { 9840 assign( rHi, mkexpr(sHi) ); 9841 assign( rLo, mkexpr(sLo) ); 9842 } 9843 else if (imm8 >= 1 && imm8 <= 7) { 9844 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) ); 9845 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) ); 9846 } 9847 else if (imm8 == 8) { 9848 assign( rHi, mkexpr(dLo) ); 9849 assign( rLo, mkexpr(sHi) ); 9850 } 9851 else if (imm8 >= 9 && imm8 <= 15) { 9852 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) ); 9853 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) ); 9854 } 9855 else if (imm8 == 16) { 9856 assign( rHi, mkexpr(dHi) ); 9857 assign( rLo, mkexpr(dLo) ); 9858 } 9859 else if (imm8 >= 17 && imm8 <= 23) { 9860 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) ); 9861 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) ); 9862 } 9863 else if (imm8 == 24) { 9864 assign( rHi, mkU64(0) ); 9865 assign( rLo, mkexpr(dHi) ); 9866 } 9867 else if (imm8 >= 25 && imm8 <= 31) { 9868 assign( rHi, mkU64(0) ); 9869 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) ); 9870 } 9871 else if (imm8 >= 32 && imm8 <= 255) { 9872 assign( rHi, mkU64(0) ); 9873 assign( rLo, mkU64(0) ); 9874 } 9875 else 9876 vassert(0); 9877 9878 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 9879 return res; 9880 } 9881 9882 9883 /* Generate a SIGSEGV followed by a restart of the current instruction 9884 if effective_addr is not 16-aligned. This is required behaviour 9885 for some SSE3 instructions and all 128-bit SSSE3 instructions. 9886 This assumes that guest_RIP_curr_instr is set correctly! */ 9887 static 9888 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask ) 9889 { 9890 stmt( 9891 IRStmt_Exit( 9892 binop(Iop_CmpNE64, 9893 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)), 9894 mkU64(0)), 9895 Ijk_SigSEGV, 9896 IRConst_U64(guest_RIP_curr_instr), 9897 OFFB_RIP 9898 ) 9899 ); 9900 } 9901 9902 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) { 9903 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1); 9904 } 9905 9906 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) { 9907 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1); 9908 } 9909 9910 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) { 9911 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1); 9912 } 9913 9914 /* Helper for deciding whether a given insn (starting at the opcode 9915 byte) may validly be used with a LOCK prefix. The following insns 9916 may be used with LOCK when their destination operand is in memory. 9917 AFAICS this is exactly the same for both 32-bit and 64-bit mode. 9918 9919 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01 9920 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09 9921 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11 9922 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19 9923 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21 9924 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29 9925 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31 9926 9927 DEC FE /1, FF /1 9928 INC FE /0, FF /0 9929 9930 NEG F6 /3, F7 /3 9931 NOT F6 /2, F7 /2 9932 9933 XCHG 86, 87 9934 9935 BTC 0F BB, 0F BA /7 9936 BTR 0F B3, 0F BA /6 9937 BTS 0F AB, 0F BA /5 9938 9939 CMPXCHG 0F B0, 0F B1 9940 CMPXCHG8B 0F C7 /1 9941 9942 XADD 0F C0, 0F C1 9943 9944 ------------------------------ 9945 9946 80 /0 = addb $imm8, rm8 9947 81 /0 = addl $imm32, rm32 and addw $imm16, rm16 9948 82 /0 = addb $imm8, rm8 9949 83 /0 = addl $simm8, rm32 and addw $simm8, rm16 9950 9951 00 = addb r8, rm8 9952 01 = addl r32, rm32 and addw r16, rm16 9953 9954 Same for ADD OR ADC SBB AND SUB XOR 9955 9956 FE /1 = dec rm8 9957 FF /1 = dec rm32 and dec rm16 9958 9959 FE /0 = inc rm8 9960 FF /0 = inc rm32 and inc rm16 9961 9962 F6 /3 = neg rm8 9963 F7 /3 = neg rm32 and neg rm16 9964 9965 F6 /2 = not rm8 9966 F7 /2 = not rm32 and not rm16 9967 9968 0F BB = btcw r16, rm16 and btcl r32, rm32 9969 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32 9970 9971 Same for BTS, BTR 9972 */ 9973 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc ) 9974 { 9975 switch (opc[0]) { 9976 case 0x00: case 0x01: case 0x08: case 0x09: 9977 case 0x10: case 0x11: case 0x18: case 0x19: 9978 case 0x20: case 0x21: case 0x28: case 0x29: 9979 case 0x30: case 0x31: 9980 if (!epartIsReg(opc[1])) 9981 return True; 9982 break; 9983 9984 case 0x80: case 0x81: case 0x82: case 0x83: 9985 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6 9986 && !epartIsReg(opc[1])) 9987 return True; 9988 break; 9989 9990 case 0xFE: case 0xFF: 9991 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1 9992 && !epartIsReg(opc[1])) 9993 return True; 9994 break; 9995 9996 case 0xF6: case 0xF7: 9997 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3 9998 && !epartIsReg(opc[1])) 9999 return True; 10000 break; 10001 10002 case 0x86: case 0x87: 10003 if (!epartIsReg(opc[1])) 10004 return True; 10005 break; 10006 10007 case 0x0F: { 10008 switch (opc[1]) { 10009 case 0xBB: case 0xB3: case 0xAB: 10010 if (!epartIsReg(opc[2])) 10011 return True; 10012 break; 10013 case 0xBA: 10014 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7 10015 && !epartIsReg(opc[2])) 10016 return True; 10017 break; 10018 case 0xB0: case 0xB1: 10019 if (!epartIsReg(opc[2])) 10020 return True; 10021 break; 10022 case 0xC7: 10023 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) ) 10024 return True; 10025 break; 10026 case 0xC0: case 0xC1: 10027 if (!epartIsReg(opc[2])) 10028 return True; 10029 break; 10030 default: 10031 break; 10032 } /* switch (opc[1]) */ 10033 break; 10034 } 10035 10036 default: 10037 break; 10038 } /* switch (opc[0]) */ 10039 10040 return False; 10041 } 10042 10043 10044 /*------------------------------------------------------------*/ 10045 /*--- ---*/ 10046 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/ 10047 /*--- ---*/ 10048 /*------------------------------------------------------------*/ 10049 10050 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx, 10051 Long delta, Bool isAvx, UChar opc ) 10052 { 10053 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/); 10054 Int alen = 0; 10055 HChar dis_buf[50]; 10056 IRTemp argL = newTemp(Ity_F64); 10057 IRTemp argR = newTemp(Ity_F64); 10058 UChar modrm = getUChar(delta); 10059 IRTemp addr = IRTemp_INVALID; 10060 if (epartIsReg(modrm)) { 10061 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm), 10062 0/*lowest lane*/ ) ); 10063 delta += 1; 10064 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10065 opc==0x2E ? "u" : "", 10066 nameXMMReg(eregOfRexRM(pfx,modrm)), 10067 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10068 } else { 10069 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10070 assign( argR, loadLE(Ity_F64, mkexpr(addr)) ); 10071 delta += alen; 10072 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "", 10073 opc==0x2E ? "u" : "", 10074 dis_buf, 10075 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10076 } 10077 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm), 10078 0/*lowest lane*/ ) ); 10079 10080 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10081 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10082 stmt( IRStmt_Put( 10083 OFFB_CC_DEP1, 10084 binop( Iop_And64, 10085 unop( Iop_32Uto64, 10086 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ), 10087 mkU64(0x45) 10088 ))); 10089 return delta; 10090 } 10091 10092 10093 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx, 10094 Long delta, Bool isAvx, UChar opc ) 10095 { 10096 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/); 10097 Int alen = 0; 10098 HChar dis_buf[50]; 10099 IRTemp argL = newTemp(Ity_F32); 10100 IRTemp argR = newTemp(Ity_F32); 10101 UChar modrm = getUChar(delta); 10102 IRTemp addr = IRTemp_INVALID; 10103 if (epartIsReg(modrm)) { 10104 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm), 10105 0/*lowest lane*/ ) ); 10106 delta += 1; 10107 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10108 opc==0x2E ? "u" : "", 10109 nameXMMReg(eregOfRexRM(pfx,modrm)), 10110 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10111 } else { 10112 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10113 assign( argR, loadLE(Ity_F32, mkexpr(addr)) ); 10114 delta += alen; 10115 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "", 10116 opc==0x2E ? "u" : "", 10117 dis_buf, 10118 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 10119 } 10120 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm), 10121 0/*lowest lane*/ ) ); 10122 10123 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 10124 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 10125 stmt( IRStmt_Put( 10126 OFFB_CC_DEP1, 10127 binop( Iop_And64, 10128 unop( Iop_32Uto64, 10129 binop(Iop_CmpF64, 10130 unop(Iop_F32toF64,mkexpr(argL)), 10131 unop(Iop_F32toF64,mkexpr(argR)))), 10132 mkU64(0x45) 10133 ))); 10134 return delta; 10135 } 10136 10137 10138 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx, 10139 Long delta, Bool writesYmm ) 10140 { 10141 Int order; 10142 Int alen = 0; 10143 HChar dis_buf[50]; 10144 IRTemp sV = newTemp(Ity_V128); 10145 UChar modrm = getUChar(delta); 10146 const HChar* strV = writesYmm ? "v" : ""; 10147 IRTemp addr = IRTemp_INVALID; 10148 if (epartIsReg(modrm)) { 10149 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 10150 order = (Int)getUChar(delta+1); 10151 delta += 1+1; 10152 DIP("%spshufd $%d,%s,%s\n", strV, order, 10153 nameXMMReg(eregOfRexRM(pfx,modrm)), 10154 nameXMMReg(gregOfRexRM(pfx,modrm))); 10155 } else { 10156 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10157 1/*byte after the amode*/ ); 10158 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 10159 order = (Int)getUChar(delta+alen); 10160 delta += alen+1; 10161 DIP("%spshufd $%d,%s,%s\n", strV, order, 10162 dis_buf, 10163 nameXMMReg(gregOfRexRM(pfx,modrm))); 10164 } 10165 10166 IRTemp s3, s2, s1, s0; 10167 s3 = s2 = s1 = s0 = IRTemp_INVALID; 10168 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10169 10170 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10171 IRTemp dV = newTemp(Ity_V128); 10172 assign(dV, 10173 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3), 10174 SEL((order>>2)&3), SEL((order>>0)&3) ) 10175 ); 10176 # undef SEL 10177 10178 (writesYmm ? putYMMRegLoAndZU : putXMMReg) 10179 (gregOfRexRM(pfx,modrm), mkexpr(dV)); 10180 return delta; 10181 } 10182 10183 10184 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 10185 { 10186 Int order; 10187 Int alen = 0; 10188 HChar dis_buf[50]; 10189 IRTemp sV = newTemp(Ity_V256); 10190 UChar modrm = getUChar(delta); 10191 IRTemp addr = IRTemp_INVALID; 10192 UInt rG = gregOfRexRM(pfx,modrm); 10193 if (epartIsReg(modrm)) { 10194 UInt rE = eregOfRexRM(pfx,modrm); 10195 assign( sV, getYMMReg(rE) ); 10196 order = (Int)getUChar(delta+1); 10197 delta += 1+1; 10198 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG)); 10199 } else { 10200 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 10201 1/*byte after the amode*/ ); 10202 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 10203 order = (Int)getUChar(delta+alen); 10204 delta += alen+1; 10205 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG)); 10206 } 10207 10208 IRTemp s[8]; 10209 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 10210 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 10211 &s[3], &s[2], &s[1], &s[0] ); 10212 10213 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)], 10214 s[4 + ((order>>4)&3)], 10215 s[4 + ((order>>2)&3)], 10216 s[4 + ((order>>0)&3)], 10217 s[0 + ((order>>6)&3)], 10218 s[0 + ((order>>4)&3)], 10219 s[0 + ((order>>2)&3)], 10220 s[0 + ((order>>0)&3)] ) ); 10221 return delta; 10222 } 10223 10224 10225 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm ) 10226 { 10227 IRTemp dV = newTemp(Ity_V128); 10228 IRTemp hi64 = newTemp(Ity_I64); 10229 IRTemp lo64 = newTemp(Ity_I64); 10230 IRTemp hi64r = newTemp(Ity_I64); 10231 IRTemp lo64r = newTemp(Ity_I64); 10232 10233 vassert(imm >= 0 && imm <= 255); 10234 if (imm >= 16) { 10235 assign(dV, mkV128(0x0000)); 10236 return dV; 10237 } 10238 10239 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10240 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10241 10242 if (imm == 0) { 10243 assign( lo64r, mkexpr(lo64) ); 10244 assign( hi64r, mkexpr(hi64) ); 10245 } 10246 else 10247 if (imm == 8) { 10248 assign( hi64r, mkU64(0) ); 10249 assign( lo64r, mkexpr(hi64) ); 10250 } 10251 else 10252 if (imm > 8) { 10253 assign( hi64r, mkU64(0) ); 10254 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) )); 10255 } else { 10256 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) )); 10257 assign( lo64r, 10258 binop( Iop_Or64, 10259 binop(Iop_Shr64, mkexpr(lo64), 10260 mkU8(8 * imm)), 10261 binop(Iop_Shl64, mkexpr(hi64), 10262 mkU8(8 * (8 - imm)) ) 10263 ) 10264 ); 10265 } 10266 10267 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10268 return dV; 10269 } 10270 10271 10272 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm ) 10273 { 10274 IRTemp dV = newTemp(Ity_V128); 10275 IRTemp hi64 = newTemp(Ity_I64); 10276 IRTemp lo64 = newTemp(Ity_I64); 10277 IRTemp hi64r = newTemp(Ity_I64); 10278 IRTemp lo64r = newTemp(Ity_I64); 10279 10280 vassert(imm >= 0 && imm <= 255); 10281 if (imm >= 16) { 10282 assign(dV, mkV128(0x0000)); 10283 return dV; 10284 } 10285 10286 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) ); 10287 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) ); 10288 10289 if (imm == 0) { 10290 assign( lo64r, mkexpr(lo64) ); 10291 assign( hi64r, mkexpr(hi64) ); 10292 } 10293 else 10294 if (imm == 8) { 10295 assign( lo64r, mkU64(0) ); 10296 assign( hi64r, mkexpr(lo64) ); 10297 } 10298 else 10299 if (imm > 8) { 10300 assign( lo64r, mkU64(0) ); 10301 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) )); 10302 } else { 10303 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) )); 10304 assign( hi64r, 10305 binop( Iop_Or64, 10306 binop(Iop_Shl64, mkexpr(hi64), 10307 mkU8(8 * imm)), 10308 binop(Iop_Shr64, mkexpr(lo64), 10309 mkU8(8 * (8 - imm)) ) 10310 ) 10311 ); 10312 } 10313 10314 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) ); 10315 return dV; 10316 } 10317 10318 10319 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx, 10320 Long delta, Bool isAvx, UChar opc, Int sz ) 10321 { 10322 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/); 10323 HChar dis_buf[50]; 10324 Int alen = 0; 10325 UChar modrm = getUChar(delta); 10326 IRTemp addr = IRTemp_INVALID; 10327 IRTemp rmode = newTemp(Ity_I32); 10328 IRTemp f64lo = newTemp(Ity_F64); 10329 Bool r2zero = toBool(opc == 0x2C); 10330 10331 if (epartIsReg(modrm)) { 10332 delta += 1; 10333 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 10334 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10335 nameXMMReg(eregOfRexRM(pfx,modrm)), 10336 nameIReg(sz, gregOfRexRM(pfx,modrm), 10337 False)); 10338 } else { 10339 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10340 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 10341 delta += alen; 10342 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10343 dis_buf, 10344 nameIReg(sz, gregOfRexRM(pfx,modrm), 10345 False)); 10346 } 10347 10348 if (r2zero) { 10349 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10350 } else { 10351 assign( rmode, get_sse_roundingmode() ); 10352 } 10353 10354 if (sz == 4) { 10355 putIReg32( gregOfRexRM(pfx,modrm), 10356 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) ); 10357 } else { 10358 vassert(sz == 8); 10359 putIReg64( gregOfRexRM(pfx,modrm), 10360 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) ); 10361 } 10362 10363 return delta; 10364 } 10365 10366 10367 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx, 10368 Long delta, Bool isAvx, UChar opc, Int sz ) 10369 { 10370 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/); 10371 HChar dis_buf[50]; 10372 Int alen = 0; 10373 UChar modrm = getUChar(delta); 10374 IRTemp addr = IRTemp_INVALID; 10375 IRTemp rmode = newTemp(Ity_I32); 10376 IRTemp f32lo = newTemp(Ity_F32); 10377 Bool r2zero = toBool(opc == 0x2C); 10378 10379 if (epartIsReg(modrm)) { 10380 delta += 1; 10381 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 10382 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10383 nameXMMReg(eregOfRexRM(pfx,modrm)), 10384 nameIReg(sz, gregOfRexRM(pfx,modrm), 10385 False)); 10386 } else { 10387 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10388 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 10389 delta += alen; 10390 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "", 10391 dis_buf, 10392 nameIReg(sz, gregOfRexRM(pfx,modrm), 10393 False)); 10394 } 10395 10396 if (r2zero) { 10397 assign( rmode, mkU32((UInt)Irrm_ZERO) ); 10398 } else { 10399 assign( rmode, get_sse_roundingmode() ); 10400 } 10401 10402 if (sz == 4) { 10403 putIReg32( gregOfRexRM(pfx,modrm), 10404 binop( Iop_F64toI32S, 10405 mkexpr(rmode), 10406 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10407 } else { 10408 vassert(sz == 8); 10409 putIReg64( gregOfRexRM(pfx,modrm), 10410 binop( Iop_F64toI64S, 10411 mkexpr(rmode), 10412 unop(Iop_F32toF64, mkexpr(f32lo))) ); 10413 } 10414 10415 return delta; 10416 } 10417 10418 10419 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 10420 Long delta, Bool isAvx ) 10421 { 10422 IRTemp addr = IRTemp_INVALID; 10423 Int alen = 0; 10424 HChar dis_buf[50]; 10425 IRTemp f32lo = newTemp(Ity_F32); 10426 IRTemp f32hi = newTemp(Ity_F32); 10427 UChar modrm = getUChar(delta); 10428 UInt rG = gregOfRexRM(pfx,modrm); 10429 if (epartIsReg(modrm)) { 10430 UInt rE = eregOfRexRM(pfx,modrm); 10431 assign( f32lo, getXMMRegLane32F(rE, 0) ); 10432 assign( f32hi, getXMMRegLane32F(rE, 1) ); 10433 delta += 1; 10434 DIP("%scvtps2pd %s,%s\n", 10435 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10436 } else { 10437 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10438 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 10439 assign( f32hi, loadLE(Ity_F32, 10440 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10441 delta += alen; 10442 DIP("%scvtps2pd %s,%s\n", 10443 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 10444 } 10445 10446 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) ); 10447 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) ); 10448 if (isAvx) 10449 putYMMRegLane128( rG, 1, mkV128(0)); 10450 return delta; 10451 } 10452 10453 10454 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 10455 Long delta ) 10456 { 10457 IRTemp addr = IRTemp_INVALID; 10458 Int alen = 0; 10459 HChar dis_buf[50]; 10460 IRTemp f32_0 = newTemp(Ity_F32); 10461 IRTemp f32_1 = newTemp(Ity_F32); 10462 IRTemp f32_2 = newTemp(Ity_F32); 10463 IRTemp f32_3 = newTemp(Ity_F32); 10464 UChar modrm = getUChar(delta); 10465 UInt rG = gregOfRexRM(pfx,modrm); 10466 if (epartIsReg(modrm)) { 10467 UInt rE = eregOfRexRM(pfx,modrm); 10468 assign( f32_0, getXMMRegLane32F(rE, 0) ); 10469 assign( f32_1, getXMMRegLane32F(rE, 1) ); 10470 assign( f32_2, getXMMRegLane32F(rE, 2) ); 10471 assign( f32_3, getXMMRegLane32F(rE, 3) ); 10472 delta += 1; 10473 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 10474 } else { 10475 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10476 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) ); 10477 assign( f32_1, loadLE(Ity_F32, 10478 binop(Iop_Add64,mkexpr(addr),mkU64(4))) ); 10479 assign( f32_2, loadLE(Ity_F32, 10480 binop(Iop_Add64,mkexpr(addr),mkU64(8))) ); 10481 assign( f32_3, loadLE(Ity_F32, 10482 binop(Iop_Add64,mkexpr(addr),mkU64(12))) ); 10483 delta += alen; 10484 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG)); 10485 } 10486 10487 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) ); 10488 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) ); 10489 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) ); 10490 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) ); 10491 return delta; 10492 } 10493 10494 10495 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10496 Long delta, Bool isAvx ) 10497 { 10498 IRTemp addr = IRTemp_INVALID; 10499 Int alen = 0; 10500 HChar dis_buf[50]; 10501 UChar modrm = getUChar(delta); 10502 UInt rG = gregOfRexRM(pfx,modrm); 10503 IRTemp argV = newTemp(Ity_V128); 10504 IRTemp rmode = newTemp(Ity_I32); 10505 if (epartIsReg(modrm)) { 10506 UInt rE = eregOfRexRM(pfx,modrm); 10507 assign( argV, getXMMReg(rE) ); 10508 delta += 1; 10509 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10510 nameXMMReg(rE), nameXMMReg(rG)); 10511 } else { 10512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10513 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10514 delta += alen; 10515 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "", 10516 dis_buf, nameXMMReg(rG) ); 10517 } 10518 10519 assign( rmode, get_sse_roundingmode() ); 10520 IRTemp t0 = newTemp(Ity_F64); 10521 IRTemp t1 = newTemp(Ity_F64); 10522 assign( t0, unop(Iop_ReinterpI64asF64, 10523 unop(Iop_V128to64, mkexpr(argV))) ); 10524 assign( t1, unop(Iop_ReinterpI64asF64, 10525 unop(Iop_V128HIto64, mkexpr(argV))) ); 10526 10527 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) ) 10528 putXMMRegLane32( rG, 3, mkU32(0) ); 10529 putXMMRegLane32( rG, 2, mkU32(0) ); 10530 putXMMRegLane32F( rG, 1, CVT(t1) ); 10531 putXMMRegLane32F( rG, 0, CVT(t0) ); 10532 # undef CVT 10533 if (isAvx) 10534 putYMMRegLane128( rG, 1, mkV128(0) ); 10535 10536 return delta; 10537 } 10538 10539 10540 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10541 Long delta, Bool isAvx, Bool r2zero ) 10542 { 10543 IRTemp addr = IRTemp_INVALID; 10544 Int alen = 0; 10545 HChar dis_buf[50]; 10546 UChar modrm = getUChar(delta); 10547 IRTemp argV = newTemp(Ity_V128); 10548 IRTemp rmode = newTemp(Ity_I32); 10549 UInt rG = gregOfRexRM(pfx,modrm); 10550 IRTemp t0, t1, t2, t3; 10551 10552 if (epartIsReg(modrm)) { 10553 UInt rE = eregOfRexRM(pfx,modrm); 10554 assign( argV, getXMMReg(rE) ); 10555 delta += 1; 10556 DIP("%scvt%sps2dq %s,%s\n", 10557 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10558 } else { 10559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10560 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10561 delta += alen; 10562 DIP("%scvt%sps2dq %s,%s\n", 10563 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10564 } 10565 10566 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10567 : get_sse_roundingmode() ); 10568 t0 = t1 = t2 = t3 = IRTemp_INVALID; 10569 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10570 /* This is less than ideal. If it turns out to be a performance 10571 bottleneck it can be improved. */ 10572 # define CVT(_t) \ 10573 binop( Iop_F64toI32S, \ 10574 mkexpr(rmode), \ 10575 unop( Iop_F32toF64, \ 10576 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10577 10578 putXMMRegLane32( rG, 3, CVT(t3) ); 10579 putXMMRegLane32( rG, 2, CVT(t2) ); 10580 putXMMRegLane32( rG, 1, CVT(t1) ); 10581 putXMMRegLane32( rG, 0, CVT(t0) ); 10582 # undef CVT 10583 if (isAvx) 10584 putYMMRegLane128( rG, 1, mkV128(0) ); 10585 10586 return delta; 10587 } 10588 10589 10590 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10591 Long delta, Bool r2zero ) 10592 { 10593 IRTemp addr = IRTemp_INVALID; 10594 Int alen = 0; 10595 HChar dis_buf[50]; 10596 UChar modrm = getUChar(delta); 10597 IRTemp argV = newTemp(Ity_V256); 10598 IRTemp rmode = newTemp(Ity_I32); 10599 UInt rG = gregOfRexRM(pfx,modrm); 10600 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10601 10602 if (epartIsReg(modrm)) { 10603 UInt rE = eregOfRexRM(pfx,modrm); 10604 assign( argV, getYMMReg(rE) ); 10605 delta += 1; 10606 DIP("vcvt%sps2dq %s,%s\n", 10607 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG)); 10608 } else { 10609 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10610 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10611 delta += alen; 10612 DIP("vcvt%sps2dq %s,%s\n", 10613 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) ); 10614 } 10615 10616 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO) 10617 : get_sse_roundingmode() ); 10618 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID; 10619 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10620 /* This is less than ideal. If it turns out to be a performance 10621 bottleneck it can be improved. */ 10622 # define CVT(_t) \ 10623 binop( Iop_F64toI32S, \ 10624 mkexpr(rmode), \ 10625 unop( Iop_F32toF64, \ 10626 unop( Iop_ReinterpI32asF32, mkexpr(_t))) ) 10627 10628 putYMMRegLane32( rG, 7, CVT(t7) ); 10629 putYMMRegLane32( rG, 6, CVT(t6) ); 10630 putYMMRegLane32( rG, 5, CVT(t5) ); 10631 putYMMRegLane32( rG, 4, CVT(t4) ); 10632 putYMMRegLane32( rG, 3, CVT(t3) ); 10633 putYMMRegLane32( rG, 2, CVT(t2) ); 10634 putYMMRegLane32( rG, 1, CVT(t1) ); 10635 putYMMRegLane32( rG, 0, CVT(t0) ); 10636 # undef CVT 10637 10638 return delta; 10639 } 10640 10641 10642 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 10643 Long delta, Bool isAvx, Bool r2zero ) 10644 { 10645 IRTemp addr = IRTemp_INVALID; 10646 Int alen = 0; 10647 HChar dis_buf[50]; 10648 UChar modrm = getUChar(delta); 10649 IRTemp argV = newTemp(Ity_V128); 10650 IRTemp rmode = newTemp(Ity_I32); 10651 UInt rG = gregOfRexRM(pfx,modrm); 10652 IRTemp t0, t1; 10653 10654 if (epartIsReg(modrm)) { 10655 UInt rE = eregOfRexRM(pfx,modrm); 10656 assign( argV, getXMMReg(rE) ); 10657 delta += 1; 10658 DIP("%scvt%spd2dq %s,%s\n", 10659 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG)); 10660 } else { 10661 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10662 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10663 delta += alen; 10664 DIP("%scvt%spd2dqx %s,%s\n", 10665 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10666 } 10667 10668 if (r2zero) { 10669 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10670 } else { 10671 assign( rmode, get_sse_roundingmode() ); 10672 } 10673 10674 t0 = newTemp(Ity_F64); 10675 t1 = newTemp(Ity_F64); 10676 assign( t0, unop(Iop_ReinterpI64asF64, 10677 unop(Iop_V128to64, mkexpr(argV))) ); 10678 assign( t1, unop(Iop_ReinterpI64asF64, 10679 unop(Iop_V128HIto64, mkexpr(argV))) ); 10680 10681 # define CVT(_t) binop( Iop_F64toI32S, \ 10682 mkexpr(rmode), \ 10683 mkexpr(_t) ) 10684 10685 putXMMRegLane32( rG, 3, mkU32(0) ); 10686 putXMMRegLane32( rG, 2, mkU32(0) ); 10687 putXMMRegLane32( rG, 1, CVT(t1) ); 10688 putXMMRegLane32( rG, 0, CVT(t0) ); 10689 # undef CVT 10690 if (isAvx) 10691 putYMMRegLane128( rG, 1, mkV128(0) ); 10692 10693 return delta; 10694 } 10695 10696 10697 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 10698 Long delta, Bool r2zero ) 10699 { 10700 IRTemp addr = IRTemp_INVALID; 10701 Int alen = 0; 10702 HChar dis_buf[50]; 10703 UChar modrm = getUChar(delta); 10704 IRTemp argV = newTemp(Ity_V256); 10705 IRTemp rmode = newTemp(Ity_I32); 10706 UInt rG = gregOfRexRM(pfx,modrm); 10707 IRTemp t0, t1, t2, t3; 10708 10709 if (epartIsReg(modrm)) { 10710 UInt rE = eregOfRexRM(pfx,modrm); 10711 assign( argV, getYMMReg(rE) ); 10712 delta += 1; 10713 DIP("vcvt%spd2dq %s,%s\n", 10714 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG)); 10715 } else { 10716 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10717 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10718 delta += alen; 10719 DIP("vcvt%spd2dqy %s,%s\n", 10720 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) ); 10721 } 10722 10723 if (r2zero) { 10724 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 10725 } else { 10726 assign( rmode, get_sse_roundingmode() ); 10727 } 10728 10729 t0 = IRTemp_INVALID; 10730 t1 = IRTemp_INVALID; 10731 t2 = IRTemp_INVALID; 10732 t3 = IRTemp_INVALID; 10733 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 10734 10735 # define CVT(_t) binop( Iop_F64toI32S, \ 10736 mkexpr(rmode), \ 10737 unop( Iop_ReinterpI64asF64, \ 10738 mkexpr(_t) ) ) 10739 10740 putXMMRegLane32( rG, 3, CVT(t3) ); 10741 putXMMRegLane32( rG, 2, CVT(t2) ); 10742 putXMMRegLane32( rG, 1, CVT(t1) ); 10743 putXMMRegLane32( rG, 0, CVT(t0) ); 10744 # undef CVT 10745 putYMMRegLane128( rG, 1, mkV128(0) ); 10746 10747 return delta; 10748 } 10749 10750 10751 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx, 10752 Long delta, Bool isAvx ) 10753 { 10754 IRTemp addr = IRTemp_INVALID; 10755 Int alen = 0; 10756 HChar dis_buf[50]; 10757 UChar modrm = getUChar(delta); 10758 IRTemp argV = newTemp(Ity_V128); 10759 IRTemp rmode = newTemp(Ity_I32); 10760 UInt rG = gregOfRexRM(pfx,modrm); 10761 IRTemp t0, t1, t2, t3; 10762 10763 if (epartIsReg(modrm)) { 10764 UInt rE = eregOfRexRM(pfx,modrm); 10765 assign( argV, getXMMReg(rE) ); 10766 delta += 1; 10767 DIP("%scvtdq2ps %s,%s\n", 10768 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 10769 } else { 10770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10771 assign( argV, loadLE(Ity_V128, mkexpr(addr)) ); 10772 delta += alen; 10773 DIP("%scvtdq2ps %s,%s\n", 10774 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) ); 10775 } 10776 10777 assign( rmode, get_sse_roundingmode() ); 10778 t0 = IRTemp_INVALID; 10779 t1 = IRTemp_INVALID; 10780 t2 = IRTemp_INVALID; 10781 t3 = IRTemp_INVALID; 10782 breakupV128to32s( argV, &t3, &t2, &t1, &t0 ); 10783 10784 # define CVT(_t) binop( Iop_F64toF32, \ 10785 mkexpr(rmode), \ 10786 unop(Iop_I32StoF64,mkexpr(_t))) 10787 10788 putXMMRegLane32F( rG, 3, CVT(t3) ); 10789 putXMMRegLane32F( rG, 2, CVT(t2) ); 10790 putXMMRegLane32F( rG, 1, CVT(t1) ); 10791 putXMMRegLane32F( rG, 0, CVT(t0) ); 10792 # undef CVT 10793 if (isAvx) 10794 putYMMRegLane128( rG, 1, mkV128(0) ); 10795 10796 return delta; 10797 } 10798 10799 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 10800 Long delta ) 10801 { 10802 IRTemp addr = IRTemp_INVALID; 10803 Int alen = 0; 10804 HChar dis_buf[50]; 10805 UChar modrm = getUChar(delta); 10806 IRTemp argV = newTemp(Ity_V256); 10807 IRTemp rmode = newTemp(Ity_I32); 10808 UInt rG = gregOfRexRM(pfx,modrm); 10809 IRTemp t0, t1, t2, t3, t4, t5, t6, t7; 10810 10811 if (epartIsReg(modrm)) { 10812 UInt rE = eregOfRexRM(pfx,modrm); 10813 assign( argV, getYMMReg(rE) ); 10814 delta += 1; 10815 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 10816 } else { 10817 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 10818 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 10819 delta += alen; 10820 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) ); 10821 } 10822 10823 assign( rmode, get_sse_roundingmode() ); 10824 t0 = IRTemp_INVALID; 10825 t1 = IRTemp_INVALID; 10826 t2 = IRTemp_INVALID; 10827 t3 = IRTemp_INVALID; 10828 t4 = IRTemp_INVALID; 10829 t5 = IRTemp_INVALID; 10830 t6 = IRTemp_INVALID; 10831 t7 = IRTemp_INVALID; 10832 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 ); 10833 10834 # define CVT(_t) binop( Iop_F64toF32, \ 10835 mkexpr(rmode), \ 10836 unop(Iop_I32StoF64,mkexpr(_t))) 10837 10838 putYMMRegLane32F( rG, 7, CVT(t7) ); 10839 putYMMRegLane32F( rG, 6, CVT(t6) ); 10840 putYMMRegLane32F( rG, 5, CVT(t5) ); 10841 putYMMRegLane32F( rG, 4, CVT(t4) ); 10842 putYMMRegLane32F( rG, 3, CVT(t3) ); 10843 putYMMRegLane32F( rG, 2, CVT(t2) ); 10844 putYMMRegLane32F( rG, 1, CVT(t1) ); 10845 putYMMRegLane32F( rG, 0, CVT(t0) ); 10846 # undef CVT 10847 10848 return delta; 10849 } 10850 10851 10852 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx, 10853 Long delta, Bool isAvx ) 10854 { 10855 UChar modrm = getUChar(delta); 10856 vassert(epartIsReg(modrm)); /* ensured by caller */ 10857 UInt rE = eregOfRexRM(pfx,modrm); 10858 UInt rG = gregOfRexRM(pfx,modrm); 10859 IRTemp t0 = newTemp(Ity_V128); 10860 IRTemp t1 = newTemp(Ity_I32); 10861 assign(t0, getXMMReg(rE)); 10862 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0)))); 10863 putIReg32(rG, mkexpr(t1)); 10864 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE), 10865 nameIReg32(rG)); 10866 delta += 1; 10867 return delta; 10868 } 10869 10870 10871 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx, 10872 Long delta ) 10873 { 10874 UChar modrm = getUChar(delta); 10875 vassert(epartIsReg(modrm)); /* ensured by caller */ 10876 UInt rE = eregOfRexRM(pfx,modrm); 10877 UInt rG = gregOfRexRM(pfx,modrm); 10878 IRTemp t0 = newTemp(Ity_V128); 10879 IRTemp t1 = newTemp(Ity_V128); 10880 IRTemp t2 = newTemp(Ity_I16); 10881 IRTemp t3 = newTemp(Ity_I16); 10882 assign(t0, getYMMRegLane128(rE, 0)); 10883 assign(t1, getYMMRegLane128(rE, 1)); 10884 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0))); 10885 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1))); 10886 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2))); 10887 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 10888 delta += 1; 10889 return delta; 10890 } 10891 10892 10893 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the 10894 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */ 10895 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */ 10896 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10897 { 10898 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10899 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10900 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10901 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10902 IRTemp res = newTemp(Ity_V128); 10903 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 ) 10904 : mkV128from32s( s1, d1, s0, d0 )); 10905 return res; 10906 } 10907 10908 10909 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */ 10910 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */ 10911 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10912 { 10913 IRTemp s1 = newTemp(Ity_I64); 10914 IRTemp s0 = newTemp(Ity_I64); 10915 IRTemp d1 = newTemp(Ity_I64); 10916 IRTemp d0 = newTemp(Ity_I64); 10917 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 10918 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 10919 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 10920 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 10921 IRTemp res = newTemp(Ity_V128); 10922 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) 10923 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0))); 10924 return res; 10925 } 10926 10927 10928 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD. 10929 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI} 10930 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid 10931 way. */ 10932 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10933 { 10934 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10935 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10936 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 10937 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 10938 IRTemp res = newTemp(Ity_V256); 10939 assign(res, xIsH 10940 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3), 10941 mkexpr(s1), mkexpr(d1)) 10942 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2), 10943 mkexpr(s0), mkexpr(d0))); 10944 return res; 10945 } 10946 10947 10948 /* FIXME: this is really bad. Surely can do something better here? 10949 One observation is that the steering in the upper and lower 128 bit 10950 halves is the same as with math_UNPCKxPS_128, so we simply split 10951 into two halves, and use that. Consequently any improvement in 10952 math_UNPCKxPS_128 (probably, to use interleave-style primops) 10953 benefits this too. */ 10954 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH ) 10955 { 10956 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10957 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10958 breakupV256toV128s( sV, &sVhi, &sVlo ); 10959 breakupV256toV128s( dV, &dVhi, &dVlo ); 10960 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH); 10961 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH); 10962 IRTemp rV = newTemp(Ity_V256); 10963 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 10964 return rV; 10965 } 10966 10967 10968 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10969 { 10970 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 10971 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 10972 vassert(imm8 < 256); 10973 10974 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 10975 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 10976 10977 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3))) 10978 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 10979 IRTemp res = newTemp(Ity_V128); 10980 assign(res, 10981 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3), 10982 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) ); 10983 # undef SELD 10984 # undef SELS 10985 return res; 10986 } 10987 10988 10989 /* 256-bit SHUFPS appears to steer each of the 128-bit halves 10990 identically. Hence do the clueless thing and use math_SHUFPS_128 10991 twice. */ 10992 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 10993 { 10994 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 10995 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 10996 breakupV256toV128s( sV, &sVhi, &sVlo ); 10997 breakupV256toV128s( dV, &dVhi, &dVlo ); 10998 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8); 10999 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8); 11000 IRTemp rV = newTemp(Ity_V256); 11001 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11002 return rV; 11003 } 11004 11005 11006 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11007 { 11008 IRTemp s1 = newTemp(Ity_I64); 11009 IRTemp s0 = newTemp(Ity_I64); 11010 IRTemp d1 = newTemp(Ity_I64); 11011 IRTemp d0 = newTemp(Ity_I64); 11012 11013 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) ); 11014 assign( d0, unop(Iop_V128to64, mkexpr(dV)) ); 11015 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) ); 11016 assign( s0, unop(Iop_V128to64, mkexpr(sV)) ); 11017 11018 # define SELD(n) mkexpr((n)==0 ? d0 : d1) 11019 # define SELS(n) mkexpr((n)==0 ? s0 : s1) 11020 11021 IRTemp res = newTemp(Ity_V128); 11022 assign(res, binop( Iop_64HLtoV128, 11023 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) ); 11024 11025 # undef SELD 11026 # undef SELS 11027 return res; 11028 } 11029 11030 11031 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11032 { 11033 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11034 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11035 breakupV256toV128s( sV, &sVhi, &sVlo ); 11036 breakupV256toV128s( dV, &dVhi, &dVlo ); 11037 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11038 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3); 11039 IRTemp rV = newTemp(Ity_V256); 11040 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11041 return rV; 11042 } 11043 11044 11045 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11046 { 11047 UShort imm8_mask_16; 11048 IRTemp imm8_mask = newTemp(Ity_V128); 11049 11050 switch( imm8 & 3 ) { 11051 case 0: imm8_mask_16 = 0x0000; break; 11052 case 1: imm8_mask_16 = 0x00FF; break; 11053 case 2: imm8_mask_16 = 0xFF00; break; 11054 case 3: imm8_mask_16 = 0xFFFF; break; 11055 default: vassert(0); break; 11056 } 11057 assign( imm8_mask, mkV128( imm8_mask_16 ) ); 11058 11059 IRTemp res = newTemp(Ity_V128); 11060 assign ( res, binop( Iop_OrV128, 11061 binop( Iop_AndV128, mkexpr(sV), 11062 mkexpr(imm8_mask) ), 11063 binop( Iop_AndV128, mkexpr(dV), 11064 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11065 return res; 11066 } 11067 11068 11069 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11070 { 11071 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11072 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11073 breakupV256toV128s( sV, &sVhi, &sVlo ); 11074 breakupV256toV128s( dV, &dVhi, &dVlo ); 11075 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3); 11076 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3); 11077 IRTemp rV = newTemp(Ity_V256); 11078 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11079 return rV; 11080 } 11081 11082 11083 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11084 { 11085 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 11086 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 11087 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 11088 0xFFFF }; 11089 IRTemp imm8_mask = newTemp(Ity_V128); 11090 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) ); 11091 11092 IRTemp res = newTemp(Ity_V128); 11093 assign ( res, binop( Iop_OrV128, 11094 binop( Iop_AndV128, mkexpr(sV), 11095 mkexpr(imm8_mask) ), 11096 binop( Iop_AndV128, mkexpr(dV), 11097 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) ); 11098 return res; 11099 } 11100 11101 11102 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11103 { 11104 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 11105 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID; 11106 breakupV256toV128s( sV, &sVhi, &sVlo ); 11107 breakupV256toV128s( dV, &dVhi, &dVlo ); 11108 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15); 11109 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15); 11110 IRTemp rV = newTemp(Ity_V256); 11111 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo))); 11112 return rV; 11113 } 11114 11115 11116 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 ) 11117 { 11118 /* Make w be a 16-bit version of imm8, formed by duplicating each 11119 bit in imm8. */ 11120 Int i; 11121 UShort imm16 = 0; 11122 for (i = 0; i < 8; i++) { 11123 if (imm8 & (1 << i)) 11124 imm16 |= (3 << (2*i)); 11125 } 11126 IRTemp imm16_mask = newTemp(Ity_V128); 11127 assign( imm16_mask, mkV128( imm16 )); 11128 11129 IRTemp res = newTemp(Ity_V128); 11130 assign ( res, binop( Iop_OrV128, 11131 binop( Iop_AndV128, mkexpr(sV), 11132 mkexpr(imm16_mask) ), 11133 binop( Iop_AndV128, mkexpr(dV), 11134 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) ); 11135 return res; 11136 } 11137 11138 11139 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV ) 11140 { 11141 /* This is a really poor translation -- could be improved if 11142 performance critical */ 11143 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11144 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11145 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11146 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11147 IRTemp res = newTemp(Ity_V128); 11148 assign(res, binop(Iop_64HLtoV128, 11149 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)), 11150 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) )); 11151 return res; 11152 } 11153 11154 11155 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV ) 11156 { 11157 /* This is a really poor translation -- could be improved if 11158 performance critical */ 11159 IRTemp sHi, sLo, dHi, dLo; 11160 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11161 breakupV256toV128s( dV, &dHi, &dLo); 11162 breakupV256toV128s( sV, &sHi, &sLo); 11163 IRTemp res = newTemp(Ity_V256); 11164 assign(res, binop(Iop_V128HLtoV256, 11165 mkexpr(math_PMULUDQ_128(sHi, dHi)), 11166 mkexpr(math_PMULUDQ_128(sLo, dLo)))); 11167 return res; 11168 } 11169 11170 11171 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV ) 11172 { 11173 /* This is a really poor translation -- could be improved if 11174 performance critical */ 11175 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 11176 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 11177 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 11178 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11179 IRTemp res = newTemp(Ity_V128); 11180 assign(res, binop(Iop_64HLtoV128, 11181 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)), 11182 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) )); 11183 return res; 11184 } 11185 11186 11187 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV ) 11188 { 11189 /* This is a really poor translation -- could be improved if 11190 performance critical */ 11191 IRTemp sHi, sLo, dHi, dLo; 11192 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11193 breakupV256toV128s( dV, &dHi, &dLo); 11194 breakupV256toV128s( sV, &sHi, &sLo); 11195 IRTemp res = newTemp(Ity_V256); 11196 assign(res, binop(Iop_V128HLtoV256, 11197 mkexpr(math_PMULDQ_128(sHi, dHi)), 11198 mkexpr(math_PMULDQ_128(sLo, dLo)))); 11199 return res; 11200 } 11201 11202 11203 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV ) 11204 { 11205 IRTemp sVhi, sVlo, dVhi, dVlo; 11206 IRTemp resHi = newTemp(Ity_I64); 11207 IRTemp resLo = newTemp(Ity_I64); 11208 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID; 11209 breakupV128to64s( sV, &sVhi, &sVlo ); 11210 breakupV128to64s( dV, &dVhi, &dVlo ); 11211 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11212 "amd64g_calculate_mmx_pmaddwd", 11213 &amd64g_calculate_mmx_pmaddwd, 11214 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi)))); 11215 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/, 11216 "amd64g_calculate_mmx_pmaddwd", 11217 &amd64g_calculate_mmx_pmaddwd, 11218 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo)))); 11219 IRTemp res = newTemp(Ity_V128); 11220 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ; 11221 return res; 11222 } 11223 11224 11225 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV ) 11226 { 11227 IRTemp sHi, sLo, dHi, dLo; 11228 sHi = sLo = dHi = dLo = IRTemp_INVALID; 11229 breakupV256toV128s( dV, &dHi, &dLo); 11230 breakupV256toV128s( sV, &sHi, &sLo); 11231 IRTemp res = newTemp(Ity_V256); 11232 assign(res, binop(Iop_V128HLtoV256, 11233 mkexpr(math_PMADDWD_128(dHi, sHi)), 11234 mkexpr(math_PMADDWD_128(dLo, sLo)))); 11235 return res; 11236 } 11237 11238 11239 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV ) 11240 { 11241 IRTemp addV = newTemp(Ity_V128); 11242 IRTemp subV = newTemp(Ity_V128); 11243 IRTemp a1 = newTemp(Ity_I64); 11244 IRTemp s0 = newTemp(Ity_I64); 11245 IRTemp rm = newTemp(Ity_I32); 11246 11247 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11248 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11249 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11250 11251 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) )); 11252 assign( s0, unop(Iop_V128to64, mkexpr(subV) )); 11253 11254 IRTemp res = newTemp(Ity_V128); 11255 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) ); 11256 return res; 11257 } 11258 11259 11260 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV ) 11261 { 11262 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11263 IRTemp addV = newTemp(Ity_V256); 11264 IRTemp subV = newTemp(Ity_V256); 11265 IRTemp rm = newTemp(Ity_I32); 11266 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11267 11268 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11269 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11270 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11271 11272 breakupV256to64s( addV, &a3, &a2, &a1, &a0 ); 11273 breakupV256to64s( subV, &s3, &s2, &s1, &s0 ); 11274 11275 IRTemp res = newTemp(Ity_V256); 11276 assign( res, mkV256from64s( a3, s2, a1, s0 ) ); 11277 return res; 11278 } 11279 11280 11281 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV ) 11282 { 11283 IRTemp a3, a2, a1, a0, s3, s2, s1, s0; 11284 IRTemp addV = newTemp(Ity_V128); 11285 IRTemp subV = newTemp(Ity_V128); 11286 IRTemp rm = newTemp(Ity_I32); 11287 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11288 11289 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11290 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11291 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11292 11293 breakupV128to32s( addV, &a3, &a2, &a1, &a0 ); 11294 breakupV128to32s( subV, &s3, &s2, &s1, &s0 ); 11295 11296 IRTemp res = newTemp(Ity_V128); 11297 assign( res, mkV128from32s( a3, s2, a1, s0 ) ); 11298 return res; 11299 } 11300 11301 11302 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV ) 11303 { 11304 IRTemp a7, a6, a5, a4, a3, a2, a1, a0; 11305 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 11306 IRTemp addV = newTemp(Ity_V256); 11307 IRTemp subV = newTemp(Ity_V256); 11308 IRTemp rm = newTemp(Ity_I32); 11309 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID; 11310 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 11311 11312 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 11313 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11314 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) ); 11315 11316 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 ); 11317 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 11318 11319 IRTemp res = newTemp(Ity_V256); 11320 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) ); 11321 return res; 11322 } 11323 11324 11325 /* Handle 128 bit PSHUFLW and PSHUFHW. */ 11326 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx, 11327 Long delta, Bool isAvx, Bool xIsH ) 11328 { 11329 IRTemp addr = IRTemp_INVALID; 11330 Int alen = 0; 11331 HChar dis_buf[50]; 11332 UChar modrm = getUChar(delta); 11333 UInt rG = gregOfRexRM(pfx,modrm); 11334 UInt imm8; 11335 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0; 11336 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11337 sV = newTemp(Ity_V128); 11338 dV = newTemp(Ity_V128); 11339 sVmut = newTemp(Ity_I64); 11340 dVmut = newTemp(Ity_I64); 11341 sVcon = newTemp(Ity_I64); 11342 if (epartIsReg(modrm)) { 11343 UInt rE = eregOfRexRM(pfx,modrm); 11344 assign( sV, getXMMReg(rE) ); 11345 imm8 = (UInt)getUChar(delta+1); 11346 delta += 1+1; 11347 DIP("%spshuf%cw $%u,%s,%s\n", 11348 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11349 imm8, nameXMMReg(rE), nameXMMReg(rG)); 11350 } else { 11351 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11352 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 11353 imm8 = (UInt)getUChar(delta+alen); 11354 delta += alen+1; 11355 DIP("%spshuf%cw $%u,%s,%s\n", 11356 isAvx ? "v" : "", xIsH ? 'h' : 'l', 11357 imm8, dis_buf, nameXMMReg(rG)); 11358 } 11359 11360 /* Get the to-be-changed (mut) and unchanging (con) bits of the 11361 source. */ 11362 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) ); 11363 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) ); 11364 11365 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 ); 11366 # define SEL(n) \ 11367 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 11368 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3), 11369 SEL((imm8>>2)&3), SEL((imm8>>0)&3) )); 11370 # undef SEL 11371 11372 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon)) 11373 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) ); 11374 11375 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV)); 11376 return delta; 11377 } 11378 11379 11380 /* Handle 256 bit PSHUFLW and PSHUFHW. */ 11381 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx, 11382 Long delta, Bool xIsH ) 11383 { 11384 IRTemp addr = IRTemp_INVALID; 11385 Int alen = 0; 11386 HChar dis_buf[50]; 11387 UChar modrm = getUChar(delta); 11388 UInt rG = gregOfRexRM(pfx,modrm); 11389 UInt imm8; 11390 IRTemp sV, s[8], sV64[4], dVhi, dVlo; 11391 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID; 11392 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 11393 sV = newTemp(Ity_V256); 11394 dVhi = newTemp(Ity_I64); 11395 dVlo = newTemp(Ity_I64); 11396 if (epartIsReg(modrm)) { 11397 UInt rE = eregOfRexRM(pfx,modrm); 11398 assign( sV, getYMMReg(rE) ); 11399 imm8 = (UInt)getUChar(delta+1); 11400 delta += 1+1; 11401 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11402 imm8, nameYMMReg(rE), nameYMMReg(rG)); 11403 } else { 11404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 11405 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 11406 imm8 = (UInt)getUChar(delta+alen); 11407 delta += alen+1; 11408 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l', 11409 imm8, dis_buf, nameYMMReg(rG)); 11410 } 11411 11412 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] ); 11413 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] ); 11414 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] ); 11415 11416 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)], 11417 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) ); 11418 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)], 11419 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) ); 11420 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3], 11421 xIsH ? sV64[2] : dVhi, 11422 xIsH ? dVlo : sV64[1], 11423 xIsH ? sV64[0] : dVlo ) ); 11424 return delta; 11425 } 11426 11427 11428 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx, 11429 Long delta, Bool isAvx ) 11430 { 11431 Long deltaIN = delta; 11432 UChar modrm = getUChar(delta); 11433 UInt rG = gregOfRexRM(pfx,modrm); 11434 IRTemp sV = newTemp(Ity_V128); 11435 IRTemp d16 = newTemp(Ity_I16); 11436 UInt imm8; 11437 IRTemp s0, s1, s2, s3; 11438 if (epartIsReg(modrm)) { 11439 UInt rE = eregOfRexRM(pfx,modrm); 11440 assign(sV, getXMMReg(rE)); 11441 imm8 = getUChar(delta+1) & 7; 11442 delta += 1+1; 11443 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "", 11444 imm8, nameXMMReg(rE), nameIReg32(rG)); 11445 } else { 11446 /* The memory case is disallowed, apparently. */ 11447 return deltaIN; /* FAIL */ 11448 } 11449 s3 = s2 = s1 = s0 = IRTemp_INVALID; 11450 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 11451 switch (imm8) { 11452 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break; 11453 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break; 11454 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break; 11455 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break; 11456 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break; 11457 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break; 11458 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break; 11459 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break; 11460 default: vassert(0); 11461 } 11462 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16))); 11463 return delta; 11464 } 11465 11466 11467 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx, 11468 Long delta, Bool isAvx ) 11469 { 11470 IRTemp addr = IRTemp_INVALID; 11471 Int alen = 0; 11472 HChar dis_buf[50]; 11473 UChar modrm = getUChar(delta); 11474 IRTemp arg64 = newTemp(Ity_I64); 11475 UInt rG = gregOfRexRM(pfx,modrm); 11476 const HChar* mbV = isAvx ? "v" : ""; 11477 if (epartIsReg(modrm)) { 11478 UInt rE = eregOfRexRM(pfx,modrm); 11479 assign( arg64, getXMMRegLane64(rE, 0) ); 11480 delta += 1; 11481 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 11482 } else { 11483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11484 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 11485 delta += alen; 11486 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 11487 } 11488 putXMMRegLane64F( 11489 rG, 0, 11490 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64))) 11491 ); 11492 putXMMRegLane64F( 11493 rG, 1, 11494 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64))) 11495 ); 11496 if (isAvx) 11497 putYMMRegLane128(rG, 1, mkV128(0)); 11498 return delta; 11499 } 11500 11501 11502 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11503 Long delta, Bool isAvx ) 11504 { 11505 IRTemp addr = IRTemp_INVALID; 11506 Int alen = 0; 11507 HChar dis_buf[50]; 11508 UChar modrm = getUChar(delta); 11509 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11510 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */ 11511 11512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11513 delta += alen; 11514 11515 /* Fake up a native SSE mxcsr word. The only thing it depends on 11516 is SSEROUND[1:0], so call a clean helper to cook it up. 11517 */ 11518 /* ULong amd64h_create_mxcsr ( ULong sseround ) */ 11519 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11520 storeLE( 11521 mkexpr(addr), 11522 unop(Iop_64to32, 11523 mkIRExprCCall( 11524 Ity_I64, 0/*regp*/, 11525 "amd64g_create_mxcsr", &amd64g_create_mxcsr, 11526 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) ) 11527 ) 11528 ) 11529 ); 11530 return delta; 11531 } 11532 11533 11534 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx, 11535 Long delta, Bool isAvx ) 11536 { 11537 IRTemp addr = IRTemp_INVALID; 11538 Int alen = 0; 11539 HChar dis_buf[50]; 11540 UChar modrm = getUChar(delta); 11541 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11542 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */ 11543 11544 IRTemp t64 = newTemp(Ity_I64); 11545 IRTemp ew = newTemp(Ity_I32); 11546 11547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11548 delta += alen; 11549 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf); 11550 11551 /* The only thing we observe in %mxcsr is the rounding mode. 11552 Therefore, pass the 32-bit value (SSE native-format control 11553 word) to a clean helper, getting back a 64-bit value, the 11554 lower half of which is the SSEROUND value to store, and the 11555 upper half of which is the emulation-warning token which may 11556 be generated. 11557 */ 11558 /* ULong amd64h_check_ldmxcsr ( ULong ); */ 11559 assign( t64, mkIRExprCCall( 11560 Ity_I64, 0/*regparms*/, 11561 "amd64g_check_ldmxcsr", 11562 &amd64g_check_ldmxcsr, 11563 mkIRExprVec_1( 11564 unop(Iop_32Uto64, 11565 loadLE(Ity_I32, mkexpr(addr)) 11566 ) 11567 ) 11568 ) 11569 ); 11570 11571 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) ); 11572 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) ); 11573 put_emwarn( mkexpr(ew) ); 11574 /* Finally, if an emulation warning was reported, side-exit to 11575 the next insn, reporting the warning, so that Valgrind's 11576 dispatcher sees the warning. */ 11577 stmt( 11578 IRStmt_Exit( 11579 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)), 11580 Ijk_EmWarn, 11581 IRConst_U64(guest_RIP_bbstart+delta), 11582 OFFB_RIP 11583 ) 11584 ); 11585 return delta; 11586 } 11587 11588 11589 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm ) 11590 { 11591 /* ------ rfbm[0] gates the x87 state ------ */ 11592 11593 /* Uses dirty helper: 11594 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11595 */ 11596 IRDirty* d0 = unsafeIRDirty_0_N ( 11597 0/*regparms*/, 11598 "amd64g_dirtyhelper_XSAVE_COMPONENT_0", 11599 &amd64g_dirtyhelper_XSAVE_COMPONENT_0, 11600 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11601 ); 11602 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)), 11603 mkU64(1)); 11604 11605 /* Declare we're writing memory. Really, bytes 24 through 31 11606 (MXCSR and MXCSR_MASK) aren't written, but we can't express more 11607 than 1 memory area here, so just mark the whole thing as 11608 written. */ 11609 d0->mFx = Ifx_Write; 11610 d0->mAddr = mkexpr(addr); 11611 d0->mSize = 160; 11612 11613 /* declare we're reading guest state */ 11614 d0->nFxState = 5; 11615 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11616 11617 d0->fxState[0].fx = Ifx_Read; 11618 d0->fxState[0].offset = OFFB_FTOP; 11619 d0->fxState[0].size = sizeof(UInt); 11620 11621 d0->fxState[1].fx = Ifx_Read; 11622 d0->fxState[1].offset = OFFB_FPREGS; 11623 d0->fxState[1].size = 8 * sizeof(ULong); 11624 11625 d0->fxState[2].fx = Ifx_Read; 11626 d0->fxState[2].offset = OFFB_FPTAGS; 11627 d0->fxState[2].size = 8 * sizeof(UChar); 11628 11629 d0->fxState[3].fx = Ifx_Read; 11630 d0->fxState[3].offset = OFFB_FPROUND; 11631 d0->fxState[3].size = sizeof(ULong); 11632 11633 d0->fxState[4].fx = Ifx_Read; 11634 d0->fxState[4].offset = OFFB_FC3210; 11635 d0->fxState[4].size = sizeof(ULong); 11636 11637 stmt( IRStmt_Dirty(d0) ); 11638 11639 /* ------ rfbm[1] gates the SSE state ------ */ 11640 11641 IRTemp rfbm_1 = newTemp(Ity_I64); 11642 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11643 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11644 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 11645 11646 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2)); 11647 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 11648 11649 /* Uses dirty helper: 11650 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS 11651 ( VexGuestAMD64State*, ULong ) 11652 This creates only MXCSR and MXCSR_MASK. We need to do this if 11653 either components 1 (SSE) or 2 (AVX) are requested. Hence the 11654 guard condition is a bit more complex. 11655 */ 11656 IRDirty* d1 = unsafeIRDirty_0_N ( 11657 0/*regparms*/, 11658 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS", 11659 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS, 11660 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11661 ); 11662 d1->guard = guard_1or2; 11663 11664 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that 11665 the code for rbfm[0] just above claims a write of 0 .. 159, so 11666 this duplicates it. But at least correctly connects 24 .. 31 to 11667 the MXCSR guest state representation (SSEROUND field). */ 11668 d1->mFx = Ifx_Write; 11669 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 11670 d1->mSize = 8; 11671 11672 /* declare we're reading guest state */ 11673 d1->nFxState = 1; 11674 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 11675 11676 d1->fxState[0].fx = Ifx_Read; 11677 d1->fxState[0].offset = OFFB_SSEROUND; 11678 d1->fxState[0].size = sizeof(ULong); 11679 11680 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing 11681 else. We do the actual register array, XMM[0..15], separately, 11682 in order that any undefinedness in the XMM registers is tracked 11683 separately by Memcheck and does not "infect" the in-memory 11684 shadow for the other parts of the image. */ 11685 stmt( IRStmt_Dirty(d1) ); 11686 11687 /* And now the XMMs themselves. */ 11688 UInt reg; 11689 for (reg = 0; reg < 16; reg++) { 11690 stmt( IRStmt_StoreG( 11691 Iend_LE, 11692 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)), 11693 getXMMReg(reg), 11694 guard_1 11695 )); 11696 } 11697 11698 /* ------ rfbm[2] gates the AVX state ------ */ 11699 /* Component 2 is just a bunch of register saves, so we'll do it 11700 inline, just to be simple and to be Memcheck friendly. */ 11701 11702 IRTemp rfbm_2 = newTemp(Ity_I64); 11703 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 11704 11705 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4)); 11706 11707 for (reg = 0; reg < 16; reg++) { 11708 stmt( IRStmt_StoreG( 11709 Iend_LE, 11710 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)), 11711 getYMMRegLane128(reg,1), 11712 guard_2 11713 )); 11714 } 11715 } 11716 11717 11718 static Long dis_XSAVE ( const VexAbiInfo* vbi, 11719 Prefix pfx, Long delta, Int sz ) 11720 { 11721 /* Note that the presence or absence of REX.W (indicated here by 11722 |sz|) slightly affects the written format: whether the saved FPU 11723 IP and DP pointers are 64 or 32 bits. But the helper function 11724 we call simply writes zero bits in the relevant fields, which 11725 are 64 bits regardless of what REX.W is, and so it's good enough 11726 (iow, equally broken) in both cases. */ 11727 IRTemp addr = IRTemp_INVALID; 11728 Int alen = 0; 11729 HChar dis_buf[50]; 11730 UChar modrm = getUChar(delta); 11731 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11732 vassert(sz == 4 || sz == 8); /* ditto */ 11733 11734 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11735 delta += alen; 11736 gen_SEGV_if_not_64_aligned(addr); 11737 11738 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11739 11740 /* VEX's caller is assumed to have checked this. */ 11741 const ULong aSSUMED_XCR0_VALUE = 7; 11742 11743 IRTemp rfbm = newTemp(Ity_I64); 11744 assign(rfbm, 11745 binop(Iop_And64, 11746 binop(Iop_Or64, 11747 binop(Iop_Shl64, 11748 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 11749 unop(Iop_32Uto64, getIRegRAX(4))), 11750 mkU64(aSSUMED_XCR0_VALUE))); 11751 11752 gen_XSAVE_SEQUENCE(addr, rfbm); 11753 11754 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by 11755 OR-ing the RFBM value into it. */ 11756 IRTemp addr_plus_512 = newTemp(Ity_I64); 11757 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512))); 11758 storeLE( mkexpr(addr_plus_512), 11759 binop(Iop_Or8, 11760 unop(Iop_64to8, mkexpr(rfbm)), 11761 loadLE(Ity_I8, mkexpr(addr_plus_512))) ); 11762 11763 return delta; 11764 } 11765 11766 11767 static Long dis_FXSAVE ( const VexAbiInfo* vbi, 11768 Prefix pfx, Long delta, Int sz ) 11769 { 11770 /* See comment in dis_XSAVE about the significance of REX.W. */ 11771 IRTemp addr = IRTemp_INVALID; 11772 Int alen = 0; 11773 HChar dis_buf[50]; 11774 UChar modrm = getUChar(delta); 11775 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11776 vassert(sz == 4 || sz == 8); /* ditto */ 11777 11778 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11779 delta += alen; 11780 gen_SEGV_if_not_16_aligned(addr); 11781 11782 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); 11783 11784 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm 11785 to 0b011, generate the XSAVE sequence accordingly, and let iropt 11786 fold out the unused (AVX) parts accordingly. */ 11787 IRTemp rfbm = newTemp(Ity_I64); 11788 assign(rfbm, mkU64(3)); 11789 gen_XSAVE_SEQUENCE(addr, rfbm); 11790 11791 return delta; 11792 } 11793 11794 11795 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm ) 11796 { 11797 /* ------ rfbm[0] gates the x87 state ------ */ 11798 11799 /* If rfbm[0] == 1, we have to write the x87 state. If 11800 xstate_bv[0] == 1, we will read it from the memory image, else 11801 we'll set it to initial values. Doing this with a helper 11802 function and getting the definedness flow annotations correct is 11803 too difficult, so generate stupid but simple code: first set the 11804 registers to initial values, regardless of xstate_bv[0]. Then, 11805 conditionally restore from the memory image. */ 11806 11807 IRTemp rfbm_0 = newTemp(Ity_I64); 11808 IRTemp xstate_bv_0 = newTemp(Ity_I64); 11809 IRTemp restore_0 = newTemp(Ity_I64); 11810 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1))); 11811 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1))); 11812 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0))); 11813 11814 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) ); 11815 11816 /* Uses dirty helper: 11817 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong ) 11818 */ 11819 IRDirty* d0 = unsafeIRDirty_0_N ( 11820 0/*regparms*/, 11821 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0", 11822 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0, 11823 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11824 ); 11825 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0)); 11826 11827 /* Declare we're reading memory. Really, bytes 24 through 31 11828 (MXCSR and MXCSR_MASK) aren't read, but we can't express more 11829 than 1 memory area here, so just mark the whole thing as 11830 read. */ 11831 d0->mFx = Ifx_Read; 11832 d0->mAddr = mkexpr(addr); 11833 d0->mSize = 160; 11834 11835 /* declare we're writing guest state */ 11836 d0->nFxState = 5; 11837 vex_bzero(&d0->fxState, sizeof(d0->fxState)); 11838 11839 d0->fxState[0].fx = Ifx_Write; 11840 d0->fxState[0].offset = OFFB_FTOP; 11841 d0->fxState[0].size = sizeof(UInt); 11842 11843 d0->fxState[1].fx = Ifx_Write; 11844 d0->fxState[1].offset = OFFB_FPREGS; 11845 d0->fxState[1].size = 8 * sizeof(ULong); 11846 11847 d0->fxState[2].fx = Ifx_Write; 11848 d0->fxState[2].offset = OFFB_FPTAGS; 11849 d0->fxState[2].size = 8 * sizeof(UChar); 11850 11851 d0->fxState[3].fx = Ifx_Write; 11852 d0->fxState[3].offset = OFFB_FPROUND; 11853 d0->fxState[3].size = sizeof(ULong); 11854 11855 d0->fxState[4].fx = Ifx_Write; 11856 d0->fxState[4].offset = OFFB_FC3210; 11857 d0->fxState[4].size = sizeof(ULong); 11858 11859 stmt( IRStmt_Dirty(d0) ); 11860 11861 /* ------ rfbm[1] gates the SSE state ------ */ 11862 11863 /* Same scheme as component 0: first zero it out, and then possibly 11864 restore from the memory area. */ 11865 IRTemp rfbm_1 = newTemp(Ity_I64); 11866 IRTemp xstate_bv_1 = newTemp(Ity_I64); 11867 IRTemp restore_1 = newTemp(Ity_I64); 11868 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2))); 11869 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2))); 11870 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1))); 11871 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0)); 11872 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0)); 11873 11874 IRTemp rfbm_1or2 = newTemp(Ity_I64); 11875 IRTemp xstate_bv_1or2 = newTemp(Ity_I64); 11876 IRTemp restore_1or2 = newTemp(Ity_I64); 11877 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6))); 11878 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6))); 11879 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2), 11880 mkexpr(xstate_bv_1or2))); 11881 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0)); 11882 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0)); 11883 11884 /* The areas in question are: SSEROUND, and the XMM register array. */ 11885 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST)); 11886 11887 UInt reg; 11888 for (reg = 0; reg < 16; reg++) { 11889 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0)); 11890 } 11891 11892 /* And now possibly restore from MXCSR/MXCSR_MASK */ 11893 /* Uses dirty helper: 11894 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS 11895 ( VexGuestAMD64State*, ULong ) 11896 This restores from only MXCSR and MXCSR_MASK. We need to do 11897 this if either components 1 (SSE) or 2 (AVX) are requested. 11898 Hence the guard condition is a bit more complex. 11899 */ 11900 IRDirty* d1 = unsafeIRDirty_0_N ( 11901 0/*regparms*/, 11902 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS", 11903 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS, 11904 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) 11905 ) ; 11906 d1->guard = restore_1or2e; 11907 11908 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that 11909 the code for rbfm[0] just above claims a read of 0 .. 159, so 11910 this duplicates it. But at least correctly connects 24 .. 31 to 11911 the MXCSR guest state representation (SSEROUND field). */ 11912 d1->mFx = Ifx_Read; 11913 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24)); 11914 d1->mSize = 8; 11915 11916 /* declare we're writing guest state */ 11917 d1->nFxState = 1; 11918 vex_bzero(&d1->fxState, sizeof(d1->fxState)); 11919 11920 d1->fxState[0].fx = Ifx_Write; 11921 d1->fxState[0].offset = OFFB_SSEROUND; 11922 d1->fxState[0].size = sizeof(ULong); 11923 11924 /* Call the helper. This creates SSEROUND but nothing 11925 else. We do the actual register array, XMM[0..15], separately, 11926 in order that any undefinedness in the XMM registers is tracked 11927 separately by Memcheck and is not "infected" by the in-memory 11928 shadow for the other parts of the image. */ 11929 stmt( IRStmt_Dirty(d1) ); 11930 11931 /* And now the XMMs themselves. For each register, we PUT either 11932 its old value, or the value loaded from memory. One convenient 11933 way to do that is with a conditional load that has its the 11934 default value, the old value of the register. */ 11935 for (reg = 0; reg < 16; reg++) { 11936 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)); 11937 IRExpr* alt = getXMMReg(reg); 11938 IRTemp loadedValue = newTemp(Ity_V128); 11939 stmt( IRStmt_LoadG(Iend_LE, 11940 ILGop_IdentV128, 11941 loadedValue, ea, alt, restore_1e) ); 11942 putXMMReg(reg, mkexpr(loadedValue)); 11943 } 11944 11945 /* ------ rfbm[2] gates the AVX state ------ */ 11946 /* Component 2 is just a bunch of register loads, so we'll do it 11947 inline, just to be simple and to be Memcheck friendly. */ 11948 11949 /* Same scheme as component 0: first zero it out, and then possibly 11950 restore from the memory area. */ 11951 IRTemp rfbm_2 = newTemp(Ity_I64); 11952 IRTemp xstate_bv_2 = newTemp(Ity_I64); 11953 IRTemp restore_2 = newTemp(Ity_I64); 11954 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4))); 11955 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4))); 11956 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2))); 11957 11958 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0)); 11959 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0)); 11960 11961 for (reg = 0; reg < 16; reg++) { 11962 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0)); 11963 } 11964 11965 for (reg = 0; reg < 16; reg++) { 11966 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)); 11967 IRExpr* alt = getYMMRegLane128(reg, 1); 11968 IRTemp loadedValue = newTemp(Ity_V128); 11969 stmt( IRStmt_LoadG(Iend_LE, 11970 ILGop_IdentV128, 11971 loadedValue, ea, alt, restore_2e) ); 11972 putYMMRegLane128(reg, 1, mkexpr(loadedValue)); 11973 } 11974 } 11975 11976 11977 static Long dis_XRSTOR ( const VexAbiInfo* vbi, 11978 Prefix pfx, Long delta, Int sz ) 11979 { 11980 /* As with XRSTOR above we ignore the value of REX.W since we're 11981 not bothering with the FPU DP and IP fields. */ 11982 IRTemp addr = IRTemp_INVALID; 11983 Int alen = 0; 11984 HChar dis_buf[50]; 11985 UChar modrm = getUChar(delta); 11986 vassert(!epartIsReg(modrm)); /* ensured by caller */ 11987 vassert(sz == 4 || sz == 8); /* ditto */ 11988 11989 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 11990 delta += alen; 11991 gen_SEGV_if_not_64_aligned(addr); 11992 11993 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 11994 11995 /* VEX's caller is assumed to have checked this. */ 11996 const ULong aSSUMED_XCR0_VALUE = 7; 11997 11998 IRTemp rfbm = newTemp(Ity_I64); 11999 assign(rfbm, 12000 binop(Iop_And64, 12001 binop(Iop_Or64, 12002 binop(Iop_Shl64, 12003 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)), 12004 unop(Iop_32Uto64, getIRegRAX(4))), 12005 mkU64(aSSUMED_XCR0_VALUE))); 12006 12007 IRTemp xstate_bv = newTemp(Ity_I64); 12008 assign(xstate_bv, loadLE(Ity_I64, 12009 binop(Iop_Add64, mkexpr(addr), mkU64(512+0)))); 12010 12011 IRTemp xcomp_bv = newTemp(Ity_I64); 12012 assign(xcomp_bv, loadLE(Ity_I64, 12013 binop(Iop_Add64, mkexpr(addr), mkU64(512+8)))); 12014 12015 IRTemp xsavehdr_23_16 = newTemp(Ity_I64); 12016 assign( xsavehdr_23_16, 12017 loadLE(Ity_I64, 12018 binop(Iop_Add64, mkexpr(addr), mkU64(512+16)))); 12019 12020 /* We must fault if 12021 * xcomp_bv[63] == 1, since this simulated CPU does not support 12022 the compaction extension. 12023 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7). 12024 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to 12025 imply that xcomp_bv must be zero. 12026 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0 12027 */ 12028 IRTemp fault_if_nonzero = newTemp(Ity_I64); 12029 assign(fault_if_nonzero, 12030 binop(Iop_Or64, 12031 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)), 12032 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16)))); 12033 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)), 12034 Ijk_SigSEGV, 12035 IRConst_U64(guest_RIP_curr_instr), 12036 OFFB_RIP 12037 )); 12038 12039 /* We are guaranteed now that both xstate_bv and rfbm are in the 12040 range 0 .. 7. Generate the restore sequence proper. */ 12041 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm); 12042 12043 return delta; 12044 } 12045 12046 12047 static Long dis_FXRSTOR ( const VexAbiInfo* vbi, 12048 Prefix pfx, Long delta, Int sz ) 12049 { 12050 /* As with FXSAVE above we ignore the value of REX.W since we're 12051 not bothering with the FPU DP and IP fields. */ 12052 IRTemp addr = IRTemp_INVALID; 12053 Int alen = 0; 12054 HChar dis_buf[50]; 12055 UChar modrm = getUChar(delta); 12056 vassert(!epartIsReg(modrm)); /* ensured by caller */ 12057 vassert(sz == 4 || sz == 8); /* ditto */ 12058 12059 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12060 delta += alen; 12061 gen_SEGV_if_not_16_aligned(addr); 12062 12063 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); 12064 12065 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also 12066 as if components 0 and 1 are set as present in XSTATE_BV in the 12067 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore, 12068 generate the XRSTOR sequence accordingly, and let iropt fold out 12069 the unused (AVX) parts accordingly. */ 12070 IRTemp three = newTemp(Ity_I64); 12071 assign(three, mkU64(3)); 12072 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/); 12073 12074 return delta; 12075 } 12076 12077 12078 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 ) 12079 { 12080 vassert(imm8 >= 0 && imm8 <= 7); 12081 12082 // Create a V128 value which has the selected word in the 12083 // specified lane, and zeroes everywhere else. 12084 IRTemp tmp128 = newTemp(Ity_V128); 12085 IRTemp halfshift = newTemp(Ity_I64); 12086 assign(halfshift, binop(Iop_Shl64, 12087 unop(Iop_16Uto64, mkexpr(u16)), 12088 mkU8(16 * (imm8 & 3)))); 12089 if (imm8 < 4) { 12090 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 12091 } else { 12092 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 12093 } 12094 12095 UShort mask = ~(3 << (imm8 * 2)); 12096 IRTemp res = newTemp(Ity_V128); 12097 assign( res, binop(Iop_OrV128, 12098 mkexpr(tmp128), 12099 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 12100 return res; 12101 } 12102 12103 12104 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV ) 12105 { 12106 IRTemp s1, s0, d1, d0; 12107 s1 = s0 = d1 = d0 = IRTemp_INVALID; 12108 12109 breakupV128to64s( sV, &s1, &s0 ); 12110 breakupV128to64s( dV, &d1, &d0 ); 12111 12112 IRTemp res = newTemp(Ity_V128); 12113 assign( res, 12114 binop(Iop_64HLtoV128, 12115 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12116 "amd64g_calculate_mmx_psadbw", 12117 &amd64g_calculate_mmx_psadbw, 12118 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))), 12119 mkIRExprCCall(Ity_I64, 0/*regparms*/, 12120 "amd64g_calculate_mmx_psadbw", 12121 &amd64g_calculate_mmx_psadbw, 12122 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) ); 12123 return res; 12124 } 12125 12126 12127 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV ) 12128 { 12129 IRTemp sHi, sLo, dHi, dLo; 12130 sHi = sLo = dHi = dLo = IRTemp_INVALID; 12131 breakupV256toV128s( dV, &dHi, &dLo); 12132 breakupV256toV128s( sV, &sHi, &sLo); 12133 IRTemp res = newTemp(Ity_V256); 12134 assign(res, binop(Iop_V128HLtoV256, 12135 mkexpr(math_PSADBW_128(dHi, sHi)), 12136 mkexpr(math_PSADBW_128(dLo, sLo)))); 12137 return res; 12138 } 12139 12140 12141 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx, 12142 Long delta, Bool isAvx ) 12143 { 12144 IRTemp regD = newTemp(Ity_V128); 12145 IRTemp mask = newTemp(Ity_V128); 12146 IRTemp olddata = newTemp(Ity_V128); 12147 IRTemp newdata = newTemp(Ity_V128); 12148 IRTemp addr = newTemp(Ity_I64); 12149 UChar modrm = getUChar(delta); 12150 UInt rG = gregOfRexRM(pfx,modrm); 12151 UInt rE = eregOfRexRM(pfx,modrm); 12152 12153 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) )); 12154 assign( regD, getXMMReg( rG )); 12155 12156 /* Unfortunately can't do the obvious thing with SarN8x16 12157 here since that can't be re-emitted as SSE2 code - no such 12158 insn. */ 12159 assign( mask, 12160 binop(Iop_64HLtoV128, 12161 binop(Iop_SarN8x8, 12162 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ), 12163 mkU8(7) ), 12164 binop(Iop_SarN8x8, 12165 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ), 12166 mkU8(7) ) )); 12167 assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); 12168 assign( newdata, binop(Iop_OrV128, 12169 binop(Iop_AndV128, 12170 mkexpr(regD), 12171 mkexpr(mask) ), 12172 binop(Iop_AndV128, 12173 mkexpr(olddata), 12174 unop(Iop_NotV128, mkexpr(mask)))) ); 12175 storeLE( mkexpr(addr), mkexpr(newdata) ); 12176 12177 delta += 1; 12178 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "", 12179 nameXMMReg(rE), nameXMMReg(rG) ); 12180 return delta; 12181 } 12182 12183 12184 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx, 12185 Long delta, Bool isAvx ) 12186 { 12187 UChar modrm = getUChar(delta); 12188 UInt rG = gregOfRexRM(pfx,modrm); 12189 UInt rE = eregOfRexRM(pfx,modrm); 12190 IRTemp t0 = newTemp(Ity_I32); 12191 IRTemp t1 = newTemp(Ity_I32); 12192 IRTemp t2 = newTemp(Ity_I32); 12193 IRTemp t3 = newTemp(Ity_I32); 12194 delta += 1; 12195 assign( t0, binop( Iop_And32, 12196 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)), 12197 mkU32(1) )); 12198 assign( t1, binop( Iop_And32, 12199 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)), 12200 mkU32(2) )); 12201 assign( t2, binop( Iop_And32, 12202 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)), 12203 mkU32(4) )); 12204 assign( t3, binop( Iop_And32, 12205 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)), 12206 mkU32(8) )); 12207 putIReg32( rG, binop(Iop_Or32, 12208 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12209 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12210 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "", 12211 nameXMMReg(rE), nameIReg32(rG)); 12212 return delta; 12213 } 12214 12215 12216 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12217 { 12218 UChar modrm = getUChar(delta); 12219 UInt rG = gregOfRexRM(pfx,modrm); 12220 UInt rE = eregOfRexRM(pfx,modrm); 12221 IRTemp t0 = newTemp(Ity_I32); 12222 IRTemp t1 = newTemp(Ity_I32); 12223 IRTemp t2 = newTemp(Ity_I32); 12224 IRTemp t3 = newTemp(Ity_I32); 12225 IRTemp t4 = newTemp(Ity_I32); 12226 IRTemp t5 = newTemp(Ity_I32); 12227 IRTemp t6 = newTemp(Ity_I32); 12228 IRTemp t7 = newTemp(Ity_I32); 12229 delta += 1; 12230 assign( t0, binop( Iop_And32, 12231 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)), 12232 mkU32(1) )); 12233 assign( t1, binop( Iop_And32, 12234 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)), 12235 mkU32(2) )); 12236 assign( t2, binop( Iop_And32, 12237 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)), 12238 mkU32(4) )); 12239 assign( t3, binop( Iop_And32, 12240 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)), 12241 mkU32(8) )); 12242 assign( t4, binop( Iop_And32, 12243 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)), 12244 mkU32(16) )); 12245 assign( t5, binop( Iop_And32, 12246 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)), 12247 mkU32(32) )); 12248 assign( t6, binop( Iop_And32, 12249 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)), 12250 mkU32(64) )); 12251 assign( t7, binop( Iop_And32, 12252 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)), 12253 mkU32(128) )); 12254 putIReg32( rG, binop(Iop_Or32, 12255 binop(Iop_Or32, 12256 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12257 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ), 12258 binop(Iop_Or32, 12259 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)), 12260 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) ); 12261 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12262 return delta; 12263 } 12264 12265 12266 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx, 12267 Long delta, Bool isAvx ) 12268 { 12269 UChar modrm = getUChar(delta); 12270 UInt rG = gregOfRexRM(pfx,modrm); 12271 UInt rE = eregOfRexRM(pfx,modrm); 12272 IRTemp t0 = newTemp(Ity_I32); 12273 IRTemp t1 = newTemp(Ity_I32); 12274 delta += 1; 12275 assign( t0, binop( Iop_And32, 12276 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)), 12277 mkU32(1) )); 12278 assign( t1, binop( Iop_And32, 12279 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)), 12280 mkU32(2) )); 12281 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) ); 12282 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "", 12283 nameXMMReg(rE), nameIReg32(rG)); 12284 return delta; 12285 } 12286 12287 12288 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 12289 { 12290 UChar modrm = getUChar(delta); 12291 UInt rG = gregOfRexRM(pfx,modrm); 12292 UInt rE = eregOfRexRM(pfx,modrm); 12293 IRTemp t0 = newTemp(Ity_I32); 12294 IRTemp t1 = newTemp(Ity_I32); 12295 IRTemp t2 = newTemp(Ity_I32); 12296 IRTemp t3 = newTemp(Ity_I32); 12297 delta += 1; 12298 assign( t0, binop( Iop_And32, 12299 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)), 12300 mkU32(1) )); 12301 assign( t1, binop( Iop_And32, 12302 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)), 12303 mkU32(2) )); 12304 assign( t2, binop( Iop_And32, 12305 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)), 12306 mkU32(4) )); 12307 assign( t3, binop( Iop_And32, 12308 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)), 12309 mkU32(8) )); 12310 putIReg32( rG, binop(Iop_Or32, 12311 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), 12312 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) ); 12313 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG)); 12314 return delta; 12315 } 12316 12317 12318 /* Note, this also handles SSE(1) insns. */ 12319 __attribute__((noinline)) 12320 static 12321 Long dis_ESC_0F__SSE2 ( Bool* decode_OK, 12322 const VexArchInfo* archinfo, 12323 const VexAbiInfo* vbi, 12324 Prefix pfx, Int sz, Long deltaIN, 12325 DisResult* dres ) 12326 { 12327 IRTemp addr = IRTemp_INVALID; 12328 IRTemp t0 = IRTemp_INVALID; 12329 IRTemp t1 = IRTemp_INVALID; 12330 IRTemp t2 = IRTemp_INVALID; 12331 IRTemp t3 = IRTemp_INVALID; 12332 IRTemp t4 = IRTemp_INVALID; 12333 IRTemp t5 = IRTemp_INVALID; 12334 IRTemp t6 = IRTemp_INVALID; 12335 UChar modrm = 0; 12336 Int alen = 0; 12337 HChar dis_buf[50]; 12338 12339 *decode_OK = False; 12340 12341 Long delta = deltaIN; 12342 UChar opc = getUChar(delta); 12343 delta++; 12344 switch (opc) { 12345 12346 case 0x10: 12347 if (have66noF2noF3(pfx) 12348 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12349 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */ 12350 modrm = getUChar(delta); 12351 if (epartIsReg(modrm)) { 12352 putXMMReg( gregOfRexRM(pfx,modrm), 12353 getXMMReg( eregOfRexRM(pfx,modrm) )); 12354 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12355 nameXMMReg(gregOfRexRM(pfx,modrm))); 12356 delta += 1; 12357 } else { 12358 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12359 putXMMReg( gregOfRexRM(pfx,modrm), 12360 loadLE(Ity_V128, mkexpr(addr)) ); 12361 DIP("movupd %s,%s\n", dis_buf, 12362 nameXMMReg(gregOfRexRM(pfx,modrm))); 12363 delta += alen; 12364 } 12365 goto decode_success; 12366 } 12367 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to 12368 G (lo half xmm). If E is mem, upper half of G is zeroed out. 12369 If E is reg, upper half of G is unchanged. */ 12370 if (haveF2no66noF3(pfx) 12371 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) { 12372 modrm = getUChar(delta); 12373 if (epartIsReg(modrm)) { 12374 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12375 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 12376 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12377 nameXMMReg(gregOfRexRM(pfx,modrm))); 12378 delta += 1; 12379 } else { 12380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12381 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12382 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 12383 loadLE(Ity_I64, mkexpr(addr)) ); 12384 DIP("movsd %s,%s\n", dis_buf, 12385 nameXMMReg(gregOfRexRM(pfx,modrm))); 12386 delta += alen; 12387 } 12388 goto decode_success; 12389 } 12390 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G 12391 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ 12392 if (haveF3no66noF2(pfx) 12393 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12394 modrm = getUChar(delta); 12395 if (epartIsReg(modrm)) { 12396 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12397 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 )); 12398 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12399 nameXMMReg(gregOfRexRM(pfx,modrm))); 12400 delta += 1; 12401 } else { 12402 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12403 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 12404 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, 12405 loadLE(Ity_I32, mkexpr(addr)) ); 12406 DIP("movss %s,%s\n", dis_buf, 12407 nameXMMReg(gregOfRexRM(pfx,modrm))); 12408 delta += alen; 12409 } 12410 goto decode_success; 12411 } 12412 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ 12413 if (haveNo66noF2noF3(pfx) 12414 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12415 modrm = getUChar(delta); 12416 if (epartIsReg(modrm)) { 12417 putXMMReg( gregOfRexRM(pfx,modrm), 12418 getXMMReg( eregOfRexRM(pfx,modrm) )); 12419 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12420 nameXMMReg(gregOfRexRM(pfx,modrm))); 12421 delta += 1; 12422 } else { 12423 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12424 putXMMReg( gregOfRexRM(pfx,modrm), 12425 loadLE(Ity_V128, mkexpr(addr)) ); 12426 DIP("movups %s,%s\n", dis_buf, 12427 nameXMMReg(gregOfRexRM(pfx,modrm))); 12428 delta += alen; 12429 } 12430 goto decode_success; 12431 } 12432 break; 12433 12434 case 0x11: 12435 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem 12436 or lo half xmm). */ 12437 if (haveF2no66noF3(pfx) 12438 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12439 modrm = getUChar(delta); 12440 if (epartIsReg(modrm)) { 12441 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0, 12442 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 12443 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12444 nameXMMReg(eregOfRexRM(pfx,modrm))); 12445 delta += 1; 12446 } else { 12447 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12448 storeLE( mkexpr(addr), 12449 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 12450 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12451 dis_buf); 12452 delta += alen; 12453 } 12454 goto decode_success; 12455 } 12456 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem 12457 or lo 1/4 xmm). */ 12458 if (haveF3no66noF2(pfx) && sz == 4) { 12459 modrm = getUChar(delta); 12460 if (epartIsReg(modrm)) { 12461 /* fall through, we don't yet have a test case */ 12462 } else { 12463 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12464 storeLE( mkexpr(addr), 12465 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 12466 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12467 dis_buf); 12468 delta += alen; 12469 goto decode_success; 12470 } 12471 } 12472 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */ 12473 if (have66noF2noF3(pfx) 12474 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12475 modrm = getUChar(delta); 12476 if (epartIsReg(modrm)) { 12477 putXMMReg( eregOfRexRM(pfx,modrm), 12478 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12479 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12480 nameXMMReg(eregOfRexRM(pfx,modrm))); 12481 delta += 1; 12482 } else { 12483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12484 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12485 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12486 dis_buf ); 12487 delta += alen; 12488 } 12489 goto decode_success; 12490 } 12491 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ 12492 if (haveNo66noF2noF3(pfx) 12493 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12494 modrm = getUChar(delta); 12495 if (epartIsReg(modrm)) { 12496 /* fall through; awaiting test case */ 12497 } else { 12498 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12499 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12500 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12501 dis_buf ); 12502 delta += alen; 12503 goto decode_success; 12504 } 12505 } 12506 break; 12507 12508 case 0x12: 12509 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */ 12510 /* Identical to MOVLPS ? */ 12511 if (have66noF2noF3(pfx) 12512 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12513 modrm = getUChar(delta); 12514 if (epartIsReg(modrm)) { 12515 /* fall through; apparently reg-reg is not possible */ 12516 } else { 12517 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12518 delta += alen; 12519 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12520 0/*lower lane*/, 12521 loadLE(Ity_I64, mkexpr(addr)) ); 12522 DIP("movlpd %s, %s\n", 12523 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12524 goto decode_success; 12525 } 12526 } 12527 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ 12528 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ 12529 if (haveNo66noF2noF3(pfx) 12530 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12531 modrm = getUChar(delta); 12532 if (epartIsReg(modrm)) { 12533 delta += 1; 12534 putXMMRegLane64( gregOfRexRM(pfx,modrm), 12535 0/*lower lane*/, 12536 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 )); 12537 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12538 nameXMMReg(gregOfRexRM(pfx,modrm))); 12539 } else { 12540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12541 delta += alen; 12542 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/, 12543 loadLE(Ity_I64, mkexpr(addr)) ); 12544 DIP("movlps %s, %s\n", 12545 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) )); 12546 } 12547 goto decode_success; 12548 } 12549 break; 12550 12551 case 0x13: 12552 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ 12553 if (haveNo66noF2noF3(pfx) 12554 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12555 modrm = getUChar(delta); 12556 if (!epartIsReg(modrm)) { 12557 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12558 delta += alen; 12559 storeLE( mkexpr(addr), 12560 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12561 0/*lower lane*/ ) ); 12562 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12563 dis_buf); 12564 goto decode_success; 12565 } 12566 /* else fall through */ 12567 } 12568 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */ 12569 /* Identical to MOVLPS ? */ 12570 if (have66noF2noF3(pfx) 12571 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12572 modrm = getUChar(delta); 12573 if (!epartIsReg(modrm)) { 12574 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12575 delta += alen; 12576 storeLE( mkexpr(addr), 12577 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12578 0/*lower lane*/ ) ); 12579 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12580 dis_buf); 12581 goto decode_success; 12582 } 12583 /* else fall through */ 12584 } 12585 break; 12586 12587 case 0x14: 12588 case 0x15: 12589 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */ 12590 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */ 12591 /* These just appear to be special cases of SHUFPS */ 12592 if (haveNo66noF2noF3(pfx) && sz == 4) { 12593 Bool hi = toBool(opc == 0x15); 12594 IRTemp sV = newTemp(Ity_V128); 12595 IRTemp dV = newTemp(Ity_V128); 12596 modrm = getUChar(delta); 12597 UInt rG = gregOfRexRM(pfx,modrm); 12598 assign( dV, getXMMReg(rG) ); 12599 if (epartIsReg(modrm)) { 12600 UInt rE = eregOfRexRM(pfx,modrm); 12601 assign( sV, getXMMReg(rE) ); 12602 delta += 1; 12603 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12604 nameXMMReg(rE), nameXMMReg(rG)); 12605 } else { 12606 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12607 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12608 delta += alen; 12609 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12610 dis_buf, nameXMMReg(rG)); 12611 } 12612 IRTemp res = math_UNPCKxPS_128( sV, dV, hi ); 12613 putXMMReg( rG, mkexpr(res) ); 12614 goto decode_success; 12615 } 12616 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */ 12617 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */ 12618 /* These just appear to be special cases of SHUFPS */ 12619 if (have66noF2noF3(pfx) 12620 && sz == 2 /* could be 8 if rex also present */) { 12621 Bool hi = toBool(opc == 0x15); 12622 IRTemp sV = newTemp(Ity_V128); 12623 IRTemp dV = newTemp(Ity_V128); 12624 modrm = getUChar(delta); 12625 UInt rG = gregOfRexRM(pfx,modrm); 12626 assign( dV, getXMMReg(rG) ); 12627 if (epartIsReg(modrm)) { 12628 UInt rE = eregOfRexRM(pfx,modrm); 12629 assign( sV, getXMMReg(rE) ); 12630 delta += 1; 12631 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12632 nameXMMReg(rE), nameXMMReg(rG)); 12633 } else { 12634 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12635 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 12636 delta += alen; 12637 DIP("unpck%sps %s,%s\n", hi ? "h" : "l", 12638 dis_buf, nameXMMReg(rG)); 12639 } 12640 IRTemp res = math_UNPCKxPD_128( sV, dV, hi ); 12641 putXMMReg( rG, mkexpr(res) ); 12642 goto decode_success; 12643 } 12644 break; 12645 12646 case 0x16: 12647 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */ 12648 /* These seems identical to MOVHPS. This instruction encoding is 12649 completely crazy. */ 12650 if (have66noF2noF3(pfx) 12651 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12652 modrm = getUChar(delta); 12653 if (epartIsReg(modrm)) { 12654 /* fall through; apparently reg-reg is not possible */ 12655 } else { 12656 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12657 delta += alen; 12658 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12659 loadLE(Ity_I64, mkexpr(addr)) ); 12660 DIP("movhpd %s,%s\n", dis_buf, 12661 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12662 goto decode_success; 12663 } 12664 } 12665 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ 12666 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ 12667 if (haveNo66noF2noF3(pfx) 12668 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12669 modrm = getUChar(delta); 12670 if (epartIsReg(modrm)) { 12671 delta += 1; 12672 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12673 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) ); 12674 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12675 nameXMMReg(gregOfRexRM(pfx,modrm))); 12676 } else { 12677 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12678 delta += alen; 12679 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/, 12680 loadLE(Ity_I64, mkexpr(addr)) ); 12681 DIP("movhps %s,%s\n", dis_buf, 12682 nameXMMReg( gregOfRexRM(pfx,modrm) )); 12683 } 12684 goto decode_success; 12685 } 12686 break; 12687 12688 case 0x17: 12689 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ 12690 if (haveNo66noF2noF3(pfx) 12691 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12692 modrm = getUChar(delta); 12693 if (!epartIsReg(modrm)) { 12694 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12695 delta += alen; 12696 storeLE( mkexpr(addr), 12697 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12698 1/*upper lane*/ ) ); 12699 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12700 dis_buf); 12701 goto decode_success; 12702 } 12703 /* else fall through */ 12704 } 12705 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */ 12706 /* Again, this seems identical to MOVHPS. */ 12707 if (have66noF2noF3(pfx) 12708 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12709 modrm = getUChar(delta); 12710 if (!epartIsReg(modrm)) { 12711 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12712 delta += alen; 12713 storeLE( mkexpr(addr), 12714 getXMMRegLane64( gregOfRexRM(pfx,modrm), 12715 1/*upper lane*/ ) ); 12716 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ), 12717 dis_buf); 12718 goto decode_success; 12719 } 12720 /* else fall through */ 12721 } 12722 break; 12723 12724 case 0x18: 12725 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */ 12726 /* 0F 18 /1 = PREFETCH0 -- with various different hints */ 12727 /* 0F 18 /2 = PREFETCH1 */ 12728 /* 0F 18 /3 = PREFETCH2 */ 12729 if (haveNo66noF2noF3(pfx) 12730 && !epartIsReg(getUChar(delta)) 12731 && gregLO3ofRM(getUChar(delta)) >= 0 12732 && gregLO3ofRM(getUChar(delta)) <= 3) { 12733 const HChar* hintstr = "??"; 12734 12735 modrm = getUChar(delta); 12736 vassert(!epartIsReg(modrm)); 12737 12738 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12739 delta += alen; 12740 12741 switch (gregLO3ofRM(modrm)) { 12742 case 0: hintstr = "nta"; break; 12743 case 1: hintstr = "t0"; break; 12744 case 2: hintstr = "t1"; break; 12745 case 3: hintstr = "t2"; break; 12746 default: vassert(0); 12747 } 12748 12749 DIP("prefetch%s %s\n", hintstr, dis_buf); 12750 goto decode_success; 12751 } 12752 break; 12753 12754 case 0x28: 12755 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */ 12756 if (have66noF2noF3(pfx) 12757 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12758 modrm = getUChar(delta); 12759 if (epartIsReg(modrm)) { 12760 putXMMReg( gregOfRexRM(pfx,modrm), 12761 getXMMReg( eregOfRexRM(pfx,modrm) )); 12762 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12763 nameXMMReg(gregOfRexRM(pfx,modrm))); 12764 delta += 1; 12765 } else { 12766 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12767 gen_SEGV_if_not_16_aligned( addr ); 12768 putXMMReg( gregOfRexRM(pfx,modrm), 12769 loadLE(Ity_V128, mkexpr(addr)) ); 12770 DIP("movapd %s,%s\n", dis_buf, 12771 nameXMMReg(gregOfRexRM(pfx,modrm))); 12772 delta += alen; 12773 } 12774 goto decode_success; 12775 } 12776 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ 12777 if (haveNo66noF2noF3(pfx) 12778 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12779 modrm = getUChar(delta); 12780 if (epartIsReg(modrm)) { 12781 putXMMReg( gregOfRexRM(pfx,modrm), 12782 getXMMReg( eregOfRexRM(pfx,modrm) )); 12783 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 12784 nameXMMReg(gregOfRexRM(pfx,modrm))); 12785 delta += 1; 12786 } else { 12787 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12788 gen_SEGV_if_not_16_aligned( addr ); 12789 putXMMReg( gregOfRexRM(pfx,modrm), 12790 loadLE(Ity_V128, mkexpr(addr)) ); 12791 DIP("movaps %s,%s\n", dis_buf, 12792 nameXMMReg(gregOfRexRM(pfx,modrm))); 12793 delta += alen; 12794 } 12795 goto decode_success; 12796 } 12797 break; 12798 12799 case 0x29: 12800 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ 12801 if (haveNo66noF2noF3(pfx) 12802 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 12803 modrm = getUChar(delta); 12804 if (epartIsReg(modrm)) { 12805 putXMMReg( eregOfRexRM(pfx,modrm), 12806 getXMMReg( gregOfRexRM(pfx,modrm) )); 12807 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12808 nameXMMReg(eregOfRexRM(pfx,modrm))); 12809 delta += 1; 12810 } else { 12811 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12812 gen_SEGV_if_not_16_aligned( addr ); 12813 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12814 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12815 dis_buf ); 12816 delta += alen; 12817 } 12818 goto decode_success; 12819 } 12820 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */ 12821 if (have66noF2noF3(pfx) 12822 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 12823 modrm = getUChar(delta); 12824 if (epartIsReg(modrm)) { 12825 putXMMReg( eregOfRexRM(pfx,modrm), 12826 getXMMReg( gregOfRexRM(pfx,modrm) ) ); 12827 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12828 nameXMMReg(eregOfRexRM(pfx,modrm))); 12829 delta += 1; 12830 } else { 12831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12832 gen_SEGV_if_not_16_aligned( addr ); 12833 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 12834 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 12835 dis_buf ); 12836 delta += alen; 12837 } 12838 goto decode_success; 12839 } 12840 break; 12841 12842 case 0x2A: 12843 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low 12844 half xmm */ 12845 if (haveNo66noF2noF3(pfx) && sz == 4) { 12846 IRTemp arg64 = newTemp(Ity_I64); 12847 IRTemp rmode = newTemp(Ity_I32); 12848 12849 modrm = getUChar(delta); 12850 do_MMX_preamble(); 12851 if (epartIsReg(modrm)) { 12852 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12853 delta += 1; 12854 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12855 nameXMMReg(gregOfRexRM(pfx,modrm))); 12856 } else { 12857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12858 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12859 delta += alen; 12860 DIP("cvtpi2ps %s,%s\n", dis_buf, 12861 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12862 } 12863 12864 assign( rmode, get_sse_roundingmode() ); 12865 12866 putXMMRegLane32F( 12867 gregOfRexRM(pfx,modrm), 0, 12868 binop(Iop_F64toF32, 12869 mkexpr(rmode), 12870 unop(Iop_I32StoF64, 12871 unop(Iop_64to32, mkexpr(arg64)) )) ); 12872 12873 putXMMRegLane32F( 12874 gregOfRexRM(pfx,modrm), 1, 12875 binop(Iop_F64toF32, 12876 mkexpr(rmode), 12877 unop(Iop_I32StoF64, 12878 unop(Iop_64HIto32, mkexpr(arg64)) )) ); 12879 12880 goto decode_success; 12881 } 12882 /* F3 0F 2A = CVTSI2SS 12883 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm 12884 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */ 12885 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 12886 IRTemp rmode = newTemp(Ity_I32); 12887 assign( rmode, get_sse_roundingmode() ); 12888 modrm = getUChar(delta); 12889 if (sz == 4) { 12890 IRTemp arg32 = newTemp(Ity_I32); 12891 if (epartIsReg(modrm)) { 12892 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12893 delta += 1; 12894 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12895 nameXMMReg(gregOfRexRM(pfx,modrm))); 12896 } else { 12897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12898 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12899 delta += alen; 12900 DIP("cvtsi2ss %s,%s\n", dis_buf, 12901 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12902 } 12903 putXMMRegLane32F( 12904 gregOfRexRM(pfx,modrm), 0, 12905 binop(Iop_F64toF32, 12906 mkexpr(rmode), 12907 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 12908 } else { 12909 /* sz == 8 */ 12910 IRTemp arg64 = newTemp(Ity_I64); 12911 if (epartIsReg(modrm)) { 12912 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12913 delta += 1; 12914 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12915 nameXMMReg(gregOfRexRM(pfx,modrm))); 12916 } else { 12917 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12918 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12919 delta += alen; 12920 DIP("cvtsi2ssq %s,%s\n", dis_buf, 12921 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12922 } 12923 putXMMRegLane32F( 12924 gregOfRexRM(pfx,modrm), 0, 12925 binop(Iop_F64toF32, 12926 mkexpr(rmode), 12927 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) ); 12928 } 12929 goto decode_success; 12930 } 12931 /* F2 0F 2A = CVTSI2SD 12932 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm 12933 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm 12934 */ 12935 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 12936 modrm = getUChar(delta); 12937 if (sz == 4) { 12938 IRTemp arg32 = newTemp(Ity_I32); 12939 if (epartIsReg(modrm)) { 12940 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) ); 12941 delta += 1; 12942 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 12943 nameXMMReg(gregOfRexRM(pfx,modrm))); 12944 } else { 12945 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12946 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 12947 delta += alen; 12948 DIP("cvtsi2sdl %s,%s\n", dis_buf, 12949 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12950 } 12951 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 12952 unop(Iop_I32StoF64, mkexpr(arg32)) 12953 ); 12954 } else { 12955 /* sz == 8 */ 12956 IRTemp arg64 = newTemp(Ity_I64); 12957 if (epartIsReg(modrm)) { 12958 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) ); 12959 delta += 1; 12960 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 12961 nameXMMReg(gregOfRexRM(pfx,modrm))); 12962 } else { 12963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 12964 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 12965 delta += alen; 12966 DIP("cvtsi2sdq %s,%s\n", dis_buf, 12967 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 12968 } 12969 putXMMRegLane64F( 12970 gregOfRexRM(pfx,modrm), 12971 0, 12972 binop( Iop_I64StoF64, 12973 get_sse_roundingmode(), 12974 mkexpr(arg64) 12975 ) 12976 ); 12977 } 12978 goto decode_success; 12979 } 12980 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in 12981 xmm(G) */ 12982 if (have66noF2noF3(pfx) && sz == 2) { 12983 IRTemp arg64 = newTemp(Ity_I64); 12984 12985 modrm = getUChar(delta); 12986 if (epartIsReg(modrm)) { 12987 /* Only switch to MMX mode if the source is a MMX register. 12988 This is inconsistent with all other instructions which 12989 convert between XMM and (M64 or MMX), which always switch 12990 to MMX mode even if 64-bit operand is M64 and not MMX. At 12991 least, that's what the Intel docs seem to me to say. 12992 Fixes #210264. */ 12993 do_MMX_preamble(); 12994 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) ); 12995 delta += 1; 12996 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 12997 nameXMMReg(gregOfRexRM(pfx,modrm))); 12998 } else { 12999 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13000 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 13001 delta += alen; 13002 DIP("cvtpi2pd %s,%s\n", dis_buf, 13003 nameXMMReg(gregOfRexRM(pfx,modrm)) ); 13004 } 13005 13006 putXMMRegLane64F( 13007 gregOfRexRM(pfx,modrm), 0, 13008 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) ) 13009 ); 13010 13011 putXMMRegLane64F( 13012 gregOfRexRM(pfx,modrm), 1, 13013 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) ) 13014 ); 13015 13016 goto decode_success; 13017 } 13018 break; 13019 13020 case 0x2B: 13021 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ 13022 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ 13023 if ( (haveNo66noF2noF3(pfx) && sz == 4) 13024 || (have66noF2noF3(pfx) && sz == 2) ) { 13025 modrm = getUChar(delta); 13026 if (!epartIsReg(modrm)) { 13027 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13028 gen_SEGV_if_not_16_aligned( addr ); 13029 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 13030 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", 13031 dis_buf, 13032 nameXMMReg(gregOfRexRM(pfx,modrm))); 13033 delta += alen; 13034 goto decode_success; 13035 } 13036 /* else fall through */ 13037 } 13038 break; 13039 13040 case 0x2C: 13041 case 0x2D: 13042 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13043 I32 in mmx, according to prevailing SSE rounding mode */ 13044 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x 13045 I32 in mmx, rounding towards zero */ 13046 if (haveNo66noF2noF3(pfx) && sz == 4) { 13047 IRTemp dst64 = newTemp(Ity_I64); 13048 IRTemp rmode = newTemp(Ity_I32); 13049 IRTemp f32lo = newTemp(Ity_F32); 13050 IRTemp f32hi = newTemp(Ity_F32); 13051 Bool r2zero = toBool(opc == 0x2C); 13052 13053 do_MMX_preamble(); 13054 modrm = getUChar(delta); 13055 13056 if (epartIsReg(modrm)) { 13057 delta += 1; 13058 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13059 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1)); 13060 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13061 nameXMMReg(eregOfRexRM(pfx,modrm)), 13062 nameMMXReg(gregLO3ofRM(modrm))); 13063 } else { 13064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13065 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13066 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64, 13067 mkexpr(addr), 13068 mkU64(4) ))); 13069 delta += alen; 13070 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "", 13071 dis_buf, 13072 nameMMXReg(gregLO3ofRM(modrm))); 13073 } 13074 13075 if (r2zero) { 13076 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13077 } else { 13078 assign( rmode, get_sse_roundingmode() ); 13079 } 13080 13081 assign( 13082 dst64, 13083 binop( Iop_32HLto64, 13084 binop( Iop_F64toI32S, 13085 mkexpr(rmode), 13086 unop( Iop_F32toF64, mkexpr(f32hi) ) ), 13087 binop( Iop_F64toI32S, 13088 mkexpr(rmode), 13089 unop( Iop_F32toF64, mkexpr(f32lo) ) ) 13090 ) 13091 ); 13092 13093 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13094 goto decode_success; 13095 } 13096 /* F3 0F 2D = CVTSS2SI 13097 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13098 according to prevailing SSE rounding mode 13099 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13100 according to prevailing SSE rounding mode 13101 */ 13102 /* F3 0F 2C = CVTTSS2SI 13103 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg, 13104 truncating towards zero 13105 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg, 13106 truncating towards zero 13107 */ 13108 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) { 13109 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13110 goto decode_success; 13111 } 13112 /* F2 0F 2D = CVTSD2SI 13113 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13114 according to prevailing SSE rounding mode 13115 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13116 according to prevailing SSE rounding mode 13117 */ 13118 /* F2 0F 2C = CVTTSD2SI 13119 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg, 13120 truncating towards zero 13121 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg, 13122 truncating towards zero 13123 */ 13124 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) { 13125 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz); 13126 goto decode_success; 13127 } 13128 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13129 I32 in mmx, according to prevailing SSE rounding mode */ 13130 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x 13131 I32 in mmx, rounding towards zero */ 13132 if (have66noF2noF3(pfx) && sz == 2) { 13133 IRTemp dst64 = newTemp(Ity_I64); 13134 IRTemp rmode = newTemp(Ity_I32); 13135 IRTemp f64lo = newTemp(Ity_F64); 13136 IRTemp f64hi = newTemp(Ity_F64); 13137 Bool r2zero = toBool(opc == 0x2C); 13138 13139 do_MMX_preamble(); 13140 modrm = getUChar(delta); 13141 13142 if (epartIsReg(modrm)) { 13143 delta += 1; 13144 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13145 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1)); 13146 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "", 13147 nameXMMReg(eregOfRexRM(pfx,modrm)), 13148 nameMMXReg(gregLO3ofRM(modrm))); 13149 } else { 13150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13151 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13152 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64, 13153 mkexpr(addr), 13154 mkU64(8) ))); 13155 delta += alen; 13156 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "", 13157 dis_buf, 13158 nameMMXReg(gregLO3ofRM(modrm))); 13159 } 13160 13161 if (r2zero) { 13162 assign(rmode, mkU32((UInt)Irrm_ZERO) ); 13163 } else { 13164 assign( rmode, get_sse_roundingmode() ); 13165 } 13166 13167 assign( 13168 dst64, 13169 binop( Iop_32HLto64, 13170 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ), 13171 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) ) 13172 ) 13173 ); 13174 13175 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64)); 13176 goto decode_success; 13177 } 13178 break; 13179 13180 case 0x2E: 13181 case 0x2F: 13182 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */ 13183 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */ 13184 if (have66noF2noF3(pfx) && sz == 2) { 13185 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc ); 13186 goto decode_success; 13187 } 13188 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */ 13189 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */ 13190 if (haveNo66noF2noF3(pfx) && sz == 4) { 13191 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc ); 13192 goto decode_success; 13193 } 13194 break; 13195 13196 case 0x50: 13197 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) 13198 to 4 lowest bits of ireg(G) */ 13199 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 13200 && epartIsReg(getUChar(delta))) { 13201 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13202 set to 1, which has been known to happen: 13203 13204 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d 13205 13206 20071106: Intel docs say that REX.W isn't redundant: when 13207 present, a 64-bit register is written; when not present, only 13208 the 32-bit half is written. However, testing on a Core2 13209 machine suggests the entire 64 bit register is written 13210 irrespective of the status of REX.W. That could be because 13211 of the default rule that says "if the lower half of a 32-bit 13212 register is written, the upper half is zeroed". By using 13213 putIReg32 here we inadvertantly produce the same behaviour as 13214 the Core2, for the same reason -- putIReg32 implements said 13215 rule. 13216 13217 AMD docs give no indication that REX.W is even valid for this 13218 insn. */ 13219 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13220 goto decode_success; 13221 } 13222 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to 13223 2 lowest bits of ireg(G) */ 13224 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 13225 /* sz == 8 is a kludge to handle insns with REX.W redundantly 13226 set to 1, which has been known to happen: 13227 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d 13228 20071106: see further comments on MOVMSKPS implementation above. 13229 */ 13230 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13231 goto decode_success; 13232 } 13233 break; 13234 13235 case 0x51: 13236 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */ 13237 if (haveF3no66noF2(pfx) && sz == 4) { 13238 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13239 "sqrtss", Iop_Sqrt32F0x4 ); 13240 goto decode_success; 13241 } 13242 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */ 13243 if (haveNo66noF2noF3(pfx) && sz == 4) { 13244 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13245 "sqrtps", Iop_Sqrt32Fx4 ); 13246 goto decode_success; 13247 } 13248 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */ 13249 if (haveF2no66noF3(pfx) && sz == 4) { 13250 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta, 13251 "sqrtsd", Iop_Sqrt64F0x2 ); 13252 goto decode_success; 13253 } 13254 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */ 13255 if (have66noF2noF3(pfx) && sz == 2) { 13256 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13257 "sqrtpd", Iop_Sqrt64Fx2 ); 13258 goto decode_success; 13259 } 13260 break; 13261 13262 case 0x52: 13263 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */ 13264 if (haveF3no66noF2(pfx) && sz == 4) { 13265 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13266 "rsqrtss", Iop_RSqrtEst32F0x4 ); 13267 goto decode_success; 13268 } 13269 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */ 13270 if (haveNo66noF2noF3(pfx) && sz == 4) { 13271 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13272 "rsqrtps", Iop_RSqrtEst32Fx4 ); 13273 goto decode_success; 13274 } 13275 break; 13276 13277 case 0x53: 13278 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */ 13279 if (haveF3no66noF2(pfx) && sz == 4) { 13280 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta, 13281 "rcpss", Iop_RecipEst32F0x4 ); 13282 goto decode_success; 13283 } 13284 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ 13285 if (haveNo66noF2noF3(pfx) && sz == 4) { 13286 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta, 13287 "rcpps", Iop_RecipEst32Fx4 ); 13288 goto decode_success; 13289 } 13290 break; 13291 13292 case 0x54: 13293 /* 0F 54 = ANDPS -- G = G and E */ 13294 if (haveNo66noF2noF3(pfx) && sz == 4) { 13295 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 ); 13296 goto decode_success; 13297 } 13298 /* 66 0F 54 = ANDPD -- G = G and E */ 13299 if (have66noF2noF3(pfx) && sz == 2) { 13300 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 ); 13301 goto decode_success; 13302 } 13303 break; 13304 13305 case 0x55: 13306 /* 0F 55 = ANDNPS -- G = (not G) and E */ 13307 if (haveNo66noF2noF3(pfx) && sz == 4) { 13308 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps", 13309 Iop_AndV128 ); 13310 goto decode_success; 13311 } 13312 /* 66 0F 55 = ANDNPD -- G = (not G) and E */ 13313 if (have66noF2noF3(pfx) && sz == 2) { 13314 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd", 13315 Iop_AndV128 ); 13316 goto decode_success; 13317 } 13318 break; 13319 13320 case 0x56: 13321 /* 0F 56 = ORPS -- G = G and E */ 13322 if (haveNo66noF2noF3(pfx) && sz == 4) { 13323 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 ); 13324 goto decode_success; 13325 } 13326 /* 66 0F 56 = ORPD -- G = G and E */ 13327 if (have66noF2noF3(pfx) && sz == 2) { 13328 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 ); 13329 goto decode_success; 13330 } 13331 break; 13332 13333 case 0x57: 13334 /* 66 0F 57 = XORPD -- G = G xor E */ 13335 if (have66noF2noF3(pfx) && sz == 2) { 13336 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 ); 13337 goto decode_success; 13338 } 13339 /* 0F 57 = XORPS -- G = G xor E */ 13340 if (haveNo66noF2noF3(pfx) && sz == 4) { 13341 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 ); 13342 goto decode_success; 13343 } 13344 break; 13345 13346 case 0x58: 13347 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ 13348 if (haveNo66noF2noF3(pfx) && sz == 4) { 13349 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 ); 13350 goto decode_success; 13351 } 13352 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */ 13353 if (haveF3no66noF2(pfx) && sz == 4) { 13354 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 ); 13355 goto decode_success; 13356 } 13357 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */ 13358 if (haveF2no66noF3(pfx) 13359 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13360 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 ); 13361 goto decode_success; 13362 } 13363 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */ 13364 if (have66noF2noF3(pfx) 13365 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13366 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 ); 13367 goto decode_success; 13368 } 13369 break; 13370 13371 case 0x59: 13372 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */ 13373 if (haveF2no66noF3(pfx) 13374 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13375 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 ); 13376 goto decode_success; 13377 } 13378 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ 13379 if (haveF3no66noF2(pfx) && sz == 4) { 13380 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 ); 13381 goto decode_success; 13382 } 13383 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ 13384 if (haveNo66noF2noF3(pfx) && sz == 4) { 13385 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 ); 13386 goto decode_success; 13387 } 13388 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */ 13389 if (have66noF2noF3(pfx) 13390 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13391 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 ); 13392 goto decode_success; 13393 } 13394 break; 13395 13396 case 0x5A: 13397 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x 13398 F64 in xmm(G). */ 13399 if (haveNo66noF2noF3(pfx) 13400 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13401 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ ); 13402 goto decode_success; 13403 } 13404 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in 13405 low half xmm(G) */ 13406 if (haveF3no66noF2(pfx) && sz == 4) { 13407 IRTemp f32lo = newTemp(Ity_F32); 13408 13409 modrm = getUChar(delta); 13410 if (epartIsReg(modrm)) { 13411 delta += 1; 13412 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0)); 13413 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13414 nameXMMReg(gregOfRexRM(pfx,modrm))); 13415 } else { 13416 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13417 assign(f32lo, loadLE(Ity_F32, mkexpr(addr))); 13418 delta += alen; 13419 DIP("cvtss2sd %s,%s\n", dis_buf, 13420 nameXMMReg(gregOfRexRM(pfx,modrm))); 13421 } 13422 13423 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0, 13424 unop( Iop_F32toF64, mkexpr(f32lo) ) ); 13425 13426 goto decode_success; 13427 } 13428 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in 13429 low 1/4 xmm(G), according to prevailing SSE rounding mode */ 13430 if (haveF2no66noF3(pfx) && sz == 4) { 13431 IRTemp rmode = newTemp(Ity_I32); 13432 IRTemp f64lo = newTemp(Ity_F64); 13433 13434 modrm = getUChar(delta); 13435 if (epartIsReg(modrm)) { 13436 delta += 1; 13437 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0)); 13438 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13439 nameXMMReg(gregOfRexRM(pfx,modrm))); 13440 } else { 13441 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13442 assign(f64lo, loadLE(Ity_F64, mkexpr(addr))); 13443 delta += alen; 13444 DIP("cvtsd2ss %s,%s\n", dis_buf, 13445 nameXMMReg(gregOfRexRM(pfx,modrm))); 13446 } 13447 13448 assign( rmode, get_sse_roundingmode() ); 13449 putXMMRegLane32F( 13450 gregOfRexRM(pfx,modrm), 0, 13451 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) ) 13452 ); 13453 13454 goto decode_success; 13455 } 13456 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in 13457 lo half xmm(G), rounding according to prevailing SSE rounding 13458 mode, and zero upper half */ 13459 /* Note, this is practically identical to CVTPD2DQ. It would have 13460 be nice to merge them together. */ 13461 if (have66noF2noF3(pfx) && sz == 2) { 13462 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13463 goto decode_success; 13464 } 13465 break; 13466 13467 case 0x5B: 13468 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13469 xmm(G), rounding towards zero */ 13470 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in 13471 xmm(G), as per the prevailing rounding mode */ 13472 if ( (have66noF2noF3(pfx) && sz == 2) 13473 || (haveF3no66noF2(pfx) && sz == 4) ) { 13474 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???) 13475 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero ); 13476 goto decode_success; 13477 } 13478 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in 13479 xmm(G) */ 13480 if (haveNo66noF2noF3(pfx) && sz == 4) { 13481 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ ); 13482 goto decode_success; 13483 } 13484 break; 13485 13486 case 0x5C: 13487 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */ 13488 if (haveF3no66noF2(pfx) && sz == 4) { 13489 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 ); 13490 goto decode_success; 13491 } 13492 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */ 13493 if (haveF2no66noF3(pfx) 13494 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13495 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 ); 13496 goto decode_success; 13497 } 13498 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */ 13499 if (haveNo66noF2noF3(pfx) && sz == 4) { 13500 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 ); 13501 goto decode_success; 13502 } 13503 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */ 13504 if (have66noF2noF3(pfx) && sz == 2) { 13505 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 ); 13506 goto decode_success; 13507 } 13508 break; 13509 13510 case 0x5D: 13511 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ 13512 if (haveNo66noF2noF3(pfx) && sz == 4) { 13513 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 ); 13514 goto decode_success; 13515 } 13516 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ 13517 if (haveF3no66noF2(pfx) && sz == 4) { 13518 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 ); 13519 goto decode_success; 13520 } 13521 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */ 13522 if (haveF2no66noF3(pfx) && sz == 4) { 13523 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 ); 13524 goto decode_success; 13525 } 13526 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */ 13527 if (have66noF2noF3(pfx) && sz == 2) { 13528 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 ); 13529 goto decode_success; 13530 } 13531 break; 13532 13533 case 0x5E: 13534 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */ 13535 if (haveF2no66noF3(pfx) && sz == 4) { 13536 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 ); 13537 goto decode_success; 13538 } 13539 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */ 13540 if (haveNo66noF2noF3(pfx) && sz == 4) { 13541 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 ); 13542 goto decode_success; 13543 } 13544 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */ 13545 if (haveF3no66noF2(pfx) && sz == 4) { 13546 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 ); 13547 goto decode_success; 13548 } 13549 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */ 13550 if (have66noF2noF3(pfx) && sz == 2) { 13551 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 ); 13552 goto decode_success; 13553 } 13554 break; 13555 13556 case 0x5F: 13557 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ 13558 if (haveNo66noF2noF3(pfx) && sz == 4) { 13559 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 ); 13560 goto decode_success; 13561 } 13562 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ 13563 if (haveF3no66noF2(pfx) && sz == 4) { 13564 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 ); 13565 goto decode_success; 13566 } 13567 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */ 13568 if (haveF2no66noF3(pfx) && sz == 4) { 13569 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 ); 13570 goto decode_success; 13571 } 13572 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */ 13573 if (have66noF2noF3(pfx) && sz == 2) { 13574 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 ); 13575 goto decode_success; 13576 } 13577 break; 13578 13579 case 0x60: 13580 /* 66 0F 60 = PUNPCKLBW */ 13581 if (have66noF2noF3(pfx) && sz == 2) { 13582 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13583 "punpcklbw", 13584 Iop_InterleaveLO8x16, True ); 13585 goto decode_success; 13586 } 13587 break; 13588 13589 case 0x61: 13590 /* 66 0F 61 = PUNPCKLWD */ 13591 if (have66noF2noF3(pfx) && sz == 2) { 13592 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13593 "punpcklwd", 13594 Iop_InterleaveLO16x8, True ); 13595 goto decode_success; 13596 } 13597 break; 13598 13599 case 0x62: 13600 /* 66 0F 62 = PUNPCKLDQ */ 13601 if (have66noF2noF3(pfx) && sz == 2) { 13602 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13603 "punpckldq", 13604 Iop_InterleaveLO32x4, True ); 13605 goto decode_success; 13606 } 13607 break; 13608 13609 case 0x63: 13610 /* 66 0F 63 = PACKSSWB */ 13611 if (have66noF2noF3(pfx) && sz == 2) { 13612 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13613 "packsswb", 13614 Iop_QNarrowBin16Sto8Sx16, True ); 13615 goto decode_success; 13616 } 13617 break; 13618 13619 case 0x64: 13620 /* 66 0F 64 = PCMPGTB */ 13621 if (have66noF2noF3(pfx) && sz == 2) { 13622 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13623 "pcmpgtb", Iop_CmpGT8Sx16, False ); 13624 goto decode_success; 13625 } 13626 break; 13627 13628 case 0x65: 13629 /* 66 0F 65 = PCMPGTW */ 13630 if (have66noF2noF3(pfx) && sz == 2) { 13631 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13632 "pcmpgtw", Iop_CmpGT16Sx8, False ); 13633 goto decode_success; 13634 } 13635 break; 13636 13637 case 0x66: 13638 /* 66 0F 66 = PCMPGTD */ 13639 if (have66noF2noF3(pfx) && sz == 2) { 13640 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13641 "pcmpgtd", Iop_CmpGT32Sx4, False ); 13642 goto decode_success; 13643 } 13644 break; 13645 13646 case 0x67: 13647 /* 66 0F 67 = PACKUSWB */ 13648 if (have66noF2noF3(pfx) && sz == 2) { 13649 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13650 "packuswb", 13651 Iop_QNarrowBin16Sto8Ux16, True ); 13652 goto decode_success; 13653 } 13654 break; 13655 13656 case 0x68: 13657 /* 66 0F 68 = PUNPCKHBW */ 13658 if (have66noF2noF3(pfx) && sz == 2) { 13659 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13660 "punpckhbw", 13661 Iop_InterleaveHI8x16, True ); 13662 goto decode_success; 13663 } 13664 break; 13665 13666 case 0x69: 13667 /* 66 0F 69 = PUNPCKHWD */ 13668 if (have66noF2noF3(pfx) && sz == 2) { 13669 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13670 "punpckhwd", 13671 Iop_InterleaveHI16x8, True ); 13672 goto decode_success; 13673 } 13674 break; 13675 13676 case 0x6A: 13677 /* 66 0F 6A = PUNPCKHDQ */ 13678 if (have66noF2noF3(pfx) && sz == 2) { 13679 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13680 "punpckhdq", 13681 Iop_InterleaveHI32x4, True ); 13682 goto decode_success; 13683 } 13684 break; 13685 13686 case 0x6B: 13687 /* 66 0F 6B = PACKSSDW */ 13688 if (have66noF2noF3(pfx) && sz == 2) { 13689 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13690 "packssdw", 13691 Iop_QNarrowBin32Sto16Sx8, True ); 13692 goto decode_success; 13693 } 13694 break; 13695 13696 case 0x6C: 13697 /* 66 0F 6C = PUNPCKLQDQ */ 13698 if (have66noF2noF3(pfx) && sz == 2) { 13699 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13700 "punpcklqdq", 13701 Iop_InterleaveLO64x2, True ); 13702 goto decode_success; 13703 } 13704 break; 13705 13706 case 0x6D: 13707 /* 66 0F 6D = PUNPCKHQDQ */ 13708 if (have66noF2noF3(pfx) && sz == 2) { 13709 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13710 "punpckhqdq", 13711 Iop_InterleaveHI64x2, True ); 13712 goto decode_success; 13713 } 13714 break; 13715 13716 case 0x6E: 13717 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, 13718 zeroing high 3/4 of xmm. */ 13719 /* or from ireg64/m64 to xmm lo 1/2, 13720 zeroing high 1/2 of xmm. */ 13721 if (have66noF2noF3(pfx)) { 13722 vassert(sz == 2 || sz == 8); 13723 if (sz == 2) sz = 4; 13724 modrm = getUChar(delta); 13725 if (epartIsReg(modrm)) { 13726 delta += 1; 13727 if (sz == 4) { 13728 putXMMReg( 13729 gregOfRexRM(pfx,modrm), 13730 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 13731 ); 13732 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 13733 nameXMMReg(gregOfRexRM(pfx,modrm))); 13734 } else { 13735 putXMMReg( 13736 gregOfRexRM(pfx,modrm), 13737 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 13738 ); 13739 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 13740 nameXMMReg(gregOfRexRM(pfx,modrm))); 13741 } 13742 } else { 13743 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 13744 delta += alen; 13745 putXMMReg( 13746 gregOfRexRM(pfx,modrm), 13747 sz == 4 13748 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) ) 13749 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) ) 13750 ); 13751 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf, 13752 nameXMMReg(gregOfRexRM(pfx,modrm))); 13753 } 13754 goto decode_success; 13755 } 13756 break; 13757 13758 case 0x6F: 13759 if (have66noF2noF3(pfx) 13760 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 13761 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */ 13762 modrm = getUChar(delta); 13763 if (epartIsReg(modrm)) { 13764 putXMMReg( gregOfRexRM(pfx,modrm), 13765 getXMMReg( eregOfRexRM(pfx,modrm) )); 13766 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13767 nameXMMReg(gregOfRexRM(pfx,modrm))); 13768 delta += 1; 13769 } else { 13770 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13771 gen_SEGV_if_not_16_aligned( addr ); 13772 putXMMReg( gregOfRexRM(pfx,modrm), 13773 loadLE(Ity_V128, mkexpr(addr)) ); 13774 DIP("movdqa %s,%s\n", dis_buf, 13775 nameXMMReg(gregOfRexRM(pfx,modrm))); 13776 delta += alen; 13777 } 13778 goto decode_success; 13779 } 13780 if (haveF3no66noF2(pfx) && sz == 4) { 13781 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */ 13782 modrm = getUChar(delta); 13783 if (epartIsReg(modrm)) { 13784 putXMMReg( gregOfRexRM(pfx,modrm), 13785 getXMMReg( eregOfRexRM(pfx,modrm) )); 13786 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13787 nameXMMReg(gregOfRexRM(pfx,modrm))); 13788 delta += 1; 13789 } else { 13790 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13791 putXMMReg( gregOfRexRM(pfx,modrm), 13792 loadLE(Ity_V128, mkexpr(addr)) ); 13793 DIP("movdqu %s,%s\n", dis_buf, 13794 nameXMMReg(gregOfRexRM(pfx,modrm))); 13795 delta += alen; 13796 } 13797 goto decode_success; 13798 } 13799 break; 13800 13801 case 0x70: 13802 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */ 13803 if (have66noF2noF3(pfx) && sz == 2) { 13804 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/); 13805 goto decode_success; 13806 } 13807 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 13808 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */ 13809 if (haveNo66noF2noF3(pfx) && sz == 4) { 13810 Int order; 13811 IRTemp sV, dV, s3, s2, s1, s0; 13812 s3 = s2 = s1 = s0 = IRTemp_INVALID; 13813 sV = newTemp(Ity_I64); 13814 dV = newTemp(Ity_I64); 13815 do_MMX_preamble(); 13816 modrm = getUChar(delta); 13817 if (epartIsReg(modrm)) { 13818 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 13819 order = (Int)getUChar(delta+1); 13820 delta += 1+1; 13821 DIP("pshufw $%d,%s,%s\n", order, 13822 nameMMXReg(eregLO3ofRM(modrm)), 13823 nameMMXReg(gregLO3ofRM(modrm))); 13824 } else { 13825 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 13826 1/*extra byte after amode*/ ); 13827 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 13828 order = (Int)getUChar(delta+alen); 13829 delta += 1+alen; 13830 DIP("pshufw $%d,%s,%s\n", order, 13831 dis_buf, 13832 nameMMXReg(gregLO3ofRM(modrm))); 13833 } 13834 breakup64to16s( sV, &s3, &s2, &s1, &s0 ); 13835 # define SEL(n) \ 13836 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3))) 13837 assign(dV, 13838 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3), 13839 SEL((order>>2)&3), SEL((order>>0)&3) ) 13840 ); 13841 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV)); 13842 # undef SEL 13843 goto decode_success; 13844 } 13845 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or 13846 mem) to G(xmm), and copy upper half */ 13847 if (haveF2no66noF3(pfx) && sz == 4) { 13848 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13849 False/*!isAvx*/, False/*!xIsH*/ ); 13850 goto decode_success; 13851 } 13852 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or 13853 mem) to G(xmm), and copy lower half */ 13854 if (haveF3no66noF2(pfx) && sz == 4) { 13855 delta = dis_PSHUFxW_128( vbi, pfx, delta, 13856 False/*!isAvx*/, True/*xIsH*/ ); 13857 goto decode_success; 13858 } 13859 break; 13860 13861 case 0x71: 13862 /* 66 0F 71 /2 ib = PSRLW by immediate */ 13863 if (have66noF2noF3(pfx) && sz == 2 13864 && epartIsReg(getUChar(delta)) 13865 && gregLO3ofRM(getUChar(delta)) == 2) { 13866 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 ); 13867 goto decode_success; 13868 } 13869 /* 66 0F 71 /4 ib = PSRAW by immediate */ 13870 if (have66noF2noF3(pfx) && sz == 2 13871 && epartIsReg(getUChar(delta)) 13872 && gregLO3ofRM(getUChar(delta)) == 4) { 13873 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 ); 13874 goto decode_success; 13875 } 13876 /* 66 0F 71 /6 ib = PSLLW by immediate */ 13877 if (have66noF2noF3(pfx) && sz == 2 13878 && epartIsReg(getUChar(delta)) 13879 && gregLO3ofRM(getUChar(delta)) == 6) { 13880 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 ); 13881 goto decode_success; 13882 } 13883 break; 13884 13885 case 0x72: 13886 /* 66 0F 72 /2 ib = PSRLD by immediate */ 13887 if (have66noF2noF3(pfx) && sz == 2 13888 && epartIsReg(getUChar(delta)) 13889 && gregLO3ofRM(getUChar(delta)) == 2) { 13890 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 ); 13891 goto decode_success; 13892 } 13893 /* 66 0F 72 /4 ib = PSRAD by immediate */ 13894 if (have66noF2noF3(pfx) && sz == 2 13895 && epartIsReg(getUChar(delta)) 13896 && gregLO3ofRM(getUChar(delta)) == 4) { 13897 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 ); 13898 goto decode_success; 13899 } 13900 /* 66 0F 72 /6 ib = PSLLD by immediate */ 13901 if (have66noF2noF3(pfx) && sz == 2 13902 && epartIsReg(getUChar(delta)) 13903 && gregLO3ofRM(getUChar(delta)) == 6) { 13904 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 ); 13905 goto decode_success; 13906 } 13907 break; 13908 13909 case 0x73: 13910 /* 66 0F 73 /3 ib = PSRLDQ by immediate */ 13911 /* note, if mem case ever filled in, 1 byte after amode */ 13912 if (have66noF2noF3(pfx) && sz == 2 13913 && epartIsReg(getUChar(delta)) 13914 && gregLO3ofRM(getUChar(delta)) == 3) { 13915 Int imm = (Int)getUChar(delta+1); 13916 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13917 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg)); 13918 delta += 2; 13919 IRTemp sV = newTemp(Ity_V128); 13920 assign( sV, getXMMReg(reg) ); 13921 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm ))); 13922 goto decode_success; 13923 } 13924 /* 66 0F 73 /7 ib = PSLLDQ by immediate */ 13925 /* note, if mem case ever filled in, 1 byte after amode */ 13926 if (have66noF2noF3(pfx) && sz == 2 13927 && epartIsReg(getUChar(delta)) 13928 && gregLO3ofRM(getUChar(delta)) == 7) { 13929 Int imm = (Int)getUChar(delta+1); 13930 Int reg = eregOfRexRM(pfx,getUChar(delta)); 13931 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg)); 13932 vassert(imm >= 0 && imm <= 255); 13933 delta += 2; 13934 IRTemp sV = newTemp(Ity_V128); 13935 assign( sV, getXMMReg(reg) ); 13936 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm ))); 13937 goto decode_success; 13938 } 13939 /* 66 0F 73 /2 ib = PSRLQ by immediate */ 13940 if (have66noF2noF3(pfx) && sz == 2 13941 && epartIsReg(getUChar(delta)) 13942 && gregLO3ofRM(getUChar(delta)) == 2) { 13943 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 ); 13944 goto decode_success; 13945 } 13946 /* 66 0F 73 /6 ib = PSLLQ by immediate */ 13947 if (have66noF2noF3(pfx) && sz == 2 13948 && epartIsReg(getUChar(delta)) 13949 && gregLO3ofRM(getUChar(delta)) == 6) { 13950 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 ); 13951 goto decode_success; 13952 } 13953 break; 13954 13955 case 0x74: 13956 /* 66 0F 74 = PCMPEQB */ 13957 if (have66noF2noF3(pfx) && sz == 2) { 13958 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13959 "pcmpeqb", Iop_CmpEQ8x16, False ); 13960 goto decode_success; 13961 } 13962 break; 13963 13964 case 0x75: 13965 /* 66 0F 75 = PCMPEQW */ 13966 if (have66noF2noF3(pfx) && sz == 2) { 13967 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13968 "pcmpeqw", Iop_CmpEQ16x8, False ); 13969 goto decode_success; 13970 } 13971 break; 13972 13973 case 0x76: 13974 /* 66 0F 76 = PCMPEQD */ 13975 if (have66noF2noF3(pfx) && sz == 2) { 13976 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 13977 "pcmpeqd", Iop_CmpEQ32x4, False ); 13978 goto decode_success; 13979 } 13980 break; 13981 13982 case 0x7E: 13983 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to 13984 G (lo half xmm). Upper half of G is zeroed out. */ 13985 if (haveF3no66noF2(pfx) 13986 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 13987 modrm = getUChar(delta); 13988 if (epartIsReg(modrm)) { 13989 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 13990 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 13991 /* zero bits 127:64 */ 13992 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) ); 13993 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 13994 nameXMMReg(gregOfRexRM(pfx,modrm))); 13995 delta += 1; 13996 } else { 13997 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 13998 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) ); 13999 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, 14000 loadLE(Ity_I64, mkexpr(addr)) ); 14001 DIP("movsd %s,%s\n", dis_buf, 14002 nameXMMReg(gregOfRexRM(pfx,modrm))); 14003 delta += alen; 14004 } 14005 goto decode_success; 14006 } 14007 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */ 14008 /* or from xmm low 1/2 to ireg64 or m64. */ 14009 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) { 14010 if (sz == 2) sz = 4; 14011 modrm = getUChar(delta); 14012 if (epartIsReg(modrm)) { 14013 delta += 1; 14014 if (sz == 4) { 14015 putIReg32( eregOfRexRM(pfx,modrm), 14016 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) ); 14017 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14018 nameIReg32(eregOfRexRM(pfx,modrm))); 14019 } else { 14020 putIReg64( eregOfRexRM(pfx,modrm), 14021 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) ); 14022 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14023 nameIReg64(eregOfRexRM(pfx,modrm))); 14024 } 14025 } else { 14026 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14027 delta += alen; 14028 storeLE( mkexpr(addr), 14029 sz == 4 14030 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0) 14031 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) ); 14032 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', 14033 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14034 } 14035 goto decode_success; 14036 } 14037 break; 14038 14039 case 0x7F: 14040 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */ 14041 if (haveF3no66noF2(pfx) && sz == 4) { 14042 modrm = getUChar(delta); 14043 if (epartIsReg(modrm)) { 14044 goto decode_failure; /* awaiting test case */ 14045 delta += 1; 14046 putXMMReg( eregOfRexRM(pfx,modrm), 14047 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14048 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14049 nameXMMReg(eregOfRexRM(pfx,modrm))); 14050 } else { 14051 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14052 delta += alen; 14053 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14054 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14055 } 14056 goto decode_success; 14057 } 14058 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */ 14059 if (have66noF2noF3(pfx) && sz == 2) { 14060 modrm = getUChar(delta); 14061 if (epartIsReg(modrm)) { 14062 delta += 1; 14063 putXMMReg( eregOfRexRM(pfx,modrm), 14064 getXMMReg(gregOfRexRM(pfx,modrm)) ); 14065 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), 14066 nameXMMReg(eregOfRexRM(pfx,modrm))); 14067 } else { 14068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 14069 gen_SEGV_if_not_16_aligned( addr ); 14070 delta += alen; 14071 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14072 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf); 14073 } 14074 goto decode_success; 14075 } 14076 break; 14077 14078 case 0xAE: 14079 /* 0F AE /7 = SFENCE -- flush pending operations to memory */ 14080 if (haveNo66noF2noF3(pfx) 14081 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14082 && sz == 4) { 14083 delta += 1; 14084 /* Insert a memory fence. It's sometimes important that these 14085 are carried through to the generated code. */ 14086 stmt( IRStmt_MBE(Imbe_Fence) ); 14087 DIP("sfence\n"); 14088 goto decode_success; 14089 } 14090 /* mindless duplication follows .. */ 14091 /* 0F AE /5 = LFENCE -- flush pending operations to memory */ 14092 /* 0F AE /6 = MFENCE -- flush pending operations to memory */ 14093 if (haveNo66noF2noF3(pfx) 14094 && epartIsReg(getUChar(delta)) 14095 && (gregLO3ofRM(getUChar(delta)) == 5 14096 || gregLO3ofRM(getUChar(delta)) == 6) 14097 && sz == 4) { 14098 delta += 1; 14099 /* Insert a memory fence. It's sometimes important that these 14100 are carried through to the generated code. */ 14101 stmt( IRStmt_MBE(Imbe_Fence) ); 14102 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m"); 14103 goto decode_success; 14104 } 14105 14106 /* 0F AE /7 = CLFLUSH -- flush cache line */ 14107 if (haveNo66noF2noF3(pfx) 14108 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7 14109 && sz == 4) { 14110 14111 /* This is something of a hack. We need to know the size of 14112 the cache line containing addr. Since we don't (easily), 14113 assume 256 on the basis that no real cache would have a 14114 line that big. It's safe to invalidate more stuff than we 14115 need, just inefficient. */ 14116 ULong lineszB = 256ULL; 14117 14118 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14119 delta += alen; 14120 14121 /* Round addr down to the start of the containing block. */ 14122 stmt( IRStmt_Put( 14123 OFFB_CMSTART, 14124 binop( Iop_And64, 14125 mkexpr(addr), 14126 mkU64( ~(lineszB-1) ))) ); 14127 14128 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) ); 14129 14130 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta)); 14131 14132 DIP("clflush %s\n", dis_buf); 14133 goto decode_success; 14134 } 14135 14136 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */ 14137 if (haveNo66noF2noF3(pfx) 14138 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 14139 && sz == 4) { 14140 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14141 goto decode_success; 14142 } 14143 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */ 14144 if (haveNo66noF2noF3(pfx) 14145 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 14146 && sz == 4) { 14147 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/); 14148 goto decode_success; 14149 } 14150 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ 14151 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14152 && !epartIsReg(getUChar(delta)) 14153 && gregOfRexRM(pfx,getUChar(delta)) == 0) { 14154 delta = dis_FXSAVE(vbi, pfx, delta, sz); 14155 goto decode_success; 14156 } 14157 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */ 14158 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14159 && !epartIsReg(getUChar(delta)) 14160 && gregOfRexRM(pfx,getUChar(delta)) == 1) { 14161 delta = dis_FXRSTOR(vbi, pfx, delta, sz); 14162 goto decode_success; 14163 } 14164 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */ 14165 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14166 && !epartIsReg(getUChar(delta)) 14167 && gregOfRexRM(pfx,getUChar(delta)) == 4 14168 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14169 delta = dis_XSAVE(vbi, pfx, delta, sz); 14170 goto decode_success; 14171 } 14172 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */ 14173 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) 14174 && !epartIsReg(getUChar(delta)) 14175 && gregOfRexRM(pfx,getUChar(delta)) == 5 14176 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 14177 delta = dis_XRSTOR(vbi, pfx, delta, sz); 14178 goto decode_success; 14179 } 14180 break; 14181 14182 case 0xC2: 14183 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */ 14184 if (haveNo66noF2noF3(pfx) && sz == 4) { 14185 Long delta0 = delta; 14186 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 ); 14187 if (delta > delta0) goto decode_success; 14188 } 14189 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */ 14190 if (haveF3no66noF2(pfx) && sz == 4) { 14191 Long delta0 = delta; 14192 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 ); 14193 if (delta > delta0) goto decode_success; 14194 } 14195 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */ 14196 if (haveF2no66noF3(pfx) && sz == 4) { 14197 Long delta0 = delta; 14198 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 ); 14199 if (delta > delta0) goto decode_success; 14200 } 14201 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */ 14202 if (have66noF2noF3(pfx) && sz == 2) { 14203 Long delta0 = delta; 14204 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 ); 14205 if (delta > delta0) goto decode_success; 14206 } 14207 break; 14208 14209 case 0xC3: 14210 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */ 14211 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14212 modrm = getUChar(delta); 14213 if (!epartIsReg(modrm)) { 14214 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14215 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) ); 14216 DIP("movnti %s,%s\n", dis_buf, 14217 nameIRegG(sz, pfx, modrm)); 14218 delta += alen; 14219 goto decode_success; 14220 } 14221 /* else fall through */ 14222 } 14223 break; 14224 14225 case 0xC4: 14226 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14227 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14228 put it into the specified lane of mmx(G). */ 14229 if (haveNo66noF2noF3(pfx) 14230 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14231 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the 14232 mmx reg. t4 is the new lane value. t5 is the original 14233 mmx value. t6 is the new mmx value. */ 14234 Int lane; 14235 t4 = newTemp(Ity_I16); 14236 t5 = newTemp(Ity_I64); 14237 t6 = newTemp(Ity_I64); 14238 modrm = getUChar(delta); 14239 do_MMX_preamble(); 14240 14241 assign(t5, getMMXReg(gregLO3ofRM(modrm))); 14242 breakup64to16s( t5, &t3, &t2, &t1, &t0 ); 14243 14244 if (epartIsReg(modrm)) { 14245 assign(t4, getIReg16(eregOfRexRM(pfx,modrm))); 14246 delta += 1+1; 14247 lane = getUChar(delta-1); 14248 DIP("pinsrw $%d,%s,%s\n", lane, 14249 nameIReg16(eregOfRexRM(pfx,modrm)), 14250 nameMMXReg(gregLO3ofRM(modrm))); 14251 } else { 14252 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14253 delta += 1+alen; 14254 lane = getUChar(delta-1); 14255 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14256 DIP("pinsrw $%d,%s,%s\n", lane, 14257 dis_buf, 14258 nameMMXReg(gregLO3ofRM(modrm))); 14259 } 14260 14261 switch (lane & 3) { 14262 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break; 14263 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break; 14264 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break; 14265 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break; 14266 default: vassert(0); 14267 } 14268 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6)); 14269 goto decode_success; 14270 } 14271 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and 14272 put it into the specified lane of xmm(G). */ 14273 if (have66noF2noF3(pfx) 14274 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14275 Int lane; 14276 t4 = newTemp(Ity_I16); 14277 modrm = getUChar(delta); 14278 UInt rG = gregOfRexRM(pfx,modrm); 14279 if (epartIsReg(modrm)) { 14280 UInt rE = eregOfRexRM(pfx,modrm); 14281 assign(t4, getIReg16(rE)); 14282 delta += 1+1; 14283 lane = getUChar(delta-1); 14284 DIP("pinsrw $%d,%s,%s\n", 14285 lane, nameIReg16(rE), nameXMMReg(rG)); 14286 } else { 14287 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 14288 1/*byte after the amode*/ ); 14289 delta += 1+alen; 14290 lane = getUChar(delta-1); 14291 assign(t4, loadLE(Ity_I16, mkexpr(addr))); 14292 DIP("pinsrw $%d,%s,%s\n", 14293 lane, dis_buf, nameXMMReg(rG)); 14294 } 14295 IRTemp src_vec = newTemp(Ity_V128); 14296 assign(src_vec, getXMMReg(rG)); 14297 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7); 14298 putXMMReg(rG, mkexpr(res_vec)); 14299 goto decode_success; 14300 } 14301 break; 14302 14303 case 0xC5: 14304 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14305 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put 14306 zero-extend of it in ireg(G). */ 14307 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) { 14308 modrm = getUChar(delta); 14309 if (epartIsReg(modrm)) { 14310 IRTemp sV = newTemp(Ity_I64); 14311 t5 = newTemp(Ity_I16); 14312 do_MMX_preamble(); 14313 assign(sV, getMMXReg(eregLO3ofRM(modrm))); 14314 breakup64to16s( sV, &t3, &t2, &t1, &t0 ); 14315 switch (getUChar(delta+1) & 3) { 14316 case 0: assign(t5, mkexpr(t0)); break; 14317 case 1: assign(t5, mkexpr(t1)); break; 14318 case 2: assign(t5, mkexpr(t2)); break; 14319 case 3: assign(t5, mkexpr(t3)); break; 14320 default: vassert(0); 14321 } 14322 if (sz == 8) 14323 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5))); 14324 else 14325 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))); 14326 DIP("pextrw $%d,%s,%s\n", 14327 (Int)getUChar(delta+1), 14328 nameMMXReg(eregLO3ofRM(modrm)), 14329 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm)) 14330 : nameIReg32(gregOfRexRM(pfx,modrm)) 14331 ); 14332 delta += 2; 14333 goto decode_success; 14334 } 14335 /* else fall through */ 14336 /* note, for anyone filling in the mem case: this insn has one 14337 byte after the amode and therefore you must pass 1 as the 14338 last arg to disAMode */ 14339 } 14340 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put 14341 zero-extend of it in ireg(G). */ 14342 if (have66noF2noF3(pfx) 14343 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14344 Long delta0 = delta; 14345 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 14346 False/*!isAvx*/ ); 14347 if (delta > delta0) goto decode_success; 14348 /* else fall through -- decoding has failed */ 14349 } 14350 break; 14351 14352 case 0xC6: 14353 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ 14354 if (haveNo66noF2noF3(pfx) && sz == 4) { 14355 Int imm8 = 0; 14356 IRTemp sV = newTemp(Ity_V128); 14357 IRTemp dV = newTemp(Ity_V128); 14358 modrm = getUChar(delta); 14359 UInt rG = gregOfRexRM(pfx,modrm); 14360 assign( dV, getXMMReg(rG) ); 14361 if (epartIsReg(modrm)) { 14362 UInt rE = eregOfRexRM(pfx,modrm); 14363 assign( sV, getXMMReg(rE) ); 14364 imm8 = (Int)getUChar(delta+1); 14365 delta += 1+1; 14366 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG)); 14367 } else { 14368 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14369 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14370 imm8 = (Int)getUChar(delta+alen); 14371 delta += 1+alen; 14372 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG)); 14373 } 14374 IRTemp res = math_SHUFPS_128( sV, dV, imm8 ); 14375 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14376 goto decode_success; 14377 } 14378 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */ 14379 if (have66noF2noF3(pfx) && sz == 2) { 14380 Int select; 14381 IRTemp sV = newTemp(Ity_V128); 14382 IRTemp dV = newTemp(Ity_V128); 14383 14384 modrm = getUChar(delta); 14385 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 14386 14387 if (epartIsReg(modrm)) { 14388 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 14389 select = (Int)getUChar(delta+1); 14390 delta += 1+1; 14391 DIP("shufpd $%d,%s,%s\n", select, 14392 nameXMMReg(eregOfRexRM(pfx,modrm)), 14393 nameXMMReg(gregOfRexRM(pfx,modrm))); 14394 } else { 14395 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 14396 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14397 select = getUChar(delta+alen); 14398 delta += 1+alen; 14399 DIP("shufpd $%d,%s,%s\n", select, 14400 dis_buf, 14401 nameXMMReg(gregOfRexRM(pfx,modrm))); 14402 } 14403 14404 IRTemp res = math_SHUFPD_128( sV, dV, select ); 14405 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 14406 goto decode_success; 14407 } 14408 break; 14409 14410 case 0xD1: 14411 /* 66 0F D1 = PSRLW by E */ 14412 if (have66noF2noF3(pfx) && sz == 2) { 14413 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 ); 14414 goto decode_success; 14415 } 14416 break; 14417 14418 case 0xD2: 14419 /* 66 0F D2 = PSRLD by E */ 14420 if (have66noF2noF3(pfx) && sz == 2) { 14421 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 ); 14422 goto decode_success; 14423 } 14424 break; 14425 14426 case 0xD3: 14427 /* 66 0F D3 = PSRLQ by E */ 14428 if (have66noF2noF3(pfx) && sz == 2) { 14429 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 ); 14430 goto decode_success; 14431 } 14432 break; 14433 14434 case 0xD4: 14435 /* 66 0F D4 = PADDQ */ 14436 if (have66noF2noF3(pfx) && sz == 2) { 14437 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14438 "paddq", Iop_Add64x2, False ); 14439 goto decode_success; 14440 } 14441 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14442 /* 0F D4 = PADDQ -- add 64x1 */ 14443 if (haveNo66noF2noF3(pfx) && sz == 4) { 14444 do_MMX_preamble(); 14445 delta = dis_MMXop_regmem_to_reg ( 14446 vbi, pfx, delta, opc, "paddq", False ); 14447 goto decode_success; 14448 } 14449 break; 14450 14451 case 0xD5: 14452 /* 66 0F D5 = PMULLW -- 16x8 multiply */ 14453 if (have66noF2noF3(pfx) && sz == 2) { 14454 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14455 "pmullw", Iop_Mul16x8, False ); 14456 goto decode_success; 14457 } 14458 break; 14459 14460 case 0xD6: 14461 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero 14462 hi half). */ 14463 if (haveF3no66noF2(pfx) && sz == 4) { 14464 modrm = getUChar(delta); 14465 if (epartIsReg(modrm)) { 14466 do_MMX_preamble(); 14467 putXMMReg( gregOfRexRM(pfx,modrm), 14468 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) ); 14469 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14470 nameXMMReg(gregOfRexRM(pfx,modrm))); 14471 delta += 1; 14472 goto decode_success; 14473 } 14474 /* apparently no mem case for this insn */ 14475 } 14476 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem 14477 or lo half xmm). */ 14478 if (have66noF2noF3(pfx) 14479 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 14480 modrm = getUChar(delta); 14481 if (epartIsReg(modrm)) { 14482 /* fall through, awaiting test case */ 14483 /* dst: lo half copied, hi half zeroed */ 14484 } else { 14485 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14486 storeLE( mkexpr(addr), 14487 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 )); 14488 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf ); 14489 delta += alen; 14490 goto decode_success; 14491 } 14492 } 14493 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */ 14494 if (haveF2no66noF3(pfx) && sz == 4) { 14495 modrm = getUChar(delta); 14496 if (epartIsReg(modrm)) { 14497 do_MMX_preamble(); 14498 putMMXReg( gregLO3ofRM(modrm), 14499 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 )); 14500 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 14501 nameMMXReg(gregLO3ofRM(modrm))); 14502 delta += 1; 14503 goto decode_success; 14504 } 14505 /* apparently no mem case for this insn */ 14506 } 14507 break; 14508 14509 case 0xD7: 14510 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 14511 lanes in xmm(E), turn them into a byte, and put 14512 zero-extend of it in ireg(G). Doing this directly is just 14513 too cumbersome; give up therefore and call a helper. */ 14514 if (have66noF2noF3(pfx) 14515 && (sz == 2 || /* ignore redundant REX.W */ sz == 8) 14516 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */ 14517 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ ); 14518 goto decode_success; 14519 } 14520 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14521 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in 14522 mmx(E), turn them into a byte, and put zero-extend of it in 14523 ireg(G). */ 14524 if (haveNo66noF2noF3(pfx) 14525 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 14526 modrm = getUChar(delta); 14527 if (epartIsReg(modrm)) { 14528 do_MMX_preamble(); 14529 t0 = newTemp(Ity_I64); 14530 t1 = newTemp(Ity_I32); 14531 assign(t0, getMMXReg(eregLO3ofRM(modrm))); 14532 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0)))); 14533 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1)); 14534 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14535 nameIReg32(gregOfRexRM(pfx,modrm))); 14536 delta += 1; 14537 goto decode_success; 14538 } 14539 /* else fall through */ 14540 } 14541 break; 14542 14543 case 0xD8: 14544 /* 66 0F D8 = PSUBUSB */ 14545 if (have66noF2noF3(pfx) && sz == 2) { 14546 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14547 "psubusb", Iop_QSub8Ux16, False ); 14548 goto decode_success; 14549 } 14550 break; 14551 14552 case 0xD9: 14553 /* 66 0F D9 = PSUBUSW */ 14554 if (have66noF2noF3(pfx) && sz == 2) { 14555 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14556 "psubusw", Iop_QSub16Ux8, False ); 14557 goto decode_success; 14558 } 14559 break; 14560 14561 case 0xDA: 14562 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14563 /* 0F DA = PMINUB -- 8x8 unsigned min */ 14564 if (haveNo66noF2noF3(pfx) && sz == 4) { 14565 do_MMX_preamble(); 14566 delta = dis_MMXop_regmem_to_reg ( 14567 vbi, pfx, delta, opc, "pminub", False ); 14568 goto decode_success; 14569 } 14570 /* 66 0F DA = PMINUB -- 8x16 unsigned min */ 14571 if (have66noF2noF3(pfx) && sz == 2) { 14572 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14573 "pminub", Iop_Min8Ux16, False ); 14574 goto decode_success; 14575 } 14576 break; 14577 14578 case 0xDB: 14579 /* 66 0F DB = PAND */ 14580 if (have66noF2noF3(pfx) && sz == 2) { 14581 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 ); 14582 goto decode_success; 14583 } 14584 break; 14585 14586 case 0xDC: 14587 /* 66 0F DC = PADDUSB */ 14588 if (have66noF2noF3(pfx) && sz == 2) { 14589 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14590 "paddusb", Iop_QAdd8Ux16, False ); 14591 goto decode_success; 14592 } 14593 break; 14594 14595 case 0xDD: 14596 /* 66 0F DD = PADDUSW */ 14597 if (have66noF2noF3(pfx) && sz == 2) { 14598 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14599 "paddusw", Iop_QAdd16Ux8, False ); 14600 goto decode_success; 14601 } 14602 break; 14603 14604 case 0xDE: 14605 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14606 /* 0F DE = PMAXUB -- 8x8 unsigned max */ 14607 if (haveNo66noF2noF3(pfx) && sz == 4) { 14608 do_MMX_preamble(); 14609 delta = dis_MMXop_regmem_to_reg ( 14610 vbi, pfx, delta, opc, "pmaxub", False ); 14611 goto decode_success; 14612 } 14613 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */ 14614 if (have66noF2noF3(pfx) && sz == 2) { 14615 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14616 "pmaxub", Iop_Max8Ux16, False ); 14617 goto decode_success; 14618 } 14619 break; 14620 14621 case 0xDF: 14622 /* 66 0F DF = PANDN */ 14623 if (have66noF2noF3(pfx) && sz == 2) { 14624 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 ); 14625 goto decode_success; 14626 } 14627 break; 14628 14629 case 0xE0: 14630 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14631 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ 14632 if (haveNo66noF2noF3(pfx) && sz == 4) { 14633 do_MMX_preamble(); 14634 delta = dis_MMXop_regmem_to_reg ( 14635 vbi, pfx, delta, opc, "pavgb", False ); 14636 goto decode_success; 14637 } 14638 /* 66 0F E0 = PAVGB */ 14639 if (have66noF2noF3(pfx) && sz == 2) { 14640 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14641 "pavgb", Iop_Avg8Ux16, False ); 14642 goto decode_success; 14643 } 14644 break; 14645 14646 case 0xE1: 14647 /* 66 0F E1 = PSRAW by E */ 14648 if (have66noF2noF3(pfx) && sz == 2) { 14649 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 ); 14650 goto decode_success; 14651 } 14652 break; 14653 14654 case 0xE2: 14655 /* 66 0F E2 = PSRAD by E */ 14656 if (have66noF2noF3(pfx) && sz == 2) { 14657 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 ); 14658 goto decode_success; 14659 } 14660 break; 14661 14662 case 0xE3: 14663 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14664 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */ 14665 if (haveNo66noF2noF3(pfx) && sz == 4) { 14666 do_MMX_preamble(); 14667 delta = dis_MMXop_regmem_to_reg ( 14668 vbi, pfx, delta, opc, "pavgw", False ); 14669 goto decode_success; 14670 } 14671 /* 66 0F E3 = PAVGW */ 14672 if (have66noF2noF3(pfx) && sz == 2) { 14673 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14674 "pavgw", Iop_Avg16Ux8, False ); 14675 goto decode_success; 14676 } 14677 break; 14678 14679 case 0xE4: 14680 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14681 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */ 14682 if (haveNo66noF2noF3(pfx) && sz == 4) { 14683 do_MMX_preamble(); 14684 delta = dis_MMXop_regmem_to_reg ( 14685 vbi, pfx, delta, opc, "pmuluh", False ); 14686 goto decode_success; 14687 } 14688 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */ 14689 if (have66noF2noF3(pfx) && sz == 2) { 14690 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14691 "pmulhuw", Iop_MulHi16Ux8, False ); 14692 goto decode_success; 14693 } 14694 break; 14695 14696 case 0xE5: 14697 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */ 14698 if (have66noF2noF3(pfx) && sz == 2) { 14699 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14700 "pmulhw", Iop_MulHi16Sx8, False ); 14701 goto decode_success; 14702 } 14703 break; 14704 14705 case 0xE6: 14706 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14707 lo half xmm(G), and zero upper half, rounding towards zero */ 14708 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in 14709 lo half xmm(G), according to prevailing rounding mode, and zero 14710 upper half */ 14711 if ( (haveF2no66noF3(pfx) && sz == 4) 14712 || (have66noF2noF3(pfx) && sz == 2) ) { 14713 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/, 14714 toBool(sz == 2)/*r2zero*/); 14715 goto decode_success; 14716 } 14717 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x 14718 F64 in xmm(G) */ 14719 if (haveF3no66noF2(pfx) && sz == 4) { 14720 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/); 14721 goto decode_success; 14722 } 14723 break; 14724 14725 case 0xE7: 14726 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14727 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the 14728 Intel manual does not say anything about the usual business of 14729 the FP reg tags getting trashed whenever an MMX insn happens. 14730 So we just leave them alone. 14731 */ 14732 if (haveNo66noF2noF3(pfx) && sz == 4) { 14733 modrm = getUChar(delta); 14734 if (!epartIsReg(modrm)) { 14735 /* do_MMX_preamble(); Intel docs don't specify this */ 14736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14737 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) ); 14738 DIP("movntq %s,%s\n", dis_buf, 14739 nameMMXReg(gregLO3ofRM(modrm))); 14740 delta += alen; 14741 goto decode_success; 14742 } 14743 /* else fall through */ 14744 } 14745 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ 14746 if (have66noF2noF3(pfx) && sz == 2) { 14747 modrm = getUChar(delta); 14748 if (!epartIsReg(modrm)) { 14749 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14750 gen_SEGV_if_not_16_aligned( addr ); 14751 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) ); 14752 DIP("movntdq %s,%s\n", dis_buf, 14753 nameXMMReg(gregOfRexRM(pfx,modrm))); 14754 delta += alen; 14755 goto decode_success; 14756 } 14757 /* else fall through */ 14758 } 14759 break; 14760 14761 case 0xE8: 14762 /* 66 0F E8 = PSUBSB */ 14763 if (have66noF2noF3(pfx) && sz == 2) { 14764 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14765 "psubsb", Iop_QSub8Sx16, False ); 14766 goto decode_success; 14767 } 14768 break; 14769 14770 case 0xE9: 14771 /* 66 0F E9 = PSUBSW */ 14772 if (have66noF2noF3(pfx) && sz == 2) { 14773 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14774 "psubsw", Iop_QSub16Sx8, False ); 14775 goto decode_success; 14776 } 14777 break; 14778 14779 case 0xEA: 14780 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14781 /* 0F EA = PMINSW -- 16x4 signed min */ 14782 if (haveNo66noF2noF3(pfx) && sz == 4) { 14783 do_MMX_preamble(); 14784 delta = dis_MMXop_regmem_to_reg ( 14785 vbi, pfx, delta, opc, "pminsw", False ); 14786 goto decode_success; 14787 } 14788 /* 66 0F EA = PMINSW -- 16x8 signed min */ 14789 if (have66noF2noF3(pfx) && sz == 2) { 14790 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14791 "pminsw", Iop_Min16Sx8, False ); 14792 goto decode_success; 14793 } 14794 break; 14795 14796 case 0xEB: 14797 /* 66 0F EB = POR */ 14798 if (have66noF2noF3(pfx) && sz == 2) { 14799 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 ); 14800 goto decode_success; 14801 } 14802 break; 14803 14804 case 0xEC: 14805 /* 66 0F EC = PADDSB */ 14806 if (have66noF2noF3(pfx) && sz == 2) { 14807 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14808 "paddsb", Iop_QAdd8Sx16, False ); 14809 goto decode_success; 14810 } 14811 break; 14812 14813 case 0xED: 14814 /* 66 0F ED = PADDSW */ 14815 if (have66noF2noF3(pfx) && sz == 2) { 14816 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14817 "paddsw", Iop_QAdd16Sx8, False ); 14818 goto decode_success; 14819 } 14820 break; 14821 14822 case 0xEE: 14823 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14824 /* 0F EE = PMAXSW -- 16x4 signed max */ 14825 if (haveNo66noF2noF3(pfx) && sz == 4) { 14826 do_MMX_preamble(); 14827 delta = dis_MMXop_regmem_to_reg ( 14828 vbi, pfx, delta, opc, "pmaxsw", False ); 14829 goto decode_success; 14830 } 14831 /* 66 0F EE = PMAXSW -- 16x8 signed max */ 14832 if (have66noF2noF3(pfx) && sz == 2) { 14833 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 14834 "pmaxsw", Iop_Max16Sx8, False ); 14835 goto decode_success; 14836 } 14837 break; 14838 14839 case 0xEF: 14840 /* 66 0F EF = PXOR */ 14841 if (have66noF2noF3(pfx) && sz == 2) { 14842 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 ); 14843 goto decode_success; 14844 } 14845 break; 14846 14847 case 0xF1: 14848 /* 66 0F F1 = PSLLW by E */ 14849 if (have66noF2noF3(pfx) && sz == 2) { 14850 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 ); 14851 goto decode_success; 14852 } 14853 break; 14854 14855 case 0xF2: 14856 /* 66 0F F2 = PSLLD by E */ 14857 if (have66noF2noF3(pfx) && sz == 2) { 14858 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 ); 14859 goto decode_success; 14860 } 14861 break; 14862 14863 case 0xF3: 14864 /* 66 0F F3 = PSLLQ by E */ 14865 if (have66noF2noF3(pfx) && sz == 2) { 14866 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 ); 14867 goto decode_success; 14868 } 14869 break; 14870 14871 case 0xF4: 14872 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14873 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit 14874 half */ 14875 if (have66noF2noF3(pfx) && sz == 2) { 14876 IRTemp sV = newTemp(Ity_V128); 14877 IRTemp dV = newTemp(Ity_V128); 14878 modrm = getUChar(delta); 14879 UInt rG = gregOfRexRM(pfx,modrm); 14880 assign( dV, getXMMReg(rG) ); 14881 if (epartIsReg(modrm)) { 14882 UInt rE = eregOfRexRM(pfx,modrm); 14883 assign( sV, getXMMReg(rE) ); 14884 delta += 1; 14885 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14886 } else { 14887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14888 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14889 delta += alen; 14890 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG)); 14891 } 14892 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) ); 14893 goto decode_success; 14894 } 14895 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 14896 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x 14897 0 to form 64-bit result */ 14898 if (haveNo66noF2noF3(pfx) && sz == 4) { 14899 IRTemp sV = newTemp(Ity_I64); 14900 IRTemp dV = newTemp(Ity_I64); 14901 t1 = newTemp(Ity_I32); 14902 t0 = newTemp(Ity_I32); 14903 modrm = getUChar(delta); 14904 14905 do_MMX_preamble(); 14906 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 14907 14908 if (epartIsReg(modrm)) { 14909 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 14910 delta += 1; 14911 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 14912 nameMMXReg(gregLO3ofRM(modrm))); 14913 } else { 14914 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14915 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 14916 delta += alen; 14917 DIP("pmuludq %s,%s\n", dis_buf, 14918 nameMMXReg(gregLO3ofRM(modrm))); 14919 } 14920 14921 assign( t0, unop(Iop_64to32, mkexpr(dV)) ); 14922 assign( t1, unop(Iop_64to32, mkexpr(sV)) ); 14923 putMMXReg( gregLO3ofRM(modrm), 14924 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) ); 14925 goto decode_success; 14926 } 14927 break; 14928 14929 case 0xF5: 14930 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from 14931 E(xmm or mem) to G(xmm) */ 14932 if (have66noF2noF3(pfx) && sz == 2) { 14933 IRTemp sV = newTemp(Ity_V128); 14934 IRTemp dV = newTemp(Ity_V128); 14935 modrm = getUChar(delta); 14936 UInt rG = gregOfRexRM(pfx,modrm); 14937 if (epartIsReg(modrm)) { 14938 UInt rE = eregOfRexRM(pfx,modrm); 14939 assign( sV, getXMMReg(rE) ); 14940 delta += 1; 14941 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14942 } else { 14943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14944 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14945 delta += alen; 14946 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG)); 14947 } 14948 assign( dV, getXMMReg(rG) ); 14949 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) ); 14950 goto decode_success; 14951 } 14952 break; 14953 14954 case 0xF6: 14955 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14956 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */ 14957 if (haveNo66noF2noF3(pfx) && sz == 4) { 14958 do_MMX_preamble(); 14959 delta = dis_MMXop_regmem_to_reg ( 14960 vbi, pfx, delta, opc, "psadbw", False ); 14961 goto decode_success; 14962 } 14963 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs 14964 from E(xmm or mem) to G(xmm) */ 14965 if (have66noF2noF3(pfx) && sz == 2) { 14966 IRTemp sV = newTemp(Ity_V128); 14967 IRTemp dV = newTemp(Ity_V128); 14968 modrm = getUChar(delta); 14969 UInt rG = gregOfRexRM(pfx,modrm); 14970 if (epartIsReg(modrm)) { 14971 UInt rE = eregOfRexRM(pfx,modrm); 14972 assign( sV, getXMMReg(rE) ); 14973 delta += 1; 14974 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 14975 } else { 14976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 14977 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 14978 delta += alen; 14979 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG)); 14980 } 14981 assign( dV, getXMMReg(rG) ); 14982 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) ); 14983 14984 goto decode_success; 14985 } 14986 break; 14987 14988 case 0xF7: 14989 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ 14990 /* 0F F7 = MASKMOVQ -- 8x8 masked store */ 14991 if (haveNo66noF2noF3(pfx) && sz == 4) { 14992 Bool ok = False; 14993 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 ); 14994 if (ok) goto decode_success; 14995 } 14996 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ 14997 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) { 14998 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ ); 14999 goto decode_success; 15000 } 15001 break; 15002 15003 case 0xF8: 15004 /* 66 0F F8 = PSUBB */ 15005 if (have66noF2noF3(pfx) && sz == 2) { 15006 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15007 "psubb", Iop_Sub8x16, False ); 15008 goto decode_success; 15009 } 15010 break; 15011 15012 case 0xF9: 15013 /* 66 0F F9 = PSUBW */ 15014 if (have66noF2noF3(pfx) && sz == 2) { 15015 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15016 "psubw", Iop_Sub16x8, False ); 15017 goto decode_success; 15018 } 15019 break; 15020 15021 case 0xFA: 15022 /* 66 0F FA = PSUBD */ 15023 if (have66noF2noF3(pfx) && sz == 2) { 15024 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15025 "psubd", Iop_Sub32x4, False ); 15026 goto decode_success; 15027 } 15028 break; 15029 15030 case 0xFB: 15031 /* 66 0F FB = PSUBQ */ 15032 if (have66noF2noF3(pfx) && sz == 2) { 15033 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15034 "psubq", Iop_Sub64x2, False ); 15035 goto decode_success; 15036 } 15037 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */ 15038 /* 0F FB = PSUBQ -- sub 64x1 */ 15039 if (haveNo66noF2noF3(pfx) && sz == 4) { 15040 do_MMX_preamble(); 15041 delta = dis_MMXop_regmem_to_reg ( 15042 vbi, pfx, delta, opc, "psubq", False ); 15043 goto decode_success; 15044 } 15045 break; 15046 15047 case 0xFC: 15048 /* 66 0F FC = PADDB */ 15049 if (have66noF2noF3(pfx) && sz == 2) { 15050 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15051 "paddb", Iop_Add8x16, False ); 15052 goto decode_success; 15053 } 15054 break; 15055 15056 case 0xFD: 15057 /* 66 0F FD = PADDW */ 15058 if (have66noF2noF3(pfx) && sz == 2) { 15059 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15060 "paddw", Iop_Add16x8, False ); 15061 goto decode_success; 15062 } 15063 break; 15064 15065 case 0xFE: 15066 /* 66 0F FE = PADDD */ 15067 if (have66noF2noF3(pfx) && sz == 2) { 15068 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 15069 "paddd", Iop_Add32x4, False ); 15070 goto decode_success; 15071 } 15072 break; 15073 15074 default: 15075 goto decode_failure; 15076 15077 } 15078 15079 decode_failure: 15080 *decode_OK = False; 15081 return deltaIN; 15082 15083 decode_success: 15084 *decode_OK = True; 15085 return delta; 15086 } 15087 15088 15089 /*------------------------------------------------------------*/ 15090 /*--- ---*/ 15091 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/ 15092 /*--- ---*/ 15093 /*------------------------------------------------------------*/ 15094 15095 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15096 Long delta, Bool isAvx ) 15097 { 15098 IRTemp addr = IRTemp_INVALID; 15099 Int alen = 0; 15100 HChar dis_buf[50]; 15101 IRTemp sV = newTemp(Ity_V128); 15102 IRTemp d0 = newTemp(Ity_I64); 15103 UChar modrm = getUChar(delta); 15104 UInt rG = gregOfRexRM(pfx,modrm); 15105 if (epartIsReg(modrm)) { 15106 UInt rE = eregOfRexRM(pfx,modrm); 15107 assign( sV, getXMMReg(rE) ); 15108 DIP("%smovddup %s,%s\n", 15109 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG)); 15110 delta += 1; 15111 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) ); 15112 } else { 15113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15114 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15115 DIP("%smovddup %s,%s\n", 15116 isAvx ? "v" : "", dis_buf, nameXMMReg(rG)); 15117 delta += alen; 15118 } 15119 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15120 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) ); 15121 return delta; 15122 } 15123 15124 15125 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15126 Long delta ) 15127 { 15128 IRTemp addr = IRTemp_INVALID; 15129 Int alen = 0; 15130 HChar dis_buf[50]; 15131 IRTemp d0 = newTemp(Ity_I64); 15132 IRTemp d1 = newTemp(Ity_I64); 15133 UChar modrm = getUChar(delta); 15134 UInt rG = gregOfRexRM(pfx,modrm); 15135 if (epartIsReg(modrm)) { 15136 UInt rE = eregOfRexRM(pfx,modrm); 15137 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 15138 delta += 1; 15139 assign ( d0, getYMMRegLane64(rE, 0) ); 15140 assign ( d1, getYMMRegLane64(rE, 2) ); 15141 } else { 15142 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15143 assign( d0, loadLE(Ity_I64, mkexpr(addr)) ); 15144 assign( d1, loadLE(Ity_I64, binop(Iop_Add64, 15145 mkexpr(addr), mkU64(16))) ); 15146 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG)); 15147 delta += alen; 15148 } 15149 putYMMRegLane64( rG, 0, mkexpr(d0) ); 15150 putYMMRegLane64( rG, 1, mkexpr(d0) ); 15151 putYMMRegLane64( rG, 2, mkexpr(d1) ); 15152 putYMMRegLane64( rG, 3, mkexpr(d1) ); 15153 return delta; 15154 } 15155 15156 15157 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx, 15158 Long delta, Bool isAvx, Bool isL ) 15159 { 15160 IRTemp addr = IRTemp_INVALID; 15161 Int alen = 0; 15162 HChar dis_buf[50]; 15163 IRTemp sV = newTemp(Ity_V128); 15164 UChar modrm = getUChar(delta); 15165 UInt rG = gregOfRexRM(pfx,modrm); 15166 IRTemp s3, s2, s1, s0; 15167 s3 = s2 = s1 = s0 = IRTemp_INVALID; 15168 if (epartIsReg(modrm)) { 15169 UInt rE = eregOfRexRM(pfx,modrm); 15170 assign( sV, getXMMReg(rE) ); 15171 DIP("%smovs%cdup %s,%s\n", 15172 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG)); 15173 delta += 1; 15174 } else { 15175 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15176 if (!isAvx) 15177 gen_SEGV_if_not_16_aligned( addr ); 15178 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15179 DIP("%smovs%cdup %s,%s\n", 15180 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG)); 15181 delta += alen; 15182 } 15183 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15184 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15185 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 ) 15186 : mkV128from32s( s3, s3, s1, s1 ) ); 15187 return delta; 15188 } 15189 15190 15191 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx, 15192 Long delta, Bool isL ) 15193 { 15194 IRTemp addr = IRTemp_INVALID; 15195 Int alen = 0; 15196 HChar dis_buf[50]; 15197 IRTemp sV = newTemp(Ity_V256); 15198 UChar modrm = getUChar(delta); 15199 UInt rG = gregOfRexRM(pfx,modrm); 15200 IRTemp s7, s6, s5, s4, s3, s2, s1, s0; 15201 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID; 15202 if (epartIsReg(modrm)) { 15203 UInt rE = eregOfRexRM(pfx,modrm); 15204 assign( sV, getYMMReg(rE) ); 15205 DIP("vmovs%cdup %s,%s\n", 15206 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG)); 15207 delta += 1; 15208 } else { 15209 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15210 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15211 DIP("vmovs%cdup %s,%s\n", 15212 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG)); 15213 delta += alen; 15214 } 15215 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 15216 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 ) 15217 : mkV128from32s( s7, s7, s5, s5 ) ); 15218 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 ) 15219 : mkV128from32s( s3, s3, s1, s1 ) ); 15220 return delta; 15221 } 15222 15223 15224 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15225 { 15226 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15227 IRTemp leftV = newTemp(Ity_V128); 15228 IRTemp rightV = newTemp(Ity_V128); 15229 IRTemp rm = newTemp(Ity_I32); 15230 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15231 15232 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 15233 breakupV128to32s( dV, &d3, &d2, &d1, &d0 ); 15234 15235 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) ); 15236 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) ); 15237 15238 IRTemp res = newTemp(Ity_V128); 15239 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15240 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4, 15241 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15242 return res; 15243 } 15244 15245 15246 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd ) 15247 { 15248 IRTemp s1, s0, d1, d0; 15249 IRTemp leftV = newTemp(Ity_V128); 15250 IRTemp rightV = newTemp(Ity_V128); 15251 IRTemp rm = newTemp(Ity_I32); 15252 s1 = s0 = d1 = d0 = IRTemp_INVALID; 15253 15254 breakupV128to64s( sV, &s1, &s0 ); 15255 breakupV128to64s( dV, &d1, &d0 ); 15256 15257 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) ); 15258 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) ); 15259 15260 IRTemp res = newTemp(Ity_V128); 15261 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 15262 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2, 15263 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) ); 15264 return res; 15265 } 15266 15267 15268 __attribute__((noinline)) 15269 static 15270 Long dis_ESC_0F__SSE3 ( Bool* decode_OK, 15271 const VexAbiInfo* vbi, 15272 Prefix pfx, Int sz, Long deltaIN ) 15273 { 15274 IRTemp addr = IRTemp_INVALID; 15275 UChar modrm = 0; 15276 Int alen = 0; 15277 HChar dis_buf[50]; 15278 15279 *decode_OK = False; 15280 15281 Long delta = deltaIN; 15282 UChar opc = getUChar(delta); 15283 delta++; 15284 switch (opc) { 15285 15286 case 0x12: 15287 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm), 15288 duplicating some lanes (2:2:0:0). */ 15289 if (haveF3no66noF2(pfx) && sz == 4) { 15290 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 15291 True/*isL*/ ); 15292 goto decode_success; 15293 } 15294 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm), 15295 duplicating some lanes (0:1:0:1). */ 15296 if (haveF2no66noF3(pfx) 15297 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) { 15298 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ ); 15299 goto decode_success; 15300 } 15301 break; 15302 15303 case 0x16: 15304 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm), 15305 duplicating some lanes (3:3:1:1). */ 15306 if (haveF3no66noF2(pfx) && sz == 4) { 15307 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/, 15308 False/*!isL*/ ); 15309 goto decode_success; 15310 } 15311 break; 15312 15313 case 0x7C: 15314 case 0x7D: 15315 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */ 15316 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */ 15317 if (haveF2no66noF3(pfx) && sz == 4) { 15318 IRTemp eV = newTemp(Ity_V128); 15319 IRTemp gV = newTemp(Ity_V128); 15320 Bool isAdd = opc == 0x7C; 15321 const HChar* str = isAdd ? "add" : "sub"; 15322 modrm = getUChar(delta); 15323 UInt rG = gregOfRexRM(pfx,modrm); 15324 if (epartIsReg(modrm)) { 15325 UInt rE = eregOfRexRM(pfx,modrm); 15326 assign( eV, getXMMReg(rE) ); 15327 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 15328 delta += 1; 15329 } else { 15330 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15331 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15332 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG)); 15333 delta += alen; 15334 } 15335 15336 assign( gV, getXMMReg(rG) ); 15337 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) ); 15338 goto decode_success; 15339 } 15340 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */ 15341 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */ 15342 if (have66noF2noF3(pfx) && sz == 2) { 15343 IRTemp eV = newTemp(Ity_V128); 15344 IRTemp gV = newTemp(Ity_V128); 15345 Bool isAdd = opc == 0x7C; 15346 const HChar* str = isAdd ? "add" : "sub"; 15347 modrm = getUChar(delta); 15348 UInt rG = gregOfRexRM(pfx,modrm); 15349 if (epartIsReg(modrm)) { 15350 UInt rE = eregOfRexRM(pfx,modrm); 15351 assign( eV, getXMMReg(rE) ); 15352 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG)); 15353 delta += 1; 15354 } else { 15355 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15356 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15357 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG)); 15358 delta += alen; 15359 } 15360 15361 assign( gV, getXMMReg(rG) ); 15362 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) ); 15363 goto decode_success; 15364 } 15365 break; 15366 15367 case 0xD0: 15368 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */ 15369 if (have66noF2noF3(pfx) && sz == 2) { 15370 IRTemp eV = newTemp(Ity_V128); 15371 IRTemp gV = newTemp(Ity_V128); 15372 modrm = getUChar(delta); 15373 UInt rG = gregOfRexRM(pfx,modrm); 15374 if (epartIsReg(modrm)) { 15375 UInt rE = eregOfRexRM(pfx,modrm); 15376 assign( eV, getXMMReg(rE) ); 15377 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15378 delta += 1; 15379 } else { 15380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15381 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15382 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG)); 15383 delta += alen; 15384 } 15385 15386 assign( gV, getXMMReg(rG) ); 15387 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) ); 15388 goto decode_success; 15389 } 15390 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */ 15391 if (haveF2no66noF3(pfx) && sz == 4) { 15392 IRTemp eV = newTemp(Ity_V128); 15393 IRTemp gV = newTemp(Ity_V128); 15394 modrm = getUChar(delta); 15395 UInt rG = gregOfRexRM(pfx,modrm); 15396 15397 modrm = getUChar(delta); 15398 if (epartIsReg(modrm)) { 15399 UInt rE = eregOfRexRM(pfx,modrm); 15400 assign( eV, getXMMReg(rE) ); 15401 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15402 delta += 1; 15403 } else { 15404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15405 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 15406 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG)); 15407 delta += alen; 15408 } 15409 15410 assign( gV, getXMMReg(rG) ); 15411 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) ); 15412 goto decode_success; 15413 } 15414 break; 15415 15416 case 0xF0: 15417 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */ 15418 if (haveF2no66noF3(pfx) && sz == 4) { 15419 modrm = getUChar(delta); 15420 if (epartIsReg(modrm)) { 15421 goto decode_failure; 15422 } else { 15423 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15424 putXMMReg( gregOfRexRM(pfx,modrm), 15425 loadLE(Ity_V128, mkexpr(addr)) ); 15426 DIP("lddqu %s,%s\n", dis_buf, 15427 nameXMMReg(gregOfRexRM(pfx,modrm))); 15428 delta += alen; 15429 } 15430 goto decode_success; 15431 } 15432 break; 15433 15434 default: 15435 goto decode_failure; 15436 15437 } 15438 15439 decode_failure: 15440 *decode_OK = False; 15441 return deltaIN; 15442 15443 decode_success: 15444 *decode_OK = True; 15445 return delta; 15446 } 15447 15448 15449 /*------------------------------------------------------------*/ 15450 /*--- ---*/ 15451 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/ 15452 /*--- ---*/ 15453 /*------------------------------------------------------------*/ 15454 15455 static 15456 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 15457 { 15458 IRTemp sHi = newTemp(Ity_I64); 15459 IRTemp sLo = newTemp(Ity_I64); 15460 IRTemp dHi = newTemp(Ity_I64); 15461 IRTemp dLo = newTemp(Ity_I64); 15462 IRTemp rHi = newTemp(Ity_I64); 15463 IRTemp rLo = newTemp(Ity_I64); 15464 IRTemp sevens = newTemp(Ity_I64); 15465 IRTemp mask0x80hi = newTemp(Ity_I64); 15466 IRTemp mask0x80lo = newTemp(Ity_I64); 15467 IRTemp maskBit3hi = newTemp(Ity_I64); 15468 IRTemp maskBit3lo = newTemp(Ity_I64); 15469 IRTemp sAnd7hi = newTemp(Ity_I64); 15470 IRTemp sAnd7lo = newTemp(Ity_I64); 15471 IRTemp permdHi = newTemp(Ity_I64); 15472 IRTemp permdLo = newTemp(Ity_I64); 15473 IRTemp res = newTemp(Ity_V128); 15474 15475 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15476 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15477 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15478 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15479 15480 assign( sevens, mkU64(0x0707070707070707ULL) ); 15481 15482 /* mask0x80hi = Not(SarN8x8(sHi,7)) 15483 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) 15484 sAnd7hi = And(sHi,sevens) 15485 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), 15486 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) 15487 rHi = And(permdHi,mask0x80hi) 15488 */ 15489 assign( 15490 mask0x80hi, 15491 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); 15492 15493 assign( 15494 maskBit3hi, 15495 binop(Iop_SarN8x8, 15496 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), 15497 mkU8(7))); 15498 15499 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); 15500 15501 assign( 15502 permdHi, 15503 binop( 15504 Iop_Or64, 15505 binop(Iop_And64, 15506 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), 15507 mkexpr(maskBit3hi)), 15508 binop(Iop_And64, 15509 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), 15510 unop(Iop_Not64,mkexpr(maskBit3hi))) )); 15511 15512 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); 15513 15514 /* And the same for the lower half of the result. What fun. */ 15515 15516 assign( 15517 mask0x80lo, 15518 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); 15519 15520 assign( 15521 maskBit3lo, 15522 binop(Iop_SarN8x8, 15523 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), 15524 mkU8(7))); 15525 15526 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); 15527 15528 assign( 15529 permdLo, 15530 binop( 15531 Iop_Or64, 15532 binop(Iop_And64, 15533 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), 15534 mkexpr(maskBit3lo)), 15535 binop(Iop_And64, 15536 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), 15537 unop(Iop_Not64,mkexpr(maskBit3lo))) )); 15538 15539 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); 15540 15541 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))); 15542 return res; 15543 } 15544 15545 15546 static 15547 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ ) 15548 { 15549 IRTemp sHi, sLo, dHi, dLo; 15550 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15551 breakupV256toV128s( dV, &dHi, &dLo); 15552 breakupV256toV128s( sV, &sHi, &sLo); 15553 IRTemp res = newTemp(Ity_V256); 15554 assign(res, binop(Iop_V128HLtoV256, 15555 mkexpr(math_PSHUFB_XMM(dHi, sHi)), 15556 mkexpr(math_PSHUFB_XMM(dLo, sLo)))); 15557 return res; 15558 } 15559 15560 15561 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 15562 Bool isAvx, UChar opc ) 15563 { 15564 IRTemp addr = IRTemp_INVALID; 15565 Int alen = 0; 15566 HChar dis_buf[50]; 15567 const HChar* str = "???"; 15568 IROp opV64 = Iop_INVALID; 15569 IROp opCatO = Iop_CatOddLanes16x4; 15570 IROp opCatE = Iop_CatEvenLanes16x4; 15571 IRTemp sV = newTemp(Ity_V128); 15572 IRTemp dV = newTemp(Ity_V128); 15573 IRTemp sHi = newTemp(Ity_I64); 15574 IRTemp sLo = newTemp(Ity_I64); 15575 IRTemp dHi = newTemp(Ity_I64); 15576 IRTemp dLo = newTemp(Ity_I64); 15577 UChar modrm = getUChar(delta); 15578 UInt rG = gregOfRexRM(pfx,modrm); 15579 UInt rV = isAvx ? getVexNvvvv(pfx) : rG; 15580 15581 switch (opc) { 15582 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15583 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15584 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15585 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15586 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15587 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15588 default: vassert(0); 15589 } 15590 if (opc == 0x02 || opc == 0x06) { 15591 opCatO = Iop_InterleaveHI32x2; 15592 opCatE = Iop_InterleaveLO32x2; 15593 } 15594 15595 assign( dV, getXMMReg(rV) ); 15596 15597 if (epartIsReg(modrm)) { 15598 UInt rE = eregOfRexRM(pfx,modrm); 15599 assign( sV, getXMMReg(rE) ); 15600 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15601 nameXMMReg(rE), nameXMMReg(rG)); 15602 delta += 1; 15603 } else { 15604 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15605 if (!isAvx) 15606 gen_SEGV_if_not_16_aligned( addr ); 15607 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15608 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str, 15609 dis_buf, nameXMMReg(rG)); 15610 delta += alen; 15611 } 15612 15613 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 15614 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 15615 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 15616 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 15617 15618 /* This isn't a particularly efficient way to compute the 15619 result, but at least it avoids a proliferation of IROps, 15620 hence avoids complication all the backends. */ 15621 15622 (isAvx ? putYMMRegLoAndZU : putXMMReg) 15623 ( rG, 15624 binop(Iop_64HLtoV128, 15625 binop(opV64, 15626 binop(opCatE,mkexpr(sHi),mkexpr(sLo)), 15627 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ), 15628 binop(opV64, 15629 binop(opCatE,mkexpr(dHi),mkexpr(dLo)), 15630 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) ); 15631 return delta; 15632 } 15633 15634 15635 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 15636 UChar opc ) 15637 { 15638 IRTemp addr = IRTemp_INVALID; 15639 Int alen = 0; 15640 HChar dis_buf[50]; 15641 const HChar* str = "???"; 15642 IROp opV64 = Iop_INVALID; 15643 IROp opCatO = Iop_CatOddLanes16x4; 15644 IROp opCatE = Iop_CatEvenLanes16x4; 15645 IRTemp sV = newTemp(Ity_V256); 15646 IRTemp dV = newTemp(Ity_V256); 15647 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 15648 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 15649 UChar modrm = getUChar(delta); 15650 UInt rG = gregOfRexRM(pfx,modrm); 15651 UInt rV = getVexNvvvv(pfx); 15652 15653 switch (opc) { 15654 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15655 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15656 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15657 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15658 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15659 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15660 default: vassert(0); 15661 } 15662 if (opc == 0x02 || opc == 0x06) { 15663 opCatO = Iop_InterleaveHI32x2; 15664 opCatE = Iop_InterleaveLO32x2; 15665 } 15666 15667 assign( dV, getYMMReg(rV) ); 15668 15669 if (epartIsReg(modrm)) { 15670 UInt rE = eregOfRexRM(pfx,modrm); 15671 assign( sV, getYMMReg(rE) ); 15672 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG)); 15673 delta += 1; 15674 } else { 15675 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15676 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 15677 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG)); 15678 delta += alen; 15679 } 15680 15681 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 15682 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 15683 15684 /* This isn't a particularly efficient way to compute the 15685 result, but at least it avoids a proliferation of IROps, 15686 hence avoids complication all the backends. */ 15687 15688 putYMMReg( rG, 15689 binop(Iop_V128HLtoV256, 15690 binop(Iop_64HLtoV128, 15691 binop(opV64, 15692 binop(opCatE,mkexpr(s3),mkexpr(s2)), 15693 binop(opCatO,mkexpr(s3),mkexpr(s2)) ), 15694 binop(opV64, 15695 binop(opCatE,mkexpr(d3),mkexpr(d2)), 15696 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ), 15697 binop(Iop_64HLtoV128, 15698 binop(opV64, 15699 binop(opCatE,mkexpr(s1),mkexpr(s0)), 15700 binop(opCatO,mkexpr(s1),mkexpr(s0)) ), 15701 binop(opV64, 15702 binop(opCatE,mkexpr(d1),mkexpr(d0)), 15703 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) ); 15704 return delta; 15705 } 15706 15707 15708 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV ) 15709 { 15710 IRTemp sVoddsSX = newTemp(Ity_V128); 15711 IRTemp sVevensSX = newTemp(Ity_V128); 15712 IRTemp dVoddsZX = newTemp(Ity_V128); 15713 IRTemp dVevensZX = newTemp(Ity_V128); 15714 /* compute dV unsigned x sV signed */ 15715 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); 15716 assign( sVevensSX, binop(Iop_SarN16x8, 15717 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), 15718 mkU8(8)) ); 15719 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); 15720 assign( dVevensZX, binop(Iop_ShrN16x8, 15721 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), 15722 mkU8(8)) ); 15723 15724 IRTemp res = newTemp(Ity_V128); 15725 assign( res, binop(Iop_QAdd16Sx8, 15726 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15727 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15728 ) 15729 ); 15730 return res; 15731 } 15732 15733 15734 static 15735 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV ) 15736 { 15737 IRTemp sHi, sLo, dHi, dLo; 15738 sHi = sLo = dHi = dLo = IRTemp_INVALID; 15739 breakupV256toV128s( dV, &dHi, &dLo); 15740 breakupV256toV128s( sV, &sHi, &sLo); 15741 IRTemp res = newTemp(Ity_V256); 15742 assign(res, binop(Iop_V128HLtoV256, 15743 mkexpr(math_PMADDUBSW_128(dHi, sHi)), 15744 mkexpr(math_PMADDUBSW_128(dLo, sLo)))); 15745 return res; 15746 } 15747 15748 15749 __attribute__((noinline)) 15750 static 15751 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK, 15752 const VexAbiInfo* vbi, 15753 Prefix pfx, Int sz, Long deltaIN ) 15754 { 15755 IRTemp addr = IRTemp_INVALID; 15756 UChar modrm = 0; 15757 Int alen = 0; 15758 HChar dis_buf[50]; 15759 15760 *decode_OK = False; 15761 15762 Long delta = deltaIN; 15763 UChar opc = getUChar(delta); 15764 delta++; 15765 switch (opc) { 15766 15767 case 0x00: 15768 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ 15769 if (have66noF2noF3(pfx) 15770 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15771 IRTemp sV = newTemp(Ity_V128); 15772 IRTemp dV = newTemp(Ity_V128); 15773 15774 modrm = getUChar(delta); 15775 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 15776 15777 if (epartIsReg(modrm)) { 15778 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 15779 delta += 1; 15780 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 15781 nameXMMReg(gregOfRexRM(pfx,modrm))); 15782 } else { 15783 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15784 gen_SEGV_if_not_16_aligned( addr ); 15785 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15786 delta += alen; 15787 DIP("pshufb %s,%s\n", dis_buf, 15788 nameXMMReg(gregOfRexRM(pfx,modrm))); 15789 } 15790 15791 IRTemp res = math_PSHUFB_XMM( dV, sV ); 15792 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res)); 15793 goto decode_success; 15794 } 15795 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ 15796 if (haveNo66noF2noF3(pfx) && sz == 4) { 15797 IRTemp sV = newTemp(Ity_I64); 15798 IRTemp dV = newTemp(Ity_I64); 15799 15800 modrm = getUChar(delta); 15801 do_MMX_preamble(); 15802 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15803 15804 if (epartIsReg(modrm)) { 15805 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15806 delta += 1; 15807 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15808 nameMMXReg(gregLO3ofRM(modrm))); 15809 } else { 15810 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15811 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15812 delta += alen; 15813 DIP("pshufb %s,%s\n", dis_buf, 15814 nameMMXReg(gregLO3ofRM(modrm))); 15815 } 15816 15817 putMMXReg( 15818 gregLO3ofRM(modrm), 15819 binop( 15820 Iop_And64, 15821 /* permute the lanes */ 15822 binop( 15823 Iop_Perm8x8, 15824 mkexpr(dV), 15825 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) 15826 ), 15827 /* mask off lanes which have (index & 0x80) == 0x80 */ 15828 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) 15829 ) 15830 ); 15831 goto decode_success; 15832 } 15833 break; 15834 15835 case 0x01: 15836 case 0x02: 15837 case 0x03: 15838 case 0x05: 15839 case 0x06: 15840 case 0x07: 15841 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and 15842 G to G (xmm). */ 15843 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and 15844 G to G (xmm). */ 15845 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or 15846 xmm) and G to G (xmm). */ 15847 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and 15848 G to G (xmm). */ 15849 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and 15850 G to G (xmm). */ 15851 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or 15852 xmm) and G to G (xmm). */ 15853 if (have66noF2noF3(pfx) 15854 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15855 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc ); 15856 goto decode_success; 15857 } 15858 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ 15859 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G 15860 to G (mmx). */ 15861 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G 15862 to G (mmx). */ 15863 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or 15864 mmx) and G to G (mmx). */ 15865 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G 15866 to G (mmx). */ 15867 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G 15868 to G (mmx). */ 15869 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or 15870 mmx) and G to G (mmx). */ 15871 if (haveNo66noF2noF3(pfx) && sz == 4) { 15872 const HChar* str = "???"; 15873 IROp opV64 = Iop_INVALID; 15874 IROp opCatO = Iop_CatOddLanes16x4; 15875 IROp opCatE = Iop_CatEvenLanes16x4; 15876 IRTemp sV = newTemp(Ity_I64); 15877 IRTemp dV = newTemp(Ity_I64); 15878 15879 modrm = getUChar(delta); 15880 15881 switch (opc) { 15882 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; 15883 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; 15884 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; 15885 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; 15886 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; 15887 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; 15888 default: vassert(0); 15889 } 15890 if (opc == 0x02 || opc == 0x06) { 15891 opCatO = Iop_InterleaveHI32x2; 15892 opCatE = Iop_InterleaveLO32x2; 15893 } 15894 15895 do_MMX_preamble(); 15896 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15897 15898 if (epartIsReg(modrm)) { 15899 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15900 delta += 1; 15901 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 15902 nameMMXReg(gregLO3ofRM(modrm))); 15903 } else { 15904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15905 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15906 delta += alen; 15907 DIP("ph%s %s,%s\n", str, dis_buf, 15908 nameMMXReg(gregLO3ofRM(modrm))); 15909 } 15910 15911 putMMXReg( 15912 gregLO3ofRM(modrm), 15913 binop(opV64, 15914 binop(opCatE,mkexpr(sV),mkexpr(dV)), 15915 binop(opCatO,mkexpr(sV),mkexpr(dV)) 15916 ) 15917 ); 15918 goto decode_success; 15919 } 15920 break; 15921 15922 case 0x04: 15923 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15924 Unsigned Bytes (XMM) */ 15925 if (have66noF2noF3(pfx) 15926 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 15927 IRTemp sV = newTemp(Ity_V128); 15928 IRTemp dV = newTemp(Ity_V128); 15929 modrm = getUChar(delta); 15930 UInt rG = gregOfRexRM(pfx,modrm); 15931 15932 assign( dV, getXMMReg(rG) ); 15933 15934 if (epartIsReg(modrm)) { 15935 UInt rE = eregOfRexRM(pfx,modrm); 15936 assign( sV, getXMMReg(rE) ); 15937 delta += 1; 15938 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 15939 } else { 15940 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15941 gen_SEGV_if_not_16_aligned( addr ); 15942 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 15943 delta += alen; 15944 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG)); 15945 } 15946 15947 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) ); 15948 goto decode_success; 15949 } 15950 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and 15951 Unsigned Bytes (MMX) */ 15952 if (haveNo66noF2noF3(pfx) && sz == 4) { 15953 IRTemp sV = newTemp(Ity_I64); 15954 IRTemp dV = newTemp(Ity_I64); 15955 IRTemp sVoddsSX = newTemp(Ity_I64); 15956 IRTemp sVevensSX = newTemp(Ity_I64); 15957 IRTemp dVoddsZX = newTemp(Ity_I64); 15958 IRTemp dVevensZX = newTemp(Ity_I64); 15959 15960 modrm = getUChar(delta); 15961 do_MMX_preamble(); 15962 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 15963 15964 if (epartIsReg(modrm)) { 15965 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 15966 delta += 1; 15967 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 15968 nameMMXReg(gregLO3ofRM(modrm))); 15969 } else { 15970 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 15971 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 15972 delta += alen; 15973 DIP("pmaddubsw %s,%s\n", dis_buf, 15974 nameMMXReg(gregLO3ofRM(modrm))); 15975 } 15976 15977 /* compute dV unsigned x sV signed */ 15978 assign( sVoddsSX, 15979 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); 15980 assign( sVevensSX, 15981 binop(Iop_SarN16x4, 15982 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), 15983 mkU8(8)) ); 15984 assign( dVoddsZX, 15985 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); 15986 assign( dVevensZX, 15987 binop(Iop_ShrN16x4, 15988 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), 15989 mkU8(8)) ); 15990 15991 putMMXReg( 15992 gregLO3ofRM(modrm), 15993 binop(Iop_QAdd16Sx4, 15994 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), 15995 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) 15996 ) 15997 ); 15998 goto decode_success; 15999 } 16000 break; 16001 16002 case 0x08: 16003 case 0x09: 16004 case 0x0A: 16005 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ 16006 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ 16007 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */ 16008 if (have66noF2noF3(pfx) 16009 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 16010 IRTemp sV = newTemp(Ity_V128); 16011 IRTemp dV = newTemp(Ity_V128); 16012 IRTemp sHi = newTemp(Ity_I64); 16013 IRTemp sLo = newTemp(Ity_I64); 16014 IRTemp dHi = newTemp(Ity_I64); 16015 IRTemp dLo = newTemp(Ity_I64); 16016 const HChar* str = "???"; 16017 Int laneszB = 0; 16018 16019 switch (opc) { 16020 case 0x08: laneszB = 1; str = "b"; break; 16021 case 0x09: laneszB = 2; str = "w"; break; 16022 case 0x0A: laneszB = 4; str = "d"; break; 16023 default: vassert(0); 16024 } 16025 16026 modrm = getUChar(delta); 16027 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 16028 16029 if (epartIsReg(modrm)) { 16030 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 16031 delta += 1; 16032 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 16033 nameXMMReg(gregOfRexRM(pfx,modrm))); 16034 } else { 16035 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16036 gen_SEGV_if_not_16_aligned( addr ); 16037 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16038 delta += alen; 16039 DIP("psign%s %s,%s\n", str, dis_buf, 16040 nameXMMReg(gregOfRexRM(pfx,modrm))); 16041 } 16042 16043 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 16044 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 16045 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 16046 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 16047 16048 putXMMReg( 16049 gregOfRexRM(pfx,modrm), 16050 binop(Iop_64HLtoV128, 16051 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 16052 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 16053 ) 16054 ); 16055 goto decode_success; 16056 } 16057 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ 16058 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ 16059 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */ 16060 if (haveNo66noF2noF3(pfx) && sz == 4) { 16061 IRTemp sV = newTemp(Ity_I64); 16062 IRTemp dV = newTemp(Ity_I64); 16063 const HChar* str = "???"; 16064 Int laneszB = 0; 16065 16066 switch (opc) { 16067 case 0x08: laneszB = 1; str = "b"; break; 16068 case 0x09: laneszB = 2; str = "w"; break; 16069 case 0x0A: laneszB = 4; str = "d"; break; 16070 default: vassert(0); 16071 } 16072 16073 modrm = getUChar(delta); 16074 do_MMX_preamble(); 16075 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 16076 16077 if (epartIsReg(modrm)) { 16078 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 16079 delta += 1; 16080 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 16081 nameMMXReg(gregLO3ofRM(modrm))); 16082 } else { 16083 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16084 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 16085 delta += alen; 16086 DIP("psign%s %s,%s\n", str, dis_buf, 16087 nameMMXReg(gregLO3ofRM(modrm))); 16088 } 16089 16090 putMMXReg( 16091 gregLO3ofRM(modrm), 16092 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) 16093 ); 16094 goto decode_success; 16095 } 16096 break; 16097 16098 case 0x0B: 16099 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and 16100 Scale (XMM) */ 16101 if (have66noF2noF3(pfx) 16102 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 16103 IRTemp sV = newTemp(Ity_V128); 16104 IRTemp dV = newTemp(Ity_V128); 16105 IRTemp sHi = newTemp(Ity_I64); 16106 IRTemp sLo = newTemp(Ity_I64); 16107 IRTemp dHi = newTemp(Ity_I64); 16108 IRTemp dLo = newTemp(Ity_I64); 16109 16110 modrm = getUChar(delta); 16111 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 16112 16113 if (epartIsReg(modrm)) { 16114 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 16115 delta += 1; 16116 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), 16117 nameXMMReg(gregOfRexRM(pfx,modrm))); 16118 } else { 16119 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16120 gen_SEGV_if_not_16_aligned( addr ); 16121 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16122 delta += alen; 16123 DIP("pmulhrsw %s,%s\n", dis_buf, 16124 nameXMMReg(gregOfRexRM(pfx,modrm))); 16125 } 16126 16127 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); 16128 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); 16129 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 16130 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 16131 16132 putXMMReg( 16133 gregOfRexRM(pfx,modrm), 16134 binop(Iop_64HLtoV128, 16135 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 16136 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 16137 ) 16138 ); 16139 goto decode_success; 16140 } 16141 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale 16142 (MMX) */ 16143 if (haveNo66noF2noF3(pfx) && sz == 4) { 16144 IRTemp sV = newTemp(Ity_I64); 16145 IRTemp dV = newTemp(Ity_I64); 16146 16147 modrm = getUChar(delta); 16148 do_MMX_preamble(); 16149 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 16150 16151 if (epartIsReg(modrm)) { 16152 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 16153 delta += 1; 16154 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)), 16155 nameMMXReg(gregLO3ofRM(modrm))); 16156 } else { 16157 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16158 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 16159 delta += alen; 16160 DIP("pmulhrsw %s,%s\n", dis_buf, 16161 nameMMXReg(gregLO3ofRM(modrm))); 16162 } 16163 16164 putMMXReg( 16165 gregLO3ofRM(modrm), 16166 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) 16167 ); 16168 goto decode_success; 16169 } 16170 break; 16171 16172 case 0x1C: 16173 case 0x1D: 16174 case 0x1E: 16175 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ 16176 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ 16177 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ 16178 if (have66noF2noF3(pfx) 16179 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 16180 IRTemp sV = newTemp(Ity_V128); 16181 const HChar* str = "???"; 16182 Int laneszB = 0; 16183 16184 switch (opc) { 16185 case 0x1C: laneszB = 1; str = "b"; break; 16186 case 0x1D: laneszB = 2; str = "w"; break; 16187 case 0x1E: laneszB = 4; str = "d"; break; 16188 default: vassert(0); 16189 } 16190 16191 modrm = getUChar(delta); 16192 if (epartIsReg(modrm)) { 16193 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 16194 delta += 1; 16195 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)), 16196 nameXMMReg(gregOfRexRM(pfx,modrm))); 16197 } else { 16198 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16199 gen_SEGV_if_not_16_aligned( addr ); 16200 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16201 delta += alen; 16202 DIP("pabs%s %s,%s\n", str, dis_buf, 16203 nameXMMReg(gregOfRexRM(pfx,modrm))); 16204 } 16205 16206 putXMMReg( gregOfRexRM(pfx,modrm), 16207 mkexpr(math_PABS_XMM(sV, laneszB)) ); 16208 goto decode_success; 16209 } 16210 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ 16211 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ 16212 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ 16213 if (haveNo66noF2noF3(pfx) && sz == 4) { 16214 IRTemp sV = newTemp(Ity_I64); 16215 const HChar* str = "???"; 16216 Int laneszB = 0; 16217 16218 switch (opc) { 16219 case 0x1C: laneszB = 1; str = "b"; break; 16220 case 0x1D: laneszB = 2; str = "w"; break; 16221 case 0x1E: laneszB = 4; str = "d"; break; 16222 default: vassert(0); 16223 } 16224 16225 modrm = getUChar(delta); 16226 do_MMX_preamble(); 16227 16228 if (epartIsReg(modrm)) { 16229 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 16230 delta += 1; 16231 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)), 16232 nameMMXReg(gregLO3ofRM(modrm))); 16233 } else { 16234 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 16235 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 16236 delta += alen; 16237 DIP("pabs%s %s,%s\n", str, dis_buf, 16238 nameMMXReg(gregLO3ofRM(modrm))); 16239 } 16240 16241 putMMXReg( gregLO3ofRM(modrm), 16242 mkexpr(math_PABS_MMX( sV, laneszB )) ); 16243 goto decode_success; 16244 } 16245 break; 16246 16247 default: 16248 break; 16249 16250 } 16251 16252 //decode_failure: 16253 *decode_OK = False; 16254 return deltaIN; 16255 16256 decode_success: 16257 *decode_OK = True; 16258 return delta; 16259 } 16260 16261 16262 /*------------------------------------------------------------*/ 16263 /*--- ---*/ 16264 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/ 16265 /*--- ---*/ 16266 /*------------------------------------------------------------*/ 16267 16268 __attribute__((noinline)) 16269 static 16270 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK, 16271 const VexAbiInfo* vbi, 16272 Prefix pfx, Int sz, Long deltaIN ) 16273 { 16274 Long d64 = 0; 16275 IRTemp addr = IRTemp_INVALID; 16276 UChar modrm = 0; 16277 Int alen = 0; 16278 HChar dis_buf[50]; 16279 16280 *decode_OK = False; 16281 16282 Long delta = deltaIN; 16283 UChar opc = getUChar(delta); 16284 delta++; 16285 switch (opc) { 16286 16287 case 0x0F: 16288 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ 16289 if (have66noF2noF3(pfx) 16290 && (sz == 2 || /*redundant REX.W*/ sz == 8)) { 16291 IRTemp sV = newTemp(Ity_V128); 16292 IRTemp dV = newTemp(Ity_V128); 16293 16294 modrm = getUChar(delta); 16295 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); 16296 16297 if (epartIsReg(modrm)) { 16298 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); 16299 d64 = (Long)getUChar(delta+1); 16300 delta += 1+1; 16301 DIP("palignr $%lld,%s,%s\n", d64, 16302 nameXMMReg(eregOfRexRM(pfx,modrm)), 16303 nameXMMReg(gregOfRexRM(pfx,modrm))); 16304 } else { 16305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 16306 gen_SEGV_if_not_16_aligned( addr ); 16307 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 16308 d64 = (Long)getUChar(delta+alen); 16309 delta += alen+1; 16310 DIP("palignr $%lld,%s,%s\n", d64, 16311 dis_buf, 16312 nameXMMReg(gregOfRexRM(pfx,modrm))); 16313 } 16314 16315 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 ); 16316 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) ); 16317 goto decode_success; 16318 } 16319 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ 16320 if (haveNo66noF2noF3(pfx) && sz == 4) { 16321 IRTemp sV = newTemp(Ity_I64); 16322 IRTemp dV = newTemp(Ity_I64); 16323 IRTemp res = newTemp(Ity_I64); 16324 16325 modrm = getUChar(delta); 16326 do_MMX_preamble(); 16327 assign( dV, getMMXReg(gregLO3ofRM(modrm)) ); 16328 16329 if (epartIsReg(modrm)) { 16330 assign( sV, getMMXReg(eregLO3ofRM(modrm)) ); 16331 d64 = (Long)getUChar(delta+1); 16332 delta += 1+1; 16333 DIP("palignr $%lld,%s,%s\n", d64, 16334 nameMMXReg(eregLO3ofRM(modrm)), 16335 nameMMXReg(gregLO3ofRM(modrm))); 16336 } else { 16337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 16338 assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); 16339 d64 = (Long)getUChar(delta+alen); 16340 delta += alen+1; 16341 DIP("palignr $%lld%s,%s\n", d64, 16342 dis_buf, 16343 nameMMXReg(gregLO3ofRM(modrm))); 16344 } 16345 16346 if (d64 == 0) { 16347 assign( res, mkexpr(sV) ); 16348 } 16349 else if (d64 >= 1 && d64 <= 7) { 16350 assign(res, 16351 binop(Iop_Or64, 16352 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)), 16353 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64)) 16354 ))); 16355 } 16356 else if (d64 == 8) { 16357 assign( res, mkexpr(dV) ); 16358 } 16359 else if (d64 >= 9 && d64 <= 15) { 16360 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) ); 16361 } 16362 else if (d64 >= 16 && d64 <= 255) { 16363 assign( res, mkU64(0) ); 16364 } 16365 else 16366 vassert(0); 16367 16368 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) ); 16369 goto decode_success; 16370 } 16371 break; 16372 16373 default: 16374 break; 16375 16376 } 16377 16378 //decode_failure: 16379 *decode_OK = False; 16380 return deltaIN; 16381 16382 decode_success: 16383 *decode_OK = True; 16384 return delta; 16385 } 16386 16387 16388 /*------------------------------------------------------------*/ 16389 /*--- ---*/ 16390 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/ 16391 /*--- ---*/ 16392 /*------------------------------------------------------------*/ 16393 16394 __attribute__((noinline)) 16395 static 16396 Long dis_ESC_0F__SSE4 ( Bool* decode_OK, 16397 const VexArchInfo* archinfo, 16398 const VexAbiInfo* vbi, 16399 Prefix pfx, Int sz, Long deltaIN ) 16400 { 16401 IRTemp addr = IRTemp_INVALID; 16402 IRType ty = Ity_INVALID; 16403 UChar modrm = 0; 16404 Int alen = 0; 16405 HChar dis_buf[50]; 16406 16407 *decode_OK = False; 16408 16409 Long delta = deltaIN; 16410 UChar opc = getUChar(delta); 16411 delta++; 16412 switch (opc) { 16413 16414 case 0xB8: 16415 /* F3 0F B8 = POPCNT{W,L,Q} 16416 Count the number of 1 bits in a register 16417 */ 16418 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */ 16419 && (sz == 2 || sz == 4 || sz == 8)) { 16420 /*IRType*/ ty = szToITy(sz); 16421 IRTemp src = newTemp(ty); 16422 modrm = getUChar(delta); 16423 if (epartIsReg(modrm)) { 16424 assign(src, getIRegE(sz, pfx, modrm)); 16425 delta += 1; 16426 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16427 nameIRegG(sz, pfx, modrm)); 16428 } else { 16429 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16430 assign(src, loadLE(ty, mkexpr(addr))); 16431 delta += alen; 16432 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf, 16433 nameIRegG(sz, pfx, modrm)); 16434 } 16435 16436 IRTemp result = gen_POPCOUNT(ty, src); 16437 putIRegG(sz, pfx, modrm, mkexpr(result)); 16438 16439 // Update flags. This is pretty lame .. perhaps can do better 16440 // if this turns out to be performance critical. 16441 // O S A C P are cleared. Z is set if SRC == 0. 16442 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16443 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16444 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16445 stmt( IRStmt_Put( OFFB_CC_DEP1, 16446 binop(Iop_Shl64, 16447 unop(Iop_1Uto64, 16448 binop(Iop_CmpEQ64, 16449 widenUto64(mkexpr(src)), 16450 mkU64(0))), 16451 mkU8(AMD64G_CC_SHIFT_Z)))); 16452 16453 goto decode_success; 16454 } 16455 break; 16456 16457 case 0xBC: 16458 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension, 16459 which we can only decode if we're sure this is a BMI1 capable cpu 16460 that supports TZCNT, since otherwise it's BSF, which behaves 16461 differently on zero source. */ 16462 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 16463 && (sz == 2 || sz == 4 || sz == 8) 16464 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) { 16465 /*IRType*/ ty = szToITy(sz); 16466 IRTemp src = newTemp(ty); 16467 modrm = getUChar(delta); 16468 if (epartIsReg(modrm)) { 16469 assign(src, getIRegE(sz, pfx, modrm)); 16470 delta += 1; 16471 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16472 nameIRegG(sz, pfx, modrm)); 16473 } else { 16474 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16475 assign(src, loadLE(ty, mkexpr(addr))); 16476 delta += alen; 16477 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16478 nameIRegG(sz, pfx, modrm)); 16479 } 16480 16481 IRTemp res = gen_TZCNT(ty, src); 16482 putIRegG(sz, pfx, modrm, mkexpr(res)); 16483 16484 // Update flags. This is pretty lame .. perhaps can do better 16485 // if this turns out to be performance critical. 16486 // O S A P are cleared. Z is set if RESULT == 0. 16487 // C is set if SRC is zero. 16488 IRTemp src64 = newTemp(Ity_I64); 16489 IRTemp res64 = newTemp(Ity_I64); 16490 assign(src64, widenUto64(mkexpr(src))); 16491 assign(res64, widenUto64(mkexpr(res))); 16492 16493 IRTemp oszacp = newTemp(Ity_I64); 16494 assign( 16495 oszacp, 16496 binop(Iop_Or64, 16497 binop(Iop_Shl64, 16498 unop(Iop_1Uto64, 16499 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16500 mkU8(AMD64G_CC_SHIFT_Z)), 16501 binop(Iop_Shl64, 16502 unop(Iop_1Uto64, 16503 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16504 mkU8(AMD64G_CC_SHIFT_C)) 16505 ) 16506 ); 16507 16508 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16509 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16510 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16511 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16512 16513 goto decode_success; 16514 } 16515 break; 16516 16517 case 0xBD: 16518 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, 16519 which we can only decode if we're sure this is an AMD cpu 16520 that supports LZCNT, since otherwise it's BSR, which behaves 16521 differently. Bizarrely, my Sandy Bridge also accepts these 16522 instructions but produces different results. */ 16523 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ 16524 && (sz == 2 || sz == 4 || sz == 8) 16525 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { 16526 /*IRType*/ ty = szToITy(sz); 16527 IRTemp src = newTemp(ty); 16528 modrm = getUChar(delta); 16529 if (epartIsReg(modrm)) { 16530 assign(src, getIRegE(sz, pfx, modrm)); 16531 delta += 1; 16532 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm), 16533 nameIRegG(sz, pfx, modrm)); 16534 } else { 16535 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0); 16536 assign(src, loadLE(ty, mkexpr(addr))); 16537 delta += alen; 16538 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf, 16539 nameIRegG(sz, pfx, modrm)); 16540 } 16541 16542 IRTemp res = gen_LZCNT(ty, src); 16543 putIRegG(sz, pfx, modrm, mkexpr(res)); 16544 16545 // Update flags. This is pretty lame .. perhaps can do better 16546 // if this turns out to be performance critical. 16547 // O S A P are cleared. Z is set if RESULT == 0. 16548 // C is set if SRC is zero. 16549 IRTemp src64 = newTemp(Ity_I64); 16550 IRTemp res64 = newTemp(Ity_I64); 16551 assign(src64, widenUto64(mkexpr(src))); 16552 assign(res64, widenUto64(mkexpr(res))); 16553 16554 IRTemp oszacp = newTemp(Ity_I64); 16555 assign( 16556 oszacp, 16557 binop(Iop_Or64, 16558 binop(Iop_Shl64, 16559 unop(Iop_1Uto64, 16560 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))), 16561 mkU8(AMD64G_CC_SHIFT_Z)), 16562 binop(Iop_Shl64, 16563 unop(Iop_1Uto64, 16564 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))), 16565 mkU8(AMD64G_CC_SHIFT_C)) 16566 ) 16567 ); 16568 16569 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16570 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16571 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16572 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) )); 16573 16574 goto decode_success; 16575 } 16576 break; 16577 16578 default: 16579 break; 16580 16581 } 16582 16583 //decode_failure: 16584 *decode_OK = False; 16585 return deltaIN; 16586 16587 decode_success: 16588 *decode_OK = True; 16589 return delta; 16590 } 16591 16592 16593 /*------------------------------------------------------------*/ 16594 /*--- ---*/ 16595 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/ 16596 /*--- ---*/ 16597 /*------------------------------------------------------------*/ 16598 16599 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG, 16600 IRTemp vec0/*controlling mask*/, 16601 UInt gran, IROp opSAR ) 16602 { 16603 /* The tricky bit is to convert vec0 into a suitable mask, by 16604 copying the most significant bit of each lane into all positions 16605 in the lane. */ 16606 IRTemp sh = newTemp(Ity_I8); 16607 assign(sh, mkU8(8 * gran - 1)); 16608 16609 IRTemp mask = newTemp(Ity_V128); 16610 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh))); 16611 16612 IRTemp notmask = newTemp(Ity_V128); 16613 assign(notmask, unop(Iop_NotV128, mkexpr(mask))); 16614 16615 IRTemp res = newTemp(Ity_V128); 16616 assign(res, binop(Iop_OrV128, 16617 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)), 16618 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask)))); 16619 return res; 16620 } 16621 16622 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG, 16623 IRTemp vec0/*controlling mask*/, 16624 UInt gran, IROp opSAR128 ) 16625 { 16626 /* The tricky bit is to convert vec0 into a suitable mask, by 16627 copying the most significant bit of each lane into all positions 16628 in the lane. */ 16629 IRTemp sh = newTemp(Ity_I8); 16630 assign(sh, mkU8(8 * gran - 1)); 16631 16632 IRTemp vec0Hi = IRTemp_INVALID; 16633 IRTemp vec0Lo = IRTemp_INVALID; 16634 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo ); 16635 16636 IRTemp mask = newTemp(Ity_V256); 16637 assign(mask, binop(Iop_V128HLtoV256, 16638 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)), 16639 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh)))); 16640 16641 IRTemp notmask = newTemp(Ity_V256); 16642 assign(notmask, unop(Iop_NotV256, mkexpr(mask))); 16643 16644 IRTemp res = newTemp(Ity_V256); 16645 assign(res, binop(Iop_OrV256, 16646 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)), 16647 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask)))); 16648 return res; 16649 } 16650 16651 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 16652 const HChar *name, UInt gran, IROp opSAR ) 16653 { 16654 IRTemp addr = IRTemp_INVALID; 16655 Int alen = 0; 16656 HChar dis_buf[50]; 16657 UChar modrm = getUChar(delta); 16658 UInt rG = gregOfRexRM(pfx, modrm); 16659 UInt rV = getVexNvvvv(pfx); 16660 UInt rIS4 = 0xFF; /* invalid */ 16661 IRTemp vecE = newTemp(Ity_V128); 16662 IRTemp vecV = newTemp(Ity_V128); 16663 IRTemp vecIS4 = newTemp(Ity_V128); 16664 if (epartIsReg(modrm)) { 16665 delta++; 16666 UInt rE = eregOfRexRM(pfx, modrm); 16667 assign(vecE, getXMMReg(rE)); 16668 UChar ib = getUChar(delta); 16669 rIS4 = (ib >> 4) & 0xF; 16670 DIP("%s %s,%s,%s,%s\n", 16671 name, nameXMMReg(rIS4), nameXMMReg(rE), 16672 nameXMMReg(rV), nameXMMReg(rG)); 16673 } else { 16674 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16675 delta += alen; 16676 assign(vecE, loadLE(Ity_V128, mkexpr(addr))); 16677 UChar ib = getUChar(delta); 16678 rIS4 = (ib >> 4) & 0xF; 16679 DIP("%s %s,%s,%s,%s\n", 16680 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 16681 } 16682 delta++; 16683 assign(vecV, getXMMReg(rV)); 16684 assign(vecIS4, getXMMReg(rIS4)); 16685 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR ); 16686 putYMMRegLoAndZU( rG, mkexpr(res) ); 16687 return delta; 16688 } 16689 16690 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta, 16691 const HChar *name, UInt gran, IROp opSAR128 ) 16692 { 16693 IRTemp addr = IRTemp_INVALID; 16694 Int alen = 0; 16695 HChar dis_buf[50]; 16696 UChar modrm = getUChar(delta); 16697 UInt rG = gregOfRexRM(pfx, modrm); 16698 UInt rV = getVexNvvvv(pfx); 16699 UInt rIS4 = 0xFF; /* invalid */ 16700 IRTemp vecE = newTemp(Ity_V256); 16701 IRTemp vecV = newTemp(Ity_V256); 16702 IRTemp vecIS4 = newTemp(Ity_V256); 16703 if (epartIsReg(modrm)) { 16704 delta++; 16705 UInt rE = eregOfRexRM(pfx, modrm); 16706 assign(vecE, getYMMReg(rE)); 16707 UChar ib = getUChar(delta); 16708 rIS4 = (ib >> 4) & 0xF; 16709 DIP("%s %s,%s,%s,%s\n", 16710 name, nameYMMReg(rIS4), nameYMMReg(rE), 16711 nameYMMReg(rV), nameYMMReg(rG)); 16712 } else { 16713 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 16714 delta += alen; 16715 assign(vecE, loadLE(Ity_V256, mkexpr(addr))); 16716 UChar ib = getUChar(delta); 16717 rIS4 = (ib >> 4) & 0xF; 16718 DIP("%s %s,%s,%s,%s\n", 16719 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 16720 } 16721 delta++; 16722 assign(vecV, getYMMReg(rV)); 16723 assign(vecIS4, getYMMReg(rIS4)); 16724 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 ); 16725 putYMMReg( rG, mkexpr(res) ); 16726 return delta; 16727 } 16728 16729 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign ) 16730 { 16731 /* Set Z=1 iff (vecE & vecG) == 0 16732 Set C=1 iff (vecE & not vecG) == 0 16733 */ 16734 16735 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16736 16737 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top 16738 and bottom 64-bits together. It relies on this trick: 16739 16740 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence 16741 16742 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly 16743 InterleaveHI64x2([a,b],[a,b]) == [a,a] 16744 16745 and so the OR of the above 2 exprs produces 16746 [a OR b, a OR b], from which we simply take the lower half. 16747 */ 16748 IRTemp and64 = newTemp(Ity_I64); 16749 IRTemp andn64 = newTemp(Ity_I64); 16750 16751 assign(and64, 16752 unop(Iop_V128to64, 16753 binop(Iop_OrV128, 16754 binop(Iop_InterleaveLO64x2, 16755 mkexpr(andV), mkexpr(andV)), 16756 binop(Iop_InterleaveHI64x2, 16757 mkexpr(andV), mkexpr(andV))))); 16758 16759 assign(andn64, 16760 unop(Iop_V128to64, 16761 binop(Iop_OrV128, 16762 binop(Iop_InterleaveLO64x2, 16763 mkexpr(andnV), mkexpr(andnV)), 16764 binop(Iop_InterleaveHI64x2, 16765 mkexpr(andnV), mkexpr(andnV))))); 16766 16767 IRTemp z64 = newTemp(Ity_I64); 16768 IRTemp c64 = newTemp(Ity_I64); 16769 if (sign == 64) { 16770 /* When only interested in the most significant bit, just shift 16771 arithmetically right and negate. */ 16772 assign(z64, 16773 unop(Iop_Not64, 16774 binop(Iop_Sar64, mkexpr(and64), mkU8(63)))); 16775 16776 assign(c64, 16777 unop(Iop_Not64, 16778 binop(Iop_Sar64, mkexpr(andn64), mkU8(63)))); 16779 } else { 16780 if (sign == 32) { 16781 /* When interested in bit 31 and bit 63, mask those bits and 16782 fallthrough into the PTEST handling. */ 16783 IRTemp t0 = newTemp(Ity_I64); 16784 IRTemp t1 = newTemp(Ity_I64); 16785 IRTemp t2 = newTemp(Ity_I64); 16786 assign(t0, mkU64(0x8000000080000000ULL)); 16787 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0))); 16788 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0))); 16789 and64 = t1; 16790 andn64 = t2; 16791 } 16792 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can 16793 slice out the Z and C bits conveniently. We use the standard 16794 trick all-zeroes -> all-zeroes, anything-else -> all-ones 16795 done by "(x | -x) >>s (word-size - 1)". 16796 */ 16797 assign(z64, 16798 unop(Iop_Not64, 16799 binop(Iop_Sar64, 16800 binop(Iop_Or64, 16801 binop(Iop_Sub64, mkU64(0), mkexpr(and64)), 16802 mkexpr(and64)), mkU8(63)))); 16803 16804 assign(c64, 16805 unop(Iop_Not64, 16806 binop(Iop_Sar64, 16807 binop(Iop_Or64, 16808 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)), 16809 mkexpr(andn64)), mkU8(63)))); 16810 } 16811 16812 /* And finally, slice out the Z and C flags and set the flags 16813 thunk to COPY for them. OSAP are set to zero. */ 16814 IRTemp newOSZACP = newTemp(Ity_I64); 16815 assign(newOSZACP, 16816 binop(Iop_Or64, 16817 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)), 16818 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C)))); 16819 16820 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP))); 16821 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 16822 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 16823 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 16824 } 16825 16826 16827 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD. 16828 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16829 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx, 16830 Long delta, Bool isAvx, Int sign ) 16831 { 16832 IRTemp addr = IRTemp_INVALID; 16833 Int alen = 0; 16834 HChar dis_buf[50]; 16835 UChar modrm = getUChar(delta); 16836 UInt rG = gregOfRexRM(pfx, modrm); 16837 IRTemp vecE = newTemp(Ity_V128); 16838 IRTemp vecG = newTemp(Ity_V128); 16839 16840 if ( epartIsReg(modrm) ) { 16841 UInt rE = eregOfRexRM(pfx, modrm); 16842 assign(vecE, getXMMReg(rE)); 16843 delta += 1; 16844 DIP( "%s%stest%s %s,%s\n", 16845 isAvx ? "v" : "", sign == 0 ? "p" : "", 16846 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16847 nameXMMReg(rE), nameXMMReg(rG) ); 16848 } else { 16849 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16850 if (!isAvx) 16851 gen_SEGV_if_not_16_aligned( addr ); 16852 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 16853 delta += alen; 16854 DIP( "%s%stest%s %s,%s\n", 16855 isAvx ? "v" : "", sign == 0 ? "p" : "", 16856 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16857 dis_buf, nameXMMReg(rG) ); 16858 } 16859 16860 assign(vecG, getXMMReg(rG)); 16861 16862 /* Set Z=1 iff (vecE & vecG) == 0 16863 Set C=1 iff (vecE & not vecG) == 0 16864 */ 16865 16866 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16867 IRTemp andV = newTemp(Ity_V128); 16868 IRTemp andnV = newTemp(Ity_V128); 16869 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG))); 16870 assign(andnV, binop(Iop_AndV128, 16871 mkexpr(vecE), 16872 binop(Iop_XorV128, mkexpr(vecG), 16873 mkV128(0xFFFF)))); 16874 16875 finish_xTESTy ( andV, andnV, sign ); 16876 return delta; 16877 } 16878 16879 16880 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD. 16881 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */ 16882 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx, 16883 Long delta, Int sign ) 16884 { 16885 IRTemp addr = IRTemp_INVALID; 16886 Int alen = 0; 16887 HChar dis_buf[50]; 16888 UChar modrm = getUChar(delta); 16889 UInt rG = gregOfRexRM(pfx, modrm); 16890 IRTemp vecE = newTemp(Ity_V256); 16891 IRTemp vecG = newTemp(Ity_V256); 16892 16893 if ( epartIsReg(modrm) ) { 16894 UInt rE = eregOfRexRM(pfx, modrm); 16895 assign(vecE, getYMMReg(rE)); 16896 delta += 1; 16897 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16898 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16899 nameYMMReg(rE), nameYMMReg(rG) ); 16900 } else { 16901 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16902 assign(vecE, loadLE( Ity_V256, mkexpr(addr) )); 16903 delta += alen; 16904 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "", 16905 sign == 0 ? "" : sign == 32 ? "ps" : "pd", 16906 dis_buf, nameYMMReg(rG) ); 16907 } 16908 16909 assign(vecG, getYMMReg(rG)); 16910 16911 /* Set Z=1 iff (vecE & vecG) == 0 16912 Set C=1 iff (vecE & not vecG) == 0 16913 */ 16914 16915 /* andV, andnV: vecE & vecG, vecE and not(vecG) */ 16916 IRTemp andV = newTemp(Ity_V256); 16917 IRTemp andnV = newTemp(Ity_V256); 16918 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG))); 16919 assign(andnV, binop(Iop_AndV256, 16920 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG)))); 16921 16922 IRTemp andVhi = IRTemp_INVALID; 16923 IRTemp andVlo = IRTemp_INVALID; 16924 IRTemp andnVhi = IRTemp_INVALID; 16925 IRTemp andnVlo = IRTemp_INVALID; 16926 breakupV256toV128s( andV, &andVhi, &andVlo ); 16927 breakupV256toV128s( andnV, &andnVhi, &andnVlo ); 16928 16929 IRTemp andV128 = newTemp(Ity_V128); 16930 IRTemp andnV128 = newTemp(Ity_V128); 16931 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) ); 16932 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) ); 16933 16934 finish_xTESTy ( andV128, andnV128, sign ); 16935 return delta; 16936 } 16937 16938 16939 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */ 16940 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx, 16941 Long delta, Bool isAvx, Bool xIsZ ) 16942 { 16943 IRTemp addr = IRTemp_INVALID; 16944 Int alen = 0; 16945 HChar dis_buf[50]; 16946 IRTemp srcVec = newTemp(Ity_V128); 16947 UChar modrm = getUChar(delta); 16948 const HChar* mbV = isAvx ? "v" : ""; 16949 const HChar how = xIsZ ? 'z' : 's'; 16950 UInt rG = gregOfRexRM(pfx, modrm); 16951 if ( epartIsReg(modrm) ) { 16952 UInt rE = eregOfRexRM(pfx, modrm); 16953 assign( srcVec, getXMMReg(rE) ); 16954 delta += 1; 16955 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 16956 } else { 16957 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 16958 assign( srcVec, 16959 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 16960 delta += alen; 16961 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 16962 } 16963 16964 IRExpr* res 16965 = xIsZ /* do math for either zero or sign extend */ 16966 ? binop( Iop_InterleaveLO8x16, 16967 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 16968 : binop( Iop_SarN16x8, 16969 binop( Iop_ShlN16x8, 16970 binop( Iop_InterleaveLO8x16, 16971 IRExpr_Const( IRConst_V128(0) ), 16972 mkexpr(srcVec) ), 16973 mkU8(8) ), 16974 mkU8(8) ); 16975 16976 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 16977 16978 return delta; 16979 } 16980 16981 16982 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */ 16983 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx, 16984 Long delta, Bool xIsZ ) 16985 { 16986 IRTemp addr = IRTemp_INVALID; 16987 Int alen = 0; 16988 HChar dis_buf[50]; 16989 IRTemp srcVec = newTemp(Ity_V128); 16990 UChar modrm = getUChar(delta); 16991 UChar how = xIsZ ? 'z' : 's'; 16992 UInt rG = gregOfRexRM(pfx, modrm); 16993 if ( epartIsReg(modrm) ) { 16994 UInt rE = eregOfRexRM(pfx, modrm); 16995 assign( srcVec, getXMMReg(rE) ); 16996 delta += 1; 16997 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 16998 } else { 16999 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17000 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 17001 delta += alen; 17002 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 17003 } 17004 17005 /* First do zero extend. */ 17006 IRExpr* res 17007 = binop( Iop_V128HLtoV256, 17008 binop( Iop_InterleaveHI8x16, 17009 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 17010 binop( Iop_InterleaveLO8x16, 17011 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 17012 /* And if needed sign extension as well. */ 17013 if (!xIsZ) 17014 res = binop( Iop_SarN16x16, 17015 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) ); 17016 17017 putYMMReg ( rG, res ); 17018 17019 return delta; 17020 } 17021 17022 17023 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx, 17024 Long delta, Bool isAvx, Bool xIsZ ) 17025 { 17026 IRTemp addr = IRTemp_INVALID; 17027 Int alen = 0; 17028 HChar dis_buf[50]; 17029 IRTemp srcVec = newTemp(Ity_V128); 17030 UChar modrm = getUChar(delta); 17031 const HChar* mbV = isAvx ? "v" : ""; 17032 const HChar how = xIsZ ? 'z' : 's'; 17033 UInt rG = gregOfRexRM(pfx, modrm); 17034 17035 if ( epartIsReg(modrm) ) { 17036 UInt rE = eregOfRexRM(pfx, modrm); 17037 assign( srcVec, getXMMReg(rE) ); 17038 delta += 1; 17039 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 17040 } else { 17041 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17042 assign( srcVec, 17043 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 17044 delta += alen; 17045 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 17046 } 17047 17048 IRExpr* res 17049 = binop( Iop_InterleaveLO16x8, 17050 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ); 17051 if (!xIsZ) 17052 res = binop(Iop_SarN32x4, 17053 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16)); 17054 17055 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17056 ( gregOfRexRM(pfx, modrm), res ); 17057 17058 return delta; 17059 } 17060 17061 17062 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx, 17063 Long delta, Bool xIsZ ) 17064 { 17065 IRTemp addr = IRTemp_INVALID; 17066 Int alen = 0; 17067 HChar dis_buf[50]; 17068 IRTemp srcVec = newTemp(Ity_V128); 17069 UChar modrm = getUChar(delta); 17070 UChar how = xIsZ ? 'z' : 's'; 17071 UInt rG = gregOfRexRM(pfx, modrm); 17072 17073 if ( epartIsReg(modrm) ) { 17074 UInt rE = eregOfRexRM(pfx, modrm); 17075 assign( srcVec, getXMMReg(rE) ); 17076 delta += 1; 17077 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 17078 } else { 17079 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17080 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) ); 17081 delta += alen; 17082 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 17083 } 17084 17085 IRExpr* res 17086 = binop( Iop_V128HLtoV256, 17087 binop( Iop_InterleaveHI16x8, 17088 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 17089 binop( Iop_InterleaveLO16x8, 17090 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 17091 if (!xIsZ) 17092 res = binop(Iop_SarN32x8, 17093 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16)); 17094 17095 putYMMReg ( rG, res ); 17096 17097 return delta; 17098 } 17099 17100 17101 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17102 Long delta, Bool isAvx ) 17103 { 17104 IRTemp addr = IRTemp_INVALID; 17105 Int alen = 0; 17106 HChar dis_buf[50]; 17107 IRTemp srcBytes = newTemp(Ity_I32); 17108 UChar modrm = getUChar(delta); 17109 const HChar* mbV = isAvx ? "v" : ""; 17110 UInt rG = gregOfRexRM(pfx, modrm); 17111 17112 if ( epartIsReg( modrm ) ) { 17113 UInt rE = eregOfRexRM(pfx, modrm); 17114 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 17115 delta += 1; 17116 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17117 } else { 17118 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17119 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 17120 delta += alen; 17121 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17122 } 17123 17124 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17125 ( rG, binop( Iop_64HLtoV128, 17126 unop( Iop_16Sto64, 17127 unop( Iop_32HIto16, mkexpr(srcBytes) ) ), 17128 unop( Iop_16Sto64, 17129 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) ); 17130 return delta; 17131 } 17132 17133 17134 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta ) 17135 { 17136 IRTemp addr = IRTemp_INVALID; 17137 Int alen = 0; 17138 HChar dis_buf[50]; 17139 IRTemp srcBytes = newTemp(Ity_I64); 17140 UChar modrm = getUChar(delta); 17141 UInt rG = gregOfRexRM(pfx, modrm); 17142 IRTemp s3, s2, s1, s0; 17143 s3 = s2 = s1 = s0 = IRTemp_INVALID; 17144 17145 if ( epartIsReg( modrm ) ) { 17146 UInt rE = eregOfRexRM(pfx, modrm); 17147 assign( srcBytes, getXMMRegLane64( rE, 0 ) ); 17148 delta += 1; 17149 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17150 } else { 17151 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17152 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) ); 17153 delta += alen; 17154 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17155 } 17156 17157 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 ); 17158 putYMMReg( rG, binop( Iop_V128HLtoV256, 17159 binop( Iop_64HLtoV128, 17160 unop( Iop_16Sto64, mkexpr(s3) ), 17161 unop( Iop_16Sto64, mkexpr(s2) ) ), 17162 binop( Iop_64HLtoV128, 17163 unop( Iop_16Sto64, mkexpr(s1) ), 17164 unop( Iop_16Sto64, mkexpr(s0) ) ) ) ); 17165 return delta; 17166 } 17167 17168 17169 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17170 Long delta, Bool isAvx ) 17171 { 17172 IRTemp addr = IRTemp_INVALID; 17173 Int alen = 0; 17174 HChar dis_buf[50]; 17175 IRTemp srcVec = newTemp(Ity_V128); 17176 UChar modrm = getUChar(delta); 17177 const HChar* mbV = isAvx ? "v" : ""; 17178 UInt rG = gregOfRexRM(pfx, modrm); 17179 17180 if ( epartIsReg( modrm ) ) { 17181 UInt rE = eregOfRexRM(pfx, modrm); 17182 assign( srcVec, getXMMReg(rE) ); 17183 delta += 1; 17184 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17185 } else { 17186 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17187 assign( srcVec, 17188 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 17189 delta += alen; 17190 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17191 } 17192 17193 IRTemp zeroVec = newTemp( Ity_V128 ); 17194 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17195 17196 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17197 ( rG, binop( Iop_InterleaveLO16x8, 17198 mkexpr(zeroVec), 17199 binop( Iop_InterleaveLO16x8, 17200 mkexpr(zeroVec), mkexpr(srcVec) ) ) ); 17201 return delta; 17202 } 17203 17204 17205 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17206 Long delta ) 17207 { 17208 IRTemp addr = IRTemp_INVALID; 17209 Int alen = 0; 17210 HChar dis_buf[50]; 17211 IRTemp srcVec = newTemp(Ity_V128); 17212 UChar modrm = getUChar(delta); 17213 UInt rG = gregOfRexRM(pfx, modrm); 17214 17215 if ( epartIsReg( modrm ) ) { 17216 UInt rE = eregOfRexRM(pfx, modrm); 17217 assign( srcVec, getXMMReg(rE) ); 17218 delta += 1; 17219 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17220 } else { 17221 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17222 assign( srcVec, 17223 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 17224 delta += alen; 17225 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17226 } 17227 17228 IRTemp zeroVec = newTemp( Ity_V128 ); 17229 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17230 17231 putYMMReg( rG, binop( Iop_V128HLtoV256, 17232 binop( Iop_InterleaveHI16x8, 17233 mkexpr(zeroVec), 17234 binop( Iop_InterleaveLO16x8, 17235 mkexpr(zeroVec), mkexpr(srcVec) ) ), 17236 binop( Iop_InterleaveLO16x8, 17237 mkexpr(zeroVec), 17238 binop( Iop_InterleaveLO16x8, 17239 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 17240 return delta; 17241 } 17242 17243 17244 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */ 17245 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17246 Long delta, Bool isAvx, Bool xIsZ ) 17247 { 17248 IRTemp addr = IRTemp_INVALID; 17249 Int alen = 0; 17250 HChar dis_buf[50]; 17251 IRTemp srcI64 = newTemp(Ity_I64); 17252 IRTemp srcVec = newTemp(Ity_V128); 17253 UChar modrm = getUChar(delta); 17254 const HChar* mbV = isAvx ? "v" : ""; 17255 const HChar how = xIsZ ? 'z' : 's'; 17256 UInt rG = gregOfRexRM(pfx, modrm); 17257 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 17258 thing in a V128, with arbitrary junk in the top 64 bits. Use 17259 one or both of them and let iropt clean up afterwards (as 17260 usual). */ 17261 if ( epartIsReg(modrm) ) { 17262 UInt rE = eregOfRexRM(pfx, modrm); 17263 assign( srcVec, getXMMReg(rE) ); 17264 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) ); 17265 delta += 1; 17266 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 17267 } else { 17268 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17269 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) ); 17270 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) ); 17271 delta += alen; 17272 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 17273 } 17274 17275 IRExpr* res 17276 = xIsZ /* do math for either zero or sign extend */ 17277 ? binop( Iop_InterleaveLO32x4, 17278 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) 17279 : binop( Iop_64HLtoV128, 17280 unop( Iop_32Sto64, 17281 unop( Iop_64HIto32, mkexpr(srcI64) ) ), 17282 unop( Iop_32Sto64, 17283 unop( Iop_64to32, mkexpr(srcI64) ) ) ); 17284 17285 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 17286 17287 return delta; 17288 } 17289 17290 17291 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */ 17292 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17293 Long delta, Bool xIsZ ) 17294 { 17295 IRTemp addr = IRTemp_INVALID; 17296 Int alen = 0; 17297 HChar dis_buf[50]; 17298 IRTemp srcVec = newTemp(Ity_V128); 17299 UChar modrm = getUChar(delta); 17300 UChar how = xIsZ ? 'z' : 's'; 17301 UInt rG = gregOfRexRM(pfx, modrm); 17302 /* Compute both srcI64 -- the value to expand -- and srcVec -- same 17303 thing in a V128, with arbitrary junk in the top 64 bits. Use 17304 one or both of them and let iropt clean up afterwards (as 17305 usual). */ 17306 if ( epartIsReg(modrm) ) { 17307 UInt rE = eregOfRexRM(pfx, modrm); 17308 assign( srcVec, getXMMReg(rE) ); 17309 delta += 1; 17310 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 17311 } else { 17312 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17313 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) ); 17314 delta += alen; 17315 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 17316 } 17317 17318 IRExpr* res; 17319 if (xIsZ) 17320 res = binop( Iop_V128HLtoV256, 17321 binop( Iop_InterleaveHI32x4, 17322 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ), 17323 binop( Iop_InterleaveLO32x4, 17324 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) ); 17325 else { 17326 IRTemp s3, s2, s1, s0; 17327 s3 = s2 = s1 = s0 = IRTemp_INVALID; 17328 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 ); 17329 res = binop( Iop_V128HLtoV256, 17330 binop( Iop_64HLtoV128, 17331 unop( Iop_32Sto64, mkexpr(s3) ), 17332 unop( Iop_32Sto64, mkexpr(s2) ) ), 17333 binop( Iop_64HLtoV128, 17334 unop( Iop_32Sto64, mkexpr(s1) ), 17335 unop( Iop_32Sto64, mkexpr(s0) ) ) ); 17336 } 17337 17338 putYMMReg ( rG, res ); 17339 17340 return delta; 17341 } 17342 17343 17344 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */ 17345 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx, 17346 Long delta, Bool isAvx, Bool xIsZ ) 17347 { 17348 IRTemp addr = IRTemp_INVALID; 17349 Int alen = 0; 17350 HChar dis_buf[50]; 17351 IRTemp srcVec = newTemp(Ity_V128); 17352 UChar modrm = getUChar(delta); 17353 const HChar* mbV = isAvx ? "v" : ""; 17354 const HChar how = xIsZ ? 'z' : 's'; 17355 UInt rG = gregOfRexRM(pfx, modrm); 17356 if ( epartIsReg(modrm) ) { 17357 UInt rE = eregOfRexRM(pfx, modrm); 17358 assign( srcVec, getXMMReg(rE) ); 17359 delta += 1; 17360 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) ); 17361 } else { 17362 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17363 assign( srcVec, 17364 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) ); 17365 delta += alen; 17366 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) ); 17367 } 17368 17369 IRTemp zeroVec = newTemp(Ity_V128); 17370 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17371 17372 IRExpr* res 17373 = binop(Iop_InterleaveLO8x16, 17374 mkexpr(zeroVec), 17375 binop(Iop_InterleaveLO8x16, 17376 mkexpr(zeroVec), mkexpr(srcVec))); 17377 if (!xIsZ) 17378 res = binop(Iop_SarN32x4, 17379 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24)); 17380 17381 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res ); 17382 17383 return delta; 17384 } 17385 17386 17387 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */ 17388 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx, 17389 Long delta, Bool xIsZ ) 17390 { 17391 IRTemp addr = IRTemp_INVALID; 17392 Int alen = 0; 17393 HChar dis_buf[50]; 17394 IRTemp srcVec = newTemp(Ity_V128); 17395 UChar modrm = getUChar(delta); 17396 UChar how = xIsZ ? 'z' : 's'; 17397 UInt rG = gregOfRexRM(pfx, modrm); 17398 if ( epartIsReg(modrm) ) { 17399 UInt rE = eregOfRexRM(pfx, modrm); 17400 assign( srcVec, getXMMReg(rE) ); 17401 delta += 1; 17402 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) ); 17403 } else { 17404 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17405 assign( srcVec, 17406 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) ); 17407 delta += alen; 17408 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) ); 17409 } 17410 17411 IRTemp zeroVec = newTemp(Ity_V128); 17412 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17413 17414 IRExpr* res 17415 = binop( Iop_V128HLtoV256, 17416 binop(Iop_InterleaveHI8x16, 17417 mkexpr(zeroVec), 17418 binop(Iop_InterleaveLO8x16, 17419 mkexpr(zeroVec), mkexpr(srcVec)) ), 17420 binop(Iop_InterleaveLO8x16, 17421 mkexpr(zeroVec), 17422 binop(Iop_InterleaveLO8x16, 17423 mkexpr(zeroVec), mkexpr(srcVec)) ) ); 17424 if (!xIsZ) 17425 res = binop(Iop_SarN32x8, 17426 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24)); 17427 17428 putYMMReg ( rG, res ); 17429 17430 return delta; 17431 } 17432 17433 17434 /* Handles 128 bit versions of PMOVSXBQ. */ 17435 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17436 Long delta, Bool isAvx ) 17437 { 17438 IRTemp addr = IRTemp_INVALID; 17439 Int alen = 0; 17440 HChar dis_buf[50]; 17441 IRTemp srcBytes = newTemp(Ity_I16); 17442 UChar modrm = getUChar(delta); 17443 const HChar* mbV = isAvx ? "v" : ""; 17444 UInt rG = gregOfRexRM(pfx, modrm); 17445 if ( epartIsReg(modrm) ) { 17446 UInt rE = eregOfRexRM(pfx, modrm); 17447 assign( srcBytes, getXMMRegLane16( rE, 0 ) ); 17448 delta += 1; 17449 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17450 } else { 17451 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17452 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) ); 17453 delta += alen; 17454 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17455 } 17456 17457 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17458 ( rG, binop( Iop_64HLtoV128, 17459 unop( Iop_8Sto64, 17460 unop( Iop_16HIto8, mkexpr(srcBytes) ) ), 17461 unop( Iop_8Sto64, 17462 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) ); 17463 return delta; 17464 } 17465 17466 17467 /* Handles 256 bit versions of PMOVSXBQ. */ 17468 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17469 Long delta ) 17470 { 17471 IRTemp addr = IRTemp_INVALID; 17472 Int alen = 0; 17473 HChar dis_buf[50]; 17474 IRTemp srcBytes = newTemp(Ity_I32); 17475 UChar modrm = getUChar(delta); 17476 UInt rG = gregOfRexRM(pfx, modrm); 17477 if ( epartIsReg(modrm) ) { 17478 UInt rE = eregOfRexRM(pfx, modrm); 17479 assign( srcBytes, getXMMRegLane32( rE, 0 ) ); 17480 delta += 1; 17481 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17482 } else { 17483 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17484 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) ); 17485 delta += alen; 17486 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17487 } 17488 17489 putYMMReg 17490 ( rG, binop( Iop_V128HLtoV256, 17491 binop( Iop_64HLtoV128, 17492 unop( Iop_8Sto64, 17493 unop( Iop_16HIto8, 17494 unop( Iop_32HIto16, 17495 mkexpr(srcBytes) ) ) ), 17496 unop( Iop_8Sto64, 17497 unop( Iop_16to8, 17498 unop( Iop_32HIto16, 17499 mkexpr(srcBytes) ) ) ) ), 17500 binop( Iop_64HLtoV128, 17501 unop( Iop_8Sto64, 17502 unop( Iop_16HIto8, 17503 unop( Iop_32to16, 17504 mkexpr(srcBytes) ) ) ), 17505 unop( Iop_8Sto64, 17506 unop( Iop_16to8, 17507 unop( Iop_32to16, 17508 mkexpr(srcBytes) ) ) ) ) ) ); 17509 return delta; 17510 } 17511 17512 17513 /* Handles 128 bit versions of PMOVZXBQ. */ 17514 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx, 17515 Long delta, Bool isAvx ) 17516 { 17517 IRTemp addr = IRTemp_INVALID; 17518 Int alen = 0; 17519 HChar dis_buf[50]; 17520 IRTemp srcVec = newTemp(Ity_V128); 17521 UChar modrm = getUChar(delta); 17522 const HChar* mbV = isAvx ? "v" : ""; 17523 UInt rG = gregOfRexRM(pfx, modrm); 17524 if ( epartIsReg(modrm) ) { 17525 UInt rE = eregOfRexRM(pfx, modrm); 17526 assign( srcVec, getXMMReg(rE) ); 17527 delta += 1; 17528 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) ); 17529 } else { 17530 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17531 assign( srcVec, 17532 unop( Iop_32UtoV128, 17533 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) )))); 17534 delta += alen; 17535 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) ); 17536 } 17537 17538 IRTemp zeroVec = newTemp(Ity_V128); 17539 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17540 17541 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17542 ( rG, binop( Iop_InterleaveLO8x16, 17543 mkexpr(zeroVec), 17544 binop( Iop_InterleaveLO8x16, 17545 mkexpr(zeroVec), 17546 binop( Iop_InterleaveLO8x16, 17547 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) ); 17548 return delta; 17549 } 17550 17551 17552 /* Handles 256 bit versions of PMOVZXBQ. */ 17553 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx, 17554 Long delta ) 17555 { 17556 IRTemp addr = IRTemp_INVALID; 17557 Int alen = 0; 17558 HChar dis_buf[50]; 17559 IRTemp srcVec = newTemp(Ity_V128); 17560 UChar modrm = getUChar(delta); 17561 UInt rG = gregOfRexRM(pfx, modrm); 17562 if ( epartIsReg(modrm) ) { 17563 UInt rE = eregOfRexRM(pfx, modrm); 17564 assign( srcVec, getXMMReg(rE) ); 17565 delta += 1; 17566 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) ); 17567 } else { 17568 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17569 assign( srcVec, 17570 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ))); 17571 delta += alen; 17572 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) ); 17573 } 17574 17575 IRTemp zeroVec = newTemp(Ity_V128); 17576 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) ); 17577 17578 putYMMReg 17579 ( rG, binop( Iop_V128HLtoV256, 17580 binop( Iop_InterleaveHI8x16, 17581 mkexpr(zeroVec), 17582 binop( Iop_InterleaveLO8x16, 17583 mkexpr(zeroVec), 17584 binop( Iop_InterleaveLO8x16, 17585 mkexpr(zeroVec), mkexpr(srcVec) ) ) ), 17586 binop( Iop_InterleaveLO8x16, 17587 mkexpr(zeroVec), 17588 binop( Iop_InterleaveLO8x16, 17589 mkexpr(zeroVec), 17590 binop( Iop_InterleaveLO8x16, 17591 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) 17592 ) ); 17593 return delta; 17594 } 17595 17596 17597 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx, 17598 Long delta, Bool isAvx ) 17599 { 17600 IRTemp addr = IRTemp_INVALID; 17601 Int alen = 0; 17602 HChar dis_buf[50]; 17603 UChar modrm = getUChar(delta); 17604 const HChar* mbV = isAvx ? "v" : ""; 17605 IRTemp sV = newTemp(Ity_V128); 17606 IRTemp sHi = newTemp(Ity_I64); 17607 IRTemp sLo = newTemp(Ity_I64); 17608 IRTemp dLo = newTemp(Ity_I64); 17609 UInt rG = gregOfRexRM(pfx,modrm); 17610 if (epartIsReg(modrm)) { 17611 UInt rE = eregOfRexRM(pfx,modrm); 17612 assign( sV, getXMMReg(rE) ); 17613 delta += 1; 17614 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG)); 17615 } else { 17616 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17617 if (!isAvx) 17618 gen_SEGV_if_not_16_aligned(addr); 17619 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17620 delta += alen; 17621 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG)); 17622 } 17623 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); 17624 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); 17625 assign( dLo, mkIRExprCCall( 17626 Ity_I64, 0/*regparms*/, 17627 "amd64g_calculate_sse_phminposuw", 17628 &amd64g_calculate_sse_phminposuw, 17629 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) 17630 )); 17631 (isAvx ? putYMMRegLoAndZU : putXMMReg) 17632 (rG, unop(Iop_64UtoV128, mkexpr(dLo))); 17633 return delta; 17634 } 17635 17636 17637 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx, 17638 Long delta, Bool isAvx, UChar opc ) 17639 { 17640 IRTemp addr = IRTemp_INVALID; 17641 Int alen = 0; 17642 HChar dis_buf[50]; 17643 UChar modrm = getUChar(delta); 17644 UInt rG = gregOfRexRM(pfx, modrm); 17645 UInt regNoL = 0; 17646 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG; 17647 17648 /* This is a nasty kludge. We need to pass 2 x V128 to the 17649 helper. Since we can't do that, use a dirty 17650 helper to compute the results directly from the XMM regs in 17651 the guest state. That means for the memory case, we need to 17652 move the left operand into a pseudo-register (XMM16, let's 17653 call it). */ 17654 if (epartIsReg(modrm)) { 17655 regNoL = eregOfRexRM(pfx, modrm); 17656 delta += 1; 17657 } else { 17658 regNoL = 16; /* use XMM16 as an intermediary */ 17659 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17660 /* alignment check needed ???? */ 17661 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17662 delta += alen; 17663 } 17664 17665 void* fn = &amd64g_dirtyhelper_AES; 17666 const HChar* nm = "amd64g_dirtyhelper_AES"; 17667 17668 /* Round up the arguments. Note that this is a kludge -- the 17669 use of mkU64 rather than mkIRExpr_HWord implies the 17670 assumption that the host's word size is 64-bit. */ 17671 UInt gstOffD = ymmGuestRegOffset(rG); 17672 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17673 UInt gstOffR = ymmGuestRegOffset(regNoR); 17674 IRExpr* opc4 = mkU64(opc); 17675 IRExpr* gstOffDe = mkU64(gstOffD); 17676 IRExpr* gstOffLe = mkU64(gstOffL); 17677 IRExpr* gstOffRe = mkU64(gstOffR); 17678 IRExpr** args 17679 = mkIRExprVec_5( IRExpr_BBPTR(), opc4, gstOffDe, gstOffLe, gstOffRe ); 17680 17681 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17682 /* It's not really a dirty call, but we can't use the clean helper 17683 mechanism here for the very lame reason that we can't pass 2 x 17684 V128s by value to a helper. Hence this roundabout scheme. */ 17685 d->nFxState = 2; 17686 vex_bzero(&d->fxState, sizeof(d->fxState)); 17687 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes 17688 the second for !isAvx or the third for isAvx. 17689 AESIMC (0xDB) reads the first register, and writes the second. */ 17690 d->fxState[0].fx = Ifx_Read; 17691 d->fxState[0].offset = gstOffL; 17692 d->fxState[0].size = sizeof(U128); 17693 d->fxState[1].offset = gstOffR; 17694 d->fxState[1].size = sizeof(U128); 17695 if (opc == 0xDB) 17696 d->fxState[1].fx = Ifx_Write; 17697 else if (!isAvx || rG == regNoR) 17698 d->fxState[1].fx = Ifx_Modify; 17699 else { 17700 d->fxState[1].fx = Ifx_Read; 17701 d->nFxState++; 17702 d->fxState[2].fx = Ifx_Write; 17703 d->fxState[2].offset = gstOffD; 17704 d->fxState[2].size = sizeof(U128); 17705 } 17706 17707 stmt( IRStmt_Dirty(d) ); 17708 { 17709 const HChar* opsuf; 17710 switch (opc) { 17711 case 0xDC: opsuf = "enc"; break; 17712 case 0XDD: opsuf = "enclast"; break; 17713 case 0xDE: opsuf = "dec"; break; 17714 case 0xDF: opsuf = "declast"; break; 17715 case 0xDB: opsuf = "imc"; break; 17716 default: vassert(0); 17717 } 17718 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf, 17719 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17720 nameXMMReg(regNoR), 17721 (isAvx && opc != 0xDB) ? "," : "", 17722 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : ""); 17723 } 17724 if (isAvx) 17725 putYMMRegLane128( rG, 1, mkV128(0) ); 17726 return delta; 17727 } 17728 17729 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx, 17730 Long delta, Bool isAvx ) 17731 { 17732 IRTemp addr = IRTemp_INVALID; 17733 Int alen = 0; 17734 HChar dis_buf[50]; 17735 UChar modrm = getUChar(delta); 17736 UInt regNoL = 0; 17737 UInt regNoR = gregOfRexRM(pfx, modrm); 17738 UChar imm = 0; 17739 17740 /* This is a nasty kludge. See AESENC et al. instructions. */ 17741 modrm = getUChar(delta); 17742 if (epartIsReg(modrm)) { 17743 regNoL = eregOfRexRM(pfx, modrm); 17744 imm = getUChar(delta+1); 17745 delta += 1+1; 17746 } else { 17747 regNoL = 16; /* use XMM16 as an intermediary */ 17748 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 17749 /* alignment check ???? . */ 17750 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 17751 imm = getUChar(delta+alen); 17752 delta += alen+1; 17753 } 17754 17755 /* Who ya gonna call? Presumably not Ghostbusters. */ 17756 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST; 17757 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST"; 17758 17759 /* Round up the arguments. Note that this is a kludge -- the 17760 use of mkU64 rather than mkIRExpr_HWord implies the 17761 assumption that the host's word size is 64-bit. */ 17762 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 17763 UInt gstOffR = ymmGuestRegOffset(regNoR); 17764 17765 IRExpr* imme = mkU64(imm & 0xFF); 17766 IRExpr* gstOffLe = mkU64(gstOffL); 17767 IRExpr* gstOffRe = mkU64(gstOffR); 17768 IRExpr** args 17769 = mkIRExprVec_4( IRExpr_BBPTR(), imme, gstOffLe, gstOffRe ); 17770 17771 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args ); 17772 /* It's not really a dirty call, but we can't use the clean helper 17773 mechanism here for the very lame reason that we can't pass 2 x 17774 V128s by value to a helper. Hence this roundabout scheme. */ 17775 d->nFxState = 2; 17776 vex_bzero(&d->fxState, sizeof(d->fxState)); 17777 d->fxState[0].fx = Ifx_Read; 17778 d->fxState[0].offset = gstOffL; 17779 d->fxState[0].size = sizeof(U128); 17780 d->fxState[1].fx = Ifx_Write; 17781 d->fxState[1].offset = gstOffR; 17782 d->fxState[1].size = sizeof(U128); 17783 stmt( IRStmt_Dirty(d) ); 17784 17785 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm, 17786 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)), 17787 nameXMMReg(regNoR)); 17788 if (isAvx) 17789 putYMMRegLane128( regNoR, 1, mkV128(0) ); 17790 return delta; 17791 } 17792 17793 17794 __attribute__((noinline)) 17795 static 17796 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK, 17797 const VexAbiInfo* vbi, 17798 Prefix pfx, Int sz, Long deltaIN ) 17799 { 17800 IRTemp addr = IRTemp_INVALID; 17801 UChar modrm = 0; 17802 Int alen = 0; 17803 HChar dis_buf[50]; 17804 17805 *decode_OK = False; 17806 17807 Long delta = deltaIN; 17808 UChar opc = getUChar(delta); 17809 delta++; 17810 switch (opc) { 17811 17812 case 0x10: 17813 case 0x14: 17814 case 0x15: 17815 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran) 17816 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran) 17817 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran) 17818 Blend at various granularities, with XMM0 (implicit operand) 17819 providing the controlling mask. 17820 */ 17821 if (have66noF2noF3(pfx) && sz == 2) { 17822 modrm = getUChar(delta); 17823 17824 const HChar* nm = NULL; 17825 UInt gran = 0; 17826 IROp opSAR = Iop_INVALID; 17827 switch (opc) { 17828 case 0x10: 17829 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16; 17830 break; 17831 case 0x14: 17832 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4; 17833 break; 17834 case 0x15: 17835 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2; 17836 break; 17837 } 17838 vassert(nm); 17839 17840 IRTemp vecE = newTemp(Ity_V128); 17841 IRTemp vecG = newTemp(Ity_V128); 17842 IRTemp vec0 = newTemp(Ity_V128); 17843 17844 if ( epartIsReg(modrm) ) { 17845 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm))); 17846 delta += 1; 17847 DIP( "%s %s,%s\n", nm, 17848 nameXMMReg( eregOfRexRM(pfx, modrm) ), 17849 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17850 } else { 17851 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 17852 gen_SEGV_if_not_16_aligned( addr ); 17853 assign(vecE, loadLE( Ity_V128, mkexpr(addr) )); 17854 delta += alen; 17855 DIP( "%s %s,%s\n", nm, 17856 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 17857 } 17858 17859 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm))); 17860 assign(vec0, getXMMReg(0)); 17861 17862 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR ); 17863 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res)); 17864 17865 goto decode_success; 17866 } 17867 break; 17868 17869 case 0x17: 17870 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128 17871 Logical compare (set ZF and CF from AND/ANDN of the operands) */ 17872 if (have66noF2noF3(pfx) 17873 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 17874 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 ); 17875 goto decode_success; 17876 } 17877 break; 17878 17879 case 0x20: 17880 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64 17881 Packed Move with Sign Extend from Byte to Word (XMM) */ 17882 if (have66noF2noF3(pfx) && sz == 2) { 17883 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 17884 False/*!isAvx*/, False/*!xIsZ*/ ); 17885 goto decode_success; 17886 } 17887 break; 17888 17889 case 0x21: 17890 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32 17891 Packed Move with Sign Extend from Byte to DWord (XMM) */ 17892 if (have66noF2noF3(pfx) && sz == 2) { 17893 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 17894 False/*!isAvx*/, False/*!xIsZ*/ ); 17895 goto decode_success; 17896 } 17897 break; 17898 17899 case 0x22: 17900 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16 17901 Packed Move with Sign Extend from Byte to QWord (XMM) */ 17902 if (have66noF2noF3(pfx) && sz == 2) { 17903 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17904 goto decode_success; 17905 } 17906 break; 17907 17908 case 0x23: 17909 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64 17910 Packed Move with Sign Extend from Word to DWord (XMM) */ 17911 if (have66noF2noF3(pfx) && sz == 2) { 17912 delta = dis_PMOVxXWD_128(vbi, pfx, delta, 17913 False/*!isAvx*/, False/*!xIsZ*/); 17914 goto decode_success; 17915 } 17916 break; 17917 17918 case 0x24: 17919 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32 17920 Packed Move with Sign Extend from Word to QWord (XMM) */ 17921 if (have66noF2noF3(pfx) && sz == 2) { 17922 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 17923 goto decode_success; 17924 } 17925 break; 17926 17927 case 0x25: 17928 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64 17929 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */ 17930 if (have66noF2noF3(pfx) && sz == 2) { 17931 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 17932 False/*!isAvx*/, False/*!xIsZ*/ ); 17933 goto decode_success; 17934 } 17935 break; 17936 17937 case 0x28: 17938 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes 17939 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper 17940 64-bit half */ 17941 /* This is a really poor translation -- could be improved if 17942 performance critical. It's a copy-paste of PMULUDQ, too. */ 17943 if (have66noF2noF3(pfx) && sz == 2) { 17944 IRTemp sV = newTemp(Ity_V128); 17945 IRTemp dV = newTemp(Ity_V128); 17946 modrm = getUChar(delta); 17947 UInt rG = gregOfRexRM(pfx,modrm); 17948 assign( dV, getXMMReg(rG) ); 17949 if (epartIsReg(modrm)) { 17950 UInt rE = eregOfRexRM(pfx,modrm); 17951 assign( sV, getXMMReg(rE) ); 17952 delta += 1; 17953 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 17954 } else { 17955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17956 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 17957 delta += alen; 17958 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG)); 17959 } 17960 17961 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) ); 17962 goto decode_success; 17963 } 17964 break; 17965 17966 case 0x29: 17967 /* 66 0F 38 29 = PCMPEQQ 17968 64x2 equality comparison */ 17969 if (have66noF2noF3(pfx) && sz == 2) { 17970 /* FIXME: this needs an alignment check */ 17971 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 17972 "pcmpeqq", Iop_CmpEQ64x2, False ); 17973 goto decode_success; 17974 } 17975 break; 17976 17977 case 0x2A: 17978 /* 66 0F 38 2A = MOVNTDQA 17979 "non-temporal" "streaming" load 17980 Handle like MOVDQA but only memory operand is allowed */ 17981 if (have66noF2noF3(pfx) && sz == 2) { 17982 modrm = getUChar(delta); 17983 if (!epartIsReg(modrm)) { 17984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 17985 gen_SEGV_if_not_16_aligned( addr ); 17986 putXMMReg( gregOfRexRM(pfx,modrm), 17987 loadLE(Ity_V128, mkexpr(addr)) ); 17988 DIP("movntdqa %s,%s\n", dis_buf, 17989 nameXMMReg(gregOfRexRM(pfx,modrm))); 17990 delta += alen; 17991 goto decode_success; 17992 } 17993 } 17994 break; 17995 17996 case 0x2B: 17997 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128 17998 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */ 17999 if (have66noF2noF3(pfx) && sz == 2) { 18000 18001 modrm = getUChar(delta); 18002 18003 IRTemp argL = newTemp(Ity_V128); 18004 IRTemp argR = newTemp(Ity_V128); 18005 18006 if ( epartIsReg(modrm) ) { 18007 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18008 delta += 1; 18009 DIP( "packusdw %s,%s\n", 18010 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18011 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18012 } else { 18013 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 18014 gen_SEGV_if_not_16_aligned( addr ); 18015 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 18016 delta += alen; 18017 DIP( "packusdw %s,%s\n", 18018 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18019 } 18020 18021 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 18022 18023 putXMMReg( gregOfRexRM(pfx, modrm), 18024 binop( Iop_QNarrowBin32Sto16Ux8, 18025 mkexpr(argL), mkexpr(argR)) ); 18026 18027 goto decode_success; 18028 } 18029 break; 18030 18031 case 0x30: 18032 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64 18033 Packed Move with Zero Extend from Byte to Word (XMM) */ 18034 if (have66noF2noF3(pfx) && sz == 2) { 18035 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 18036 False/*!isAvx*/, True/*xIsZ*/ ); 18037 goto decode_success; 18038 } 18039 break; 18040 18041 case 0x31: 18042 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32 18043 Packed Move with Zero Extend from Byte to DWord (XMM) */ 18044 if (have66noF2noF3(pfx) && sz == 2) { 18045 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 18046 False/*!isAvx*/, True/*xIsZ*/ ); 18047 goto decode_success; 18048 } 18049 break; 18050 18051 case 0x32: 18052 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16 18053 Packed Move with Zero Extend from Byte to QWord (XMM) */ 18054 if (have66noF2noF3(pfx) && sz == 2) { 18055 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 18056 goto decode_success; 18057 } 18058 break; 18059 18060 case 0x33: 18061 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64 18062 Packed Move with Zero Extend from Word to DWord (XMM) */ 18063 if (have66noF2noF3(pfx) && sz == 2) { 18064 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 18065 False/*!isAvx*/, True/*xIsZ*/ ); 18066 goto decode_success; 18067 } 18068 break; 18069 18070 case 0x34: 18071 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32 18072 Packed Move with Zero Extend from Word to QWord (XMM) */ 18073 if (have66noF2noF3(pfx) && sz == 2) { 18074 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ ); 18075 goto decode_success; 18076 } 18077 break; 18078 18079 case 0x35: 18080 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64 18081 Packed Move with Zero Extend from DWord to QWord (XMM) */ 18082 if (have66noF2noF3(pfx) && sz == 2) { 18083 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 18084 False/*!isAvx*/, True/*xIsZ*/ ); 18085 goto decode_success; 18086 } 18087 break; 18088 18089 case 0x37: 18090 /* 66 0F 38 37 = PCMPGTQ 18091 64x2 comparison (signed, presumably; the Intel docs don't say :-) 18092 */ 18093 if (have66noF2noF3(pfx) && sz == 2) { 18094 /* FIXME: this needs an alignment check */ 18095 delta = dis_SSEint_E_to_G( vbi, pfx, delta, 18096 "pcmpgtq", Iop_CmpGT64Sx2, False ); 18097 goto decode_success; 18098 } 18099 break; 18100 18101 case 0x38: 18102 case 0x3C: 18103 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min 18104 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max 18105 */ 18106 if (have66noF2noF3(pfx) && sz == 2) { 18107 /* FIXME: this needs an alignment check */ 18108 Bool isMAX = opc == 0x3C; 18109 delta = dis_SSEint_E_to_G( 18110 vbi, pfx, delta, 18111 isMAX ? "pmaxsb" : "pminsb", 18112 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16, 18113 False 18114 ); 18115 goto decode_success; 18116 } 18117 break; 18118 18119 case 0x39: 18120 case 0x3D: 18121 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128 18122 Minimum of Packed Signed Double Word Integers (XMM) 18123 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128 18124 Maximum of Packed Signed Double Word Integers (XMM) 18125 */ 18126 if (have66noF2noF3(pfx) && sz == 2) { 18127 /* FIXME: this needs an alignment check */ 18128 Bool isMAX = opc == 0x3D; 18129 delta = dis_SSEint_E_to_G( 18130 vbi, pfx, delta, 18131 isMAX ? "pmaxsd" : "pminsd", 18132 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4, 18133 False 18134 ); 18135 goto decode_success; 18136 } 18137 break; 18138 18139 case 0x3A: 18140 case 0x3E: 18141 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128 18142 Minimum of Packed Unsigned Word Integers (XMM) 18143 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128 18144 Maximum of Packed Unsigned Word Integers (XMM) 18145 */ 18146 if (have66noF2noF3(pfx) && sz == 2) { 18147 /* FIXME: this needs an alignment check */ 18148 Bool isMAX = opc == 0x3E; 18149 delta = dis_SSEint_E_to_G( 18150 vbi, pfx, delta, 18151 isMAX ? "pmaxuw" : "pminuw", 18152 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8, 18153 False 18154 ); 18155 goto decode_success; 18156 } 18157 break; 18158 18159 case 0x3B: 18160 case 0x3F: 18161 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128 18162 Minimum of Packed Unsigned Doubleword Integers (XMM) 18163 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128 18164 Maximum of Packed Unsigned Doubleword Integers (XMM) 18165 */ 18166 if (have66noF2noF3(pfx) && sz == 2) { 18167 /* FIXME: this needs an alignment check */ 18168 Bool isMAX = opc == 0x3F; 18169 delta = dis_SSEint_E_to_G( 18170 vbi, pfx, delta, 18171 isMAX ? "pmaxud" : "pminud", 18172 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4, 18173 False 18174 ); 18175 goto decode_success; 18176 } 18177 break; 18178 18179 case 0x40: 18180 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128 18181 32x4 integer multiply from xmm2/m128 to xmm1 */ 18182 if (have66noF2noF3(pfx) && sz == 2) { 18183 18184 modrm = getUChar(delta); 18185 18186 IRTemp argL = newTemp(Ity_V128); 18187 IRTemp argR = newTemp(Ity_V128); 18188 18189 if ( epartIsReg(modrm) ) { 18190 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 18191 delta += 1; 18192 DIP( "pmulld %s,%s\n", 18193 nameXMMReg( eregOfRexRM(pfx, modrm) ), 18194 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18195 } else { 18196 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 18197 gen_SEGV_if_not_16_aligned( addr ); 18198 assign( argL, loadLE( Ity_V128, mkexpr(addr) )); 18199 delta += alen; 18200 DIP( "pmulld %s,%s\n", 18201 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 18202 } 18203 18204 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) )); 18205 18206 putXMMReg( gregOfRexRM(pfx, modrm), 18207 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); 18208 18209 goto decode_success; 18210 } 18211 break; 18212 18213 case 0x41: 18214 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128 18215 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */ 18216 if (have66noF2noF3(pfx) && sz == 2) { 18217 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ ); 18218 goto decode_success; 18219 } 18220 break; 18221 18222 case 0xDC: 18223 case 0xDD: 18224 case 0xDE: 18225 case 0xDF: 18226 case 0xDB: 18227 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128 18228 DD /r = AESENCLAST xmm1, xmm2/m128 18229 DE /r = AESDEC xmm1, xmm2/m128 18230 DF /r = AESDECLAST xmm1, xmm2/m128 18231 18232 DB /r = AESIMC xmm1, xmm2/m128 */ 18233 if (have66noF2noF3(pfx) && sz == 2) { 18234 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc ); 18235 goto decode_success; 18236 } 18237 break; 18238 18239 case 0xF0: 18240 case 0xF1: 18241 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok) 18242 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32 18243 The decoding on this is a bit unusual. 18244 */ 18245 if (haveF2noF3(pfx) 18246 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) { 18247 modrm = getUChar(delta); 18248 18249 if (opc == 0xF0) 18250 sz = 1; 18251 else 18252 vassert(sz == 2 || sz == 4 || sz == 8); 18253 18254 IRType tyE = szToITy(sz); 18255 IRTemp valE = newTemp(tyE); 18256 18257 if (epartIsReg(modrm)) { 18258 assign(valE, getIRegE(sz, pfx, modrm)); 18259 delta += 1; 18260 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm), 18261 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 18262 } else { 18263 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 18264 assign(valE, loadLE(tyE, mkexpr(addr))); 18265 delta += alen; 18266 DIP("crc32b %s,%s\n", dis_buf, 18267 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm)); 18268 } 18269 18270 /* Somewhat funny getting/putting of the crc32 value, in order 18271 to ensure that it turns into 64-bit gets and puts. However, 18272 mask off the upper 32 bits so as to not get memcheck false 18273 +ves around the helper call. */ 18274 IRTemp valG0 = newTemp(Ity_I64); 18275 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm), 18276 mkU64(0xFFFFFFFF))); 18277 18278 const HChar* nm = NULL; 18279 void* fn = NULL; 18280 switch (sz) { 18281 case 1: nm = "amd64g_calc_crc32b"; 18282 fn = &amd64g_calc_crc32b; break; 18283 case 2: nm = "amd64g_calc_crc32w"; 18284 fn = &amd64g_calc_crc32w; break; 18285 case 4: nm = "amd64g_calc_crc32l"; 18286 fn = &amd64g_calc_crc32l; break; 18287 case 8: nm = "amd64g_calc_crc32q"; 18288 fn = &amd64g_calc_crc32q; break; 18289 } 18290 vassert(nm && fn); 18291 IRTemp valG1 = newTemp(Ity_I64); 18292 assign(valG1, 18293 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn, 18294 mkIRExprVec_2(mkexpr(valG0), 18295 widenUto64(mkexpr(valE))))); 18296 18297 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1))); 18298 goto decode_success; 18299 } 18300 break; 18301 18302 default: 18303 break; 18304 18305 } 18306 18307 //decode_failure: 18308 *decode_OK = False; 18309 return deltaIN; 18310 18311 decode_success: 18312 *decode_OK = True; 18313 return delta; 18314 } 18315 18316 18317 /*------------------------------------------------------------*/ 18318 /*--- ---*/ 18319 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/ 18320 /*--- ---*/ 18321 /*------------------------------------------------------------*/ 18322 18323 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx, 18324 Long delta, Bool isAvx ) 18325 { 18326 IRTemp addr = IRTemp_INVALID; 18327 IRTemp t0 = IRTemp_INVALID; 18328 IRTemp t1 = IRTemp_INVALID; 18329 IRTemp t2 = IRTemp_INVALID; 18330 IRTemp t3 = IRTemp_INVALID; 18331 UChar modrm = getUChar(delta); 18332 Int alen = 0; 18333 HChar dis_buf[50]; 18334 UInt rG = gregOfRexRM(pfx,modrm); 18335 Int imm8_20; 18336 IRTemp xmm_vec = newTemp(Ity_V128); 18337 IRTemp d16 = newTemp(Ity_I16); 18338 const HChar* mbV = isAvx ? "v" : ""; 18339 18340 vassert(0==getRexW(pfx)); /* ensured by caller */ 18341 assign( xmm_vec, getXMMReg(rG) ); 18342 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18343 18344 if ( epartIsReg( modrm ) ) { 18345 imm8_20 = (Int)(getUChar(delta+1) & 7); 18346 } else { 18347 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18348 imm8_20 = (Int)(getUChar(delta+alen) & 7); 18349 } 18350 18351 switch (imm8_20) { 18352 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break; 18353 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break; 18354 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break; 18355 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break; 18356 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break; 18357 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break; 18358 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break; 18359 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break; 18360 default: vassert(0); 18361 } 18362 18363 if ( epartIsReg( modrm ) ) { 18364 UInt rE = eregOfRexRM(pfx,modrm); 18365 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) ); 18366 delta += 1+1; 18367 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, 18368 nameXMMReg( rG ), nameIReg32( rE ) ); 18369 } else { 18370 storeLE( mkexpr(addr), mkexpr(d16) ); 18371 delta += alen+1; 18372 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf ); 18373 } 18374 return delta; 18375 } 18376 18377 18378 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx, 18379 Long delta, Bool isAvx ) 18380 { 18381 IRTemp addr = IRTemp_INVALID; 18382 IRTemp t0 = IRTemp_INVALID; 18383 IRTemp t1 = IRTemp_INVALID; 18384 IRTemp t2 = IRTemp_INVALID; 18385 IRTemp t3 = IRTemp_INVALID; 18386 UChar modrm = 0; 18387 Int alen = 0; 18388 HChar dis_buf[50]; 18389 18390 Int imm8_10; 18391 IRTemp xmm_vec = newTemp(Ity_V128); 18392 IRTemp src_dword = newTemp(Ity_I32); 18393 const HChar* mbV = isAvx ? "v" : ""; 18394 18395 vassert(0==getRexW(pfx)); /* ensured by caller */ 18396 modrm = getUChar(delta); 18397 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 18398 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18399 18400 if ( epartIsReg( modrm ) ) { 18401 imm8_10 = (Int)(getUChar(delta+1) & 3); 18402 } else { 18403 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18404 imm8_10 = (Int)(getUChar(delta+alen) & 3); 18405 } 18406 18407 switch ( imm8_10 ) { 18408 case 0: assign( src_dword, mkexpr(t0) ); break; 18409 case 1: assign( src_dword, mkexpr(t1) ); break; 18410 case 2: assign( src_dword, mkexpr(t2) ); break; 18411 case 3: assign( src_dword, mkexpr(t3) ); break; 18412 default: vassert(0); 18413 } 18414 18415 if ( epartIsReg( modrm ) ) { 18416 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) ); 18417 delta += 1+1; 18418 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10, 18419 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18420 nameIReg32( eregOfRexRM(pfx, modrm) ) ); 18421 } else { 18422 storeLE( mkexpr(addr), mkexpr(src_dword) ); 18423 delta += alen+1; 18424 DIP( "%spextrd $%d, %s,%s\n", mbV, 18425 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18426 } 18427 return delta; 18428 } 18429 18430 18431 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx, 18432 Long delta, Bool isAvx ) 18433 { 18434 IRTemp addr = IRTemp_INVALID; 18435 UChar modrm = 0; 18436 Int alen = 0; 18437 HChar dis_buf[50]; 18438 18439 Int imm8_0; 18440 IRTemp xmm_vec = newTemp(Ity_V128); 18441 IRTemp src_qword = newTemp(Ity_I64); 18442 const HChar* mbV = isAvx ? "v" : ""; 18443 18444 vassert(1==getRexW(pfx)); /* ensured by caller */ 18445 modrm = getUChar(delta); 18446 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 18447 18448 if ( epartIsReg( modrm ) ) { 18449 imm8_0 = (Int)(getUChar(delta+1) & 1); 18450 } else { 18451 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18452 imm8_0 = (Int)(getUChar(delta+alen) & 1); 18453 } 18454 18455 switch ( imm8_0 ) { 18456 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); 18457 break; 18458 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); 18459 break; 18460 default: vassert(0); 18461 } 18462 18463 if ( epartIsReg( modrm ) ) { 18464 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) ); 18465 delta += 1+1; 18466 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0, 18467 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18468 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18469 } else { 18470 storeLE( mkexpr(addr), mkexpr(src_qword) ); 18471 delta += alen+1; 18472 DIP( "%spextrq $%d, %s,%s\n", mbV, 18473 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18474 } 18475 return delta; 18476 } 18477 18478 static IRExpr* math_CTZ32(IRExpr *exp) 18479 { 18480 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */ 18481 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp))); 18482 } 18483 18484 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR, 18485 Long delta, UChar opc, UChar imm, 18486 HChar dis_buf[]) 18487 { 18488 /* We only handle PCMPISTRI for now */ 18489 vassert((opc & 0x03) == 0x03); 18490 /* And only an immediate byte of 0x38 or 0x3A */ 18491 vassert((imm & ~0x02) == 0x38); 18492 18493 /* FIXME: Is this correct when RegNoL == 16 ? */ 18494 IRTemp argL = newTemp(Ity_V128); 18495 assign(argL, getXMMReg(regNoL)); 18496 IRTemp argR = newTemp(Ity_V128); 18497 assign(argR, getXMMReg(regNoR)); 18498 18499 IRTemp zmaskL = newTemp(Ity_I32); 18500 assign(zmaskL, unop(Iop_16Uto32, 18501 unop(Iop_GetMSBs8x16, 18502 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0))))); 18503 IRTemp zmaskR = newTemp(Ity_I32); 18504 assign(zmaskR, unop(Iop_16Uto32, 18505 unop(Iop_GetMSBs8x16, 18506 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0))))); 18507 18508 /* We want validL = ~(zmaskL | -zmaskL) 18509 18510 But this formulation kills memcheck's validity tracking when any 18511 bits above the first "1" are invalid. So reformulate as: 18512 18513 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1 18514 */ 18515 18516 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL))); 18517 18518 /* Generate a bool expression which is zero iff the original is 18519 zero. Do this carefully so memcheck can propagate validity bits 18520 correctly. 18521 */ 18522 IRTemp zmaskL_zero = newTemp(Ity_I1); 18523 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0))); 18524 18525 IRTemp validL = newTemp(Ity_I32); 18526 assign(validL, binop(Iop_Sub32, 18527 IRExpr_ITE(mkexpr(zmaskL_zero), 18528 binop(Iop_Shl32, mkU32(1), ctzL), 18529 mkU32(0)), 18530 mkU32(1))); 18531 18532 /* And similarly for validR. */ 18533 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR))); 18534 IRTemp zmaskR_zero = newTemp(Ity_I1); 18535 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0))); 18536 IRTemp validR = newTemp(Ity_I32); 18537 assign(validR, binop(Iop_Sub32, 18538 IRExpr_ITE(mkexpr(zmaskR_zero), 18539 binop(Iop_Shl32, mkU32(1), ctzR), 18540 mkU32(0)), 18541 mkU32(1))); 18542 18543 /* Do the actual comparison. */ 18544 IRExpr *boolResII = unop(Iop_16Uto32, 18545 unop(Iop_GetMSBs8x16, 18546 binop(Iop_CmpEQ8x16, mkexpr(argL), 18547 mkexpr(argR)))); 18548 18549 /* Compute boolresII & validL & validR (i.e., if both valid, use 18550 comparison result) */ 18551 IRExpr *intRes1_a = binop(Iop_And32, boolResII, 18552 binop(Iop_And32, 18553 mkexpr(validL), mkexpr(validR))); 18554 18555 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */ 18556 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32, 18557 mkexpr(validL), mkexpr(validR))); 18558 /* Otherwise, zero. */ 18559 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF), 18560 binop(Iop_Or32, intRes1_a, intRes1_b)); 18561 18562 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with 18563 result. */ 18564 IRTemp intRes2 = newTemp(Ity_I32); 18565 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF), 18566 binop(Iop_Xor32, intRes1, mkexpr(validL)))); 18567 18568 /* If the 0x40 bit were set in imm=0x3A, we would return the index 18569 of the msb. Since it is clear, we return the index of the 18570 lsb. */ 18571 IRExpr *newECX = math_CTZ32(binop(Iop_Or32, 18572 mkexpr(intRes2), mkU32(0x10000))); 18573 18574 /* And thats our rcx. */ 18575 putIReg32(R_RCX, newECX); 18576 18577 /* Now for the condition codes... */ 18578 18579 /* C == 0 iff intRes2 == 0 */ 18580 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2), 18581 mkU32(0)), 18582 mkU32(1 << AMD64G_CC_SHIFT_C), 18583 mkU32(0)); 18584 /* Z == 1 iff any in argL is 0 */ 18585 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero), 18586 mkU32(1 << AMD64G_CC_SHIFT_Z), 18587 mkU32(0)); 18588 /* S == 1 iff any in argR is 0 */ 18589 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero), 18590 mkU32(1 << AMD64G_CC_SHIFT_S), 18591 mkU32(0)); 18592 /* O == IntRes2[0] */ 18593 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2), 18594 mkU32(0x01)), 18595 mkU8(AMD64G_CC_SHIFT_O)); 18596 18597 /* Put them all together */ 18598 IRTemp cc = newTemp(Ity_I64); 18599 assign(cc, widenUto64(binop(Iop_Or32, 18600 binop(Iop_Or32, c_bit, z_bit), 18601 binop(Iop_Or32, s_bit, o_bit)))); 18602 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY))); 18603 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc))); 18604 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0))); 18605 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0))); 18606 18607 return delta; 18608 } 18609 18610 /* This can fail, in which case it returns the original (unchanged) 18611 delta. */ 18612 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx, 18613 Long delta, Bool isAvx, UChar opc ) 18614 { 18615 Long delta0 = delta; 18616 UInt isISTRx = opc & 2; 18617 UInt isxSTRM = (opc & 1) ^ 1; 18618 UInt regNoL = 0; 18619 UInt regNoR = 0; 18620 UChar imm = 0; 18621 IRTemp addr = IRTemp_INVALID; 18622 Int alen = 0; 18623 HChar dis_buf[50]; 18624 18625 /* This is a nasty kludge. We need to pass 2 x V128 to the helper 18626 (which is clean). Since we can't do that, use a dirty helper to 18627 compute the results directly from the XMM regs in the guest 18628 state. That means for the memory case, we need to move the left 18629 operand into a pseudo-register (XMM16, let's call it). */ 18630 UChar modrm = getUChar(delta); 18631 if (epartIsReg(modrm)) { 18632 regNoL = eregOfRexRM(pfx, modrm); 18633 regNoR = gregOfRexRM(pfx, modrm); 18634 imm = getUChar(delta+1); 18635 delta += 1+1; 18636 } else { 18637 regNoL = 16; /* use XMM16 as an intermediary */ 18638 regNoR = gregOfRexRM(pfx, modrm); 18639 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18640 /* No alignment check; I guess that makes sense, given that 18641 these insns are for dealing with C style strings. */ 18642 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) )); 18643 imm = getUChar(delta+alen); 18644 delta += alen+1; 18645 } 18646 18647 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so 18648 itself. */ 18649 if (regNoL == 16) { 18650 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18651 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18652 (UInt)imm, dis_buf, nameXMMReg(regNoR)); 18653 } else { 18654 DIP("%spcmp%cstr%c $%x,%s,%s\n", 18655 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i', 18656 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR)); 18657 } 18658 18659 /* Handle special case(s). */ 18660 if (imm == 0x3A && isISTRx && !isxSTRM) { 18661 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta, 18662 opc, imm, dis_buf); 18663 } 18664 18665 /* Now we know the XMM reg numbers for the operands, and the 18666 immediate byte. Is it one we can actually handle? Throw out any 18667 cases for which the helper function has not been verified. */ 18668 switch (imm) { 18669 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: 18670 case 0x12: case 0x14: case 0x18: case 0x1A: 18671 case 0x30: case 0x34: case 0x38: case 0x3A: 18672 case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A: 18673 break; 18674 // the 16-bit character versions of the above 18675 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 18676 case 0x13: case 0x1B: 18677 case 0x39: case 0x3B: 18678 case 0x45: case 0x4B: 18679 break; 18680 default: 18681 return delta0; /*FAIL*/ 18682 } 18683 18684 /* Who ya gonna call? Presumably not Ghostbusters. */ 18685 void* fn = &amd64g_dirtyhelper_PCMPxSTRx; 18686 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx"; 18687 18688 /* Round up the arguments. Note that this is a kludge -- the use 18689 of mkU64 rather than mkIRExpr_HWord implies the assumption that 18690 the host's word size is 64-bit. */ 18691 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL); 18692 UInt gstOffR = ymmGuestRegOffset(regNoR); 18693 18694 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF)); 18695 IRExpr* gstOffLe = mkU64(gstOffL); 18696 IRExpr* gstOffRe = mkU64(gstOffR); 18697 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8); 18698 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8); 18699 IRExpr** args 18700 = mkIRExprVec_6( IRExpr_BBPTR(), 18701 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN ); 18702 18703 IRTemp resT = newTemp(Ity_I64); 18704 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args ); 18705 /* It's not really a dirty call, but we can't use the clean helper 18706 mechanism here for the very lame reason that we can't pass 2 x 18707 V128s by value to a helper. Hence this roundabout scheme. */ 18708 d->nFxState = 2; 18709 vex_bzero(&d->fxState, sizeof(d->fxState)); 18710 d->fxState[0].fx = Ifx_Read; 18711 d->fxState[0].offset = gstOffL; 18712 d->fxState[0].size = sizeof(U128); 18713 d->fxState[1].fx = Ifx_Read; 18714 d->fxState[1].offset = gstOffR; 18715 d->fxState[1].size = sizeof(U128); 18716 if (isxSTRM) { 18717 /* Declare that the helper writes XMM0. */ 18718 d->nFxState = 3; 18719 d->fxState[2].fx = Ifx_Write; 18720 d->fxState[2].offset = ymmGuestRegOffset(0); 18721 d->fxState[2].size = sizeof(U128); 18722 } 18723 18724 stmt( IRStmt_Dirty(d) ); 18725 18726 /* Now resT[15:0] holds the new OSZACP values, so the condition 18727 codes must be updated. And for a xSTRI case, resT[31:16] holds 18728 the new ECX value, so stash that too. */ 18729 if (!isxSTRM) { 18730 putIReg64(R_RCX, binop(Iop_And64, 18731 binop(Iop_Shr64, mkexpr(resT), mkU8(16)), 18732 mkU64(0xFFFF))); 18733 } 18734 18735 /* Zap the upper half of the dest reg as per AVX conventions. */ 18736 if (isxSTRM && isAvx) 18737 putYMMRegLane128(/*YMM*/0, 1, mkV128(0)); 18738 18739 stmt( IRStmt_Put( 18740 OFFB_CC_DEP1, 18741 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)) 18742 )); 18743 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 18744 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 18745 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 18746 18747 return delta; 18748 } 18749 18750 18751 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 ) 18752 { 18753 vassert(imm8 >= 0 && imm8 <= 15); 18754 18755 // Create a V128 value which has the selected byte in the 18756 // specified lane, and zeroes everywhere else. 18757 IRTemp tmp128 = newTemp(Ity_V128); 18758 IRTemp halfshift = newTemp(Ity_I64); 18759 assign(halfshift, binop(Iop_Shl64, 18760 unop(Iop_8Uto64, mkexpr(u8)), 18761 mkU8(8 * (imm8 & 7)))); 18762 if (imm8 < 8) { 18763 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift))); 18764 } else { 18765 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0))); 18766 } 18767 18768 UShort mask = ~(1 << imm8); 18769 IRTemp res = newTemp(Ity_V128); 18770 assign( res, binop(Iop_OrV128, 18771 mkexpr(tmp128), 18772 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) ); 18773 return res; 18774 } 18775 18776 18777 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 ) 18778 { 18779 IRTemp z32 = newTemp(Ity_I32); 18780 assign(z32, mkU32(0)); 18781 18782 /* Surround u32 with zeroes as per imm, giving us something we can 18783 OR into a suitably masked-out v128.*/ 18784 IRTemp withZs = newTemp(Ity_V128); 18785 UShort mask = 0; 18786 switch (imm8) { 18787 case 3: mask = 0x0FFF; 18788 assign(withZs, mkV128from32s(u32, z32, z32, z32)); 18789 break; 18790 case 2: mask = 0xF0FF; 18791 assign(withZs, mkV128from32s(z32, u32, z32, z32)); 18792 break; 18793 case 1: mask = 0xFF0F; 18794 assign(withZs, mkV128from32s(z32, z32, u32, z32)); 18795 break; 18796 case 0: mask = 0xFFF0; 18797 assign(withZs, mkV128from32s(z32, z32, z32, u32)); 18798 break; 18799 default: vassert(0); 18800 } 18801 18802 IRTemp res = newTemp(Ity_V128); 18803 assign(res, binop( Iop_OrV128, 18804 mkexpr(withZs), 18805 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18806 return res; 18807 } 18808 18809 18810 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 ) 18811 { 18812 /* Surround u64 with zeroes as per imm, giving us something we can 18813 OR into a suitably masked-out v128.*/ 18814 IRTemp withZs = newTemp(Ity_V128); 18815 UShort mask = 0; 18816 if (imm8 == 0) { 18817 mask = 0xFF00; 18818 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64))); 18819 } else { 18820 vassert(imm8 == 1); 18821 mask = 0x00FF; 18822 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0))); 18823 } 18824 18825 IRTemp res = newTemp(Ity_V128); 18826 assign( res, binop( Iop_OrV128, 18827 mkexpr(withZs), 18828 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) ); 18829 return res; 18830 } 18831 18832 18833 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 ) 18834 { 18835 const IRTemp inval = IRTemp_INVALID; 18836 IRTemp dstDs[4] = { inval, inval, inval, inval }; 18837 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] ); 18838 18839 vassert(imm8 <= 255); 18840 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */ 18841 18842 UInt imm8_zmask = (imm8 & 15); 18843 IRTemp zero_32 = newTemp(Ity_I32); 18844 assign( zero_32, mkU32(0) ); 18845 IRTemp resV = newTemp(Ity_V128); 18846 assign( resV, mkV128from32s( 18847 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3], 18848 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2], 18849 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1], 18850 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) ); 18851 return resV; 18852 } 18853 18854 18855 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx, 18856 Long delta, Bool isAvx ) 18857 { 18858 IRTemp addr = IRTemp_INVALID; 18859 Int alen = 0; 18860 HChar dis_buf[50]; 18861 IRTemp xmm_vec = newTemp(Ity_V128); 18862 IRTemp sel_lane = newTemp(Ity_I32); 18863 IRTemp shr_lane = newTemp(Ity_I32); 18864 const HChar* mbV = isAvx ? "v" : ""; 18865 UChar modrm = getUChar(delta); 18866 IRTemp t3, t2, t1, t0; 18867 Int imm8; 18868 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) ); 18869 t3 = t2 = t1 = t0 = IRTemp_INVALID; 18870 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 18871 18872 if ( epartIsReg( modrm ) ) { 18873 imm8 = (Int)getUChar(delta+1); 18874 } else { 18875 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 18876 imm8 = (Int)getUChar(delta+alen); 18877 } 18878 switch ( (imm8 >> 2) & 3 ) { 18879 case 0: assign( sel_lane, mkexpr(t0) ); break; 18880 case 1: assign( sel_lane, mkexpr(t1) ); break; 18881 case 2: assign( sel_lane, mkexpr(t2) ); break; 18882 case 3: assign( sel_lane, mkexpr(t3) ); break; 18883 default: vassert(0); 18884 } 18885 assign( shr_lane, 18886 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) ); 18887 18888 if ( epartIsReg( modrm ) ) { 18889 putIReg64( eregOfRexRM(pfx,modrm), 18890 unop( Iop_32Uto64, 18891 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) ); 18892 delta += 1+1; 18893 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8, 18894 nameXMMReg( gregOfRexRM(pfx, modrm) ), 18895 nameIReg64( eregOfRexRM(pfx, modrm) ) ); 18896 } else { 18897 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) ); 18898 delta += alen+1; 18899 DIP( "%spextrb $%d,%s,%s\n", mbV, 18900 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf ); 18901 } 18902 18903 return delta; 18904 } 18905 18906 18907 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18908 { 18909 vassert(imm8 < 256); 18910 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF }; 18911 IRTemp and_vec = newTemp(Ity_V128); 18912 IRTemp sum_vec = newTemp(Ity_V128); 18913 IRTemp rm = newTemp(Ity_I32); 18914 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18915 assign( and_vec, binop( Iop_AndV128, 18916 triop( Iop_Mul64Fx2, 18917 mkexpr(rm), 18918 mkexpr(dst_vec), mkexpr(src_vec) ), 18919 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) ); 18920 18921 assign( sum_vec, binop( Iop_Add64F0x2, 18922 binop( Iop_InterleaveHI64x2, 18923 mkexpr(and_vec), mkexpr(and_vec) ), 18924 binop( Iop_InterleaveLO64x2, 18925 mkexpr(and_vec), mkexpr(and_vec) ) ) ); 18926 IRTemp res = newTemp(Ity_V128); 18927 assign(res, binop( Iop_AndV128, 18928 binop( Iop_InterleaveLO64x2, 18929 mkexpr(sum_vec), mkexpr(sum_vec) ), 18930 mkV128( imm8_perms[ (imm8 & 3) ] ) ) ); 18931 return res; 18932 } 18933 18934 18935 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 ) 18936 { 18937 vassert(imm8 < 256); 18938 IRTemp tmp_prod_vec = newTemp(Ity_V128); 18939 IRTemp prod_vec = newTemp(Ity_V128); 18940 IRTemp sum_vec = newTemp(Ity_V128); 18941 IRTemp rm = newTemp(Ity_I32); 18942 IRTemp v3, v2, v1, v0; 18943 v3 = v2 = v1 = v0 = IRTemp_INVALID; 18944 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 18945 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 18946 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 18947 0xFFFF }; 18948 18949 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */ 18950 assign( tmp_prod_vec, 18951 binop( Iop_AndV128, 18952 triop( Iop_Mul32Fx4, 18953 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ), 18954 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) ); 18955 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 ); 18956 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) ); 18957 18958 assign( sum_vec, triop( Iop_Add32Fx4, 18959 mkexpr(rm), 18960 binop( Iop_InterleaveHI32x4, 18961 mkexpr(prod_vec), mkexpr(prod_vec) ), 18962 binop( Iop_InterleaveLO32x4, 18963 mkexpr(prod_vec), mkexpr(prod_vec) ) ) ); 18964 18965 IRTemp res = newTemp(Ity_V128); 18966 assign( res, binop( Iop_AndV128, 18967 triop( Iop_Add32Fx4, 18968 mkexpr(rm), 18969 binop( Iop_InterleaveHI32x4, 18970 mkexpr(sum_vec), mkexpr(sum_vec) ), 18971 binop( Iop_InterleaveLO32x4, 18972 mkexpr(sum_vec), mkexpr(sum_vec) ) ), 18973 mkV128( imm8_perms[ (imm8 & 15) ] ) ) ); 18974 return res; 18975 } 18976 18977 18978 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 ) 18979 { 18980 /* Mask out bits of the operands we don't need. This isn't 18981 strictly necessary, but it does ensure Memcheck doesn't 18982 give us any false uninitialised value errors as a 18983 result. */ 18984 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 }; 18985 UShort dst_mask[2] = { 0x07FF, 0x7FF0 }; 18986 18987 IRTemp src_maskV = newTemp(Ity_V128); 18988 IRTemp dst_maskV = newTemp(Ity_V128); 18989 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] )); 18990 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] )); 18991 18992 IRTemp src_masked = newTemp(Ity_V128); 18993 IRTemp dst_masked = newTemp(Ity_V128); 18994 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV))); 18995 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV))); 18996 18997 /* Generate 4 64 bit values that we can hand to a clean helper */ 18998 IRTemp sHi = newTemp(Ity_I64); 18999 IRTemp sLo = newTemp(Ity_I64); 19000 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) ); 19001 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) ); 19002 19003 IRTemp dHi = newTemp(Ity_I64); 19004 IRTemp dLo = newTemp(Ity_I64); 19005 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) ); 19006 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) ); 19007 19008 /* Compute halves of the result separately */ 19009 IRTemp resHi = newTemp(Ity_I64); 19010 IRTemp resLo = newTemp(Ity_I64); 19011 19012 IRExpr** argsHi 19013 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 19014 mkU64( 0x80 | (imm8 & 7) )); 19015 IRExpr** argsLo 19016 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo), 19017 mkU64( 0x00 | (imm8 & 7) )); 19018 19019 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/, 19020 "amd64g_calc_mpsadbw", 19021 &amd64g_calc_mpsadbw, argsHi )); 19022 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/, 19023 "amd64g_calc_mpsadbw", 19024 &amd64g_calc_mpsadbw, argsLo )); 19025 19026 IRTemp res = newTemp(Ity_V128); 19027 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))); 19028 return res; 19029 } 19030 19031 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx, 19032 Long delta, Bool isAvx ) 19033 { 19034 IRTemp addr = IRTemp_INVALID; 19035 Int alen = 0; 19036 HChar dis_buf[50]; 19037 UChar modrm = getUChar(delta); 19038 Int imm8_10; 19039 IRTemp xmm_vec = newTemp(Ity_V128); 19040 IRTemp src_dword = newTemp(Ity_I32); 19041 UInt rG = gregOfRexRM(pfx,modrm); 19042 IRTemp t3, t2, t1, t0; 19043 t3 = t2 = t1 = t0 = IRTemp_INVALID; 19044 19045 assign( xmm_vec, getXMMReg( rG ) ); 19046 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 ); 19047 19048 if ( epartIsReg( modrm ) ) { 19049 imm8_10 = (Int)(getUChar(delta+1) & 3); 19050 } else { 19051 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19052 imm8_10 = (Int)(getUChar(delta+alen) & 3); 19053 } 19054 19055 switch ( imm8_10 ) { 19056 case 0: assign( src_dword, mkexpr(t0) ); break; 19057 case 1: assign( src_dword, mkexpr(t1) ); break; 19058 case 2: assign( src_dword, mkexpr(t2) ); break; 19059 case 3: assign( src_dword, mkexpr(t3) ); break; 19060 default: vassert(0); 19061 } 19062 19063 if ( epartIsReg( modrm ) ) { 19064 UInt rE = eregOfRexRM(pfx,modrm); 19065 putIReg32( rE, mkexpr(src_dword) ); 19066 delta += 1+1; 19067 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 19068 nameXMMReg( rG ), nameIReg32( rE ) ); 19069 } else { 19070 storeLE( mkexpr(addr), mkexpr(src_dword) ); 19071 delta += alen+1; 19072 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10, 19073 nameXMMReg( rG ), dis_buf ); 19074 } 19075 19076 return delta; 19077 } 19078 19079 19080 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 ) 19081 { 19082 IRTemp t0 = newTemp(Ity_I64); 19083 IRTemp t1 = newTemp(Ity_I64); 19084 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, 19085 mkexpr(dV))); 19086 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, 19087 mkexpr(sV))); 19088 19089 IRTemp t2 = newTemp(Ity_I64); 19090 IRTemp t3 = newTemp(Ity_I64); 19091 19092 IRExpr** args; 19093 19094 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0)); 19095 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 19096 &amd64g_calculate_pclmul, args)); 19097 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1)); 19098 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul", 19099 &amd64g_calculate_pclmul, args)); 19100 19101 IRTemp res = newTemp(Ity_V128); 19102 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2))); 19103 return res; 19104 } 19105 19106 19107 __attribute__((noinline)) 19108 static 19109 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK, 19110 const VexAbiInfo* vbi, 19111 Prefix pfx, Int sz, Long deltaIN ) 19112 { 19113 IRTemp addr = IRTemp_INVALID; 19114 UChar modrm = 0; 19115 Int alen = 0; 19116 HChar dis_buf[50]; 19117 19118 *decode_OK = False; 19119 19120 Long delta = deltaIN; 19121 UChar opc = getUChar(delta); 19122 delta++; 19123 switch (opc) { 19124 19125 case 0x08: 19126 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */ 19127 if (have66noF2noF3(pfx) && sz == 2) { 19128 19129 IRTemp src0 = newTemp(Ity_F32); 19130 IRTemp src1 = newTemp(Ity_F32); 19131 IRTemp src2 = newTemp(Ity_F32); 19132 IRTemp src3 = newTemp(Ity_F32); 19133 IRTemp res0 = newTemp(Ity_F32); 19134 IRTemp res1 = newTemp(Ity_F32); 19135 IRTemp res2 = newTemp(Ity_F32); 19136 IRTemp res3 = newTemp(Ity_F32); 19137 IRTemp rm = newTemp(Ity_I32); 19138 Int imm = 0; 19139 19140 modrm = getUChar(delta); 19141 19142 if (epartIsReg(modrm)) { 19143 assign( src0, 19144 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 19145 assign( src1, 19146 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); 19147 assign( src2, 19148 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); 19149 assign( src3, 19150 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); 19151 imm = getUChar(delta+1); 19152 if (imm & ~15) goto decode_failure; 19153 delta += 1+1; 19154 DIP( "roundps $%d,%s,%s\n", 19155 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 19156 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19157 } else { 19158 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19159 gen_SEGV_if_not_16_aligned(addr); 19160 assign( src0, loadLE(Ity_F32, 19161 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 19162 assign( src1, loadLE(Ity_F32, 19163 binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); 19164 assign( src2, loadLE(Ity_F32, 19165 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 19166 assign( src3, loadLE(Ity_F32, 19167 binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); 19168 imm = getUChar(delta+alen); 19169 if (imm & ~15) goto decode_failure; 19170 delta += alen+1; 19171 DIP( "roundps $%d,%s,%s\n", 19172 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19173 } 19174 19175 /* (imm & 3) contains an Intel-encoded rounding mode. Because 19176 that encoding is the same as the encoding for IRRoundingMode, 19177 we can use that value directly in the IR as a rounding 19178 mode. */ 19179 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 19180 19181 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) ); 19182 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) ); 19183 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) ); 19184 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) ); 19185 19186 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 19187 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 19188 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); 19189 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); 19190 19191 goto decode_success; 19192 } 19193 break; 19194 19195 case 0x09: 19196 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */ 19197 if (have66noF2noF3(pfx) && sz == 2) { 19198 19199 IRTemp src0 = newTemp(Ity_F64); 19200 IRTemp src1 = newTemp(Ity_F64); 19201 IRTemp res0 = newTemp(Ity_F64); 19202 IRTemp res1 = newTemp(Ity_F64); 19203 IRTemp rm = newTemp(Ity_I32); 19204 Int imm = 0; 19205 19206 modrm = getUChar(delta); 19207 19208 if (epartIsReg(modrm)) { 19209 assign( src0, 19210 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); 19211 assign( src1, 19212 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); 19213 imm = getUChar(delta+1); 19214 if (imm & ~15) goto decode_failure; 19215 delta += 1+1; 19216 DIP( "roundpd $%d,%s,%s\n", 19217 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 19218 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19219 } else { 19220 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19221 gen_SEGV_if_not_16_aligned(addr); 19222 assign( src0, loadLE(Ity_F64, 19223 binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); 19224 assign( src1, loadLE(Ity_F64, 19225 binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); 19226 imm = getUChar(delta+alen); 19227 if (imm & ~15) goto decode_failure; 19228 delta += alen+1; 19229 DIP( "roundpd $%d,%s,%s\n", 19230 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19231 } 19232 19233 /* (imm & 3) contains an Intel-encoded rounding mode. Because 19234 that encoding is the same as the encoding for IRRoundingMode, 19235 we can use that value directly in the IR as a rounding 19236 mode. */ 19237 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 19238 19239 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) ); 19240 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) ); 19241 19242 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); 19243 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); 19244 19245 goto decode_success; 19246 } 19247 break; 19248 19249 case 0x0A: 19250 case 0x0B: 19251 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 19252 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 19253 */ 19254 if (have66noF2noF3(pfx) && sz == 2) { 19255 19256 Bool isD = opc == 0x0B; 19257 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 19258 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 19259 Int imm = 0; 19260 19261 modrm = getUChar(delta); 19262 19263 if (epartIsReg(modrm)) { 19264 assign( src, 19265 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) 19266 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); 19267 imm = getUChar(delta+1); 19268 if (imm & ~15) goto decode_failure; 19269 delta += 1+1; 19270 DIP( "rounds%c $%d,%s,%s\n", 19271 isD ? 'd' : 's', 19272 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), 19273 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19274 } else { 19275 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19276 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 19277 imm = getUChar(delta+alen); 19278 if (imm & ~15) goto decode_failure; 19279 delta += alen+1; 19280 DIP( "rounds%c $%d,%s,%s\n", 19281 isD ? 'd' : 's', 19282 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19283 } 19284 19285 /* (imm & 3) contains an Intel-encoded rounding mode. Because 19286 that encoding is the same as the encoding for IRRoundingMode, 19287 we can use that value directly in the IR as a rounding 19288 mode. */ 19289 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 19290 (imm & 4) ? get_sse_roundingmode() 19291 : mkU32(imm & 3), 19292 mkexpr(src)) ); 19293 19294 if (isD) 19295 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 19296 else 19297 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); 19298 19299 goto decode_success; 19300 } 19301 break; 19302 19303 case 0x0C: 19304 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8 19305 Blend Packed Single Precision Floating-Point Values (XMM) */ 19306 if (have66noF2noF3(pfx) && sz == 2) { 19307 19308 Int imm8; 19309 IRTemp dst_vec = newTemp(Ity_V128); 19310 IRTemp src_vec = newTemp(Ity_V128); 19311 19312 modrm = getUChar(delta); 19313 19314 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 19315 19316 if ( epartIsReg( modrm ) ) { 19317 imm8 = (Int)getUChar(delta+1); 19318 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 19319 delta += 1+1; 19320 DIP( "blendps $%d, %s,%s\n", imm8, 19321 nameXMMReg( eregOfRexRM(pfx, modrm) ), 19322 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19323 } else { 19324 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19325 1/* imm8 is 1 byte after the amode */ ); 19326 gen_SEGV_if_not_16_aligned( addr ); 19327 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19328 imm8 = (Int)getUChar(delta+alen); 19329 delta += alen+1; 19330 DIP( "blendpd $%d, %s,%s\n", 19331 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19332 } 19333 19334 putXMMReg( gregOfRexRM(pfx, modrm), 19335 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) ); 19336 goto decode_success; 19337 } 19338 break; 19339 19340 case 0x0D: 19341 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8 19342 Blend Packed Double Precision Floating-Point Values (XMM) */ 19343 if (have66noF2noF3(pfx) && sz == 2) { 19344 19345 Int imm8; 19346 IRTemp dst_vec = newTemp(Ity_V128); 19347 IRTemp src_vec = newTemp(Ity_V128); 19348 19349 modrm = getUChar(delta); 19350 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 19351 19352 if ( epartIsReg( modrm ) ) { 19353 imm8 = (Int)getUChar(delta+1); 19354 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 19355 delta += 1+1; 19356 DIP( "blendpd $%d, %s,%s\n", imm8, 19357 nameXMMReg( eregOfRexRM(pfx, modrm) ), 19358 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19359 } else { 19360 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19361 1/* imm8 is 1 byte after the amode */ ); 19362 gen_SEGV_if_not_16_aligned( addr ); 19363 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19364 imm8 = (Int)getUChar(delta+alen); 19365 delta += alen+1; 19366 DIP( "blendpd $%d, %s,%s\n", 19367 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19368 } 19369 19370 putXMMReg( gregOfRexRM(pfx, modrm), 19371 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) ); 19372 goto decode_success; 19373 } 19374 break; 19375 19376 case 0x0E: 19377 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8 19378 Blend Packed Words (XMM) */ 19379 if (have66noF2noF3(pfx) && sz == 2) { 19380 19381 Int imm8; 19382 IRTemp dst_vec = newTemp(Ity_V128); 19383 IRTemp src_vec = newTemp(Ity_V128); 19384 19385 modrm = getUChar(delta); 19386 19387 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) ); 19388 19389 if ( epartIsReg( modrm ) ) { 19390 imm8 = (Int)getUChar(delta+1); 19391 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) ); 19392 delta += 1+1; 19393 DIP( "pblendw $%d, %s,%s\n", imm8, 19394 nameXMMReg( eregOfRexRM(pfx, modrm) ), 19395 nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19396 } else { 19397 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19398 1/* imm8 is 1 byte after the amode */ ); 19399 gen_SEGV_if_not_16_aligned( addr ); 19400 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19401 imm8 = (Int)getUChar(delta+alen); 19402 delta += alen+1; 19403 DIP( "pblendw $%d, %s,%s\n", 19404 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); 19405 } 19406 19407 putXMMReg( gregOfRexRM(pfx, modrm), 19408 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) ); 19409 goto decode_success; 19410 } 19411 break; 19412 19413 case 0x14: 19414 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8 19415 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. 19416 (XMM) */ 19417 if (have66noF2noF3(pfx) && sz == 2) { 19418 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 19419 goto decode_success; 19420 } 19421 break; 19422 19423 case 0x15: 19424 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8 19425 Extract Word from xmm, store in mem or zero-extend + store in gen.reg. 19426 (XMM) */ 19427 if (have66noF2noF3(pfx) && sz == 2) { 19428 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ ); 19429 goto decode_success; 19430 } 19431 break; 19432 19433 case 0x16: 19434 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8 19435 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM) 19436 Note that this insn has the same opcodes as PEXTRQ, but 19437 here the REX.W bit is _not_ present */ 19438 if (have66noF2noF3(pfx) 19439 && sz == 2 /* REX.W is _not_ present */) { 19440 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ ); 19441 goto decode_success; 19442 } 19443 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8 19444 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM) 19445 Note that this insn has the same opcodes as PEXTRD, but 19446 here the REX.W bit is present */ 19447 if (have66noF2noF3(pfx) 19448 && sz == 8 /* REX.W is present */) { 19449 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/); 19450 goto decode_success; 19451 } 19452 break; 19453 19454 case 0x17: 19455 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract 19456 float from xmm reg and store in gen.reg or mem. This is 19457 identical to PEXTRD, except that REX.W appears to be ignored. 19458 */ 19459 if (have66noF2noF3(pfx) 19460 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) { 19461 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ ); 19462 goto decode_success; 19463 } 19464 break; 19465 19466 case 0x20: 19467 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8 19468 Extract byte from r32/m8 and insert into xmm1 */ 19469 if (have66noF2noF3(pfx) && sz == 2) { 19470 Int imm8; 19471 IRTemp new8 = newTemp(Ity_I8); 19472 modrm = getUChar(delta); 19473 UInt rG = gregOfRexRM(pfx, modrm); 19474 if ( epartIsReg( modrm ) ) { 19475 UInt rE = eregOfRexRM(pfx,modrm); 19476 imm8 = (Int)(getUChar(delta+1) & 0xF); 19477 assign( new8, unop(Iop_32to8, getIReg32(rE)) ); 19478 delta += 1+1; 19479 DIP( "pinsrb $%d,%s,%s\n", imm8, 19480 nameIReg32(rE), nameXMMReg(rG) ); 19481 } else { 19482 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19483 imm8 = (Int)(getUChar(delta+alen) & 0xF); 19484 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) ); 19485 delta += alen+1; 19486 DIP( "pinsrb $%d,%s,%s\n", 19487 imm8, dis_buf, nameXMMReg(rG) ); 19488 } 19489 IRTemp src_vec = newTemp(Ity_V128); 19490 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) )); 19491 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 ); 19492 putXMMReg( rG, mkexpr(res) ); 19493 goto decode_success; 19494 } 19495 break; 19496 19497 case 0x21: 19498 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1 19499 Insert Packed Single Precision Floating-Point Value (XMM) */ 19500 if (have66noF2noF3(pfx) && sz == 2) { 19501 UInt imm8; 19502 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 19503 const IRTemp inval = IRTemp_INVALID; 19504 19505 modrm = getUChar(delta); 19506 UInt rG = gregOfRexRM(pfx, modrm); 19507 19508 if ( epartIsReg( modrm ) ) { 19509 UInt rE = eregOfRexRM(pfx, modrm); 19510 IRTemp vE = newTemp(Ity_V128); 19511 assign( vE, getXMMReg(rE) ); 19512 IRTemp dsE[4] = { inval, inval, inval, inval }; 19513 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 19514 imm8 = getUChar(delta+1); 19515 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 19516 delta += 1+1; 19517 DIP( "insertps $%u, %s,%s\n", 19518 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19519 } else { 19520 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19521 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 19522 imm8 = getUChar(delta+alen); 19523 delta += alen+1; 19524 DIP( "insertps $%u, %s,%s\n", 19525 imm8, dis_buf, nameXMMReg(rG) ); 19526 } 19527 19528 IRTemp vG = newTemp(Ity_V128); 19529 assign( vG, getXMMReg(rG) ); 19530 19531 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) ); 19532 goto decode_success; 19533 } 19534 break; 19535 19536 case 0x22: 19537 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 19538 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ 19539 if (have66noF2noF3(pfx) 19540 && sz == 2 /* REX.W is NOT present */) { 19541 Int imm8_10; 19542 IRTemp src_u32 = newTemp(Ity_I32); 19543 modrm = getUChar(delta); 19544 UInt rG = gregOfRexRM(pfx, modrm); 19545 19546 if ( epartIsReg( modrm ) ) { 19547 UInt rE = eregOfRexRM(pfx,modrm); 19548 imm8_10 = (Int)(getUChar(delta+1) & 3); 19549 assign( src_u32, getIReg32( rE ) ); 19550 delta += 1+1; 19551 DIP( "pinsrd $%d, %s,%s\n", 19552 imm8_10, nameIReg32(rE), nameXMMReg(rG) ); 19553 } else { 19554 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19555 imm8_10 = (Int)(getUChar(delta+alen) & 3); 19556 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 19557 delta += alen+1; 19558 DIP( "pinsrd $%d, %s,%s\n", 19559 imm8_10, dis_buf, nameXMMReg(rG) ); 19560 } 19561 19562 IRTemp src_vec = newTemp(Ity_V128); 19563 assign(src_vec, getXMMReg( rG )); 19564 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 19565 putXMMReg( rG, mkexpr(res_vec) ); 19566 goto decode_success; 19567 } 19568 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8 19569 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */ 19570 if (have66noF2noF3(pfx) 19571 && sz == 8 /* REX.W is present */) { 19572 Int imm8_0; 19573 IRTemp src_u64 = newTemp(Ity_I64); 19574 modrm = getUChar(delta); 19575 UInt rG = gregOfRexRM(pfx, modrm); 19576 19577 if ( epartIsReg( modrm ) ) { 19578 UInt rE = eregOfRexRM(pfx,modrm); 19579 imm8_0 = (Int)(getUChar(delta+1) & 1); 19580 assign( src_u64, getIReg64( rE ) ); 19581 delta += 1+1; 19582 DIP( "pinsrq $%d, %s,%s\n", 19583 imm8_0, nameIReg64(rE), nameXMMReg(rG) ); 19584 } else { 19585 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 19586 imm8_0 = (Int)(getUChar(delta+alen) & 1); 19587 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 19588 delta += alen+1; 19589 DIP( "pinsrq $%d, %s,%s\n", 19590 imm8_0, dis_buf, nameXMMReg(rG) ); 19591 } 19592 19593 IRTemp src_vec = newTemp(Ity_V128); 19594 assign(src_vec, getXMMReg( rG )); 19595 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 19596 putXMMReg( rG, mkexpr(res_vec) ); 19597 goto decode_success; 19598 } 19599 break; 19600 19601 case 0x40: 19602 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8 19603 Dot Product of Packed Single Precision Floating-Point Values (XMM) */ 19604 if (have66noF2noF3(pfx) && sz == 2) { 19605 modrm = getUChar(delta); 19606 Int imm8; 19607 IRTemp src_vec = newTemp(Ity_V128); 19608 IRTemp dst_vec = newTemp(Ity_V128); 19609 UInt rG = gregOfRexRM(pfx, modrm); 19610 assign( dst_vec, getXMMReg( rG ) ); 19611 if ( epartIsReg( modrm ) ) { 19612 UInt rE = eregOfRexRM(pfx, modrm); 19613 imm8 = (Int)getUChar(delta+1); 19614 assign( src_vec, getXMMReg(rE) ); 19615 delta += 1+1; 19616 DIP( "dpps $%d, %s,%s\n", 19617 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19618 } else { 19619 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19620 1/* imm8 is 1 byte after the amode */ ); 19621 gen_SEGV_if_not_16_aligned( addr ); 19622 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19623 imm8 = (Int)getUChar(delta+alen); 19624 delta += alen+1; 19625 DIP( "dpps $%d, %s,%s\n", 19626 imm8, dis_buf, nameXMMReg(rG) ); 19627 } 19628 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 ); 19629 putXMMReg( rG, mkexpr(res) ); 19630 goto decode_success; 19631 } 19632 break; 19633 19634 case 0x41: 19635 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8 19636 Dot Product of Packed Double Precision Floating-Point Values (XMM) */ 19637 if (have66noF2noF3(pfx) && sz == 2) { 19638 modrm = getUChar(delta); 19639 Int imm8; 19640 IRTemp src_vec = newTemp(Ity_V128); 19641 IRTemp dst_vec = newTemp(Ity_V128); 19642 UInt rG = gregOfRexRM(pfx, modrm); 19643 assign( dst_vec, getXMMReg( rG ) ); 19644 if ( epartIsReg( modrm ) ) { 19645 UInt rE = eregOfRexRM(pfx, modrm); 19646 imm8 = (Int)getUChar(delta+1); 19647 assign( src_vec, getXMMReg(rE) ); 19648 delta += 1+1; 19649 DIP( "dppd $%d, %s,%s\n", 19650 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 19651 } else { 19652 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19653 1/* imm8 is 1 byte after the amode */ ); 19654 gen_SEGV_if_not_16_aligned( addr ); 19655 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19656 imm8 = (Int)getUChar(delta+alen); 19657 delta += alen+1; 19658 DIP( "dppd $%d, %s,%s\n", 19659 imm8, dis_buf, nameXMMReg(rG) ); 19660 } 19661 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 ); 19662 putXMMReg( rG, mkexpr(res) ); 19663 goto decode_success; 19664 } 19665 break; 19666 19667 case 0x42: 19668 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8 19669 Multiple Packed Sums of Absolule Difference (XMM) */ 19670 if (have66noF2noF3(pfx) && sz == 2) { 19671 Int imm8; 19672 IRTemp src_vec = newTemp(Ity_V128); 19673 IRTemp dst_vec = newTemp(Ity_V128); 19674 modrm = getUChar(delta); 19675 UInt rG = gregOfRexRM(pfx, modrm); 19676 19677 assign( dst_vec, getXMMReg(rG) ); 19678 19679 if ( epartIsReg( modrm ) ) { 19680 UInt rE = eregOfRexRM(pfx, modrm); 19681 19682 imm8 = (Int)getUChar(delta+1); 19683 assign( src_vec, getXMMReg(rE) ); 19684 delta += 1+1; 19685 DIP( "mpsadbw $%d, %s,%s\n", imm8, 19686 nameXMMReg(rE), nameXMMReg(rG) ); 19687 } else { 19688 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19689 1/* imm8 is 1 byte after the amode */ ); 19690 gen_SEGV_if_not_16_aligned( addr ); 19691 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 19692 imm8 = (Int)getUChar(delta+alen); 19693 delta += alen+1; 19694 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) ); 19695 } 19696 19697 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) ); 19698 goto decode_success; 19699 } 19700 break; 19701 19702 case 0x44: 19703 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 19704 * Carry-less multiplication of selected XMM quadwords into XMM 19705 * registers (a.k.a multiplication of polynomials over GF(2)) 19706 */ 19707 if (have66noF2noF3(pfx) && sz == 2) { 19708 19709 Int imm8; 19710 IRTemp svec = newTemp(Ity_V128); 19711 IRTemp dvec = newTemp(Ity_V128); 19712 modrm = getUChar(delta); 19713 UInt rG = gregOfRexRM(pfx, modrm); 19714 19715 assign( dvec, getXMMReg(rG) ); 19716 19717 if ( epartIsReg( modrm ) ) { 19718 UInt rE = eregOfRexRM(pfx, modrm); 19719 imm8 = (Int)getUChar(delta+1); 19720 assign( svec, getXMMReg(rE) ); 19721 delta += 1+1; 19722 DIP( "pclmulqdq $%d, %s,%s\n", imm8, 19723 nameXMMReg(rE), nameXMMReg(rG) ); 19724 } else { 19725 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 19726 1/* imm8 is 1 byte after the amode */ ); 19727 gen_SEGV_if_not_16_aligned( addr ); 19728 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) ); 19729 imm8 = (Int)getUChar(delta+alen); 19730 delta += alen+1; 19731 DIP( "pclmulqdq $%d, %s,%s\n", 19732 imm8, dis_buf, nameXMMReg(rG) ); 19733 } 19734 19735 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) ); 19736 goto decode_success; 19737 } 19738 break; 19739 19740 case 0x60: 19741 case 0x61: 19742 case 0x62: 19743 case 0x63: 19744 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1 19745 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1 19746 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1 19747 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1 19748 (selected special cases that actually occur in glibc, 19749 not by any means a complete implementation.) 19750 */ 19751 if (have66noF2noF3(pfx) && sz == 2) { 19752 Long delta0 = delta; 19753 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc ); 19754 if (delta > delta0) goto decode_success; 19755 /* else fall though; dis_PCMPxSTRx failed to decode it */ 19756 } 19757 break; 19758 19759 case 0xDF: 19760 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */ 19761 if (have66noF2noF3(pfx) && sz == 2) { 19762 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ ); 19763 goto decode_success; 19764 } 19765 break; 19766 19767 default: 19768 break; 19769 19770 } 19771 19772 decode_failure: 19773 *decode_OK = False; 19774 return deltaIN; 19775 19776 decode_success: 19777 *decode_OK = True; 19778 return delta; 19779 } 19780 19781 19782 /*------------------------------------------------------------*/ 19783 /*--- ---*/ 19784 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/ 19785 /*--- ---*/ 19786 /*------------------------------------------------------------*/ 19787 19788 __attribute__((noinline)) 19789 static 19790 Long dis_ESC_NONE ( 19791 /*MB_OUT*/DisResult* dres, 19792 /*MB_OUT*/Bool* expect_CAS, 19793 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 19794 Bool resteerCisOk, 19795 void* callback_opaque, 19796 const VexArchInfo* archinfo, 19797 const VexAbiInfo* vbi, 19798 Prefix pfx, Int sz, Long deltaIN 19799 ) 19800 { 19801 Long d64 = 0; 19802 UChar abyte = 0; 19803 IRTemp addr = IRTemp_INVALID; 19804 IRTemp t1 = IRTemp_INVALID; 19805 IRTemp t2 = IRTemp_INVALID; 19806 IRTemp t3 = IRTemp_INVALID; 19807 IRTemp t4 = IRTemp_INVALID; 19808 IRTemp t5 = IRTemp_INVALID; 19809 IRType ty = Ity_INVALID; 19810 UChar modrm = 0; 19811 Int am_sz = 0; 19812 Int d_sz = 0; 19813 Int alen = 0; 19814 HChar dis_buf[50]; 19815 19816 Long delta = deltaIN; 19817 UChar opc = getUChar(delta); delta++; 19818 19819 /* delta now points at the modrm byte. In most of the cases that 19820 follow, neither the F2 nor F3 prefixes are allowed. However, 19821 for some basic arithmetic operations we have to allow F2/XACQ or 19822 F3/XREL in the case where the destination is memory and the LOCK 19823 prefix is also present. Do this check by looking at the modrm 19824 byte but not advancing delta over it. */ 19825 /* By default, F2 and F3 are not allowed, so let's start off with 19826 that setting. */ 19827 Bool validF2orF3 = haveF2orF3(pfx) ? False : True; 19828 { UChar tmp_modrm = getUChar(delta); 19829 switch (opc) { 19830 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */ 19831 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */ 19832 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */ 19833 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */ 19834 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */ 19835 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */ 19836 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */ 19837 if (!epartIsReg(tmp_modrm) 19838 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) { 19839 /* dst is mem, and we have F2 or F3 but not both */ 19840 validF2orF3 = True; 19841 } 19842 break; 19843 default: 19844 break; 19845 } 19846 } 19847 19848 /* Now, in the switch below, for the opc values examined by the 19849 switch above, use validF2orF3 rather than looking at pfx 19850 directly. */ 19851 switch (opc) { 19852 19853 case 0x00: /* ADD Gb,Eb */ 19854 if (!validF2orF3) goto decode_failure; 19855 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19856 return delta; 19857 case 0x01: /* ADD Gv,Ev */ 19858 if (!validF2orF3) goto decode_failure; 19859 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19860 return delta; 19861 19862 case 0x02: /* ADD Eb,Gb */ 19863 if (haveF2orF3(pfx)) goto decode_failure; 19864 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" ); 19865 return delta; 19866 case 0x03: /* ADD Ev,Gv */ 19867 if (haveF2orF3(pfx)) goto decode_failure; 19868 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" ); 19869 return delta; 19870 19871 case 0x04: /* ADD Ib, AL */ 19872 if (haveF2orF3(pfx)) goto decode_failure; 19873 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" ); 19874 return delta; 19875 case 0x05: /* ADD Iv, eAX */ 19876 if (haveF2orF3(pfx)) goto decode_failure; 19877 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" ); 19878 return delta; 19879 19880 case 0x08: /* OR Gb,Eb */ 19881 if (!validF2orF3) goto decode_failure; 19882 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19883 return delta; 19884 case 0x09: /* OR Gv,Ev */ 19885 if (!validF2orF3) goto decode_failure; 19886 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19887 return delta; 19888 19889 case 0x0A: /* OR Eb,Gb */ 19890 if (haveF2orF3(pfx)) goto decode_failure; 19891 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" ); 19892 return delta; 19893 case 0x0B: /* OR Ev,Gv */ 19894 if (haveF2orF3(pfx)) goto decode_failure; 19895 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" ); 19896 return delta; 19897 19898 case 0x0C: /* OR Ib, AL */ 19899 if (haveF2orF3(pfx)) goto decode_failure; 19900 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" ); 19901 return delta; 19902 case 0x0D: /* OR Iv, eAX */ 19903 if (haveF2orF3(pfx)) goto decode_failure; 19904 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" ); 19905 return delta; 19906 19907 case 0x10: /* ADC Gb,Eb */ 19908 if (!validF2orF3) goto decode_failure; 19909 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19910 return delta; 19911 case 0x11: /* ADC Gv,Ev */ 19912 if (!validF2orF3) goto decode_failure; 19913 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19914 return delta; 19915 19916 case 0x12: /* ADC Eb,Gb */ 19917 if (haveF2orF3(pfx)) goto decode_failure; 19918 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" ); 19919 return delta; 19920 case 0x13: /* ADC Ev,Gv */ 19921 if (haveF2orF3(pfx)) goto decode_failure; 19922 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" ); 19923 return delta; 19924 19925 case 0x14: /* ADC Ib, AL */ 19926 if (haveF2orF3(pfx)) goto decode_failure; 19927 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" ); 19928 return delta; 19929 case 0x15: /* ADC Iv, eAX */ 19930 if (haveF2orF3(pfx)) goto decode_failure; 19931 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" ); 19932 return delta; 19933 19934 case 0x18: /* SBB Gb,Eb */ 19935 if (!validF2orF3) goto decode_failure; 19936 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19937 return delta; 19938 case 0x19: /* SBB Gv,Ev */ 19939 if (!validF2orF3) goto decode_failure; 19940 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19941 return delta; 19942 19943 case 0x1A: /* SBB Eb,Gb */ 19944 if (haveF2orF3(pfx)) goto decode_failure; 19945 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" ); 19946 return delta; 19947 case 0x1B: /* SBB Ev,Gv */ 19948 if (haveF2orF3(pfx)) goto decode_failure; 19949 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" ); 19950 return delta; 19951 19952 case 0x1C: /* SBB Ib, AL */ 19953 if (haveF2orF3(pfx)) goto decode_failure; 19954 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" ); 19955 return delta; 19956 case 0x1D: /* SBB Iv, eAX */ 19957 if (haveF2orF3(pfx)) goto decode_failure; 19958 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" ); 19959 return delta; 19960 19961 case 0x20: /* AND Gb,Eb */ 19962 if (!validF2orF3) goto decode_failure; 19963 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19964 return delta; 19965 case 0x21: /* AND Gv,Ev */ 19966 if (!validF2orF3) goto decode_failure; 19967 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19968 return delta; 19969 19970 case 0x22: /* AND Eb,Gb */ 19971 if (haveF2orF3(pfx)) goto decode_failure; 19972 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" ); 19973 return delta; 19974 case 0x23: /* AND Ev,Gv */ 19975 if (haveF2orF3(pfx)) goto decode_failure; 19976 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" ); 19977 return delta; 19978 19979 case 0x24: /* AND Ib, AL */ 19980 if (haveF2orF3(pfx)) goto decode_failure; 19981 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" ); 19982 return delta; 19983 case 0x25: /* AND Iv, eAX */ 19984 if (haveF2orF3(pfx)) goto decode_failure; 19985 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" ); 19986 return delta; 19987 19988 case 0x28: /* SUB Gb,Eb */ 19989 if (!validF2orF3) goto decode_failure; 19990 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 19991 return delta; 19992 case 0x29: /* SUB Gv,Ev */ 19993 if (!validF2orF3) goto decode_failure; 19994 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 19995 return delta; 19996 19997 case 0x2A: /* SUB Eb,Gb */ 19998 if (haveF2orF3(pfx)) goto decode_failure; 19999 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" ); 20000 return delta; 20001 case 0x2B: /* SUB Ev,Gv */ 20002 if (haveF2orF3(pfx)) goto decode_failure; 20003 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" ); 20004 return delta; 20005 20006 case 0x2C: /* SUB Ib, AL */ 20007 if (haveF2orF3(pfx)) goto decode_failure; 20008 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" ); 20009 return delta; 20010 case 0x2D: /* SUB Iv, eAX */ 20011 if (haveF2orF3(pfx)) goto decode_failure; 20012 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" ); 20013 return delta; 20014 20015 case 0x30: /* XOR Gb,Eb */ 20016 if (!validF2orF3) goto decode_failure; 20017 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 20018 return delta; 20019 case 0x31: /* XOR Gv,Ev */ 20020 if (!validF2orF3) goto decode_failure; 20021 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 20022 return delta; 20023 20024 case 0x32: /* XOR Eb,Gb */ 20025 if (haveF2orF3(pfx)) goto decode_failure; 20026 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" ); 20027 return delta; 20028 case 0x33: /* XOR Ev,Gv */ 20029 if (haveF2orF3(pfx)) goto decode_failure; 20030 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" ); 20031 return delta; 20032 20033 case 0x34: /* XOR Ib, AL */ 20034 if (haveF2orF3(pfx)) goto decode_failure; 20035 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" ); 20036 return delta; 20037 case 0x35: /* XOR Iv, eAX */ 20038 if (haveF2orF3(pfx)) goto decode_failure; 20039 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" ); 20040 return delta; 20041 20042 case 0x38: /* CMP Gb,Eb */ 20043 if (haveF2orF3(pfx)) goto decode_failure; 20044 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 20045 return delta; 20046 case 0x39: /* CMP Gv,Ev */ 20047 if (haveF2orF3(pfx)) goto decode_failure; 20048 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 20049 return delta; 20050 20051 case 0x3A: /* CMP Eb,Gb */ 20052 if (haveF2orF3(pfx)) goto decode_failure; 20053 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" ); 20054 return delta; 20055 case 0x3B: /* CMP Ev,Gv */ 20056 if (haveF2orF3(pfx)) goto decode_failure; 20057 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" ); 20058 return delta; 20059 20060 case 0x3C: /* CMP Ib, AL */ 20061 if (haveF2orF3(pfx)) goto decode_failure; 20062 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" ); 20063 return delta; 20064 case 0x3D: /* CMP Iv, eAX */ 20065 if (haveF2orF3(pfx)) goto decode_failure; 20066 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" ); 20067 return delta; 20068 20069 case 0x50: /* PUSH eAX */ 20070 case 0x51: /* PUSH eCX */ 20071 case 0x52: /* PUSH eDX */ 20072 case 0x53: /* PUSH eBX */ 20073 case 0x55: /* PUSH eBP */ 20074 case 0x56: /* PUSH eSI */ 20075 case 0x57: /* PUSH eDI */ 20076 case 0x54: /* PUSH eSP */ 20077 /* This is the Right Way, in that the value to be pushed is 20078 established before %rsp is changed, so that pushq %rsp 20079 correctly pushes the old value. */ 20080 if (haveF2orF3(pfx)) goto decode_failure; 20081 vassert(sz == 2 || sz == 4 || sz == 8); 20082 if (sz == 4) 20083 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */ 20084 ty = sz==2 ? Ity_I16 : Ity_I64; 20085 t1 = newTemp(ty); 20086 t2 = newTemp(Ity_I64); 20087 assign(t1, getIRegRexB(sz, pfx, opc-0x50)); 20088 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz))); 20089 putIReg64(R_RSP, mkexpr(t2) ); 20090 storeLE(mkexpr(t2),mkexpr(t1)); 20091 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50)); 20092 return delta; 20093 20094 case 0x58: /* POP eAX */ 20095 case 0x59: /* POP eCX */ 20096 case 0x5A: /* POP eDX */ 20097 case 0x5B: /* POP eBX */ 20098 case 0x5D: /* POP eBP */ 20099 case 0x5E: /* POP eSI */ 20100 case 0x5F: /* POP eDI */ 20101 case 0x5C: /* POP eSP */ 20102 if (haveF2orF3(pfx)) goto decode_failure; 20103 vassert(sz == 2 || sz == 4 || sz == 8); 20104 if (sz == 4) 20105 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */ 20106 t1 = newTemp(szToITy(sz)); 20107 t2 = newTemp(Ity_I64); 20108 assign(t2, getIReg64(R_RSP)); 20109 assign(t1, loadLE(szToITy(sz),mkexpr(t2))); 20110 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 20111 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1)); 20112 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58)); 20113 return delta; 20114 20115 case 0x63: /* MOVSX */ 20116 if (haveF2orF3(pfx)) goto decode_failure; 20117 if (haveREX(pfx) && 1==getRexW(pfx)) { 20118 vassert(sz == 8); 20119 /* movsx r/m32 to r64 */ 20120 modrm = getUChar(delta); 20121 if (epartIsReg(modrm)) { 20122 delta++; 20123 putIRegG(8, pfx, modrm, 20124 unop(Iop_32Sto64, 20125 getIRegE(4, pfx, modrm))); 20126 DIP("movslq %s,%s\n", 20127 nameIRegE(4, pfx, modrm), 20128 nameIRegG(8, pfx, modrm)); 20129 return delta; 20130 } else { 20131 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20132 delta += alen; 20133 putIRegG(8, pfx, modrm, 20134 unop(Iop_32Sto64, 20135 loadLE(Ity_I32, mkexpr(addr)))); 20136 DIP("movslq %s,%s\n", dis_buf, 20137 nameIRegG(8, pfx, modrm)); 20138 return delta; 20139 } 20140 } else { 20141 goto decode_failure; 20142 } 20143 20144 case 0x68: /* PUSH Iv */ 20145 if (haveF2orF3(pfx)) goto decode_failure; 20146 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 20147 if (sz == 4) sz = 8; 20148 d64 = getSDisp(imin(4,sz),delta); 20149 delta += imin(4,sz); 20150 goto do_push_I; 20151 20152 case 0x69: /* IMUL Iv, Ev, Gv */ 20153 if (haveF2orF3(pfx)) goto decode_failure; 20154 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz ); 20155 return delta; 20156 20157 case 0x6A: /* PUSH Ib, sign-extended to sz */ 20158 if (haveF2orF3(pfx)) goto decode_failure; 20159 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */ 20160 if (sz == 4) sz = 8; 20161 d64 = getSDisp8(delta); delta += 1; 20162 goto do_push_I; 20163 do_push_I: 20164 ty = szToITy(sz); 20165 t1 = newTemp(Ity_I64); 20166 t2 = newTemp(ty); 20167 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 20168 putIReg64(R_RSP, mkexpr(t1) ); 20169 /* stop mkU16 asserting if d32 is a negative 16-bit number 20170 (bug #132813) */ 20171 if (ty == Ity_I16) 20172 d64 &= 0xFFFF; 20173 storeLE( mkexpr(t1), mkU(ty,d64) ); 20174 DIP("push%c $%lld\n", nameISize(sz), (Long)d64); 20175 return delta; 20176 20177 case 0x6B: /* IMUL Ib, Ev, Gv */ 20178 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 ); 20179 return delta; 20180 20181 case 0x70: 20182 case 0x71: 20183 case 0x72: /* JBb/JNAEb (jump below) */ 20184 case 0x73: /* JNBb/JAEb (jump not below) */ 20185 case 0x74: /* JZb/JEb (jump zero) */ 20186 case 0x75: /* JNZb/JNEb (jump not zero) */ 20187 case 0x76: /* JBEb/JNAb (jump below or equal) */ 20188 case 0x77: /* JNBEb/JAb (jump not below or equal) */ 20189 case 0x78: /* JSb (jump negative) */ 20190 case 0x79: /* JSb (jump not negative) */ 20191 case 0x7A: /* JP (jump parity even) */ 20192 case 0x7B: /* JNP/JPO (jump parity odd) */ 20193 case 0x7C: /* JLb/JNGEb (jump less) */ 20194 case 0x7D: /* JGEb/JNLb (jump greater or equal) */ 20195 case 0x7E: /* JLEb/JNGb (jump less or equal) */ 20196 case 0x7F: { /* JGb/JNLEb (jump greater) */ 20197 Long jmpDelta; 20198 const HChar* comment = ""; 20199 if (haveF3(pfx)) goto decode_failure; 20200 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20201 jmpDelta = getSDisp8(delta); 20202 vassert(-128 <= jmpDelta && jmpDelta < 128); 20203 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta; 20204 delta++; 20205 if (resteerCisOk 20206 && vex_control.guest_chase_cond 20207 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 20208 && jmpDelta < 0 20209 && resteerOkFn( callback_opaque, (Addr64)d64) ) { 20210 /* Speculation: assume this backward branch is taken. So we 20211 need to emit a side-exit to the insn following this one, 20212 on the negation of the condition, and continue at the 20213 branch target address (d64). If we wind up back at the 20214 first instruction of the trace, just stop; it's better to 20215 let the IR loop unroller handle that case. */ 20216 stmt( IRStmt_Exit( 20217 mk_amd64g_calculate_condition( 20218 (AMD64Condcode)(1 ^ (opc - 0x70))), 20219 Ijk_Boring, 20220 IRConst_U64(guest_RIP_bbstart+delta), 20221 OFFB_RIP ) ); 20222 dres->whatNext = Dis_ResteerC; 20223 dres->continueAt = d64; 20224 comment = "(assumed taken)"; 20225 } 20226 else 20227 if (resteerCisOk 20228 && vex_control.guest_chase_cond 20229 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 20230 && jmpDelta >= 0 20231 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 20232 /* Speculation: assume this forward branch is not taken. So 20233 we need to emit a side-exit to d64 (the dest) and continue 20234 disassembling at the insn immediately following this 20235 one. */ 20236 stmt( IRStmt_Exit( 20237 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)), 20238 Ijk_Boring, 20239 IRConst_U64(d64), 20240 OFFB_RIP ) ); 20241 dres->whatNext = Dis_ResteerC; 20242 dres->continueAt = guest_RIP_bbstart+delta; 20243 comment = "(assumed not taken)"; 20244 } 20245 else { 20246 /* Conservative default translation - end the block at this 20247 point. */ 20248 jcc_01( dres, (AMD64Condcode)(opc - 0x70), 20249 guest_RIP_bbstart+delta, d64 ); 20250 vassert(dres->whatNext == Dis_StopHere); 20251 } 20252 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64, 20253 comment); 20254 return delta; 20255 } 20256 20257 case 0x80: /* Grp1 Ib,Eb */ 20258 modrm = getUChar(delta); 20259 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow 20260 just one for the mem case and also require LOCK in this case. 20261 Note that this erroneously allows XACQ/XREL on CMP since we 20262 don't check the subopcode here. No big deal. */ 20263 if (epartIsReg(modrm) && haveF2orF3(pfx)) 20264 goto decode_failure; 20265 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 20266 goto decode_failure; 20267 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 20268 goto decode_failure; 20269 am_sz = lengthAMode(pfx,delta); 20270 sz = 1; 20271 d_sz = 1; 20272 d64 = getSDisp8(delta + am_sz); 20273 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 20274 return delta; 20275 20276 case 0x81: /* Grp1 Iv,Ev */ 20277 modrm = getUChar(delta); 20278 /* Same comment as for case 0x80 just above. */ 20279 if (epartIsReg(modrm) && haveF2orF3(pfx)) 20280 goto decode_failure; 20281 if (!epartIsReg(modrm) && haveF2andF3(pfx)) 20282 goto decode_failure; 20283 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx)) 20284 goto decode_failure; 20285 am_sz = lengthAMode(pfx,delta); 20286 d_sz = imin(sz,4); 20287 d64 = getSDisp(d_sz, delta + am_sz); 20288 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 20289 return delta; 20290 20291 case 0x83: /* Grp1 Ib,Ev */ 20292 if (haveF2orF3(pfx)) goto decode_failure; 20293 modrm = getUChar(delta); 20294 am_sz = lengthAMode(pfx,delta); 20295 d_sz = 1; 20296 d64 = getSDisp8(delta + am_sz); 20297 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 ); 20298 return delta; 20299 20300 case 0x84: /* TEST Eb,Gb */ 20301 if (haveF2orF3(pfx)) goto decode_failure; 20302 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" ); 20303 return delta; 20304 20305 case 0x85: /* TEST Ev,Gv */ 20306 if (haveF2orF3(pfx)) goto decode_failure; 20307 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" ); 20308 return delta; 20309 20310 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK 20311 prefix. Therefore, generate CAS regardless of the presence or 20312 otherwise of a LOCK prefix. */ 20313 case 0x86: /* XCHG Gb,Eb */ 20314 sz = 1; 20315 /* Fall through ... */ 20316 case 0x87: /* XCHG Gv,Ev */ 20317 modrm = getUChar(delta); 20318 /* Check whether F2 or F3 are allowable. For the mem case, one 20319 or the othter but not both are. We don't care about the 20320 presence of LOCK in this case -- XCHG is unusual in this 20321 respect. */ 20322 if (haveF2orF3(pfx)) { 20323 if (epartIsReg(modrm)) { 20324 goto decode_failure; 20325 } else { 20326 if (haveF2andF3(pfx)) 20327 goto decode_failure; 20328 } 20329 } 20330 ty = szToITy(sz); 20331 t1 = newTemp(ty); t2 = newTemp(ty); 20332 if (epartIsReg(modrm)) { 20333 assign(t1, getIRegE(sz, pfx, modrm)); 20334 assign(t2, getIRegG(sz, pfx, modrm)); 20335 putIRegG(sz, pfx, modrm, mkexpr(t1)); 20336 putIRegE(sz, pfx, modrm, mkexpr(t2)); 20337 delta++; 20338 DIP("xchg%c %s, %s\n", 20339 nameISize(sz), nameIRegG(sz, pfx, modrm), 20340 nameIRegE(sz, pfx, modrm)); 20341 } else { 20342 *expect_CAS = True; 20343 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 20344 assign( t1, loadLE(ty, mkexpr(addr)) ); 20345 assign( t2, getIRegG(sz, pfx, modrm) ); 20346 casLE( mkexpr(addr), 20347 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr ); 20348 putIRegG( sz, pfx, modrm, mkexpr(t1) ); 20349 delta += alen; 20350 DIP("xchg%c %s, %s\n", nameISize(sz), 20351 nameIRegG(sz, pfx, modrm), dis_buf); 20352 } 20353 return delta; 20354 20355 case 0x88: { /* MOV Gb,Eb */ 20356 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 20357 Bool ok = True; 20358 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok); 20359 if (!ok) goto decode_failure; 20360 return delta; 20361 } 20362 20363 case 0x89: { /* MOV Gv,Ev */ 20364 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */ 20365 Bool ok = True; 20366 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok); 20367 if (!ok) goto decode_failure; 20368 return delta; 20369 } 20370 20371 case 0x8A: /* MOV Eb,Gb */ 20372 if (haveF2orF3(pfx)) goto decode_failure; 20373 delta = dis_mov_E_G(vbi, pfx, 1, delta); 20374 return delta; 20375 20376 case 0x8B: /* MOV Ev,Gv */ 20377 if (haveF2orF3(pfx)) goto decode_failure; 20378 delta = dis_mov_E_G(vbi, pfx, sz, delta); 20379 return delta; 20380 20381 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */ 20382 if (haveF2orF3(pfx)) goto decode_failure; 20383 delta = dis_mov_S_E(vbi, pfx, sz, delta); 20384 return delta; 20385 20386 case 0x8D: /* LEA M,Gv */ 20387 if (haveF2orF3(pfx)) goto decode_failure; 20388 if (sz != 4 && sz != 8) 20389 goto decode_failure; 20390 modrm = getUChar(delta); 20391 if (epartIsReg(modrm)) 20392 goto decode_failure; 20393 /* NOTE! this is the one place where a segment override prefix 20394 has no effect on the address calculation. Therefore we clear 20395 any segment override bits in pfx. */ 20396 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 ); 20397 delta += alen; 20398 /* This is a hack. But it isn't clear that really doing the 20399 calculation at 32 bits is really worth it. Hence for leal, 20400 do the full 64-bit calculation and then truncate it. */ 20401 putIRegG( sz, pfx, modrm, 20402 sz == 4 20403 ? unop(Iop_64to32, mkexpr(addr)) 20404 : mkexpr(addr) 20405 ); 20406 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf, 20407 nameIRegG(sz,pfx,modrm)); 20408 return delta; 20409 20410 case 0x8F: { /* POPQ m64 / POPW m16 */ 20411 Int len; 20412 UChar rm; 20413 /* There is no encoding for 32-bit pop in 64-bit mode. 20414 So sz==4 actually means sz==8. */ 20415 if (haveF2orF3(pfx)) goto decode_failure; 20416 vassert(sz == 2 || sz == 4 20417 || /* tolerate redundant REX.W, see #210481 */ sz == 8); 20418 if (sz == 4) sz = 8; 20419 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20420 20421 rm = getUChar(delta); 20422 20423 /* make sure this instruction is correct POP */ 20424 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0) 20425 goto decode_failure; 20426 /* and has correct size */ 20427 vassert(sz == 8); 20428 20429 t1 = newTemp(Ity_I64); 20430 t3 = newTemp(Ity_I64); 20431 assign( t1, getIReg64(R_RSP) ); 20432 assign( t3, loadLE(Ity_I64, mkexpr(t1)) ); 20433 20434 /* Increase RSP; must be done before the STORE. Intel manual 20435 says: If the RSP register is used as a base register for 20436 addressing a destination operand in memory, the POP 20437 instruction computes the effective address of the operand 20438 after it increments the RSP register. */ 20439 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) ); 20440 20441 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 ); 20442 storeLE( mkexpr(addr), mkexpr(t3) ); 20443 20444 DIP("popl %s\n", dis_buf); 20445 20446 delta += len; 20447 return delta; 20448 } 20449 20450 case 0x90: /* XCHG eAX,eAX */ 20451 /* detect and handle F3 90 (rep nop) specially */ 20452 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) { 20453 DIP("rep nop (P4 pause)\n"); 20454 /* "observe" the hint. The Vex client needs to be careful not 20455 to cause very long delays as a result, though. */ 20456 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta); 20457 vassert(dres->whatNext == Dis_StopHere); 20458 return delta; 20459 } 20460 /* detect and handle NOPs specially */ 20461 if (/* F2/F3 probably change meaning completely */ 20462 !haveF2orF3(pfx) 20463 /* If REX.B is 1, we're not exchanging rAX with itself */ 20464 && getRexB(pfx)==0 ) { 20465 DIP("nop\n"); 20466 return delta; 20467 } 20468 /* else fall through to normal case. */ 20469 case 0x91: /* XCHG rAX,rCX */ 20470 case 0x92: /* XCHG rAX,rDX */ 20471 case 0x93: /* XCHG rAX,rBX */ 20472 case 0x94: /* XCHG rAX,rSP */ 20473 case 0x95: /* XCHG rAX,rBP */ 20474 case 0x96: /* XCHG rAX,rSI */ 20475 case 0x97: /* XCHG rAX,rDI */ 20476 /* guard against mutancy */ 20477 if (haveF2orF3(pfx)) goto decode_failure; 20478 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 ); 20479 return delta; 20480 20481 case 0x98: /* CBW */ 20482 if (haveF2orF3(pfx)) goto decode_failure; 20483 if (sz == 8) { 20484 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) ); 20485 DIP(/*"cdqe\n"*/"cltq"); 20486 return delta; 20487 } 20488 if (sz == 4) { 20489 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) ); 20490 DIP("cwtl\n"); 20491 return delta; 20492 } 20493 if (sz == 2) { 20494 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) ); 20495 DIP("cbw\n"); 20496 return delta; 20497 } 20498 goto decode_failure; 20499 20500 case 0x99: /* CWD/CDQ/CQO */ 20501 if (haveF2orF3(pfx)) goto decode_failure; 20502 vassert(sz == 2 || sz == 4 || sz == 8); 20503 ty = szToITy(sz); 20504 putIRegRDX( sz, 20505 binop(mkSizedOp(ty,Iop_Sar8), 20506 getIRegRAX(sz), 20507 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) ); 20508 DIP(sz == 2 ? "cwd\n" 20509 : (sz == 4 ? /*"cdq\n"*/ "cltd\n" 20510 : "cqo\n")); 20511 return delta; 20512 20513 case 0x9B: /* FWAIT (X87 insn) */ 20514 /* ignore? */ 20515 DIP("fwait\n"); 20516 return delta; 20517 20518 case 0x9C: /* PUSHF */ { 20519 /* Note. There is no encoding for a 32-bit pushf in 64-bit 20520 mode. So sz==4 actually means sz==8. */ 20521 /* 24 July 06: has also been seen with a redundant REX prefix, 20522 so must also allow sz==8. */ 20523 if (haveF2orF3(pfx)) goto decode_failure; 20524 vassert(sz == 2 || sz == 4 || sz == 8); 20525 if (sz == 4) sz = 8; 20526 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20527 20528 t1 = newTemp(Ity_I64); 20529 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) ); 20530 putIReg64(R_RSP, mkexpr(t1) ); 20531 20532 t2 = newTemp(Ity_I64); 20533 assign( t2, mk_amd64g_calculate_rflags_all() ); 20534 20535 /* Patch in the D flag. This can simply be a copy of bit 10 of 20536 baseBlock[OFFB_DFLAG]. */ 20537 t3 = newTemp(Ity_I64); 20538 assign( t3, binop(Iop_Or64, 20539 mkexpr(t2), 20540 binop(Iop_And64, 20541 IRExpr_Get(OFFB_DFLAG,Ity_I64), 20542 mkU64(1<<10))) 20543 ); 20544 20545 /* And patch in the ID flag. */ 20546 t4 = newTemp(Ity_I64); 20547 assign( t4, binop(Iop_Or64, 20548 mkexpr(t3), 20549 binop(Iop_And64, 20550 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64), 20551 mkU8(21)), 20552 mkU64(1<<21))) 20553 ); 20554 20555 /* And patch in the AC flag too. */ 20556 t5 = newTemp(Ity_I64); 20557 assign( t5, binop(Iop_Or64, 20558 mkexpr(t4), 20559 binop(Iop_And64, 20560 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64), 20561 mkU8(18)), 20562 mkU64(1<<18))) 20563 ); 20564 20565 /* if sz==2, the stored value needs to be narrowed. */ 20566 if (sz == 2) 20567 storeLE( mkexpr(t1), unop(Iop_32to16, 20568 unop(Iop_64to32,mkexpr(t5))) ); 20569 else 20570 storeLE( mkexpr(t1), mkexpr(t5) ); 20571 20572 DIP("pushf%c\n", nameISize(sz)); 20573 return delta; 20574 } 20575 20576 case 0x9D: /* POPF */ 20577 /* Note. There is no encoding for a 32-bit popf in 64-bit mode. 20578 So sz==4 actually means sz==8. */ 20579 if (haveF2orF3(pfx)) goto decode_failure; 20580 vassert(sz == 2 || sz == 4); 20581 if (sz == 4) sz = 8; 20582 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists 20583 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64); 20584 assign(t2, getIReg64(R_RSP)); 20585 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2)))); 20586 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz))); 20587 /* t1 is the flag word. Mask out everything except OSZACP and 20588 set the flags thunk to AMD64G_CC_OP_COPY. */ 20589 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 20590 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 20591 stmt( IRStmt_Put( OFFB_CC_DEP1, 20592 binop(Iop_And64, 20593 mkexpr(t1), 20594 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P 20595 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z 20596 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O ) 20597 ) 20598 ) 20599 ); 20600 20601 /* Also need to set the D flag, which is held in bit 10 of t1. 20602 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */ 20603 stmt( IRStmt_Put( 20604 OFFB_DFLAG, 20605 IRExpr_ITE( 20606 unop(Iop_64to1, 20607 binop(Iop_And64, 20608 binop(Iop_Shr64, mkexpr(t1), mkU8(10)), 20609 mkU64(1))), 20610 mkU64(0xFFFFFFFFFFFFFFFFULL), 20611 mkU64(1))) 20612 ); 20613 20614 /* And set the ID flag */ 20615 stmt( IRStmt_Put( 20616 OFFB_IDFLAG, 20617 IRExpr_ITE( 20618 unop(Iop_64to1, 20619 binop(Iop_And64, 20620 binop(Iop_Shr64, mkexpr(t1), mkU8(21)), 20621 mkU64(1))), 20622 mkU64(1), 20623 mkU64(0))) 20624 ); 20625 20626 /* And set the AC flag too */ 20627 stmt( IRStmt_Put( 20628 OFFB_ACFLAG, 20629 IRExpr_ITE( 20630 unop(Iop_64to1, 20631 binop(Iop_And64, 20632 binop(Iop_Shr64, mkexpr(t1), mkU8(18)), 20633 mkU64(1))), 20634 mkU64(1), 20635 mkU64(0))) 20636 ); 20637 20638 DIP("popf%c\n", nameISize(sz)); 20639 return delta; 20640 20641 case 0x9E: /* SAHF */ 20642 codegen_SAHF(); 20643 DIP("sahf\n"); 20644 return delta; 20645 20646 case 0x9F: /* LAHF */ 20647 codegen_LAHF(); 20648 DIP("lahf\n"); 20649 return delta; 20650 20651 case 0xA0: /* MOV Ob,AL */ 20652 if (have66orF2orF3(pfx)) goto decode_failure; 20653 sz = 1; 20654 /* Fall through ... */ 20655 case 0xA1: /* MOV Ov,eAX */ 20656 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20657 goto decode_failure; 20658 d64 = getDisp64(delta); 20659 delta += 8; 20660 ty = szToITy(sz); 20661 addr = newTemp(Ity_I64); 20662 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20663 putIRegRAX(sz, loadLE( ty, mkexpr(addr) )); 20664 DIP("mov%c %s0x%llx, %s\n", nameISize(sz), 20665 segRegTxt(pfx), (ULong)d64, 20666 nameIRegRAX(sz)); 20667 return delta; 20668 20669 case 0xA2: /* MOV AL,Ob */ 20670 if (have66orF2orF3(pfx)) goto decode_failure; 20671 sz = 1; 20672 /* Fall through ... */ 20673 case 0xA3: /* MOV eAX,Ov */ 20674 if (sz != 8 && sz != 4 && sz != 2 && sz != 1) 20675 goto decode_failure; 20676 d64 = getDisp64(delta); 20677 delta += 8; 20678 ty = szToITy(sz); 20679 addr = newTemp(Ity_I64); 20680 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) ); 20681 storeLE( mkexpr(addr), getIRegRAX(sz) ); 20682 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz), 20683 segRegTxt(pfx), (ULong)d64); 20684 return delta; 20685 20686 case 0xA4: 20687 case 0xA5: 20688 /* F3 A4: rep movsb */ 20689 if (haveF3(pfx) && !haveF2(pfx)) { 20690 if (opc == 0xA4) 20691 sz = 1; 20692 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz, 20693 guest_RIP_curr_instr, 20694 guest_RIP_bbstart+delta, "rep movs", pfx ); 20695 dres->whatNext = Dis_StopHere; 20696 return delta; 20697 } 20698 /* A4: movsb */ 20699 if (!haveF3(pfx) && !haveF2(pfx)) { 20700 if (opc == 0xA4) 20701 sz = 1; 20702 dis_string_op( dis_MOVS, sz, "movs", pfx ); 20703 return delta; 20704 } 20705 goto decode_failure; 20706 20707 case 0xA6: 20708 case 0xA7: 20709 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */ 20710 if (haveF3(pfx) && !haveF2(pfx)) { 20711 if (opc == 0xA6) 20712 sz = 1; 20713 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz, 20714 guest_RIP_curr_instr, 20715 guest_RIP_bbstart+delta, "repe cmps", pfx ); 20716 dres->whatNext = Dis_StopHere; 20717 return delta; 20718 } 20719 goto decode_failure; 20720 20721 case 0xAA: 20722 case 0xAB: 20723 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */ 20724 if (haveF3(pfx) && !haveF2(pfx)) { 20725 if (opc == 0xAA) 20726 sz = 1; 20727 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz, 20728 guest_RIP_curr_instr, 20729 guest_RIP_bbstart+delta, "rep stos", pfx ); 20730 vassert(dres->whatNext == Dis_StopHere); 20731 return delta; 20732 } 20733 /* AA/AB: stosb/stos{w,l,q} */ 20734 if (!haveF3(pfx) && !haveF2(pfx)) { 20735 if (opc == 0xAA) 20736 sz = 1; 20737 dis_string_op( dis_STOS, sz, "stos", pfx ); 20738 return delta; 20739 } 20740 goto decode_failure; 20741 20742 case 0xA8: /* TEST Ib, AL */ 20743 if (haveF2orF3(pfx)) goto decode_failure; 20744 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" ); 20745 return delta; 20746 case 0xA9: /* TEST Iv, eAX */ 20747 if (haveF2orF3(pfx)) goto decode_failure; 20748 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" ); 20749 return delta; 20750 20751 case 0xAC: /* LODS, no REP prefix */ 20752 case 0xAD: 20753 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx ); 20754 return delta; 20755 20756 case 0xAE: 20757 case 0xAF: 20758 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */ 20759 if (haveF2(pfx) && !haveF3(pfx)) { 20760 if (opc == 0xAE) 20761 sz = 1; 20762 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz, 20763 guest_RIP_curr_instr, 20764 guest_RIP_bbstart+delta, "repne scas", pfx ); 20765 vassert(dres->whatNext == Dis_StopHere); 20766 return delta; 20767 } 20768 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */ 20769 if (!haveF2(pfx) && haveF3(pfx)) { 20770 if (opc == 0xAE) 20771 sz = 1; 20772 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz, 20773 guest_RIP_curr_instr, 20774 guest_RIP_bbstart+delta, "repe scas", pfx ); 20775 vassert(dres->whatNext == Dis_StopHere); 20776 return delta; 20777 } 20778 /* AE/AF: scasb/scas{w,l,q} */ 20779 if (!haveF2(pfx) && !haveF3(pfx)) { 20780 if (opc == 0xAE) 20781 sz = 1; 20782 dis_string_op( dis_SCAS, sz, "scas", pfx ); 20783 return delta; 20784 } 20785 goto decode_failure; 20786 20787 /* XXXX be careful here with moves to AH/BH/CH/DH */ 20788 case 0xB0: /* MOV imm,AL */ 20789 case 0xB1: /* MOV imm,CL */ 20790 case 0xB2: /* MOV imm,DL */ 20791 case 0xB3: /* MOV imm,BL */ 20792 case 0xB4: /* MOV imm,AH */ 20793 case 0xB5: /* MOV imm,CH */ 20794 case 0xB6: /* MOV imm,DH */ 20795 case 0xB7: /* MOV imm,BH */ 20796 if (haveF2orF3(pfx)) goto decode_failure; 20797 d64 = getUChar(delta); 20798 delta += 1; 20799 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64)); 20800 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0)); 20801 return delta; 20802 20803 case 0xB8: /* MOV imm,eAX */ 20804 case 0xB9: /* MOV imm,eCX */ 20805 case 0xBA: /* MOV imm,eDX */ 20806 case 0xBB: /* MOV imm,eBX */ 20807 case 0xBC: /* MOV imm,eSP */ 20808 case 0xBD: /* MOV imm,eBP */ 20809 case 0xBE: /* MOV imm,eSI */ 20810 case 0xBF: /* MOV imm,eDI */ 20811 /* This is the one-and-only place where 64-bit literals are 20812 allowed in the instruction stream. */ 20813 if (haveF2orF3(pfx)) goto decode_failure; 20814 if (sz == 8) { 20815 d64 = getDisp64(delta); 20816 delta += 8; 20817 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64)); 20818 DIP("movabsq $%lld,%s\n", (Long)d64, 20819 nameIRegRexB(8,pfx,opc-0xB8)); 20820 } else { 20821 d64 = getSDisp(imin(4,sz),delta); 20822 delta += imin(4,sz); 20823 putIRegRexB(sz, pfx, opc-0xB8, 20824 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20825 DIP("mov%c $%lld,%s\n", nameISize(sz), 20826 (Long)d64, 20827 nameIRegRexB(sz,pfx,opc-0xB8)); 20828 } 20829 return delta; 20830 20831 case 0xC0: { /* Grp2 Ib,Eb */ 20832 Bool decode_OK = True; 20833 if (haveF2orF3(pfx)) goto decode_failure; 20834 modrm = getUChar(delta); 20835 am_sz = lengthAMode(pfx,delta); 20836 d_sz = 1; 20837 d64 = getUChar(delta + am_sz); 20838 sz = 1; 20839 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20840 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20841 if (!decode_OK) goto decode_failure; 20842 return delta; 20843 } 20844 20845 case 0xC1: { /* Grp2 Ib,Ev */ 20846 Bool decode_OK = True; 20847 if (haveF2orF3(pfx)) goto decode_failure; 20848 modrm = getUChar(delta); 20849 am_sz = lengthAMode(pfx,delta); 20850 d_sz = 1; 20851 d64 = getUChar(delta + am_sz); 20852 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 20853 mkU8(d64 & 0xFF), NULL, &decode_OK ); 20854 if (!decode_OK) goto decode_failure; 20855 return delta; 20856 } 20857 20858 case 0xC2: /* RET imm16 */ 20859 if (have66orF3(pfx)) goto decode_failure; 20860 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20861 d64 = getUDisp16(delta); 20862 delta += 2; 20863 dis_ret(dres, vbi, d64); 20864 DIP("ret $%lld\n", d64); 20865 return delta; 20866 20867 case 0xC3: /* RET */ 20868 if (have66(pfx)) goto decode_failure; 20869 /* F3 is acceptable on AMD. */ 20870 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 20871 dis_ret(dres, vbi, 0); 20872 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n"); 20873 return delta; 20874 20875 case 0xC6: /* C6 /0 = MOV Ib,Eb */ 20876 sz = 1; 20877 goto maybe_do_Mov_I_E; 20878 case 0xC7: /* C7 /0 = MOV Iv,Ev */ 20879 goto maybe_do_Mov_I_E; 20880 maybe_do_Mov_I_E: 20881 modrm = getUChar(delta); 20882 if (gregLO3ofRM(modrm) == 0) { 20883 if (epartIsReg(modrm)) { 20884 /* Neither F2 nor F3 are allowable. */ 20885 if (haveF2orF3(pfx)) goto decode_failure; 20886 delta++; /* mod/rm byte */ 20887 d64 = getSDisp(imin(4,sz),delta); 20888 delta += imin(4,sz); 20889 putIRegE(sz, pfx, modrm, 20890 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20891 DIP("mov%c $%lld, %s\n", nameISize(sz), 20892 (Long)d64, 20893 nameIRegE(sz,pfx,modrm)); 20894 } else { 20895 if (haveF2(pfx)) goto decode_failure; 20896 /* F3(XRELEASE) is allowable here */ 20897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 20898 /*xtra*/imin(4,sz) ); 20899 delta += alen; 20900 d64 = getSDisp(imin(4,sz),delta); 20901 delta += imin(4,sz); 20902 storeLE(mkexpr(addr), 20903 mkU(szToITy(sz), d64 & mkSizeMask(sz))); 20904 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf); 20905 } 20906 return delta; 20907 } 20908 /* BEGIN HACKY SUPPORT FOR xbegin */ 20909 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4 20910 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20911 delta++; /* mod/rm byte */ 20912 d64 = getSDisp(4,delta); 20913 delta += 4; 20914 guest_RIP_next_mustcheck = True; 20915 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 20916 Addr64 failAddr = guest_RIP_bbstart + delta + d64; 20917 /* EAX contains the failure status code. Bit 3 is "Set if an 20918 internal buffer overflowed", which seems like the 20919 least-bogus choice we can make here. */ 20920 putIRegRAX(4, mkU32(1<<3)); 20921 /* And jump to the fail address. */ 20922 jmp_lit(dres, Ijk_Boring, failAddr); 20923 vassert(dres->whatNext == Dis_StopHere); 20924 DIP("xbeginq 0x%llx\n", failAddr); 20925 return delta; 20926 } 20927 /* END HACKY SUPPORT FOR xbegin */ 20928 /* BEGIN HACKY SUPPORT FOR xabort */ 20929 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1 20930 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 20931 delta++; /* mod/rm byte */ 20932 abyte = getUChar(delta); delta++; 20933 /* There is never a real transaction in progress, so do nothing. */ 20934 DIP("xabort $%d", (Int)abyte); 20935 return delta; 20936 } 20937 /* END HACKY SUPPORT FOR xabort */ 20938 goto decode_failure; 20939 20940 case 0xC8: /* ENTER */ 20941 /* Same comments re operand size as for LEAVE below apply. 20942 Also, only handles the case "enter $imm16, $0"; other cases 20943 for the second operand (nesting depth) are not handled. */ 20944 if (sz != 4) 20945 goto decode_failure; 20946 d64 = getUDisp16(delta); 20947 delta += 2; 20948 vassert(d64 >= 0 && d64 <= 0xFFFF); 20949 if (getUChar(delta) != 0) 20950 goto decode_failure; 20951 delta++; 20952 /* Intel docs seem to suggest: 20953 push rbp 20954 temp = rsp 20955 rbp = temp 20956 rsp = rsp - imm16 20957 */ 20958 t1 = newTemp(Ity_I64); 20959 assign(t1, getIReg64(R_RBP)); 20960 t2 = newTemp(Ity_I64); 20961 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 20962 putIReg64(R_RSP, mkexpr(t2)); 20963 storeLE(mkexpr(t2), mkexpr(t1)); 20964 putIReg64(R_RBP, mkexpr(t2)); 20965 if (d64 > 0) { 20966 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64))); 20967 } 20968 DIP("enter $%u, $0\n", (UInt)d64); 20969 return delta; 20970 20971 case 0xC9: /* LEAVE */ 20972 /* In 64-bit mode this defaults to a 64-bit operand size. There 20973 is no way to encode a 32-bit variant. Hence sz==4 but we do 20974 it as if sz=8. */ 20975 if (sz != 4) 20976 goto decode_failure; 20977 t1 = newTemp(Ity_I64); 20978 t2 = newTemp(Ity_I64); 20979 assign(t1, getIReg64(R_RBP)); 20980 /* First PUT RSP looks redundant, but need it because RSP must 20981 always be up-to-date for Memcheck to work... */ 20982 putIReg64(R_RSP, mkexpr(t1)); 20983 assign(t2, loadLE(Ity_I64,mkexpr(t1))); 20984 putIReg64(R_RBP, mkexpr(t2)); 20985 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) ); 20986 DIP("leave\n"); 20987 return delta; 20988 20989 case 0xCC: /* INT 3 */ 20990 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta); 20991 vassert(dres->whatNext == Dis_StopHere); 20992 DIP("int $0x3\n"); 20993 return delta; 20994 20995 case 0xCD: /* INT imm8 */ 20996 d64 = getUChar(delta); delta++; 20997 20998 /* Handle int $0xD2 (Solaris fasttrap syscalls). */ 20999 if (d64 == 0xD2) { 21000 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta); 21001 vassert(dres->whatNext == Dis_StopHere); 21002 DIP("int $0xD2\n"); 21003 return delta; 21004 } 21005 goto decode_failure; 21006 21007 case 0xD0: { /* Grp2 1,Eb */ 21008 Bool decode_OK = True; 21009 if (haveF2orF3(pfx)) goto decode_failure; 21010 modrm = getUChar(delta); 21011 am_sz = lengthAMode(pfx,delta); 21012 d_sz = 0; 21013 d64 = 1; 21014 sz = 1; 21015 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 21016 mkU8(d64), NULL, &decode_OK ); 21017 if (!decode_OK) goto decode_failure; 21018 return delta; 21019 } 21020 21021 case 0xD1: { /* Grp2 1,Ev */ 21022 Bool decode_OK = True; 21023 if (haveF2orF3(pfx)) goto decode_failure; 21024 modrm = getUChar(delta); 21025 am_sz = lengthAMode(pfx,delta); 21026 d_sz = 0; 21027 d64 = 1; 21028 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 21029 mkU8(d64), NULL, &decode_OK ); 21030 if (!decode_OK) goto decode_failure; 21031 return delta; 21032 } 21033 21034 case 0xD2: { /* Grp2 CL,Eb */ 21035 Bool decode_OK = True; 21036 if (haveF2orF3(pfx)) goto decode_failure; 21037 modrm = getUChar(delta); 21038 am_sz = lengthAMode(pfx,delta); 21039 d_sz = 0; 21040 sz = 1; 21041 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 21042 getIRegCL(), "%cl", &decode_OK ); 21043 if (!decode_OK) goto decode_failure; 21044 return delta; 21045 } 21046 21047 case 0xD3: { /* Grp2 CL,Ev */ 21048 Bool decode_OK = True; 21049 if (haveF2orF3(pfx)) goto decode_failure; 21050 modrm = getUChar(delta); 21051 am_sz = lengthAMode(pfx,delta); 21052 d_sz = 0; 21053 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, 21054 getIRegCL(), "%cl", &decode_OK ); 21055 if (!decode_OK) goto decode_failure; 21056 return delta; 21057 } 21058 21059 case 0xD8: /* X87 instructions */ 21060 case 0xD9: 21061 case 0xDA: 21062 case 0xDB: 21063 case 0xDC: 21064 case 0xDD: 21065 case 0xDE: 21066 case 0xDF: { 21067 Bool redundantREXWok = False; 21068 21069 if (haveF2orF3(pfx)) 21070 goto decode_failure; 21071 21072 /* kludge to tolerate redundant rex.w prefixes (should do this 21073 properly one day) */ 21074 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */ 21075 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ ) 21076 redundantREXWok = True; 21077 21078 Bool size_OK = False; 21079 if ( sz == 4 ) 21080 size_OK = True; 21081 else if ( sz == 8 ) 21082 size_OK = redundantREXWok; 21083 else if ( sz == 2 ) { 21084 int mod_rm = getUChar(delta+0); 21085 int reg = gregLO3ofRM(mod_rm); 21086 /* The HotSpot JVM uses these */ 21087 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ || 21088 reg == 4 /* FNSAVE */ || 21089 reg == 6 /* FRSTOR */ ) ) 21090 size_OK = True; 21091 } 21092 /* AMD manual says 0x66 size override is ignored, except where 21093 it is meaningful */ 21094 if (!size_OK) 21095 goto decode_failure; 21096 21097 Bool decode_OK = False; 21098 delta = dis_FPU ( &decode_OK, vbi, pfx, delta ); 21099 if (!decode_OK) 21100 goto decode_failure; 21101 21102 return delta; 21103 } 21104 21105 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */ 21106 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */ 21107 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */ 21108 { /* The docs say this uses rCX as a count depending on the 21109 address size override, not the operand one. */ 21110 IRExpr* zbit = NULL; 21111 IRExpr* count = NULL; 21112 IRExpr* cond = NULL; 21113 const HChar* xtra = NULL; 21114 21115 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure; 21116 /* So at this point we've rejected any variants which appear to 21117 be governed by the usual operand-size modifiers. Hence only 21118 the address size prefix can have an effect. It changes the 21119 size from 64 (default) to 32. */ 21120 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta); 21121 delta++; 21122 if (haveASO(pfx)) { 21123 /* 64to32 of 64-bit get is merely a get-put improvement 21124 trick. */ 21125 putIReg32(R_RCX, binop(Iop_Sub32, 21126 unop(Iop_64to32, getIReg64(R_RCX)), 21127 mkU32(1))); 21128 } else { 21129 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1))); 21130 } 21131 21132 /* This is correct, both for 32- and 64-bit versions. If we're 21133 doing a 32-bit dec and the result is zero then the default 21134 zero extension rule will cause the upper 32 bits to be zero 21135 too. Hence a 64-bit check against zero is OK. */ 21136 count = getIReg64(R_RCX); 21137 cond = binop(Iop_CmpNE64, count, mkU64(0)); 21138 switch (opc) { 21139 case 0xE2: 21140 xtra = ""; 21141 break; 21142 case 0xE1: 21143 xtra = "e"; 21144 zbit = mk_amd64g_calculate_condition( AMD64CondZ ); 21145 cond = mkAnd1(cond, zbit); 21146 break; 21147 case 0xE0: 21148 xtra = "ne"; 21149 zbit = mk_amd64g_calculate_condition( AMD64CondNZ ); 21150 cond = mkAnd1(cond, zbit); 21151 break; 21152 default: 21153 vassert(0); 21154 } 21155 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) ); 21156 21157 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64); 21158 return delta; 21159 } 21160 21161 case 0xE3: 21162 /* JRCXZ or JECXZ, depending address size override. */ 21163 if (have66orF2orF3(pfx)) goto decode_failure; 21164 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 21165 delta++; 21166 if (haveASO(pfx)) { 21167 /* 32-bit */ 21168 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 21169 unop(Iop_32Uto64, getIReg32(R_RCX)), 21170 mkU64(0)), 21171 Ijk_Boring, 21172 IRConst_U64(d64), 21173 OFFB_RIP 21174 )); 21175 DIP("jecxz 0x%llx\n", (ULong)d64); 21176 } else { 21177 /* 64-bit */ 21178 stmt( IRStmt_Exit( binop(Iop_CmpEQ64, 21179 getIReg64(R_RCX), 21180 mkU64(0)), 21181 Ijk_Boring, 21182 IRConst_U64(d64), 21183 OFFB_RIP 21184 )); 21185 DIP("jrcxz 0x%llx\n", (ULong)d64); 21186 } 21187 return delta; 21188 21189 case 0xE4: /* IN imm8, AL */ 21190 sz = 1; 21191 t1 = newTemp(Ity_I64); 21192 abyte = getUChar(delta); delta++; 21193 assign(t1, mkU64( abyte & 0xFF )); 21194 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 21195 goto do_IN; 21196 case 0xE5: /* IN imm8, eAX */ 21197 if (!(sz == 2 || sz == 4)) goto decode_failure; 21198 t1 = newTemp(Ity_I64); 21199 abyte = getUChar(delta); delta++; 21200 assign(t1, mkU64( abyte & 0xFF )); 21201 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz)); 21202 goto do_IN; 21203 case 0xEC: /* IN %DX, AL */ 21204 sz = 1; 21205 t1 = newTemp(Ity_I64); 21206 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 21207 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 21208 nameIRegRAX(sz)); 21209 goto do_IN; 21210 case 0xED: /* IN %DX, eAX */ 21211 if (!(sz == 2 || sz == 4)) goto decode_failure; 21212 t1 = newTemp(Ity_I64); 21213 assign(t1, unop(Iop_16Uto64, getIRegRDX(2))); 21214 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2), 21215 nameIRegRAX(sz)); 21216 goto do_IN; 21217 do_IN: { 21218 /* At this point, sz indicates the width, and t1 is a 64-bit 21219 value giving port number. */ 21220 IRDirty* d; 21221 if (haveF2orF3(pfx)) goto decode_failure; 21222 vassert(sz == 1 || sz == 2 || sz == 4); 21223 ty = szToITy(sz); 21224 t2 = newTemp(Ity_I64); 21225 d = unsafeIRDirty_1_N( 21226 t2, 21227 0/*regparms*/, 21228 "amd64g_dirtyhelper_IN", 21229 &amd64g_dirtyhelper_IN, 21230 mkIRExprVec_2( mkexpr(t1), mkU64(sz) ) 21231 ); 21232 /* do the call, dumping the result in t2. */ 21233 stmt( IRStmt_Dirty(d) ); 21234 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) ); 21235 return delta; 21236 } 21237 21238 case 0xE6: /* OUT AL, imm8 */ 21239 sz = 1; 21240 t1 = newTemp(Ity_I64); 21241 abyte = getUChar(delta); delta++; 21242 assign( t1, mkU64( abyte & 0xFF ) ); 21243 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 21244 goto do_OUT; 21245 case 0xE7: /* OUT eAX, imm8 */ 21246 if (!(sz == 2 || sz == 4)) goto decode_failure; 21247 t1 = newTemp(Ity_I64); 21248 abyte = getUChar(delta); delta++; 21249 assign( t1, mkU64( abyte & 0xFF ) ); 21250 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte); 21251 goto do_OUT; 21252 case 0xEE: /* OUT AL, %DX */ 21253 sz = 1; 21254 t1 = newTemp(Ity_I64); 21255 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 21256 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 21257 nameIRegRDX(2)); 21258 goto do_OUT; 21259 case 0xEF: /* OUT eAX, %DX */ 21260 if (!(sz == 2 || sz == 4)) goto decode_failure; 21261 t1 = newTemp(Ity_I64); 21262 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) ); 21263 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz), 21264 nameIRegRDX(2)); 21265 goto do_OUT; 21266 do_OUT: { 21267 /* At this point, sz indicates the width, and t1 is a 64-bit 21268 value giving port number. */ 21269 IRDirty* d; 21270 if (haveF2orF3(pfx)) goto decode_failure; 21271 vassert(sz == 1 || sz == 2 || sz == 4); 21272 ty = szToITy(sz); 21273 d = unsafeIRDirty_0_N( 21274 0/*regparms*/, 21275 "amd64g_dirtyhelper_OUT", 21276 &amd64g_dirtyhelper_OUT, 21277 mkIRExprVec_3( mkexpr(t1), 21278 widenUto64( getIRegRAX(sz) ), 21279 mkU64(sz) ) 21280 ); 21281 stmt( IRStmt_Dirty(d) ); 21282 return delta; 21283 } 21284 21285 case 0xE8: /* CALL J4 */ 21286 if (haveF3(pfx)) goto decode_failure; 21287 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21288 d64 = getSDisp32(delta); delta += 4; 21289 d64 += (guest_RIP_bbstart+delta); 21290 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */ 21291 t1 = newTemp(Ity_I64); 21292 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 21293 putIReg64(R_RSP, mkexpr(t1)); 21294 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta)); 21295 t2 = newTemp(Ity_I64); 21296 assign(t2, mkU64((Addr64)d64)); 21297 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32"); 21298 if (resteerOkFn( callback_opaque, (Addr64)d64) ) { 21299 /* follow into the call target. */ 21300 dres->whatNext = Dis_ResteerU; 21301 dres->continueAt = d64; 21302 } else { 21303 jmp_lit(dres, Ijk_Call, d64); 21304 vassert(dres->whatNext == Dis_StopHere); 21305 } 21306 DIP("call 0x%llx\n", (ULong)d64); 21307 return delta; 21308 21309 case 0xE9: /* Jv (jump, 16/32 offset) */ 21310 if (haveF3(pfx)) goto decode_failure; 21311 if (sz != 4) 21312 goto decode_failure; /* JRS added 2004 July 11 */ 21313 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21314 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); 21315 delta += sz; 21316 if (resteerOkFn(callback_opaque, (Addr64)d64)) { 21317 dres->whatNext = Dis_ResteerU; 21318 dres->continueAt = d64; 21319 } else { 21320 jmp_lit(dres, Ijk_Boring, d64); 21321 vassert(dres->whatNext == Dis_StopHere); 21322 } 21323 DIP("jmp 0x%llx\n", (ULong)d64); 21324 return delta; 21325 21326 case 0xEB: /* Jb (jump, byte offset) */ 21327 if (haveF3(pfx)) goto decode_failure; 21328 if (sz != 4) 21329 goto decode_failure; /* JRS added 2004 July 11 */ 21330 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21331 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); 21332 delta++; 21333 if (resteerOkFn(callback_opaque, (Addr64)d64)) { 21334 dres->whatNext = Dis_ResteerU; 21335 dres->continueAt = d64; 21336 } else { 21337 jmp_lit(dres, Ijk_Boring, d64); 21338 vassert(dres->whatNext == Dis_StopHere); 21339 } 21340 DIP("jmp-8 0x%llx\n", (ULong)d64); 21341 return delta; 21342 21343 case 0xF5: /* CMC */ 21344 case 0xF8: /* CLC */ 21345 case 0xF9: /* STC */ 21346 t1 = newTemp(Ity_I64); 21347 t2 = newTemp(Ity_I64); 21348 assign( t1, mk_amd64g_calculate_rflags_all() ); 21349 switch (opc) { 21350 case 0xF5: 21351 assign( t2, binop(Iop_Xor64, mkexpr(t1), 21352 mkU64(AMD64G_CC_MASK_C))); 21353 DIP("cmc\n"); 21354 break; 21355 case 0xF8: 21356 assign( t2, binop(Iop_And64, mkexpr(t1), 21357 mkU64(~AMD64G_CC_MASK_C))); 21358 DIP("clc\n"); 21359 break; 21360 case 0xF9: 21361 assign( t2, binop(Iop_Or64, mkexpr(t1), 21362 mkU64(AMD64G_CC_MASK_C))); 21363 DIP("stc\n"); 21364 break; 21365 default: 21366 vpanic("disInstr(x64)(cmc/clc/stc)"); 21367 } 21368 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21369 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21370 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) )); 21371 /* Set NDEP even though it isn't used. This makes redundant-PUT 21372 elimination of previous stores to this field work better. */ 21373 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21374 return delta; 21375 21376 case 0xF6: { /* Grp3 Eb */ 21377 Bool decode_OK = True; 21378 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 21379 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 21380 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK ); 21381 if (!decode_OK) goto decode_failure; 21382 return delta; 21383 } 21384 21385 case 0xF7: { /* Grp3 Ev */ 21386 Bool decode_OK = True; 21387 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 21388 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */ 21389 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK ); 21390 if (!decode_OK) goto decode_failure; 21391 return delta; 21392 } 21393 21394 case 0xFC: /* CLD */ 21395 if (haveF2orF3(pfx)) goto decode_failure; 21396 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) ); 21397 DIP("cld\n"); 21398 return delta; 21399 21400 case 0xFD: /* STD */ 21401 if (haveF2orF3(pfx)) goto decode_failure; 21402 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) ); 21403 DIP("std\n"); 21404 return delta; 21405 21406 case 0xFE: { /* Grp4 Eb */ 21407 Bool decode_OK = True; 21408 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 21409 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */ 21410 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK ); 21411 if (!decode_OK) goto decode_failure; 21412 return delta; 21413 } 21414 21415 case 0xFF: { /* Grp5 Ev */ 21416 Bool decode_OK = True; 21417 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */ 21418 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */ 21419 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK ); 21420 if (!decode_OK) goto decode_failure; 21421 return delta; 21422 } 21423 21424 default: 21425 break; 21426 21427 } 21428 21429 decode_failure: 21430 return deltaIN; /* fail */ 21431 } 21432 21433 21434 /*------------------------------------------------------------*/ 21435 /*--- ---*/ 21436 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/ 21437 /*--- ---*/ 21438 /*------------------------------------------------------------*/ 21439 21440 static IRTemp math_BSWAP ( IRTemp t1, IRType ty ) 21441 { 21442 IRTemp t2 = newTemp(ty); 21443 if (ty == Ity_I64) { 21444 IRTemp m8 = newTemp(Ity_I64); 21445 IRTemp s8 = newTemp(Ity_I64); 21446 IRTemp m16 = newTemp(Ity_I64); 21447 IRTemp s16 = newTemp(Ity_I64); 21448 IRTemp m32 = newTemp(Ity_I64); 21449 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) ); 21450 assign( s8, 21451 binop(Iop_Or64, 21452 binop(Iop_Shr64, 21453 binop(Iop_And64,mkexpr(t1),mkexpr(m8)), 21454 mkU8(8)), 21455 binop(Iop_And64, 21456 binop(Iop_Shl64,mkexpr(t1),mkU8(8)), 21457 mkexpr(m8)) 21458 ) 21459 ); 21460 21461 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) ); 21462 assign( s16, 21463 binop(Iop_Or64, 21464 binop(Iop_Shr64, 21465 binop(Iop_And64,mkexpr(s8),mkexpr(m16)), 21466 mkU8(16)), 21467 binop(Iop_And64, 21468 binop(Iop_Shl64,mkexpr(s8),mkU8(16)), 21469 mkexpr(m16)) 21470 ) 21471 ); 21472 21473 assign( m32, mkU64(0xFFFFFFFF00000000ULL) ); 21474 assign( t2, 21475 binop(Iop_Or64, 21476 binop(Iop_Shr64, 21477 binop(Iop_And64,mkexpr(s16),mkexpr(m32)), 21478 mkU8(32)), 21479 binop(Iop_And64, 21480 binop(Iop_Shl64,mkexpr(s16),mkU8(32)), 21481 mkexpr(m32)) 21482 ) 21483 ); 21484 return t2; 21485 } 21486 if (ty == Ity_I32) { 21487 assign( t2, 21488 binop( 21489 Iop_Or32, 21490 binop(Iop_Shl32, mkexpr(t1), mkU8(24)), 21491 binop( 21492 Iop_Or32, 21493 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)), 21494 mkU32(0x00FF0000)), 21495 binop(Iop_Or32, 21496 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)), 21497 mkU32(0x0000FF00)), 21498 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)), 21499 mkU32(0x000000FF) ) 21500 ))) 21501 ); 21502 return t2; 21503 } 21504 if (ty == Ity_I16) { 21505 assign(t2, 21506 binop(Iop_Or16, 21507 binop(Iop_Shl16, mkexpr(t1), mkU8(8)), 21508 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) )); 21509 return t2; 21510 } 21511 vassert(0); 21512 /*NOTREACHED*/ 21513 return IRTemp_INVALID; 21514 } 21515 21516 21517 __attribute__((noinline)) 21518 static 21519 Long dis_ESC_0F ( 21520 /*MB_OUT*/DisResult* dres, 21521 /*MB_OUT*/Bool* expect_CAS, 21522 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 21523 Bool resteerCisOk, 21524 void* callback_opaque, 21525 const VexArchInfo* archinfo, 21526 const VexAbiInfo* vbi, 21527 Prefix pfx, Int sz, Long deltaIN 21528 ) 21529 { 21530 Long d64 = 0; 21531 IRTemp addr = IRTemp_INVALID; 21532 IRTemp t1 = IRTemp_INVALID; 21533 IRTemp t2 = IRTemp_INVALID; 21534 UChar modrm = 0; 21535 Int am_sz = 0; 21536 Int alen = 0; 21537 HChar dis_buf[50]; 21538 21539 /* In the first switch, look for ordinary integer insns. */ 21540 Long delta = deltaIN; 21541 UChar opc = getUChar(delta); 21542 delta++; 21543 switch (opc) { /* first switch */ 21544 21545 case 0x01: 21546 { 21547 modrm = getUChar(delta); 21548 /* 0F 01 /0 -- SGDT */ 21549 /* 0F 01 /1 -- SIDT */ 21550 if (!epartIsReg(modrm) 21551 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) { 21552 /* This is really revolting, but ... since each processor 21553 (core) only has one IDT and one GDT, just let the guest 21554 see it (pass-through semantics). I can't see any way to 21555 construct a faked-up value, so don't bother to try. */ 21556 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21557 delta += alen; 21558 switch (gregLO3ofRM(modrm)) { 21559 case 0: DIP("sgdt %s\n", dis_buf); break; 21560 case 1: DIP("sidt %s\n", dis_buf); break; 21561 default: vassert(0); /*NOTREACHED*/ 21562 } 21563 IRDirty* d = unsafeIRDirty_0_N ( 21564 0/*regparms*/, 21565 "amd64g_dirtyhelper_SxDT", 21566 &amd64g_dirtyhelper_SxDT, 21567 mkIRExprVec_2( mkexpr(addr), 21568 mkU64(gregLO3ofRM(modrm)) ) 21569 ); 21570 /* declare we're writing memory */ 21571 d->mFx = Ifx_Write; 21572 d->mAddr = mkexpr(addr); 21573 d->mSize = 6; 21574 stmt( IRStmt_Dirty(d) ); 21575 return delta; 21576 } 21577 /* 0F 01 D0 = XGETBV */ 21578 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21579 delta += 1; 21580 DIP("xgetbv\n"); 21581 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I 21582 am not sure if that translates in to SEGV or to something 21583 else, in user space. */ 21584 t1 = newTemp(Ity_I32); 21585 assign( t1, getIReg32(R_RCX) ); 21586 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)), 21587 Ijk_SigSEGV, 21588 IRConst_U64(guest_RIP_curr_instr), 21589 OFFB_RIP 21590 )); 21591 putIRegRAX(4, mkU32(7)); 21592 putIRegRDX(4, mkU32(0)); 21593 return delta; 21594 } 21595 /* BEGIN HACKY SUPPORT FOR xend */ 21596 /* 0F 01 D5 = XEND */ 21597 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21598 /* We are never in an transaction (xbegin immediately aborts). 21599 So this just always generates a General Protection Fault. */ 21600 delta += 1; 21601 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta); 21602 vassert(dres->whatNext == Dis_StopHere); 21603 DIP("xend\n"); 21604 return delta; 21605 } 21606 /* END HACKY SUPPORT FOR xend */ 21607 /* BEGIN HACKY SUPPORT FOR xtest */ 21608 /* 0F 01 D6 = XTEST */ 21609 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21610 /* Sets ZF because there never is a transaction, and all 21611 CF, OF, SF, PF and AF are always cleared by xtest. */ 21612 delta += 1; 21613 DIP("xtest\n"); 21614 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 21615 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 21616 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) )); 21617 /* Set NDEP even though it isn't used. This makes redundant-PUT 21618 elimination of previous stores to this field work better. */ 21619 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 21620 return delta; 21621 } 21622 /* END HACKY SUPPORT FOR xtest */ 21623 /* 0F 01 F9 = RDTSCP */ 21624 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) { 21625 delta += 1; 21626 /* Uses dirty helper: 21627 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* ) 21628 declared to wr rax, rcx, rdx 21629 */ 21630 const HChar* fName = "amd64g_dirtyhelper_RDTSCP"; 21631 void* fAddr = &amd64g_dirtyhelper_RDTSCP; 21632 IRDirty* d 21633 = unsafeIRDirty_0_N ( 0/*regparms*/, 21634 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21635 /* declare guest state effects */ 21636 d->nFxState = 3; 21637 vex_bzero(&d->fxState, sizeof(d->fxState)); 21638 d->fxState[0].fx = Ifx_Write; 21639 d->fxState[0].offset = OFFB_RAX; 21640 d->fxState[0].size = 8; 21641 d->fxState[1].fx = Ifx_Write; 21642 d->fxState[1].offset = OFFB_RCX; 21643 d->fxState[1].size = 8; 21644 d->fxState[2].fx = Ifx_Write; 21645 d->fxState[2].offset = OFFB_RDX; 21646 d->fxState[2].size = 8; 21647 /* execute the dirty call, side-effecting guest state */ 21648 stmt( IRStmt_Dirty(d) ); 21649 /* RDTSCP is a serialising insn. So, just in case someone is 21650 using it as a memory fence ... */ 21651 stmt( IRStmt_MBE(Imbe_Fence) ); 21652 DIP("rdtscp\n"); 21653 return delta; 21654 } 21655 /* else decode failed */ 21656 break; 21657 } 21658 21659 case 0x05: /* SYSCALL */ 21660 guest_RIP_next_mustcheck = True; 21661 guest_RIP_next_assumed = guest_RIP_bbstart + delta; 21662 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) ); 21663 /* It's important that all guest state is up-to-date 21664 at this point. So we declare an end-of-block here, which 21665 forces any cached guest state to be flushed. */ 21666 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed); 21667 vassert(dres->whatNext == Dis_StopHere); 21668 DIP("syscall\n"); 21669 return delta; 21670 21671 case 0x0B: /* UD2 */ 21672 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 21673 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr); 21674 vassert(dres->whatNext == Dis_StopHere); 21675 DIP("ud2\n"); 21676 return delta; 21677 21678 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */ 21679 /* 0F 0D /1 -- prefetchw mem8 */ 21680 if (have66orF2orF3(pfx)) goto decode_failure; 21681 modrm = getUChar(delta); 21682 if (epartIsReg(modrm)) goto decode_failure; 21683 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1) 21684 goto decode_failure; 21685 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21686 delta += alen; 21687 switch (gregLO3ofRM(modrm)) { 21688 case 0: DIP("prefetch %s\n", dis_buf); break; 21689 case 1: DIP("prefetchw %s\n", dis_buf); break; 21690 default: vassert(0); /*NOTREACHED*/ 21691 } 21692 return delta; 21693 21694 case 0x1F: 21695 if (haveF2orF3(pfx)) goto decode_failure; 21696 modrm = getUChar(delta); 21697 if (epartIsReg(modrm)) goto decode_failure; 21698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21699 delta += alen; 21700 DIP("nop%c %s\n", nameISize(sz), dis_buf); 21701 return delta; 21702 21703 case 0x31: { /* RDTSC */ 21704 IRTemp val = newTemp(Ity_I64); 21705 IRExpr** args = mkIRExprVec_0(); 21706 IRDirty* d = unsafeIRDirty_1_N ( 21707 val, 21708 0/*regparms*/, 21709 "amd64g_dirtyhelper_RDTSC", 21710 &amd64g_dirtyhelper_RDTSC, 21711 args 21712 ); 21713 if (have66orF2orF3(pfx)) goto decode_failure; 21714 /* execute the dirty call, dumping the result in val. */ 21715 stmt( IRStmt_Dirty(d) ); 21716 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val))); 21717 putIRegRAX(4, unop(Iop_64to32, mkexpr(val))); 21718 DIP("rdtsc\n"); 21719 return delta; 21720 } 21721 21722 case 0x40: 21723 case 0x41: 21724 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ 21725 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ 21726 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ 21727 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ 21728 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ 21729 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ 21730 case 0x48: /* CMOVSb (cmov negative) */ 21731 case 0x49: /* CMOVSb (cmov not negative) */ 21732 case 0x4A: /* CMOVP (cmov parity even) */ 21733 case 0x4B: /* CMOVNP (cmov parity odd) */ 21734 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ 21735 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ 21736 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ 21737 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ 21738 if (haveF2orF3(pfx)) goto decode_failure; 21739 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta); 21740 return delta; 21741 21742 case 0x80: 21743 case 0x81: 21744 case 0x82: /* JBb/JNAEb (jump below) */ 21745 case 0x83: /* JNBb/JAEb (jump not below) */ 21746 case 0x84: /* JZb/JEb (jump zero) */ 21747 case 0x85: /* JNZb/JNEb (jump not zero) */ 21748 case 0x86: /* JBEb/JNAb (jump below or equal) */ 21749 case 0x87: /* JNBEb/JAb (jump not below or equal) */ 21750 case 0x88: /* JSb (jump negative) */ 21751 case 0x89: /* JSb (jump not negative) */ 21752 case 0x8A: /* JP (jump parity even) */ 21753 case 0x8B: /* JNP/JPO (jump parity odd) */ 21754 case 0x8C: /* JLb/JNGEb (jump less) */ 21755 case 0x8D: /* JGEb/JNLb (jump greater or equal) */ 21756 case 0x8E: /* JLEb/JNGb (jump less or equal) */ 21757 case 0x8F: { /* JGb/JNLEb (jump greater) */ 21758 Long jmpDelta; 21759 const HChar* comment = ""; 21760 if (haveF3(pfx)) goto decode_failure; 21761 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ 21762 jmpDelta = getSDisp32(delta); 21763 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta; 21764 delta += 4; 21765 if (resteerCisOk 21766 && vex_control.guest_chase_cond 21767 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21768 && jmpDelta < 0 21769 && resteerOkFn( callback_opaque, (Addr64)d64) ) { 21770 /* Speculation: assume this backward branch is taken. So 21771 we need to emit a side-exit to the insn following this 21772 one, on the negation of the condition, and continue at 21773 the branch target address (d64). If we wind up back at 21774 the first instruction of the trace, just stop; it's 21775 better to let the IR loop unroller handle that case. */ 21776 stmt( IRStmt_Exit( 21777 mk_amd64g_calculate_condition( 21778 (AMD64Condcode)(1 ^ (opc - 0x80))), 21779 Ijk_Boring, 21780 IRConst_U64(guest_RIP_bbstart+delta), 21781 OFFB_RIP 21782 )); 21783 dres->whatNext = Dis_ResteerC; 21784 dres->continueAt = d64; 21785 comment = "(assumed taken)"; 21786 } 21787 else 21788 if (resteerCisOk 21789 && vex_control.guest_chase_cond 21790 && (Addr64)d64 != (Addr64)guest_RIP_bbstart 21791 && jmpDelta >= 0 21792 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) { 21793 /* Speculation: assume this forward branch is not taken. 21794 So we need to emit a side-exit to d64 (the dest) and 21795 continue disassembling at the insn immediately 21796 following this one. */ 21797 stmt( IRStmt_Exit( 21798 mk_amd64g_calculate_condition((AMD64Condcode) 21799 (opc - 0x80)), 21800 Ijk_Boring, 21801 IRConst_U64(d64), 21802 OFFB_RIP 21803 )); 21804 dres->whatNext = Dis_ResteerC; 21805 dres->continueAt = guest_RIP_bbstart+delta; 21806 comment = "(assumed not taken)"; 21807 } 21808 else { 21809 /* Conservative default translation - end the block at 21810 this point. */ 21811 jcc_01( dres, (AMD64Condcode)(opc - 0x80), 21812 guest_RIP_bbstart+delta, d64 ); 21813 vassert(dres->whatNext == Dis_StopHere); 21814 } 21815 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64, 21816 comment); 21817 return delta; 21818 } 21819 21820 case 0x90: 21821 case 0x91: 21822 case 0x92: /* set-Bb/set-NAEb (set if below) */ 21823 case 0x93: /* set-NBb/set-AEb (set if not below) */ 21824 case 0x94: /* set-Zb/set-Eb (set if zero) */ 21825 case 0x95: /* set-NZb/set-NEb (set if not zero) */ 21826 case 0x96: /* set-BEb/set-NAb (set if below or equal) */ 21827 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */ 21828 case 0x98: /* set-Sb (set if negative) */ 21829 case 0x99: /* set-Sb (set if not negative) */ 21830 case 0x9A: /* set-P (set if parity even) */ 21831 case 0x9B: /* set-NP (set if parity odd) */ 21832 case 0x9C: /* set-Lb/set-NGEb (set if less) */ 21833 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */ 21834 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */ 21835 case 0x9F: /* set-Gb/set-NLEb (set if greater) */ 21836 if (haveF2orF3(pfx)) goto decode_failure; 21837 t1 = newTemp(Ity_I8); 21838 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) ); 21839 modrm = getUChar(delta); 21840 if (epartIsReg(modrm)) { 21841 delta++; 21842 putIRegE(1, pfx, modrm, mkexpr(t1)); 21843 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), 21844 nameIRegE(1,pfx,modrm)); 21845 } else { 21846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21847 delta += alen; 21848 storeLE( mkexpr(addr), mkexpr(t1) ); 21849 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf); 21850 } 21851 return delta; 21852 21853 case 0x1A: 21854 case 0x1B: { /* Future MPX instructions, currently NOPs. 21855 BNDMK b, m F3 0F 1B 21856 BNDCL b, r/m F3 0F 1A 21857 BNDCU b, r/m F2 0F 1A 21858 BNDCN b, r/m F2 0F 1B 21859 BNDMOV b, b/m 66 0F 1A 21860 BNDMOV b/m, b 66 0F 1B 21861 BNDLDX b, mib 0F 1A 21862 BNDSTX mib, b 0F 1B */ 21863 21864 /* All instructions have two operands. One operand is always the 21865 bnd register number (bnd0-bnd3, other register numbers are 21866 ignored when MPX isn't enabled, but should generate an 21867 exception if MPX is enabled) given by gregOfRexRM. The other 21868 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded 21869 address, all of which can be decoded by using either 21870 eregOfRexRM or disAMode. */ 21871 21872 modrm = getUChar(delta); 21873 int bnd = gregOfRexRM(pfx,modrm); 21874 const HChar *oper; 21875 if (epartIsReg(modrm)) { 21876 oper = nameIReg64 (eregOfRexRM(pfx,modrm)); 21877 delta += 1; 21878 } else { 21879 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 21880 delta += alen; 21881 oper = dis_buf; 21882 } 21883 21884 if (haveF3no66noF2 (pfx)) { 21885 if (opc == 0x1B) { 21886 DIP ("bndmk %s, %%bnd%d\n", oper, bnd); 21887 } else /* opc == 0x1A */ { 21888 DIP ("bndcl %s, %%bnd%d\n", oper, bnd); 21889 } 21890 } else if (haveF2no66noF3 (pfx)) { 21891 if (opc == 0x1A) { 21892 DIP ("bndcu %s, %%bnd%d\n", oper, bnd); 21893 } else /* opc == 0x1B */ { 21894 DIP ("bndcn %s, %%bnd%d\n", oper, bnd); 21895 } 21896 } else if (have66noF2noF3 (pfx)) { 21897 if (opc == 0x1A) { 21898 DIP ("bndmov %s, %%bnd%d\n", oper, bnd); 21899 } else /* opc == 0x1B */ { 21900 DIP ("bndmov %%bnd%d, %s\n", bnd, oper); 21901 } 21902 } else if (haveNo66noF2noF3 (pfx)) { 21903 if (opc == 0x1A) { 21904 DIP ("bndldx %s, %%bnd%d\n", oper, bnd); 21905 } else /* opc == 0x1B */ { 21906 DIP ("bndstx %%bnd%d, %s\n", bnd, oper); 21907 } 21908 } else goto decode_failure; 21909 21910 return delta; 21911 } 21912 21913 case 0xA2: { /* CPUID */ 21914 /* Uses dirty helper: 21915 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* ) 21916 declared to mod rax, wr rbx, rcx, rdx 21917 */ 21918 IRDirty* d = NULL; 21919 const HChar* fName = NULL; 21920 void* fAddr = NULL; 21921 21922 if (haveF2orF3(pfx)) goto decode_failure; 21923 21924 /* This isn't entirely correct, CPUID should depend on the VEX 21925 capabilities, not on the underlying CPU. See bug #324882. */ 21926 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21927 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) && 21928 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) { 21929 fName = "amd64g_dirtyhelper_CPUID_avx2"; 21930 fAddr = &amd64g_dirtyhelper_CPUID_avx2; 21931 /* This is a Core-i7-4910-like machine */ 21932 } 21933 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21934 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) && 21935 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) { 21936 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16"; 21937 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16; 21938 /* This is a Core-i5-2300-like machine */ 21939 } 21940 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) && 21941 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) { 21942 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; 21943 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; 21944 /* This is a Core-i5-670-like machine */ 21945 } 21946 else { 21947 /* Give a CPUID for at least a baseline machine, SSE2 21948 only, and no CX16 */ 21949 fName = "amd64g_dirtyhelper_CPUID_baseline"; 21950 fAddr = &amd64g_dirtyhelper_CPUID_baseline; 21951 } 21952 21953 vassert(fName); vassert(fAddr); 21954 d = unsafeIRDirty_0_N ( 0/*regparms*/, 21955 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) ); 21956 /* declare guest state effects */ 21957 d->nFxState = 4; 21958 vex_bzero(&d->fxState, sizeof(d->fxState)); 21959 d->fxState[0].fx = Ifx_Modify; 21960 d->fxState[0].offset = OFFB_RAX; 21961 d->fxState[0].size = 8; 21962 d->fxState[1].fx = Ifx_Write; 21963 d->fxState[1].offset = OFFB_RBX; 21964 d->fxState[1].size = 8; 21965 d->fxState[2].fx = Ifx_Modify; 21966 d->fxState[2].offset = OFFB_RCX; 21967 d->fxState[2].size = 8; 21968 d->fxState[3].fx = Ifx_Write; 21969 d->fxState[3].offset = OFFB_RDX; 21970 d->fxState[3].size = 8; 21971 /* execute the dirty call, side-effecting guest state */ 21972 stmt( IRStmt_Dirty(d) ); 21973 /* CPUID is a serialising insn. So, just in case someone is 21974 using it as a memory fence ... */ 21975 stmt( IRStmt_MBE(Imbe_Fence) ); 21976 DIP("cpuid\n"); 21977 return delta; 21978 } 21979 21980 case 0xA3: { /* BT Gv,Ev */ 21981 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 21982 Bool ok = True; 21983 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 21984 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok ); 21985 if (!ok) goto decode_failure; 21986 return delta; 21987 } 21988 21989 case 0xA4: /* SHLDv imm8,Gv,Ev */ 21990 modrm = getUChar(delta); 21991 d64 = delta + lengthAMode(pfx, delta); 21992 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 21993 delta = dis_SHLRD_Gv_Ev ( 21994 vbi, pfx, delta, modrm, sz, 21995 mkU8(getUChar(d64)), True, /* literal */ 21996 dis_buf, True /* left */ ); 21997 return delta; 21998 21999 case 0xA5: /* SHLDv %cl,Gv,Ev */ 22000 modrm = getUChar(delta); 22001 delta = dis_SHLRD_Gv_Ev ( 22002 vbi, pfx, delta, modrm, sz, 22003 getIRegCL(), False, /* not literal */ 22004 "%cl", True /* left */ ); 22005 return delta; 22006 22007 case 0xAB: { /* BTS Gv,Ev */ 22008 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 22009 Bool ok = True; 22010 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 22011 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok ); 22012 if (!ok) goto decode_failure; 22013 return delta; 22014 } 22015 22016 case 0xAC: /* SHRDv imm8,Gv,Ev */ 22017 modrm = getUChar(delta); 22018 d64 = delta + lengthAMode(pfx, delta); 22019 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64)); 22020 delta = dis_SHLRD_Gv_Ev ( 22021 vbi, pfx, delta, modrm, sz, 22022 mkU8(getUChar(d64)), True, /* literal */ 22023 dis_buf, False /* right */ ); 22024 return delta; 22025 22026 case 0xAD: /* SHRDv %cl,Gv,Ev */ 22027 modrm = getUChar(delta); 22028 delta = dis_SHLRD_Gv_Ev ( 22029 vbi, pfx, delta, modrm, sz, 22030 getIRegCL(), False, /* not literal */ 22031 "%cl", False /* right */); 22032 return delta; 22033 22034 case 0xAF: /* IMUL Ev, Gv */ 22035 if (haveF2orF3(pfx)) goto decode_failure; 22036 delta = dis_mul_E_G ( vbi, pfx, sz, delta ); 22037 return delta; 22038 22039 case 0xB0: { /* CMPXCHG Gb,Eb */ 22040 Bool ok = True; 22041 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 22042 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta ); 22043 if (!ok) goto decode_failure; 22044 return delta; 22045 } 22046 22047 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */ 22048 Bool ok = True; 22049 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */ 22050 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure; 22051 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta ); 22052 if (!ok) goto decode_failure; 22053 return delta; 22054 } 22055 22056 case 0xB3: { /* BTR Gv,Ev */ 22057 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 22058 Bool ok = True; 22059 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 22060 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok ); 22061 if (!ok) goto decode_failure; 22062 return delta; 22063 } 22064 22065 case 0xB6: /* MOVZXb Eb,Gv */ 22066 if (haveF2orF3(pfx)) goto decode_failure; 22067 if (sz != 2 && sz != 4 && sz != 8) 22068 goto decode_failure; 22069 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False ); 22070 return delta; 22071 22072 case 0xB7: /* MOVZXw Ew,Gv */ 22073 if (haveF2orF3(pfx)) goto decode_failure; 22074 if (sz != 4 && sz != 8) 22075 goto decode_failure; 22076 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False ); 22077 return delta; 22078 22079 case 0xBA: { /* Grp8 Ib,Ev */ 22080 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */ 22081 Bool decode_OK = False; 22082 modrm = getUChar(delta); 22083 am_sz = lengthAMode(pfx,delta); 22084 d64 = getSDisp8(delta + am_sz); 22085 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64, 22086 &decode_OK ); 22087 if (!decode_OK) 22088 goto decode_failure; 22089 return delta; 22090 } 22091 22092 case 0xBB: { /* BTC Gv,Ev */ 22093 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */ 22094 Bool ok = False; 22095 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure; 22096 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok ); 22097 if (!ok) goto decode_failure; 22098 return delta; 22099 } 22100 22101 case 0xBC: /* BSF Gv,Ev */ 22102 if (!haveF2orF3(pfx) 22103 || (haveF3noF2(pfx) 22104 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) { 22105 /* no-F2 no-F3 0F BC = BSF 22106 or F3 0F BC = REP; BSF on older CPUs. */ 22107 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True ); 22108 return delta; 22109 } 22110 /* Fall through, since F3 0F BC is TZCNT, and needs to 22111 be handled by dis_ESC_0F__SSE4. */ 22112 break; 22113 22114 case 0xBD: /* BSR Gv,Ev */ 22115 if (!haveF2orF3(pfx) 22116 || (haveF3noF2(pfx) 22117 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) { 22118 /* no-F2 no-F3 0F BD = BSR 22119 or F3 0F BD = REP; BSR on older CPUs. */ 22120 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False ); 22121 return delta; 22122 } 22123 /* Fall through, since F3 0F BD is LZCNT, and needs to 22124 be handled by dis_ESC_0F__SSE4. */ 22125 break; 22126 22127 case 0xBE: /* MOVSXb Eb,Gv */ 22128 if (haveF2orF3(pfx)) goto decode_failure; 22129 if (sz != 2 && sz != 4 && sz != 8) 22130 goto decode_failure; 22131 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True ); 22132 return delta; 22133 22134 case 0xBF: /* MOVSXw Ew,Gv */ 22135 if (haveF2orF3(pfx)) goto decode_failure; 22136 if (sz != 4 && sz != 8) 22137 goto decode_failure; 22138 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True ); 22139 return delta; 22140 22141 case 0xC0: { /* XADD Gb,Eb */ 22142 Bool decode_OK = False; 22143 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta ); 22144 if (!decode_OK) 22145 goto decode_failure; 22146 return delta; 22147 } 22148 22149 case 0xC1: { /* XADD Gv,Ev */ 22150 Bool decode_OK = False; 22151 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta ); 22152 if (!decode_OK) 22153 goto decode_failure; 22154 return delta; 22155 } 22156 22157 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */ 22158 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64; 22159 IRTemp expdHi = newTemp(elemTy); 22160 IRTemp expdLo = newTemp(elemTy); 22161 IRTemp dataHi = newTemp(elemTy); 22162 IRTemp dataLo = newTemp(elemTy); 22163 IRTemp oldHi = newTemp(elemTy); 22164 IRTemp oldLo = newTemp(elemTy); 22165 IRTemp flags_old = newTemp(Ity_I64); 22166 IRTemp flags_new = newTemp(Ity_I64); 22167 IRTemp success = newTemp(Ity_I1); 22168 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64; 22169 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64; 22170 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64; 22171 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0); 22172 IRTemp expdHi64 = newTemp(Ity_I64); 22173 IRTemp expdLo64 = newTemp(Ity_I64); 22174 22175 /* Translate this using a DCAS, even if there is no LOCK 22176 prefix. Life is too short to bother with generating two 22177 different translations for the with/without-LOCK-prefix 22178 cases. */ 22179 *expect_CAS = True; 22180 22181 /* Decode, and generate address. */ 22182 if (have66(pfx)) goto decode_failure; 22183 if (sz != 4 && sz != 8) goto decode_failure; 22184 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) 22185 goto decode_failure; 22186 modrm = getUChar(delta); 22187 if (epartIsReg(modrm)) goto decode_failure; 22188 if (gregLO3ofRM(modrm) != 1) goto decode_failure; 22189 if (haveF2orF3(pfx)) { 22190 /* Since the e-part is memory only, F2 or F3 (one or the 22191 other) is acceptable if LOCK is also present. But only 22192 for cmpxchg8b. */ 22193 if (sz == 8) goto decode_failure; 22194 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure; 22195 } 22196 22197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22198 delta += alen; 22199 22200 /* cmpxchg16b requires an alignment check. */ 22201 if (sz == 8) 22202 gen_SEGV_if_not_16_aligned( addr ); 22203 22204 /* Get the expected and new values. */ 22205 assign( expdHi64, getIReg64(R_RDX) ); 22206 assign( expdLo64, getIReg64(R_RAX) ); 22207 22208 /* These are the correctly-sized expected and new values. 22209 However, we also get expdHi64/expdLo64 above as 64-bits 22210 regardless, because we will need them later in the 32-bit 22211 case (paradoxically). */ 22212 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64)) 22213 : mkexpr(expdHi64) ); 22214 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64)) 22215 : mkexpr(expdLo64) ); 22216 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) ); 22217 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) ); 22218 22219 /* Do the DCAS */ 22220 stmt( IRStmt_CAS( 22221 mkIRCAS( oldHi, oldLo, 22222 Iend_LE, mkexpr(addr), 22223 mkexpr(expdHi), mkexpr(expdLo), 22224 mkexpr(dataHi), mkexpr(dataLo) 22225 ))); 22226 22227 /* success when oldHi:oldLo == expdHi:expdLo */ 22228 assign( success, 22229 binop(opCasCmpEQ, 22230 binop(opOR, 22231 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)), 22232 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo)) 22233 ), 22234 zero 22235 )); 22236 22237 /* If the DCAS is successful, that is to say oldHi:oldLo == 22238 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX, 22239 which is where they came from originally. Both the actual 22240 contents of these two regs, and any shadow values, are 22241 unchanged. If the DCAS fails then we're putting into 22242 RDX:RAX the value seen in memory. */ 22243 /* Now of course there's a complication in the 32-bit case 22244 (bah!): if the DCAS succeeds, we need to leave RDX:RAX 22245 unchanged; but if we use the same scheme as in the 64-bit 22246 case, we get hit by the standard rule that a write to the 22247 bottom 32 bits of an integer register zeros the upper 32 22248 bits. And so the upper halves of RDX and RAX mysteriously 22249 become zero. So we have to stuff back in the original 22250 64-bit values which we previously stashed in 22251 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */ 22252 /* It's just _so_ much fun ... */ 22253 putIRegRDX( 8, 22254 IRExpr_ITE( mkexpr(success), 22255 mkexpr(expdHi64), 22256 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi)) 22257 : mkexpr(oldHi) 22258 )); 22259 putIRegRAX( 8, 22260 IRExpr_ITE( mkexpr(success), 22261 mkexpr(expdLo64), 22262 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo)) 22263 : mkexpr(oldLo) 22264 )); 22265 22266 /* Copy the success bit into the Z flag and leave the others 22267 unchanged */ 22268 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all())); 22269 assign( 22270 flags_new, 22271 binop(Iop_Or64, 22272 binop(Iop_And64, mkexpr(flags_old), 22273 mkU64(~AMD64G_CC_MASK_Z)), 22274 binop(Iop_Shl64, 22275 binop(Iop_And64, 22276 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)), 22277 mkU8(AMD64G_CC_SHIFT_Z)) )); 22278 22279 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) )); 22280 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) )); 22281 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) )); 22282 /* Set NDEP even though it isn't used. This makes 22283 redundant-PUT elimination of previous stores to this field 22284 work better. */ 22285 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) )); 22286 22287 /* Sheesh. Aren't you glad it was me and not you that had to 22288 write and validate all this grunge? */ 22289 22290 DIP("cmpxchg8b %s\n", dis_buf); 22291 return delta; 22292 } 22293 22294 case 0xC8: /* BSWAP %eax */ 22295 case 0xC9: 22296 case 0xCA: 22297 case 0xCB: 22298 case 0xCC: 22299 case 0xCD: 22300 case 0xCE: 22301 case 0xCF: /* BSWAP %edi */ 22302 if (haveF2orF3(pfx)) goto decode_failure; 22303 /* According to the AMD64 docs, this insn can have size 4 or 22304 8. */ 22305 if (sz == 4) { 22306 t1 = newTemp(Ity_I32); 22307 assign( t1, getIRegRexB(4, pfx, opc-0xC8) ); 22308 t2 = math_BSWAP( t1, Ity_I32 ); 22309 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2)); 22310 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8)); 22311 return delta; 22312 } 22313 if (sz == 8) { 22314 t1 = newTemp(Ity_I64); 22315 t2 = newTemp(Ity_I64); 22316 assign( t1, getIRegRexB(8, pfx, opc-0xC8) ); 22317 t2 = math_BSWAP( t1, Ity_I64 ); 22318 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2)); 22319 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8)); 22320 return delta; 22321 } 22322 goto decode_failure; 22323 22324 default: 22325 break; 22326 22327 } /* first switch */ 22328 22329 22330 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */ 22331 /* In the second switch, pick off MMX insns. */ 22332 22333 if (!have66orF2orF3(pfx)) { 22334 /* So there's no SIMD prefix. */ 22335 22336 vassert(sz == 4 || sz == 8); 22337 22338 switch (opc) { /* second switch */ 22339 22340 case 0x71: 22341 case 0x72: 22342 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */ 22343 22344 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */ 22345 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */ 22346 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */ 22347 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */ 22348 22349 case 0xFC: 22350 case 0xFD: 22351 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */ 22352 22353 case 0xEC: 22354 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */ 22355 22356 case 0xDC: 22357 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 22358 22359 case 0xF8: 22360 case 0xF9: 22361 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */ 22362 22363 case 0xE8: 22364 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */ 22365 22366 case 0xD8: 22367 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */ 22368 22369 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */ 22370 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */ 22371 22372 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */ 22373 22374 case 0x74: 22375 case 0x75: 22376 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */ 22377 22378 case 0x64: 22379 case 0x65: 22380 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */ 22381 22382 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */ 22383 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */ 22384 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */ 22385 22386 case 0x68: 22387 case 0x69: 22388 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */ 22389 22390 case 0x60: 22391 case 0x61: 22392 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */ 22393 22394 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */ 22395 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */ 22396 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */ 22397 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */ 22398 22399 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */ 22400 case 0xF2: 22401 case 0xF3: 22402 22403 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */ 22404 case 0xD2: 22405 case 0xD3: 22406 22407 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */ 22408 case 0xE2: { 22409 Bool decode_OK = False; 22410 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN ); 22411 if (decode_OK) 22412 return delta; 22413 goto decode_failure; 22414 } 22415 22416 default: 22417 break; 22418 } /* second switch */ 22419 22420 } 22421 22422 /* A couple of MMX corner cases */ 22423 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) { 22424 if (sz != 4) 22425 goto decode_failure; 22426 do_EMMS_preamble(); 22427 DIP("{f}emms\n"); 22428 return delta; 22429 } 22430 22431 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */ 22432 /* Perhaps it's an SSE or SSE2 instruction. We can try this 22433 without checking the guest hwcaps because SSE2 is a baseline 22434 facility in 64 bit mode. */ 22435 { 22436 Bool decode_OK = False; 22437 delta = dis_ESC_0F__SSE2 ( &decode_OK, 22438 archinfo, vbi, pfx, sz, deltaIN, dres ); 22439 if (decode_OK) 22440 return delta; 22441 } 22442 22443 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */ 22444 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps 22445 first. */ 22446 { 22447 Bool decode_OK = False; 22448 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22449 if (decode_OK) 22450 return delta; 22451 } 22452 22453 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22454 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps 22455 first. */ 22456 { 22457 Bool decode_OK = False; 22458 delta = dis_ESC_0F__SSE4 ( &decode_OK, 22459 archinfo, vbi, pfx, sz, deltaIN ); 22460 if (decode_OK) 22461 return delta; 22462 } 22463 22464 decode_failure: 22465 return deltaIN; /* fail */ 22466 } 22467 22468 22469 /*------------------------------------------------------------*/ 22470 /*--- ---*/ 22471 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/ 22472 /*--- ---*/ 22473 /*------------------------------------------------------------*/ 22474 22475 __attribute__((noinline)) 22476 static 22477 Long dis_ESC_0F38 ( 22478 /*MB_OUT*/DisResult* dres, 22479 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 22480 Bool resteerCisOk, 22481 void* callback_opaque, 22482 const VexArchInfo* archinfo, 22483 const VexAbiInfo* vbi, 22484 Prefix pfx, Int sz, Long deltaIN 22485 ) 22486 { 22487 Long delta = deltaIN; 22488 UChar opc = getUChar(delta); 22489 delta++; 22490 switch (opc) { 22491 22492 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */ 22493 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */ 22494 if (!haveF2orF3(pfx) && !haveVEX(pfx) 22495 && (sz == 2 || sz == 4 || sz == 8)) { 22496 IRTemp addr = IRTemp_INVALID; 22497 UChar modrm = 0; 22498 Int alen = 0; 22499 HChar dis_buf[50]; 22500 modrm = getUChar(delta); 22501 if (epartIsReg(modrm)) break; 22502 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22503 delta += alen; 22504 IRType ty = szToITy(sz); 22505 IRTemp src = newTemp(ty); 22506 if (opc == 0xF0) { /* LOAD */ 22507 assign(src, loadLE(ty, mkexpr(addr))); 22508 IRTemp dst = math_BSWAP(src, ty); 22509 putIRegG(sz, pfx, modrm, mkexpr(dst)); 22510 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm)); 22511 } else { /* STORE */ 22512 assign(src, getIRegG(sz, pfx, modrm)); 22513 IRTemp dst = math_BSWAP(src, ty); 22514 storeLE(mkexpr(addr), mkexpr(dst)); 22515 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf); 22516 } 22517 return delta; 22518 } 22519 /* else fall through; maybe one of the decoders below knows what 22520 it is. */ 22521 break; 22522 } 22523 22524 default: 22525 break; 22526 22527 } 22528 22529 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22530 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22531 rather than proceeding indiscriminately. */ 22532 { 22533 Bool decode_OK = False; 22534 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22535 if (decode_OK) 22536 return delta; 22537 } 22538 22539 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22540 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22541 rather than proceeding indiscriminately. */ 22542 { 22543 Bool decode_OK = False; 22544 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22545 if (decode_OK) 22546 return delta; 22547 } 22548 22549 /*decode_failure:*/ 22550 return deltaIN; /* fail */ 22551 } 22552 22553 22554 /*------------------------------------------------------------*/ 22555 /*--- ---*/ 22556 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/ 22557 /*--- ---*/ 22558 /*------------------------------------------------------------*/ 22559 22560 __attribute__((noinline)) 22561 static 22562 Long dis_ESC_0F3A ( 22563 /*MB_OUT*/DisResult* dres, 22564 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 22565 Bool resteerCisOk, 22566 void* callback_opaque, 22567 const VexArchInfo* archinfo, 22568 const VexAbiInfo* vbi, 22569 Prefix pfx, Int sz, Long deltaIN 22570 ) 22571 { 22572 Long delta = deltaIN; 22573 UChar opc = getUChar(delta); 22574 delta++; 22575 switch (opc) { 22576 22577 default: 22578 break; 22579 22580 } 22581 22582 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */ 22583 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps 22584 rather than proceeding indiscriminately. */ 22585 { 22586 Bool decode_OK = False; 22587 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22588 if (decode_OK) 22589 return delta; 22590 } 22591 22592 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */ 22593 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps 22594 rather than proceeding indiscriminately. */ 22595 { 22596 Bool decode_OK = False; 22597 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN ); 22598 if (decode_OK) 22599 return delta; 22600 } 22601 22602 return deltaIN; /* fail */ 22603 } 22604 22605 22606 /*------------------------------------------------------------*/ 22607 /*--- ---*/ 22608 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/ 22609 /*--- ---*/ 22610 /*------------------------------------------------------------*/ 22611 22612 /* FIXME: common up with the _256_ version below? */ 22613 static 22614 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG ( 22615 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22616 Prefix pfx, Long delta, const HChar* name, 22617 /* The actual operation. Use either 'op' or 'opfn', 22618 but not both. */ 22619 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 22620 Bool invertLeftArg, 22621 Bool swapArgs 22622 ) 22623 { 22624 UChar modrm = getUChar(delta); 22625 UInt rD = gregOfRexRM(pfx, modrm); 22626 UInt rSL = getVexNvvvv(pfx); 22627 IRTemp tSL = newTemp(Ity_V128); 22628 IRTemp tSR = newTemp(Ity_V128); 22629 IRTemp addr = IRTemp_INVALID; 22630 HChar dis_buf[50]; 22631 Int alen = 0; 22632 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/); 22633 22634 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL)) 22635 : getXMMReg(rSL)); 22636 22637 if (epartIsReg(modrm)) { 22638 UInt rSR = eregOfRexRM(pfx, modrm); 22639 delta += 1; 22640 assign(tSR, getXMMReg(rSR)); 22641 DIP("%s %s,%s,%s\n", 22642 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD)); 22643 } else { 22644 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 22645 delta += alen; 22646 assign(tSR, loadLE(Ity_V128, mkexpr(addr))); 22647 DIP("%s %s,%s,%s\n", 22648 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD)); 22649 } 22650 22651 IRTemp res = IRTemp_INVALID; 22652 if (op != Iop_INVALID) { 22653 vassert(opFn == NULL); 22654 res = newTemp(Ity_V128); 22655 if (requiresRMode(op)) { 22656 IRTemp rm = newTemp(Ity_I32); 22657 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 22658 assign(res, swapArgs 22659 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 22660 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 22661 } else { 22662 assign(res, swapArgs 22663 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 22664 : binop(op, mkexpr(tSL), mkexpr(tSR))); 22665 } 22666 } else { 22667 vassert(opFn != NULL); 22668 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 22669 } 22670 22671 putYMMRegLoAndZU(rD, mkexpr(res)); 22672 22673 *uses_vvvv = True; 22674 return delta; 22675 } 22676 22677 22678 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp 22679 for the operation, no inversion of the left arg, and no swapping of 22680 args. */ 22681 static 22682 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple ( 22683 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22684 Prefix pfx, Long delta, const HChar* name, 22685 IROp op 22686 ) 22687 { 22688 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22689 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 22690 } 22691 22692 22693 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR 22694 generator to compute the result, no inversion of the left 22695 arg, and no swapping of args. */ 22696 static 22697 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex ( 22698 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 22699 Prefix pfx, Long delta, const HChar* name, 22700 IRTemp(*opFn)(IRTemp,IRTemp) 22701 ) 22702 { 22703 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 22704 uses_vvvv, vbi, pfx, delta, name, 22705 Iop_INVALID, opFn, False, False ); 22706 } 22707 22708 22709 /* Vector by scalar shift of V by the amount specified at the bottom 22710 of E. */ 22711 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi, 22712 Prefix pfx, Long delta, 22713 const HChar* opname, IROp op ) 22714 { 22715 HChar dis_buf[50]; 22716 Int alen, size; 22717 IRTemp addr; 22718 Bool shl, shr, sar; 22719 UChar modrm = getUChar(delta); 22720 UInt rG = gregOfRexRM(pfx,modrm); 22721 UInt rV = getVexNvvvv(pfx);; 22722 IRTemp g0 = newTemp(Ity_V128); 22723 IRTemp g1 = newTemp(Ity_V128); 22724 IRTemp amt = newTemp(Ity_I64); 22725 IRTemp amt8 = newTemp(Ity_I8); 22726 if (epartIsReg(modrm)) { 22727 UInt rE = eregOfRexRM(pfx,modrm); 22728 assign( amt, getXMMRegLane64(rE, 0) ); 22729 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22730 nameXMMReg(rV), nameXMMReg(rG) ); 22731 delta++; 22732 } else { 22733 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22734 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22735 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 22736 delta += alen; 22737 } 22738 assign( g0, getXMMReg(rV) ); 22739 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22740 22741 shl = shr = sar = False; 22742 size = 0; 22743 switch (op) { 22744 case Iop_ShlN16x8: shl = True; size = 32; break; 22745 case Iop_ShlN32x4: shl = True; size = 32; break; 22746 case Iop_ShlN64x2: shl = True; size = 64; break; 22747 case Iop_SarN16x8: sar = True; size = 16; break; 22748 case Iop_SarN32x4: sar = True; size = 32; break; 22749 case Iop_ShrN16x8: shr = True; size = 16; break; 22750 case Iop_ShrN32x4: shr = True; size = 32; break; 22751 case Iop_ShrN64x2: shr = True; size = 64; break; 22752 default: vassert(0); 22753 } 22754 22755 if (shl || shr) { 22756 assign( 22757 g1, 22758 IRExpr_ITE( 22759 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22760 binop(op, mkexpr(g0), mkexpr(amt8)), 22761 mkV128(0x0000) 22762 ) 22763 ); 22764 } else 22765 if (sar) { 22766 assign( 22767 g1, 22768 IRExpr_ITE( 22769 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22770 binop(op, mkexpr(g0), mkexpr(amt8)), 22771 binop(op, mkexpr(g0), mkU8(size-1)) 22772 ) 22773 ); 22774 } else { 22775 vassert(0); 22776 } 22777 22778 putYMMRegLoAndZU( rG, mkexpr(g1) ); 22779 return delta; 22780 } 22781 22782 22783 /* Vector by scalar shift of V by the amount specified at the bottom 22784 of E. */ 22785 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi, 22786 Prefix pfx, Long delta, 22787 const HChar* opname, IROp op ) 22788 { 22789 HChar dis_buf[50]; 22790 Int alen, size; 22791 IRTemp addr; 22792 Bool shl, shr, sar; 22793 UChar modrm = getUChar(delta); 22794 UInt rG = gregOfRexRM(pfx,modrm); 22795 UInt rV = getVexNvvvv(pfx);; 22796 IRTemp g0 = newTemp(Ity_V256); 22797 IRTemp g1 = newTemp(Ity_V256); 22798 IRTemp amt = newTemp(Ity_I64); 22799 IRTemp amt8 = newTemp(Ity_I8); 22800 if (epartIsReg(modrm)) { 22801 UInt rE = eregOfRexRM(pfx,modrm); 22802 assign( amt, getXMMRegLane64(rE, 0) ); 22803 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22804 nameYMMReg(rV), nameYMMReg(rG) ); 22805 delta++; 22806 } else { 22807 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22808 assign( amt, loadLE(Ity_I64, mkexpr(addr)) ); 22809 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 22810 delta += alen; 22811 } 22812 assign( g0, getYMMReg(rV) ); 22813 assign( amt8, unop(Iop_64to8, mkexpr(amt)) ); 22814 22815 shl = shr = sar = False; 22816 size = 0; 22817 switch (op) { 22818 case Iop_ShlN16x16: shl = True; size = 32; break; 22819 case Iop_ShlN32x8: shl = True; size = 32; break; 22820 case Iop_ShlN64x4: shl = True; size = 64; break; 22821 case Iop_SarN16x16: sar = True; size = 16; break; 22822 case Iop_SarN32x8: sar = True; size = 32; break; 22823 case Iop_ShrN16x16: shr = True; size = 16; break; 22824 case Iop_ShrN32x8: shr = True; size = 32; break; 22825 case Iop_ShrN64x4: shr = True; size = 64; break; 22826 default: vassert(0); 22827 } 22828 22829 if (shl || shr) { 22830 assign( 22831 g1, 22832 IRExpr_ITE( 22833 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22834 binop(op, mkexpr(g0), mkexpr(amt8)), 22835 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 22836 ) 22837 ); 22838 } else 22839 if (sar) { 22840 assign( 22841 g1, 22842 IRExpr_ITE( 22843 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)), 22844 binop(op, mkexpr(g0), mkexpr(amt8)), 22845 binop(op, mkexpr(g0), mkU8(size-1)) 22846 ) 22847 ); 22848 } else { 22849 vassert(0); 22850 } 22851 22852 putYMMReg( rG, mkexpr(g1) ); 22853 return delta; 22854 } 22855 22856 22857 /* Vector by vector shift of V by the amount specified at the bottom 22858 of E. Vector by vector shifts are defined for all shift amounts, 22859 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts 22860 anyway). */ 22861 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi, 22862 Prefix pfx, Long delta, 22863 const HChar* opname, IROp op, Bool isYMM ) 22864 { 22865 HChar dis_buf[50]; 22866 Int alen, size, i; 22867 IRTemp addr; 22868 UChar modrm = getUChar(delta); 22869 UInt rG = gregOfRexRM(pfx,modrm); 22870 UInt rV = getVexNvvvv(pfx);; 22871 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22872 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128); 22873 IRTemp amts[8], sVs[8], res[8]; 22874 if (epartIsReg(modrm)) { 22875 UInt rE = eregOfRexRM(pfx,modrm); 22876 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) ); 22877 if (isYMM) { 22878 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE), 22879 nameYMMReg(rV), nameYMMReg(rG) ); 22880 } else { 22881 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE), 22882 nameXMMReg(rV), nameXMMReg(rG) ); 22883 } 22884 delta++; 22885 } else { 22886 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 22887 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) ); 22888 if (isYMM) { 22889 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), 22890 nameYMMReg(rG) ); 22891 } else { 22892 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), 22893 nameXMMReg(rG) ); 22894 } 22895 delta += alen; 22896 } 22897 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) ); 22898 22899 size = 0; 22900 switch (op) { 22901 case Iop_Shl32: size = 32; break; 22902 case Iop_Shl64: size = 64; break; 22903 case Iop_Sar32: size = 32; break; 22904 case Iop_Shr32: size = 32; break; 22905 case Iop_Shr64: size = 64; break; 22906 default: vassert(0); 22907 } 22908 22909 for (i = 0; i < 8; i++) { 22910 sVs[i] = IRTemp_INVALID; 22911 amts[i] = IRTemp_INVALID; 22912 } 22913 switch (size) { 22914 case 32: 22915 if (isYMM) { 22916 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4], 22917 &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22918 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4], 22919 &amts[3], &amts[2], &amts[1], &amts[0] ); 22920 } else { 22921 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22922 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22923 } 22924 break; 22925 case 64: 22926 if (isYMM) { 22927 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] ); 22928 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] ); 22929 } else { 22930 breakupV128to64s( sV, &sVs[1], &sVs[0] ); 22931 breakupV128to64s( amt, &amts[1], &amts[0] ); 22932 } 22933 break; 22934 default: vassert(0); 22935 } 22936 for (i = 0; i < 8; i++) 22937 if (sVs[i] != IRTemp_INVALID) { 22938 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64); 22939 assign( res[i], 22940 IRExpr_ITE( 22941 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U, 22942 mkexpr(amts[i]), 22943 size == 32 ? mkU32(size) : mkU64(size)), 22944 binop(op, mkexpr(sVs[i]), 22945 unop(size == 32 ? Iop_32to8 : Iop_64to8, 22946 mkexpr(amts[i]))), 22947 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1)) 22948 : size == 32 ? mkU32(0) : mkU64(0) 22949 )); 22950 } 22951 switch (size) { 22952 case 32: 22953 for (i = 0; i < 8; i++) 22954 putYMMRegLane32( rG, i, (i < 4 || isYMM) 22955 ? mkexpr(res[i]) : mkU32(0) ); 22956 break; 22957 case 64: 22958 for (i = 0; i < 4; i++) 22959 putYMMRegLane64( rG, i, (i < 2 || isYMM) 22960 ? mkexpr(res[i]) : mkU64(0) ); 22961 break; 22962 default: vassert(0); 22963 } 22964 22965 return delta; 22966 } 22967 22968 22969 /* Vector by scalar shift of E into V, by an immediate byte. Modified 22970 version of dis_SSE_shiftE_imm. */ 22971 static 22972 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx, 22973 Long delta, const HChar* opname, IROp op ) 22974 { 22975 Bool shl, shr, sar; 22976 UChar rm = getUChar(delta); 22977 IRTemp e0 = newTemp(Ity_V128); 22978 IRTemp e1 = newTemp(Ity_V128); 22979 UInt rD = getVexNvvvv(pfx); 22980 UChar amt, size; 22981 vassert(epartIsReg(rm)); 22982 vassert(gregLO3ofRM(rm) == 2 22983 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 22984 amt = getUChar(delta+1); 22985 delta += 2; 22986 DIP("%s $%d,%s,%s\n", opname, 22987 (Int)amt, 22988 nameXMMReg(eregOfRexRM(pfx,rm)), 22989 nameXMMReg(rD)); 22990 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) ); 22991 22992 shl = shr = sar = False; 22993 size = 0; 22994 switch (op) { 22995 case Iop_ShlN16x8: shl = True; size = 16; break; 22996 case Iop_ShlN32x4: shl = True; size = 32; break; 22997 case Iop_ShlN64x2: shl = True; size = 64; break; 22998 case Iop_SarN16x8: sar = True; size = 16; break; 22999 case Iop_SarN32x4: sar = True; size = 32; break; 23000 case Iop_ShrN16x8: shr = True; size = 16; break; 23001 case Iop_ShrN32x4: shr = True; size = 32; break; 23002 case Iop_ShrN64x2: shr = True; size = 64; break; 23003 default: vassert(0); 23004 } 23005 23006 if (shl || shr) { 23007 assign( e1, amt >= size 23008 ? mkV128(0x0000) 23009 : binop(op, mkexpr(e0), mkU8(amt)) 23010 ); 23011 } else 23012 if (sar) { 23013 assign( e1, amt >= size 23014 ? binop(op, mkexpr(e0), mkU8(size-1)) 23015 : binop(op, mkexpr(e0), mkU8(amt)) 23016 ); 23017 } else { 23018 vassert(0); 23019 } 23020 23021 putYMMRegLoAndZU( rD, mkexpr(e1) ); 23022 return delta; 23023 } 23024 23025 23026 /* Vector by scalar shift of E into V, by an immediate byte. Modified 23027 version of dis_AVX128_shiftE_to_V_imm. */ 23028 static 23029 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx, 23030 Long delta, const HChar* opname, IROp op ) 23031 { 23032 Bool shl, shr, sar; 23033 UChar rm = getUChar(delta); 23034 IRTemp e0 = newTemp(Ity_V256); 23035 IRTemp e1 = newTemp(Ity_V256); 23036 UInt rD = getVexNvvvv(pfx); 23037 UChar amt, size; 23038 vassert(epartIsReg(rm)); 23039 vassert(gregLO3ofRM(rm) == 2 23040 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6); 23041 amt = getUChar(delta+1); 23042 delta += 2; 23043 DIP("%s $%d,%s,%s\n", opname, 23044 (Int)amt, 23045 nameYMMReg(eregOfRexRM(pfx,rm)), 23046 nameYMMReg(rD)); 23047 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) ); 23048 23049 shl = shr = sar = False; 23050 size = 0; 23051 switch (op) { 23052 case Iop_ShlN16x16: shl = True; size = 16; break; 23053 case Iop_ShlN32x8: shl = True; size = 32; break; 23054 case Iop_ShlN64x4: shl = True; size = 64; break; 23055 case Iop_SarN16x16: sar = True; size = 16; break; 23056 case Iop_SarN32x8: sar = True; size = 32; break; 23057 case Iop_ShrN16x16: shr = True; size = 16; break; 23058 case Iop_ShrN32x8: shr = True; size = 32; break; 23059 case Iop_ShrN64x4: shr = True; size = 64; break; 23060 default: vassert(0); 23061 } 23062 23063 23064 if (shl || shr) { 23065 assign( e1, amt >= size 23066 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0)) 23067 : binop(op, mkexpr(e0), mkU8(amt)) 23068 ); 23069 } else 23070 if (sar) { 23071 assign( e1, amt >= size 23072 ? binop(op, mkexpr(e0), mkU8(size-1)) 23073 : binop(op, mkexpr(e0), mkU8(amt)) 23074 ); 23075 } else { 23076 vassert(0); 23077 } 23078 23079 putYMMReg( rD, mkexpr(e1) ); 23080 return delta; 23081 } 23082 23083 23084 /* Lower 64-bit lane only AVX128 binary operation: 23085 G[63:0] = V[63:0] `op` E[63:0] 23086 G[127:64] = V[127:64] 23087 G[255:128] = 0. 23088 The specified op must be of the 64F0x2 kind, so that it 23089 copies the upper half of the left operand to the result. 23090 */ 23091 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv, 23092 const VexAbiInfo* vbi, 23093 Prefix pfx, Long delta, 23094 const HChar* opname, IROp op ) 23095 { 23096 HChar dis_buf[50]; 23097 Int alen; 23098 IRTemp addr; 23099 UChar rm = getUChar(delta); 23100 UInt rG = gregOfRexRM(pfx,rm); 23101 UInt rV = getVexNvvvv(pfx); 23102 IRExpr* vpart = getXMMReg(rV); 23103 if (epartIsReg(rm)) { 23104 UInt rE = eregOfRexRM(pfx,rm); 23105 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 23106 DIP("%s %s,%s,%s\n", opname, 23107 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23108 delta = delta+1; 23109 } else { 23110 /* We can only do a 64-bit memory read, so the upper half of the 23111 E operand needs to be made simply of zeroes. */ 23112 IRTemp epart = newTemp(Ity_V128); 23113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23114 assign( epart, unop( Iop_64UtoV128, 23115 loadLE(Ity_I64, mkexpr(addr))) ); 23116 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 23117 DIP("%s %s,%s,%s\n", opname, 23118 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23119 delta = delta+alen; 23120 } 23121 putYMMRegLane128( rG, 1, mkV128(0) ); 23122 *uses_vvvv = True; 23123 return delta; 23124 } 23125 23126 23127 /* Lower 64-bit lane only AVX128 unary operation: 23128 G[63:0] = op(E[63:0]) 23129 G[127:64] = V[127:64] 23130 G[255:128] = 0 23131 The specified op must be of the 64F0x2 kind, so that it 23132 copies the upper half of the operand to the result. 23133 */ 23134 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv, 23135 const VexAbiInfo* vbi, 23136 Prefix pfx, Long delta, 23137 const HChar* opname, IROp op ) 23138 { 23139 HChar dis_buf[50]; 23140 Int alen; 23141 IRTemp addr; 23142 UChar rm = getUChar(delta); 23143 UInt rG = gregOfRexRM(pfx,rm); 23144 UInt rV = getVexNvvvv(pfx); 23145 IRTemp e64 = newTemp(Ity_I64); 23146 23147 /* Fetch E[63:0] */ 23148 if (epartIsReg(rm)) { 23149 UInt rE = eregOfRexRM(pfx,rm); 23150 assign(e64, getXMMRegLane64(rE, 0)); 23151 DIP("%s %s,%s,%s\n", opname, 23152 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23153 delta += 1; 23154 } else { 23155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23156 assign(e64, loadLE(Ity_I64, mkexpr(addr))); 23157 DIP("%s %s,%s,%s\n", opname, 23158 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23159 delta += alen; 23160 } 23161 23162 /* Create a value 'arg' as V[127:64]++E[63:0] */ 23163 IRTemp arg = newTemp(Ity_V128); 23164 assign(arg, 23165 binop(Iop_SetV128lo64, 23166 getXMMReg(rV), mkexpr(e64))); 23167 /* and apply op to it */ 23168 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 23169 *uses_vvvv = True; 23170 return delta; 23171 } 23172 23173 23174 /* Lower 32-bit lane only AVX128 unary operation: 23175 G[31:0] = op(E[31:0]) 23176 G[127:32] = V[127:32] 23177 G[255:128] = 0 23178 The specified op must be of the 32F0x4 kind, so that it 23179 copies the upper 3/4 of the operand to the result. 23180 */ 23181 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv, 23182 const VexAbiInfo* vbi, 23183 Prefix pfx, Long delta, 23184 const HChar* opname, IROp op ) 23185 { 23186 HChar dis_buf[50]; 23187 Int alen; 23188 IRTemp addr; 23189 UChar rm = getUChar(delta); 23190 UInt rG = gregOfRexRM(pfx,rm); 23191 UInt rV = getVexNvvvv(pfx); 23192 IRTemp e32 = newTemp(Ity_I32); 23193 23194 /* Fetch E[31:0] */ 23195 if (epartIsReg(rm)) { 23196 UInt rE = eregOfRexRM(pfx,rm); 23197 assign(e32, getXMMRegLane32(rE, 0)); 23198 DIP("%s %s,%s,%s\n", opname, 23199 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23200 delta += 1; 23201 } else { 23202 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23203 assign(e32, loadLE(Ity_I32, mkexpr(addr))); 23204 DIP("%s %s,%s,%s\n", opname, 23205 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23206 delta += alen; 23207 } 23208 23209 /* Create a value 'arg' as V[127:32]++E[31:0] */ 23210 IRTemp arg = newTemp(Ity_V128); 23211 assign(arg, 23212 binop(Iop_SetV128lo32, 23213 getXMMReg(rV), mkexpr(e32))); 23214 /* and apply op to it */ 23215 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) ); 23216 *uses_vvvv = True; 23217 return delta; 23218 } 23219 23220 23221 /* Lower 32-bit lane only AVX128 binary operation: 23222 G[31:0] = V[31:0] `op` E[31:0] 23223 G[127:32] = V[127:32] 23224 G[255:128] = 0. 23225 The specified op must be of the 32F0x4 kind, so that it 23226 copies the upper 3/4 of the left operand to the result. 23227 */ 23228 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv, 23229 const VexAbiInfo* vbi, 23230 Prefix pfx, Long delta, 23231 const HChar* opname, IROp op ) 23232 { 23233 HChar dis_buf[50]; 23234 Int alen; 23235 IRTemp addr; 23236 UChar rm = getUChar(delta); 23237 UInt rG = gregOfRexRM(pfx,rm); 23238 UInt rV = getVexNvvvv(pfx); 23239 IRExpr* vpart = getXMMReg(rV); 23240 if (epartIsReg(rm)) { 23241 UInt rE = eregOfRexRM(pfx,rm); 23242 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) ); 23243 DIP("%s %s,%s,%s\n", opname, 23244 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23245 delta = delta+1; 23246 } else { 23247 /* We can only do a 32-bit memory read, so the upper 3/4 of the 23248 E operand needs to be made simply of zeroes. */ 23249 IRTemp epart = newTemp(Ity_V128); 23250 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23251 assign( epart, unop( Iop_32UtoV128, 23252 loadLE(Ity_I32, mkexpr(addr))) ); 23253 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) ); 23254 DIP("%s %s,%s,%s\n", opname, 23255 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23256 delta = delta+alen; 23257 } 23258 putYMMRegLane128( rG, 1, mkV128(0) ); 23259 *uses_vvvv = True; 23260 return delta; 23261 } 23262 23263 23264 /* All-lanes AVX128 binary operation: 23265 G[127:0] = V[127:0] `op` E[127:0] 23266 G[255:128] = 0. 23267 */ 23268 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 23269 const VexAbiInfo* vbi, 23270 Prefix pfx, Long delta, 23271 const HChar* opname, IROp op ) 23272 { 23273 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 23274 uses_vvvv, vbi, pfx, delta, opname, op, 23275 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 23276 ); 23277 } 23278 23279 23280 /* Handles AVX128 32F/64F comparisons. A derivative of 23281 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 23282 original delta to indicate failure. */ 23283 static 23284 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 23285 const VexAbiInfo* vbi, 23286 Prefix pfx, Long delta, 23287 const HChar* opname, Bool all_lanes, Int sz ) 23288 { 23289 vassert(sz == 4 || sz == 8); 23290 Long deltaIN = delta; 23291 HChar dis_buf[50]; 23292 Int alen; 23293 UInt imm8; 23294 IRTemp addr; 23295 Bool preSwap = False; 23296 IROp op = Iop_INVALID; 23297 Bool postNot = False; 23298 IRTemp plain = newTemp(Ity_V128); 23299 UChar rm = getUChar(delta); 23300 UInt rG = gregOfRexRM(pfx, rm); 23301 UInt rV = getVexNvvvv(pfx); 23302 IRTemp argL = newTemp(Ity_V128); 23303 IRTemp argR = newTemp(Ity_V128); 23304 23305 assign(argL, getXMMReg(rV)); 23306 if (epartIsReg(rm)) { 23307 imm8 = getUChar(delta+1); 23308 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 23309 if (!ok) return deltaIN; /* FAIL */ 23310 UInt rE = eregOfRexRM(pfx,rm); 23311 assign(argR, getXMMReg(rE)); 23312 delta += 1+1; 23313 DIP("%s $%u,%s,%s,%s\n", 23314 opname, imm8, 23315 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23316 } else { 23317 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23318 imm8 = getUChar(delta+alen); 23319 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz); 23320 if (!ok) return deltaIN; /* FAIL */ 23321 assign(argR, 23322 all_lanes ? loadLE(Ity_V128, mkexpr(addr)) 23323 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr))) 23324 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))); 23325 delta += alen+1; 23326 DIP("%s $%u,%s,%s,%s\n", 23327 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 23328 } 23329 23330 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL)) 23331 : binop(op, mkexpr(argL), mkexpr(argR))); 23332 23333 if (all_lanes) { 23334 /* This is simple: just invert the result, if necessary, and 23335 have done. */ 23336 if (postNot) { 23337 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) ); 23338 } else { 23339 putYMMRegLoAndZU( rG, mkexpr(plain) ); 23340 } 23341 } 23342 else 23343 if (!preSwap) { 23344 /* More complex. It's a one-lane-only, hence need to possibly 23345 invert only that one lane. But at least the other lanes are 23346 correctly "in" the result, having been copied from the left 23347 operand (argL). */ 23348 if (postNot) { 23349 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF); 23350 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain), 23351 mask) ); 23352 } else { 23353 putYMMRegLoAndZU( rG, mkexpr(plain) ); 23354 } 23355 } 23356 else { 23357 /* This is the most complex case. One-lane-only, but the args 23358 were swapped. So we have to possibly invert the bottom lane, 23359 and (definitely) we have to copy the upper lane(s) from argL 23360 since, due to the swapping, what's currently there is from 23361 argR, which is not correct. */ 23362 IRTemp res = newTemp(Ity_V128); 23363 IRTemp mask = newTemp(Ity_V128); 23364 IRTemp notMask = newTemp(Ity_V128); 23365 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF)); 23366 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00)); 23367 if (postNot) { 23368 assign(res, 23369 binop(Iop_OrV128, 23370 binop(Iop_AndV128, 23371 unop(Iop_NotV128, mkexpr(plain)), 23372 mkexpr(mask)), 23373 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 23374 } else { 23375 assign(res, 23376 binop(Iop_OrV128, 23377 binop(Iop_AndV128, 23378 mkexpr(plain), 23379 mkexpr(mask)), 23380 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask)))); 23381 } 23382 putYMMRegLoAndZU( rG, mkexpr(res) ); 23383 } 23384 23385 *uses_vvvv = True; 23386 return delta; 23387 } 23388 23389 23390 /* Handles AVX256 32F/64F comparisons. A derivative of 23391 dis_SSEcmp_E_to_G. It can fail, in which case it returns the 23392 original delta to indicate failure. */ 23393 static 23394 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv, 23395 const VexAbiInfo* vbi, 23396 Prefix pfx, Long delta, 23397 const HChar* opname, Int sz ) 23398 { 23399 vassert(sz == 4 || sz == 8); 23400 Long deltaIN = delta; 23401 HChar dis_buf[50]; 23402 Int alen; 23403 UInt imm8; 23404 IRTemp addr; 23405 Bool preSwap = False; 23406 IROp op = Iop_INVALID; 23407 Bool postNot = False; 23408 IRTemp plain = newTemp(Ity_V256); 23409 UChar rm = getUChar(delta); 23410 UInt rG = gregOfRexRM(pfx, rm); 23411 UInt rV = getVexNvvvv(pfx); 23412 IRTemp argL = newTemp(Ity_V256); 23413 IRTemp argR = newTemp(Ity_V256); 23414 IRTemp argLhi = IRTemp_INVALID; 23415 IRTemp argLlo = IRTemp_INVALID; 23416 IRTemp argRhi = IRTemp_INVALID; 23417 IRTemp argRlo = IRTemp_INVALID; 23418 23419 assign(argL, getYMMReg(rV)); 23420 if (epartIsReg(rm)) { 23421 imm8 = getUChar(delta+1); 23422 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 23423 True/*all_lanes*/, sz); 23424 if (!ok) return deltaIN; /* FAIL */ 23425 UInt rE = eregOfRexRM(pfx,rm); 23426 assign(argR, getYMMReg(rE)); 23427 delta += 1+1; 23428 DIP("%s $%u,%s,%s,%s\n", 23429 opname, imm8, 23430 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 23431 } else { 23432 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 23433 imm8 = getUChar(delta+alen); 23434 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, 23435 True/*all_lanes*/, sz); 23436 if (!ok) return deltaIN; /* FAIL */ 23437 assign(argR, loadLE(Ity_V256, mkexpr(addr)) ); 23438 delta += alen+1; 23439 DIP("%s $%u,%s,%s,%s\n", 23440 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 23441 } 23442 23443 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo ); 23444 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo ); 23445 assign(plain, binop( Iop_V128HLtoV256, 23446 binop(op, mkexpr(argLhi), mkexpr(argRhi)), 23447 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) ); 23448 23449 /* This is simple: just invert the result, if necessary, and 23450 have done. */ 23451 if (postNot) { 23452 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) ); 23453 } else { 23454 putYMMReg( rG, mkexpr(plain) ); 23455 } 23456 23457 *uses_vvvv = True; 23458 return delta; 23459 } 23460 23461 23462 /* Handles AVX128 unary E-to-G all-lanes operations. */ 23463 static 23464 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 23465 const VexAbiInfo* vbi, 23466 Prefix pfx, Long delta, 23467 const HChar* opname, 23468 IRTemp (*opFn)(IRTemp) ) 23469 { 23470 HChar dis_buf[50]; 23471 Int alen; 23472 IRTemp addr; 23473 IRTemp res = newTemp(Ity_V128); 23474 IRTemp arg = newTemp(Ity_V128); 23475 UChar rm = getUChar(delta); 23476 UInt rG = gregOfRexRM(pfx, rm); 23477 if (epartIsReg(rm)) { 23478 UInt rE = eregOfRexRM(pfx,rm); 23479 assign(arg, getXMMReg(rE)); 23480 delta += 1; 23481 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 23482 } else { 23483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23484 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 23485 delta += alen; 23486 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 23487 } 23488 res = opFn(arg); 23489 putYMMRegLoAndZU( rG, mkexpr(res) ); 23490 *uses_vvvv = False; 23491 return delta; 23492 } 23493 23494 23495 /* Handles AVX128 unary E-to-G all-lanes operations. */ 23496 static 23497 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 23498 const VexAbiInfo* vbi, 23499 Prefix pfx, Long delta, 23500 const HChar* opname, IROp op ) 23501 { 23502 HChar dis_buf[50]; 23503 Int alen; 23504 IRTemp addr; 23505 IRTemp arg = newTemp(Ity_V128); 23506 UChar rm = getUChar(delta); 23507 UInt rG = gregOfRexRM(pfx, rm); 23508 if (epartIsReg(rm)) { 23509 UInt rE = eregOfRexRM(pfx,rm); 23510 assign(arg, getXMMReg(rE)); 23511 delta += 1; 23512 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG)); 23513 } else { 23514 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23515 assign(arg, loadLE(Ity_V128, mkexpr(addr))); 23516 delta += alen; 23517 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG)); 23518 } 23519 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked 23520 // up in the usual way. 23521 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2; 23522 /* XXXROUNDINGFIXME */ 23523 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg)) 23524 : unop(op, mkexpr(arg)); 23525 putYMMRegLoAndZU( rG, res ); 23526 *uses_vvvv = False; 23527 return delta; 23528 } 23529 23530 23531 /* FIXME: common up with the _128_ version above? */ 23532 static 23533 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG ( 23534 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23535 Prefix pfx, Long delta, const HChar* name, 23536 /* The actual operation. Use either 'op' or 'opfn', 23537 but not both. */ 23538 IROp op, IRTemp(*opFn)(IRTemp,IRTemp), 23539 Bool invertLeftArg, 23540 Bool swapArgs 23541 ) 23542 { 23543 UChar modrm = getUChar(delta); 23544 UInt rD = gregOfRexRM(pfx, modrm); 23545 UInt rSL = getVexNvvvv(pfx); 23546 IRTemp tSL = newTemp(Ity_V256); 23547 IRTemp tSR = newTemp(Ity_V256); 23548 IRTemp addr = IRTemp_INVALID; 23549 HChar dis_buf[50]; 23550 Int alen = 0; 23551 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/); 23552 23553 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL)) 23554 : getYMMReg(rSL)); 23555 23556 if (epartIsReg(modrm)) { 23557 UInt rSR = eregOfRexRM(pfx, modrm); 23558 delta += 1; 23559 assign(tSR, getYMMReg(rSR)); 23560 DIP("%s %s,%s,%s\n", 23561 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD)); 23562 } else { 23563 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 23564 delta += alen; 23565 assign(tSR, loadLE(Ity_V256, mkexpr(addr))); 23566 DIP("%s %s,%s,%s\n", 23567 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD)); 23568 } 23569 23570 IRTemp res = IRTemp_INVALID; 23571 if (op != Iop_INVALID) { 23572 vassert(opFn == NULL); 23573 res = newTemp(Ity_V256); 23574 if (requiresRMode(op)) { 23575 IRTemp rm = newTemp(Ity_I32); 23576 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */ 23577 assign(res, swapArgs 23578 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL)) 23579 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR))); 23580 } else { 23581 assign(res, swapArgs 23582 ? binop(op, mkexpr(tSR), mkexpr(tSL)) 23583 : binop(op, mkexpr(tSL), mkexpr(tSR))); 23584 } 23585 } else { 23586 vassert(opFn != NULL); 23587 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR); 23588 } 23589 23590 putYMMReg(rD, mkexpr(res)); 23591 23592 *uses_vvvv = True; 23593 return delta; 23594 } 23595 23596 23597 /* All-lanes AVX256 binary operation: 23598 G[255:0] = V[255:0] `op` E[255:0] 23599 */ 23600 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv, 23601 const VexAbiInfo* vbi, 23602 Prefix pfx, Long delta, 23603 const HChar* opname, IROp op ) 23604 { 23605 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23606 uses_vvvv, vbi, pfx, delta, opname, op, 23607 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/ 23608 ); 23609 } 23610 23611 23612 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp 23613 for the operation, no inversion of the left arg, and no swapping of 23614 args. */ 23615 static 23616 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple ( 23617 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23618 Prefix pfx, Long delta, const HChar* name, 23619 IROp op 23620 ) 23621 { 23622 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23623 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False); 23624 } 23625 23626 23627 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR 23628 generator to compute the result, no inversion of the left 23629 arg, and no swapping of args. */ 23630 static 23631 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex ( 23632 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi, 23633 Prefix pfx, Long delta, const HChar* name, 23634 IRTemp(*opFn)(IRTemp,IRTemp) 23635 ) 23636 { 23637 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 23638 uses_vvvv, vbi, pfx, delta, name, 23639 Iop_INVALID, opFn, False, False ); 23640 } 23641 23642 23643 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23644 static 23645 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv, 23646 const VexAbiInfo* vbi, 23647 Prefix pfx, Long delta, 23648 const HChar* opname, 23649 IRTemp (*opFn)(IRTemp) ) 23650 { 23651 HChar dis_buf[50]; 23652 Int alen; 23653 IRTemp addr; 23654 IRTemp res = newTemp(Ity_V256); 23655 IRTemp arg = newTemp(Ity_V256); 23656 UChar rm = getUChar(delta); 23657 UInt rG = gregOfRexRM(pfx, rm); 23658 if (epartIsReg(rm)) { 23659 UInt rE = eregOfRexRM(pfx,rm); 23660 assign(arg, getYMMReg(rE)); 23661 delta += 1; 23662 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23663 } else { 23664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23665 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23666 delta += alen; 23667 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23668 } 23669 res = opFn(arg); 23670 putYMMReg( rG, mkexpr(res) ); 23671 *uses_vvvv = False; 23672 return delta; 23673 } 23674 23675 23676 /* Handles AVX256 unary E-to-G all-lanes operations. */ 23677 static 23678 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv, 23679 const VexAbiInfo* vbi, 23680 Prefix pfx, Long delta, 23681 const HChar* opname, IROp op ) 23682 { 23683 HChar dis_buf[50]; 23684 Int alen; 23685 IRTemp addr; 23686 IRTemp arg = newTemp(Ity_V256); 23687 UChar rm = getUChar(delta); 23688 UInt rG = gregOfRexRM(pfx, rm); 23689 if (epartIsReg(rm)) { 23690 UInt rE = eregOfRexRM(pfx,rm); 23691 assign(arg, getYMMReg(rE)); 23692 delta += 1; 23693 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG)); 23694 } else { 23695 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23696 assign(arg, loadLE(Ity_V256, mkexpr(addr))); 23697 delta += alen; 23698 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG)); 23699 } 23700 putYMMReg( rG, unop(op, mkexpr(arg)) ); 23701 *uses_vvvv = False; 23702 return delta; 23703 } 23704 23705 23706 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we 23707 had a variant of Iop_64x4toV256 that took F64s as args instead. */ 23708 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx, 23709 Long delta ) 23710 { 23711 IRTemp addr = IRTemp_INVALID; 23712 Int alen = 0; 23713 HChar dis_buf[50]; 23714 UChar modrm = getUChar(delta); 23715 IRTemp sV = newTemp(Ity_V128); 23716 UInt rG = gregOfRexRM(pfx,modrm); 23717 if (epartIsReg(modrm)) { 23718 UInt rE = eregOfRexRM(pfx,modrm); 23719 assign( sV, getXMMReg(rE) ); 23720 delta += 1; 23721 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 23722 } else { 23723 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23724 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 23725 delta += alen; 23726 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) ); 23727 } 23728 IRTemp s3, s2, s1, s0; 23729 s3 = s2 = s1 = s0 = IRTemp_INVALID; 23730 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 23731 IRExpr* res 23732 = IRExpr_Qop( 23733 Iop_64x4toV256, 23734 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))), 23735 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))), 23736 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))), 23737 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0))) 23738 ); 23739 putYMMReg(rG, res); 23740 return delta; 23741 } 23742 23743 23744 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx, 23745 Long delta ) 23746 { 23747 IRTemp addr = IRTemp_INVALID; 23748 Int alen = 0; 23749 HChar dis_buf[50]; 23750 UChar modrm = getUChar(delta); 23751 UInt rG = gregOfRexRM(pfx,modrm); 23752 IRTemp argV = newTemp(Ity_V256); 23753 IRTemp rmode = newTemp(Ity_I32); 23754 if (epartIsReg(modrm)) { 23755 UInt rE = eregOfRexRM(pfx,modrm); 23756 assign( argV, getYMMReg(rE) ); 23757 delta += 1; 23758 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG)); 23759 } else { 23760 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23761 assign( argV, loadLE(Ity_V256, mkexpr(addr)) ); 23762 delta += alen; 23763 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) ); 23764 } 23765 23766 assign( rmode, get_sse_roundingmode() ); 23767 IRTemp t3, t2, t1, t0; 23768 t3 = t2 = t1 = t0 = IRTemp_INVALID; 23769 breakupV256to64s( argV, &t3, &t2, &t1, &t0 ); 23770 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \ 23771 unop(Iop_ReinterpI64asF64, mkexpr(_t)) ) 23772 putXMMRegLane32F( rG, 3, CVT(t3) ); 23773 putXMMRegLane32F( rG, 2, CVT(t2) ); 23774 putXMMRegLane32F( rG, 1, CVT(t1) ); 23775 putXMMRegLane32F( rG, 0, CVT(t0) ); 23776 # undef CVT 23777 putYMMRegLane128( rG, 1, mkV128(0) ); 23778 return delta; 23779 } 23780 23781 23782 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op ) 23783 { 23784 IRTemp tLhi, tLlo, tRhi, tRlo; 23785 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID; 23786 IRTemp res = newTemp(Ity_V256); 23787 breakupV256toV128s( tL, &tLhi, &tLlo ); 23788 breakupV256toV128s( tR, &tRhi, &tRlo ); 23789 assign( res, binop( Iop_V128HLtoV256, 23790 binop( op, mkexpr(tRhi), mkexpr(tLhi) ), 23791 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) ); 23792 return res; 23793 } 23794 23795 23796 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR ) 23797 { 23798 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 ); 23799 } 23800 23801 23802 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR ) 23803 { 23804 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 ); 23805 } 23806 23807 23808 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR ) 23809 { 23810 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 ); 23811 } 23812 23813 23814 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR ) 23815 { 23816 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 ); 23817 } 23818 23819 23820 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR ) 23821 { 23822 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 ); 23823 } 23824 23825 23826 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR ) 23827 { 23828 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 ); 23829 } 23830 23831 23832 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR ) 23833 { 23834 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 ); 23835 } 23836 23837 23838 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR ) 23839 { 23840 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 ); 23841 } 23842 23843 23844 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR ) 23845 { 23846 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 ); 23847 } 23848 23849 23850 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR ) 23851 { 23852 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 ); 23853 } 23854 23855 23856 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR ) 23857 { 23858 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 ); 23859 } 23860 23861 23862 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR ) 23863 { 23864 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 ); 23865 } 23866 23867 23868 __attribute__((noinline)) 23869 static 23870 Long dis_ESC_0F__VEX ( 23871 /*MB_OUT*/DisResult* dres, 23872 /*OUT*/ Bool* uses_vvvv, 23873 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 23874 Bool resteerCisOk, 23875 void* callback_opaque, 23876 const VexArchInfo* archinfo, 23877 const VexAbiInfo* vbi, 23878 Prefix pfx, Int sz, Long deltaIN 23879 ) 23880 { 23881 IRTemp addr = IRTemp_INVALID; 23882 Int alen = 0; 23883 HChar dis_buf[50]; 23884 Long delta = deltaIN; 23885 UChar opc = getUChar(delta); 23886 delta++; 23887 *uses_vvvv = False; 23888 23889 switch (opc) { 23890 23891 case 0x10: 23892 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23893 /* Move 64 bits from E (mem only) to G (lo half xmm). 23894 Bits 255-64 of the dest are zeroed out. */ 23895 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 23896 UChar modrm = getUChar(delta); 23897 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23898 UInt rG = gregOfRexRM(pfx,modrm); 23899 IRTemp z128 = newTemp(Ity_V128); 23900 assign(z128, mkV128(0)); 23901 putXMMReg( rG, mkexpr(z128) ); 23902 /* FIXME: ALIGNMENT CHECK? */ 23903 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 23904 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23905 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG)); 23906 delta += alen; 23907 goto decode_success; 23908 } 23909 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */ 23910 /* Reg form. */ 23911 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 23912 UChar modrm = getUChar(delta); 23913 UInt rG = gregOfRexRM(pfx, modrm); 23914 UInt rE = eregOfRexRM(pfx, modrm); 23915 UInt rV = getVexNvvvv(pfx); 23916 delta++; 23917 DIP("vmovsd %s,%s,%s\n", 23918 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23919 IRTemp res = newTemp(Ity_V128); 23920 assign(res, binop(Iop_64HLtoV128, 23921 getXMMRegLane64(rV, 1), 23922 getXMMRegLane64(rE, 0))); 23923 putYMMRegLoAndZU(rG, mkexpr(res)); 23924 *uses_vvvv = True; 23925 goto decode_success; 23926 } 23927 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23928 /* Move 32 bits from E (mem only) to G (lo half xmm). 23929 Bits 255-32 of the dest are zeroed out. */ 23930 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 23931 UChar modrm = getUChar(delta); 23932 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23933 UInt rG = gregOfRexRM(pfx,modrm); 23934 IRTemp z128 = newTemp(Ity_V128); 23935 assign(z128, mkV128(0)); 23936 putXMMReg( rG, mkexpr(z128) ); 23937 /* FIXME: ALIGNMENT CHECK? */ 23938 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) ); 23939 putYMMRegLane128( rG, 1, mkexpr(z128) ); 23940 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG)); 23941 delta += alen; 23942 goto decode_success; 23943 } 23944 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */ 23945 /* Reg form. */ 23946 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 23947 UChar modrm = getUChar(delta); 23948 UInt rG = gregOfRexRM(pfx, modrm); 23949 UInt rE = eregOfRexRM(pfx, modrm); 23950 UInt rV = getVexNvvvv(pfx); 23951 delta++; 23952 DIP("vmovss %s,%s,%s\n", 23953 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 23954 IRTemp res = newTemp(Ity_V128); 23955 assign( res, binop( Iop_64HLtoV128, 23956 getXMMRegLane64(rV, 1), 23957 binop(Iop_32HLto64, 23958 getXMMRegLane32(rV, 1), 23959 getXMMRegLane32(rE, 0)) ) ); 23960 putYMMRegLoAndZU(rG, mkexpr(res)); 23961 *uses_vvvv = True; 23962 goto decode_success; 23963 } 23964 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */ 23965 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 23966 UChar modrm = getUChar(delta); 23967 UInt rG = gregOfRexRM(pfx, modrm); 23968 if (epartIsReg(modrm)) { 23969 UInt rE = eregOfRexRM(pfx,modrm); 23970 putYMMRegLoAndZU( rG, getXMMReg( rE )); 23971 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 23972 delta += 1; 23973 } else { 23974 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23975 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 23976 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG)); 23977 delta += alen; 23978 } 23979 goto decode_success; 23980 } 23981 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */ 23982 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 23983 UChar modrm = getUChar(delta); 23984 UInt rG = gregOfRexRM(pfx, modrm); 23985 if (epartIsReg(modrm)) { 23986 UInt rE = eregOfRexRM(pfx,modrm); 23987 putYMMReg( rG, getYMMReg( rE )); 23988 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 23989 delta += 1; 23990 } else { 23991 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 23992 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 23993 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG)); 23994 delta += alen; 23995 } 23996 goto decode_success; 23997 } 23998 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */ 23999 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24000 UChar modrm = getUChar(delta); 24001 UInt rG = gregOfRexRM(pfx, modrm); 24002 if (epartIsReg(modrm)) { 24003 UInt rE = eregOfRexRM(pfx,modrm); 24004 putYMMRegLoAndZU( rG, getXMMReg( rE )); 24005 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 24006 delta += 1; 24007 } else { 24008 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24009 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 24010 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG)); 24011 delta += alen; 24012 } 24013 goto decode_success; 24014 } 24015 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */ 24016 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24017 UChar modrm = getUChar(delta); 24018 UInt rG = gregOfRexRM(pfx, modrm); 24019 if (epartIsReg(modrm)) { 24020 UInt rE = eregOfRexRM(pfx,modrm); 24021 putYMMReg( rG, getYMMReg( rE )); 24022 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 24023 delta += 1; 24024 } else { 24025 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24026 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 24027 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG)); 24028 delta += alen; 24029 } 24030 goto decode_success; 24031 } 24032 break; 24033 24034 case 0x11: 24035 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */ 24036 /* Move 64 bits from G (low half xmm) to mem only. */ 24037 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) { 24038 UChar modrm = getUChar(delta); 24039 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24040 UInt rG = gregOfRexRM(pfx,modrm); 24041 /* FIXME: ALIGNMENT CHECK? */ 24042 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0)); 24043 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf); 24044 delta += alen; 24045 goto decode_success; 24046 } 24047 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */ 24048 /* Reg form. */ 24049 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) { 24050 UChar modrm = getUChar(delta); 24051 UInt rG = gregOfRexRM(pfx, modrm); 24052 UInt rE = eregOfRexRM(pfx, modrm); 24053 UInt rV = getVexNvvvv(pfx); 24054 delta++; 24055 DIP("vmovsd %s,%s,%s\n", 24056 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24057 IRTemp res = newTemp(Ity_V128); 24058 assign(res, binop(Iop_64HLtoV128, 24059 getXMMRegLane64(rV, 1), 24060 getXMMRegLane64(rE, 0))); 24061 putYMMRegLoAndZU(rG, mkexpr(res)); 24062 *uses_vvvv = True; 24063 goto decode_success; 24064 } 24065 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */ 24066 /* Move 32 bits from G (low 1/4 xmm) to mem only. */ 24067 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) { 24068 UChar modrm = getUChar(delta); 24069 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24070 UInt rG = gregOfRexRM(pfx,modrm); 24071 /* FIXME: ALIGNMENT CHECK? */ 24072 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0)); 24073 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf); 24074 delta += alen; 24075 goto decode_success; 24076 } 24077 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */ 24078 /* Reg form. */ 24079 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) { 24080 UChar modrm = getUChar(delta); 24081 UInt rG = gregOfRexRM(pfx, modrm); 24082 UInt rE = eregOfRexRM(pfx, modrm); 24083 UInt rV = getVexNvvvv(pfx); 24084 delta++; 24085 DIP("vmovss %s,%s,%s\n", 24086 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24087 IRTemp res = newTemp(Ity_V128); 24088 assign( res, binop( Iop_64HLtoV128, 24089 getXMMRegLane64(rV, 1), 24090 binop(Iop_32HLto64, 24091 getXMMRegLane32(rV, 1), 24092 getXMMRegLane32(rE, 0)) ) ); 24093 putYMMRegLoAndZU(rG, mkexpr(res)); 24094 *uses_vvvv = True; 24095 goto decode_success; 24096 } 24097 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */ 24098 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24099 UChar modrm = getUChar(delta); 24100 UInt rG = gregOfRexRM(pfx,modrm); 24101 if (epartIsReg(modrm)) { 24102 UInt rE = eregOfRexRM(pfx,modrm); 24103 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24104 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24105 delta += 1; 24106 } else { 24107 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24108 storeLE( mkexpr(addr), getXMMReg(rG) ); 24109 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf); 24110 delta += alen; 24111 } 24112 goto decode_success; 24113 } 24114 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */ 24115 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24116 UChar modrm = getUChar(delta); 24117 UInt rG = gregOfRexRM(pfx,modrm); 24118 if (epartIsReg(modrm)) { 24119 UInt rE = eregOfRexRM(pfx,modrm); 24120 putYMMReg( rE, getYMMReg(rG) ); 24121 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24122 delta += 1; 24123 } else { 24124 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24125 storeLE( mkexpr(addr), getYMMReg(rG) ); 24126 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf); 24127 delta += alen; 24128 } 24129 goto decode_success; 24130 } 24131 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */ 24132 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24133 UChar modrm = getUChar(delta); 24134 UInt rG = gregOfRexRM(pfx,modrm); 24135 if (epartIsReg(modrm)) { 24136 UInt rE = eregOfRexRM(pfx,modrm); 24137 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24138 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24139 delta += 1; 24140 } else { 24141 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24142 storeLE( mkexpr(addr), getXMMReg(rG) ); 24143 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf); 24144 delta += alen; 24145 } 24146 goto decode_success; 24147 } 24148 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */ 24149 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24150 UChar modrm = getUChar(delta); 24151 UInt rG = gregOfRexRM(pfx,modrm); 24152 if (epartIsReg(modrm)) { 24153 UInt rE = eregOfRexRM(pfx,modrm); 24154 putYMMReg( rE, getYMMReg(rG) ); 24155 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24156 delta += 1; 24157 } else { 24158 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24159 storeLE( mkexpr(addr), getYMMReg(rG) ); 24160 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf); 24161 delta += alen; 24162 } 24163 goto decode_success; 24164 } 24165 break; 24166 24167 case 0x12: 24168 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */ 24169 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24170 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ ); 24171 goto decode_success; 24172 } 24173 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */ 24174 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24175 delta = dis_MOVDDUP_256( vbi, pfx, delta ); 24176 goto decode_success; 24177 } 24178 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */ 24179 /* Insn only exists in reg form */ 24180 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 24181 && epartIsReg(getUChar(delta))) { 24182 UChar modrm = getUChar(delta); 24183 UInt rG = gregOfRexRM(pfx, modrm); 24184 UInt rE = eregOfRexRM(pfx, modrm); 24185 UInt rV = getVexNvvvv(pfx); 24186 delta++; 24187 DIP("vmovhlps %s,%s,%s\n", 24188 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24189 IRTemp res = newTemp(Ity_V128); 24190 assign(res, binop(Iop_64HLtoV128, 24191 getXMMRegLane64(rV, 1), 24192 getXMMRegLane64(rE, 1))); 24193 putYMMRegLoAndZU(rG, mkexpr(res)); 24194 *uses_vvvv = True; 24195 goto decode_success; 24196 } 24197 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */ 24198 /* Insn exists only in mem form, it appears. */ 24199 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */ 24200 /* Insn exists only in mem form, it appears. */ 24201 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24202 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24203 UChar modrm = getUChar(delta); 24204 UInt rG = gregOfRexRM(pfx, modrm); 24205 UInt rV = getVexNvvvv(pfx); 24206 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24207 delta += alen; 24208 DIP("vmovlpd %s,%s,%s\n", 24209 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 24210 IRTemp res = newTemp(Ity_V128); 24211 assign(res, binop(Iop_64HLtoV128, 24212 getXMMRegLane64(rV, 1), 24213 loadLE(Ity_I64, mkexpr(addr)))); 24214 putYMMRegLoAndZU(rG, mkexpr(res)); 24215 *uses_vvvv = True; 24216 goto decode_success; 24217 } 24218 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */ 24219 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 24220 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 24221 True/*isL*/ ); 24222 goto decode_success; 24223 } 24224 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */ 24225 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 24226 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ ); 24227 goto decode_success; 24228 } 24229 break; 24230 24231 case 0x13: 24232 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */ 24233 /* Insn exists only in mem form, it appears. */ 24234 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */ 24235 /* Insn exists only in mem form, it appears. */ 24236 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24237 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24238 UChar modrm = getUChar(delta); 24239 UInt rG = gregOfRexRM(pfx, modrm); 24240 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24241 delta += alen; 24242 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0)); 24243 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf); 24244 goto decode_success; 24245 } 24246 break; 24247 24248 case 0x14: 24249 case 0x15: 24250 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */ 24251 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */ 24252 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24253 Bool hi = opc == 0x15; 24254 UChar modrm = getUChar(delta); 24255 UInt rG = gregOfRexRM(pfx,modrm); 24256 UInt rV = getVexNvvvv(pfx); 24257 IRTemp eV = newTemp(Ity_V128); 24258 IRTemp vV = newTemp(Ity_V128); 24259 assign( vV, getXMMReg(rV) ); 24260 if (epartIsReg(modrm)) { 24261 UInt rE = eregOfRexRM(pfx,modrm); 24262 assign( eV, getXMMReg(rE) ); 24263 delta += 1; 24264 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 24265 nameXMMReg(rE), nameXMMReg(rG)); 24266 } else { 24267 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24268 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 24269 delta += alen; 24270 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 24271 dis_buf, nameXMMReg(rG)); 24272 } 24273 IRTemp res = math_UNPCKxPS_128( eV, vV, hi ); 24274 putYMMRegLoAndZU( rG, mkexpr(res) ); 24275 *uses_vvvv = True; 24276 goto decode_success; 24277 } 24278 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */ 24279 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */ 24280 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24281 Bool hi = opc == 0x15; 24282 UChar modrm = getUChar(delta); 24283 UInt rG = gregOfRexRM(pfx,modrm); 24284 UInt rV = getVexNvvvv(pfx); 24285 IRTemp eV = newTemp(Ity_V256); 24286 IRTemp vV = newTemp(Ity_V256); 24287 assign( vV, getYMMReg(rV) ); 24288 if (epartIsReg(modrm)) { 24289 UInt rE = eregOfRexRM(pfx,modrm); 24290 assign( eV, getYMMReg(rE) ); 24291 delta += 1; 24292 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 24293 nameYMMReg(rE), nameYMMReg(rG)); 24294 } else { 24295 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24296 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 24297 delta += alen; 24298 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l", 24299 dis_buf, nameYMMReg(rG)); 24300 } 24301 IRTemp res = math_UNPCKxPS_256( eV, vV, hi ); 24302 putYMMReg( rG, mkexpr(res) ); 24303 *uses_vvvv = True; 24304 goto decode_success; 24305 } 24306 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */ 24307 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */ 24308 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24309 Bool hi = opc == 0x15; 24310 UChar modrm = getUChar(delta); 24311 UInt rG = gregOfRexRM(pfx,modrm); 24312 UInt rV = getVexNvvvv(pfx); 24313 IRTemp eV = newTemp(Ity_V128); 24314 IRTemp vV = newTemp(Ity_V128); 24315 assign( vV, getXMMReg(rV) ); 24316 if (epartIsReg(modrm)) { 24317 UInt rE = eregOfRexRM(pfx,modrm); 24318 assign( eV, getXMMReg(rE) ); 24319 delta += 1; 24320 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 24321 nameXMMReg(rE), nameXMMReg(rG)); 24322 } else { 24323 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24324 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 24325 delta += alen; 24326 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 24327 dis_buf, nameXMMReg(rG)); 24328 } 24329 IRTemp res = math_UNPCKxPD_128( eV, vV, hi ); 24330 putYMMRegLoAndZU( rG, mkexpr(res) ); 24331 *uses_vvvv = True; 24332 goto decode_success; 24333 } 24334 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */ 24335 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */ 24336 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24337 Bool hi = opc == 0x15; 24338 UChar modrm = getUChar(delta); 24339 UInt rG = gregOfRexRM(pfx,modrm); 24340 UInt rV = getVexNvvvv(pfx); 24341 IRTemp eV = newTemp(Ity_V256); 24342 IRTemp vV = newTemp(Ity_V256); 24343 assign( vV, getYMMReg(rV) ); 24344 if (epartIsReg(modrm)) { 24345 UInt rE = eregOfRexRM(pfx,modrm); 24346 assign( eV, getYMMReg(rE) ); 24347 delta += 1; 24348 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 24349 nameYMMReg(rE), nameYMMReg(rG)); 24350 } else { 24351 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24352 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 24353 delta += alen; 24354 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l", 24355 dis_buf, nameYMMReg(rG)); 24356 } 24357 IRTemp res = math_UNPCKxPD_256( eV, vV, hi ); 24358 putYMMReg( rG, mkexpr(res) ); 24359 *uses_vvvv = True; 24360 goto decode_success; 24361 } 24362 break; 24363 24364 case 0x16: 24365 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */ 24366 /* Insn only exists in reg form */ 24367 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 24368 && epartIsReg(getUChar(delta))) { 24369 UChar modrm = getUChar(delta); 24370 UInt rG = gregOfRexRM(pfx, modrm); 24371 UInt rE = eregOfRexRM(pfx, modrm); 24372 UInt rV = getVexNvvvv(pfx); 24373 delta++; 24374 DIP("vmovlhps %s,%s,%s\n", 24375 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 24376 IRTemp res = newTemp(Ity_V128); 24377 assign(res, binop(Iop_64HLtoV128, 24378 getXMMRegLane64(rE, 0), 24379 getXMMRegLane64(rV, 0))); 24380 putYMMRegLoAndZU(rG, mkexpr(res)); 24381 *uses_vvvv = True; 24382 goto decode_success; 24383 } 24384 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */ 24385 /* Insn exists only in mem form, it appears. */ 24386 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */ 24387 /* Insn exists only in mem form, it appears. */ 24388 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24389 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24390 UChar modrm = getUChar(delta); 24391 UInt rG = gregOfRexRM(pfx, modrm); 24392 UInt rV = getVexNvvvv(pfx); 24393 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24394 delta += alen; 24395 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's', 24396 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 24397 IRTemp res = newTemp(Ity_V128); 24398 assign(res, binop(Iop_64HLtoV128, 24399 loadLE(Ity_I64, mkexpr(addr)), 24400 getXMMRegLane64(rV, 0))); 24401 putYMMRegLoAndZU(rG, mkexpr(res)); 24402 *uses_vvvv = True; 24403 goto decode_success; 24404 } 24405 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */ 24406 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 24407 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/, 24408 False/*!isL*/ ); 24409 goto decode_success; 24410 } 24411 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */ 24412 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 24413 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ ); 24414 goto decode_success; 24415 } 24416 break; 24417 24418 case 0x17: 24419 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */ 24420 /* Insn exists only in mem form, it appears. */ 24421 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */ 24422 /* Insn exists only in mem form, it appears. */ 24423 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24424 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24425 UChar modrm = getUChar(delta); 24426 UInt rG = gregOfRexRM(pfx, modrm); 24427 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24428 delta += alen; 24429 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1)); 24430 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24431 nameXMMReg(rG), dis_buf); 24432 goto decode_success; 24433 } 24434 break; 24435 24436 case 0x28: 24437 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */ 24438 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24439 UChar modrm = getUChar(delta); 24440 UInt rG = gregOfRexRM(pfx, modrm); 24441 if (epartIsReg(modrm)) { 24442 UInt rE = eregOfRexRM(pfx,modrm); 24443 putYMMRegLoAndZU( rG, getXMMReg( rE )); 24444 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 24445 delta += 1; 24446 } else { 24447 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24448 gen_SEGV_if_not_16_aligned( addr ); 24449 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 24450 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG)); 24451 delta += alen; 24452 } 24453 goto decode_success; 24454 } 24455 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */ 24456 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24457 UChar modrm = getUChar(delta); 24458 UInt rG = gregOfRexRM(pfx, modrm); 24459 if (epartIsReg(modrm)) { 24460 UInt rE = eregOfRexRM(pfx,modrm); 24461 putYMMReg( rG, getYMMReg( rE )); 24462 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 24463 delta += 1; 24464 } else { 24465 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24466 gen_SEGV_if_not_32_aligned( addr ); 24467 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 24468 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG)); 24469 delta += alen; 24470 } 24471 goto decode_success; 24472 } 24473 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */ 24474 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24475 UChar modrm = getUChar(delta); 24476 UInt rG = gregOfRexRM(pfx, modrm); 24477 if (epartIsReg(modrm)) { 24478 UInt rE = eregOfRexRM(pfx,modrm); 24479 putYMMRegLoAndZU( rG, getXMMReg( rE )); 24480 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 24481 delta += 1; 24482 } else { 24483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24484 gen_SEGV_if_not_16_aligned( addr ); 24485 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) ); 24486 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG)); 24487 delta += alen; 24488 } 24489 goto decode_success; 24490 } 24491 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */ 24492 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24493 UChar modrm = getUChar(delta); 24494 UInt rG = gregOfRexRM(pfx, modrm); 24495 if (epartIsReg(modrm)) { 24496 UInt rE = eregOfRexRM(pfx,modrm); 24497 putYMMReg( rG, getYMMReg( rE )); 24498 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG)); 24499 delta += 1; 24500 } else { 24501 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24502 gen_SEGV_if_not_32_aligned( addr ); 24503 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) ); 24504 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG)); 24505 delta += alen; 24506 } 24507 goto decode_success; 24508 } 24509 break; 24510 24511 case 0x29: 24512 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */ 24513 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24514 UChar modrm = getUChar(delta); 24515 UInt rG = gregOfRexRM(pfx,modrm); 24516 if (epartIsReg(modrm)) { 24517 UInt rE = eregOfRexRM(pfx,modrm); 24518 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24519 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24520 delta += 1; 24521 } else { 24522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24523 gen_SEGV_if_not_16_aligned( addr ); 24524 storeLE( mkexpr(addr), getXMMReg(rG) ); 24525 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf ); 24526 delta += alen; 24527 } 24528 goto decode_success; 24529 } 24530 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */ 24531 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24532 UChar modrm = getUChar(delta); 24533 UInt rG = gregOfRexRM(pfx,modrm); 24534 if (epartIsReg(modrm)) { 24535 UInt rE = eregOfRexRM(pfx,modrm); 24536 putYMMReg( rE, getYMMReg(rG) ); 24537 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24538 delta += 1; 24539 } else { 24540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24541 gen_SEGV_if_not_32_aligned( addr ); 24542 storeLE( mkexpr(addr), getYMMReg(rG) ); 24543 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf ); 24544 delta += alen; 24545 } 24546 goto decode_success; 24547 } 24548 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */ 24549 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24550 UChar modrm = getUChar(delta); 24551 UInt rG = gregOfRexRM(pfx,modrm); 24552 if (epartIsReg(modrm)) { 24553 UInt rE = eregOfRexRM(pfx,modrm); 24554 putYMMRegLoAndZU( rE, getXMMReg(rG) ); 24555 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE)); 24556 delta += 1; 24557 goto decode_success; 24558 } else { 24559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24560 gen_SEGV_if_not_16_aligned( addr ); 24561 storeLE( mkexpr(addr), getXMMReg(rG) ); 24562 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf ); 24563 delta += alen; 24564 goto decode_success; 24565 } 24566 } 24567 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */ 24568 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24569 UChar modrm = getUChar(delta); 24570 UInt rG = gregOfRexRM(pfx,modrm); 24571 if (epartIsReg(modrm)) { 24572 UInt rE = eregOfRexRM(pfx,modrm); 24573 putYMMReg( rE, getYMMReg(rG) ); 24574 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE)); 24575 delta += 1; 24576 goto decode_success; 24577 } else { 24578 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24579 gen_SEGV_if_not_32_aligned( addr ); 24580 storeLE( mkexpr(addr), getYMMReg(rG) ); 24581 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf ); 24582 delta += alen; 24583 goto decode_success; 24584 } 24585 } 24586 break; 24587 24588 case 0x2A: { 24589 IRTemp rmode = newTemp(Ity_I32); 24590 assign( rmode, get_sse_roundingmode() ); 24591 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */ 24592 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24593 UChar modrm = getUChar(delta); 24594 UInt rV = getVexNvvvv(pfx); 24595 UInt rD = gregOfRexRM(pfx, modrm); 24596 IRTemp arg32 = newTemp(Ity_I32); 24597 if (epartIsReg(modrm)) { 24598 UInt rS = eregOfRexRM(pfx,modrm); 24599 assign( arg32, getIReg32(rS) ); 24600 delta += 1; 24601 DIP("vcvtsi2sdl %s,%s,%s\n", 24602 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24603 } else { 24604 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24605 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24606 delta += alen; 24607 DIP("vcvtsi2sdl %s,%s,%s\n", 24608 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24609 } 24610 putXMMRegLane64F( rD, 0, 24611 unop(Iop_I32StoF64, mkexpr(arg32))); 24612 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24613 putYMMRegLane128( rD, 1, mkV128(0) ); 24614 *uses_vvvv = True; 24615 goto decode_success; 24616 } 24617 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */ 24618 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24619 UChar modrm = getUChar(delta); 24620 UInt rV = getVexNvvvv(pfx); 24621 UInt rD = gregOfRexRM(pfx, modrm); 24622 IRTemp arg64 = newTemp(Ity_I64); 24623 if (epartIsReg(modrm)) { 24624 UInt rS = eregOfRexRM(pfx,modrm); 24625 assign( arg64, getIReg64(rS) ); 24626 delta += 1; 24627 DIP("vcvtsi2sdq %s,%s,%s\n", 24628 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24629 } else { 24630 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24631 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24632 delta += alen; 24633 DIP("vcvtsi2sdq %s,%s,%s\n", 24634 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24635 } 24636 putXMMRegLane64F( rD, 0, 24637 binop( Iop_I64StoF64, 24638 get_sse_roundingmode(), 24639 mkexpr(arg64)) ); 24640 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24641 putYMMRegLane128( rD, 1, mkV128(0) ); 24642 *uses_vvvv = True; 24643 goto decode_success; 24644 } 24645 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */ 24646 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24647 UChar modrm = getUChar(delta); 24648 UInt rV = getVexNvvvv(pfx); 24649 UInt rD = gregOfRexRM(pfx, modrm); 24650 IRTemp arg64 = newTemp(Ity_I64); 24651 if (epartIsReg(modrm)) { 24652 UInt rS = eregOfRexRM(pfx,modrm); 24653 assign( arg64, getIReg64(rS) ); 24654 delta += 1; 24655 DIP("vcvtsi2ssq %s,%s,%s\n", 24656 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD)); 24657 } else { 24658 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24659 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) ); 24660 delta += alen; 24661 DIP("vcvtsi2ssq %s,%s,%s\n", 24662 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24663 } 24664 putXMMRegLane32F( rD, 0, 24665 binop(Iop_F64toF32, 24666 mkexpr(rmode), 24667 binop(Iop_I64StoF64, mkexpr(rmode), 24668 mkexpr(arg64)) ) ); 24669 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24670 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24671 putYMMRegLane128( rD, 1, mkV128(0) ); 24672 *uses_vvvv = True; 24673 goto decode_success; 24674 } 24675 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */ 24676 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24677 UChar modrm = getUChar(delta); 24678 UInt rV = getVexNvvvv(pfx); 24679 UInt rD = gregOfRexRM(pfx, modrm); 24680 IRTemp arg32 = newTemp(Ity_I32); 24681 if (epartIsReg(modrm)) { 24682 UInt rS = eregOfRexRM(pfx,modrm); 24683 assign( arg32, getIReg32(rS) ); 24684 delta += 1; 24685 DIP("vcvtsi2ssl %s,%s,%s\n", 24686 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD)); 24687 } else { 24688 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 24689 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) ); 24690 delta += alen; 24691 DIP("vcvtsi2ssl %s,%s,%s\n", 24692 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 24693 } 24694 putXMMRegLane32F( rD, 0, 24695 binop(Iop_F64toF32, 24696 mkexpr(rmode), 24697 unop(Iop_I32StoF64, mkexpr(arg32)) ) ); 24698 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 24699 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 24700 putYMMRegLane128( rD, 1, mkV128(0) ); 24701 *uses_vvvv = True; 24702 goto decode_success; 24703 } 24704 break; 24705 } 24706 24707 case 0x2B: 24708 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */ 24709 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */ 24710 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24711 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) { 24712 UChar modrm = getUChar(delta); 24713 UInt rS = gregOfRexRM(pfx, modrm); 24714 IRTemp tS = newTemp(Ity_V128); 24715 assign(tS, getXMMReg(rS)); 24716 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24717 delta += alen; 24718 gen_SEGV_if_not_16_aligned(addr); 24719 storeLE(mkexpr(addr), mkexpr(tS)); 24720 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24721 nameXMMReg(rS), dis_buf); 24722 goto decode_success; 24723 } 24724 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */ 24725 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */ 24726 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx)) 24727 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) { 24728 UChar modrm = getUChar(delta); 24729 UInt rS = gregOfRexRM(pfx, modrm); 24730 IRTemp tS = newTemp(Ity_V256); 24731 assign(tS, getYMMReg(rS)); 24732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 24733 delta += alen; 24734 gen_SEGV_if_not_32_aligned(addr); 24735 storeLE(mkexpr(addr), mkexpr(tS)); 24736 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's', 24737 nameYMMReg(rS), dis_buf); 24738 goto decode_success; 24739 } 24740 break; 24741 24742 case 0x2C: 24743 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */ 24744 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24745 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24746 goto decode_success; 24747 } 24748 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */ 24749 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24750 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24751 goto decode_success; 24752 } 24753 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */ 24754 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24755 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24756 goto decode_success; 24757 } 24758 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */ 24759 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24760 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24761 goto decode_success; 24762 } 24763 break; 24764 24765 case 0x2D: 24766 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */ 24767 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 24768 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24769 goto decode_success; 24770 } 24771 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */ 24772 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 24773 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24774 goto decode_success; 24775 } 24776 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */ 24777 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) { 24778 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4); 24779 goto decode_success; 24780 } 24781 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */ 24782 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) { 24783 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8); 24784 goto decode_success; 24785 } 24786 break; 24787 24788 case 0x2E: 24789 case 0x2F: 24790 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */ 24791 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */ 24792 if (have66noF2noF3(pfx)) { 24793 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc ); 24794 goto decode_success; 24795 } 24796 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */ 24797 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */ 24798 if (haveNo66noF2noF3(pfx)) { 24799 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc ); 24800 goto decode_success; 24801 } 24802 break; 24803 24804 case 0x50: 24805 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */ 24806 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24807 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ ); 24808 goto decode_success; 24809 } 24810 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */ 24811 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24812 delta = dis_MOVMSKPD_256( vbi, pfx, delta ); 24813 goto decode_success; 24814 } 24815 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */ 24816 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24817 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ ); 24818 goto decode_success; 24819 } 24820 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */ 24821 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24822 delta = dis_MOVMSKPS_256( vbi, pfx, delta ); 24823 goto decode_success; 24824 } 24825 break; 24826 24827 case 0x51: 24828 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */ 24829 if (haveF3no66noF2(pfx)) { 24830 delta = dis_AVX128_E_V_to_G_lo32_unary( 24831 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 ); 24832 goto decode_success; 24833 } 24834 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */ 24835 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24836 delta = dis_AVX128_E_to_G_unary_all( 24837 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 ); 24838 goto decode_success; 24839 } 24840 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */ 24841 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24842 delta = dis_AVX256_E_to_G_unary_all( 24843 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 ); 24844 goto decode_success; 24845 } 24846 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */ 24847 if (haveF2no66noF3(pfx)) { 24848 delta = dis_AVX128_E_V_to_G_lo64_unary( 24849 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 ); 24850 goto decode_success; 24851 } 24852 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */ 24853 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24854 delta = dis_AVX128_E_to_G_unary_all( 24855 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 ); 24856 goto decode_success; 24857 } 24858 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */ 24859 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24860 delta = dis_AVX256_E_to_G_unary_all( 24861 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 ); 24862 goto decode_success; 24863 } 24864 break; 24865 24866 case 0x52: 24867 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */ 24868 if (haveF3no66noF2(pfx)) { 24869 delta = dis_AVX128_E_V_to_G_lo32_unary( 24870 uses_vvvv, vbi, pfx, delta, "vrsqrtss", 24871 Iop_RSqrtEst32F0x4 ); 24872 goto decode_success; 24873 } 24874 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */ 24875 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24876 delta = dis_AVX128_E_to_G_unary_all( 24877 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 ); 24878 goto decode_success; 24879 } 24880 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */ 24881 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24882 delta = dis_AVX256_E_to_G_unary_all( 24883 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 ); 24884 goto decode_success; 24885 } 24886 break; 24887 24888 case 0x53: 24889 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */ 24890 if (haveF3no66noF2(pfx)) { 24891 delta = dis_AVX128_E_V_to_G_lo32_unary( 24892 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 ); 24893 goto decode_success; 24894 } 24895 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */ 24896 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24897 delta = dis_AVX128_E_to_G_unary_all( 24898 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 ); 24899 goto decode_success; 24900 } 24901 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */ 24902 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24903 delta = dis_AVX256_E_to_G_unary_all( 24904 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 ); 24905 goto decode_success; 24906 } 24907 break; 24908 24909 case 0x54: 24910 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24911 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */ 24912 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24913 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24914 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 ); 24915 goto decode_success; 24916 } 24917 /* VANDPD r/m, rV, r ::: r = rV & r/m */ 24918 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */ 24919 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24920 delta = dis_AVX256_E_V_to_G( 24921 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 ); 24922 goto decode_success; 24923 } 24924 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */ 24925 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24926 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24927 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 ); 24928 goto decode_success; 24929 } 24930 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */ 24931 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24932 delta = dis_AVX256_E_V_to_G( 24933 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 ); 24934 goto decode_success; 24935 } 24936 break; 24937 24938 case 0x55: 24939 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */ 24940 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */ 24941 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24942 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24943 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128, 24944 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24945 goto decode_success; 24946 } 24947 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */ 24948 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24949 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24950 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256, 24951 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24952 goto decode_success; 24953 } 24954 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */ 24955 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24956 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 24957 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128, 24958 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24959 goto decode_success; 24960 } 24961 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */ 24962 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24963 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 24964 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256, 24965 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 24966 goto decode_success; 24967 } 24968 break; 24969 24970 case 0x56: 24971 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24972 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */ 24973 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24974 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24975 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 ); 24976 goto decode_success; 24977 } 24978 /* VORPD r/m, rV, r ::: r = rV | r/m */ 24979 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */ 24980 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24981 delta = dis_AVX256_E_V_to_G( 24982 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 ); 24983 goto decode_success; 24984 } 24985 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24986 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */ 24987 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 24988 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 24989 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 ); 24990 goto decode_success; 24991 } 24992 /* VORPS r/m, rV, r ::: r = rV | r/m */ 24993 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */ 24994 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 24995 delta = dis_AVX256_E_V_to_G( 24996 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 ); 24997 goto decode_success; 24998 } 24999 break; 25000 25001 case 0x57: 25002 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 25003 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */ 25004 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25005 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25006 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 ); 25007 goto decode_success; 25008 } 25009 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */ 25010 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */ 25011 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25012 delta = dis_AVX256_E_V_to_G( 25013 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 ); 25014 goto decode_success; 25015 } 25016 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 25017 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */ 25018 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25019 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25020 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 ); 25021 goto decode_success; 25022 } 25023 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */ 25024 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */ 25025 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25026 delta = dis_AVX256_E_V_to_G( 25027 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 ); 25028 goto decode_success; 25029 } 25030 break; 25031 25032 case 0x58: 25033 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */ 25034 if (haveF2no66noF3(pfx)) { 25035 delta = dis_AVX128_E_V_to_G_lo64( 25036 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 ); 25037 goto decode_success; 25038 } 25039 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */ 25040 if (haveF3no66noF2(pfx)) { 25041 delta = dis_AVX128_E_V_to_G_lo32( 25042 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 ); 25043 goto decode_success; 25044 } 25045 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */ 25046 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25047 delta = dis_AVX128_E_V_to_G( 25048 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 ); 25049 goto decode_success; 25050 } 25051 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */ 25052 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25053 delta = dis_AVX256_E_V_to_G( 25054 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 ); 25055 goto decode_success; 25056 } 25057 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */ 25058 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25059 delta = dis_AVX128_E_V_to_G( 25060 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 ); 25061 goto decode_success; 25062 } 25063 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */ 25064 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25065 delta = dis_AVX256_E_V_to_G( 25066 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 ); 25067 goto decode_success; 25068 } 25069 break; 25070 25071 case 0x59: 25072 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */ 25073 if (haveF2no66noF3(pfx)) { 25074 delta = dis_AVX128_E_V_to_G_lo64( 25075 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 ); 25076 goto decode_success; 25077 } 25078 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */ 25079 if (haveF3no66noF2(pfx)) { 25080 delta = dis_AVX128_E_V_to_G_lo32( 25081 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 ); 25082 goto decode_success; 25083 } 25084 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */ 25085 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25086 delta = dis_AVX128_E_V_to_G( 25087 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 ); 25088 goto decode_success; 25089 } 25090 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */ 25091 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25092 delta = dis_AVX256_E_V_to_G( 25093 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 ); 25094 goto decode_success; 25095 } 25096 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */ 25097 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25098 delta = dis_AVX128_E_V_to_G( 25099 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 ); 25100 goto decode_success; 25101 } 25102 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */ 25103 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25104 delta = dis_AVX256_E_V_to_G( 25105 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 ); 25106 goto decode_success; 25107 } 25108 break; 25109 25110 case 0x5A: 25111 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */ 25112 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25113 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ ); 25114 goto decode_success; 25115 } 25116 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */ 25117 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25118 delta = dis_CVTPS2PD_256( vbi, pfx, delta ); 25119 goto decode_success; 25120 } 25121 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */ 25122 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25123 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ ); 25124 goto decode_success; 25125 } 25126 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */ 25127 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25128 delta = dis_CVTPD2PS_256( vbi, pfx, delta ); 25129 goto decode_success; 25130 } 25131 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */ 25132 if (haveF2no66noF3(pfx)) { 25133 UChar modrm = getUChar(delta); 25134 UInt rV = getVexNvvvv(pfx); 25135 UInt rD = gregOfRexRM(pfx, modrm); 25136 IRTemp f64lo = newTemp(Ity_F64); 25137 IRTemp rmode = newTemp(Ity_I32); 25138 assign( rmode, get_sse_roundingmode() ); 25139 if (epartIsReg(modrm)) { 25140 UInt rS = eregOfRexRM(pfx,modrm); 25141 assign(f64lo, getXMMRegLane64F(rS, 0)); 25142 delta += 1; 25143 DIP("vcvtsd2ss %s,%s,%s\n", 25144 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 25145 } else { 25146 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25147 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) ); 25148 delta += alen; 25149 DIP("vcvtsd2ss %s,%s,%s\n", 25150 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 25151 } 25152 putXMMRegLane32F( rD, 0, 25153 binop( Iop_F64toF32, mkexpr(rmode), 25154 mkexpr(f64lo)) ); 25155 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 )); 25156 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 25157 putYMMRegLane128( rD, 1, mkV128(0) ); 25158 *uses_vvvv = True; 25159 goto decode_success; 25160 } 25161 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */ 25162 if (haveF3no66noF2(pfx)) { 25163 UChar modrm = getUChar(delta); 25164 UInt rV = getVexNvvvv(pfx); 25165 UInt rD = gregOfRexRM(pfx, modrm); 25166 IRTemp f32lo = newTemp(Ity_F32); 25167 if (epartIsReg(modrm)) { 25168 UInt rS = eregOfRexRM(pfx,modrm); 25169 assign(f32lo, getXMMRegLane32F(rS, 0)); 25170 delta += 1; 25171 DIP("vcvtss2sd %s,%s,%s\n", 25172 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD)); 25173 } else { 25174 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 25175 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) ); 25176 delta += alen; 25177 DIP("vcvtss2sd %s,%s,%s\n", 25178 dis_buf, nameXMMReg(rV), nameXMMReg(rD)); 25179 } 25180 putXMMRegLane64F( rD, 0, 25181 unop( Iop_F32toF64, mkexpr(f32lo)) ); 25182 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 )); 25183 putYMMRegLane128( rD, 1, mkV128(0) ); 25184 *uses_vvvv = True; 25185 goto decode_success; 25186 } 25187 break; 25188 25189 case 0x5B: 25190 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */ 25191 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25192 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 25193 True/*isAvx*/, False/*!r2zero*/ ); 25194 goto decode_success; 25195 } 25196 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */ 25197 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25198 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 25199 False/*!r2zero*/ ); 25200 goto decode_success; 25201 } 25202 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */ 25203 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 25204 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, 25205 True/*isAvx*/, True/*r2zero*/ ); 25206 goto decode_success; 25207 } 25208 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */ 25209 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 25210 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta, 25211 True/*r2zero*/ ); 25212 goto decode_success; 25213 } 25214 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */ 25215 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25216 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ ); 25217 goto decode_success; 25218 } 25219 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */ 25220 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25221 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta ); 25222 goto decode_success; 25223 } 25224 break; 25225 25226 case 0x5C: 25227 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */ 25228 if (haveF2no66noF3(pfx)) { 25229 delta = dis_AVX128_E_V_to_G_lo64( 25230 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 ); 25231 goto decode_success; 25232 } 25233 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */ 25234 if (haveF3no66noF2(pfx)) { 25235 delta = dis_AVX128_E_V_to_G_lo32( 25236 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 ); 25237 goto decode_success; 25238 } 25239 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */ 25240 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25241 delta = dis_AVX128_E_V_to_G( 25242 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 ); 25243 goto decode_success; 25244 } 25245 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */ 25246 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25247 delta = dis_AVX256_E_V_to_G( 25248 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 ); 25249 goto decode_success; 25250 } 25251 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */ 25252 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25253 delta = dis_AVX128_E_V_to_G( 25254 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 ); 25255 goto decode_success; 25256 } 25257 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */ 25258 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25259 delta = dis_AVX256_E_V_to_G( 25260 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 ); 25261 goto decode_success; 25262 } 25263 break; 25264 25265 case 0x5D: 25266 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */ 25267 if (haveF2no66noF3(pfx)) { 25268 delta = dis_AVX128_E_V_to_G_lo64( 25269 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 ); 25270 goto decode_success; 25271 } 25272 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */ 25273 if (haveF3no66noF2(pfx)) { 25274 delta = dis_AVX128_E_V_to_G_lo32( 25275 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 ); 25276 goto decode_success; 25277 } 25278 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */ 25279 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25280 delta = dis_AVX128_E_V_to_G( 25281 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 ); 25282 goto decode_success; 25283 } 25284 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */ 25285 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25286 delta = dis_AVX256_E_V_to_G( 25287 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 ); 25288 goto decode_success; 25289 } 25290 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */ 25291 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25292 delta = dis_AVX128_E_V_to_G( 25293 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 ); 25294 goto decode_success; 25295 } 25296 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */ 25297 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25298 delta = dis_AVX256_E_V_to_G( 25299 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 ); 25300 goto decode_success; 25301 } 25302 break; 25303 25304 case 0x5E: 25305 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */ 25306 if (haveF2no66noF3(pfx)) { 25307 delta = dis_AVX128_E_V_to_G_lo64( 25308 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 ); 25309 goto decode_success; 25310 } 25311 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */ 25312 if (haveF3no66noF2(pfx)) { 25313 delta = dis_AVX128_E_V_to_G_lo32( 25314 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 ); 25315 goto decode_success; 25316 } 25317 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */ 25318 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25319 delta = dis_AVX128_E_V_to_G( 25320 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 ); 25321 goto decode_success; 25322 } 25323 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */ 25324 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25325 delta = dis_AVX256_E_V_to_G( 25326 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 ); 25327 goto decode_success; 25328 } 25329 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */ 25330 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25331 delta = dis_AVX128_E_V_to_G( 25332 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 ); 25333 goto decode_success; 25334 } 25335 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */ 25336 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25337 delta = dis_AVX256_E_V_to_G( 25338 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 ); 25339 goto decode_success; 25340 } 25341 break; 25342 25343 case 0x5F: 25344 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */ 25345 if (haveF2no66noF3(pfx)) { 25346 delta = dis_AVX128_E_V_to_G_lo64( 25347 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 ); 25348 goto decode_success; 25349 } 25350 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */ 25351 if (haveF3no66noF2(pfx)) { 25352 delta = dis_AVX128_E_V_to_G_lo32( 25353 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 ); 25354 goto decode_success; 25355 } 25356 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */ 25357 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25358 delta = dis_AVX128_E_V_to_G( 25359 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 ); 25360 goto decode_success; 25361 } 25362 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */ 25363 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25364 delta = dis_AVX256_E_V_to_G( 25365 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 ); 25366 goto decode_success; 25367 } 25368 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */ 25369 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25370 delta = dis_AVX128_E_V_to_G( 25371 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 ); 25372 goto decode_success; 25373 } 25374 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */ 25375 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25376 delta = dis_AVX256_E_V_to_G( 25377 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 ); 25378 goto decode_success; 25379 } 25380 break; 25381 25382 case 0x60: 25383 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 25384 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */ 25385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25386 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25387 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 25388 Iop_InterleaveLO8x16, NULL, 25389 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25390 goto decode_success; 25391 } 25392 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */ 25393 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */ 25394 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25395 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25396 uses_vvvv, vbi, pfx, delta, "vpunpcklbw", 25397 math_VPUNPCKLBW_YMM ); 25398 goto decode_success; 25399 } 25400 break; 25401 25402 case 0x61: 25403 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 25404 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */ 25405 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25406 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25407 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 25408 Iop_InterleaveLO16x8, NULL, 25409 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25410 goto decode_success; 25411 } 25412 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */ 25413 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */ 25414 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25415 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25416 uses_vvvv, vbi, pfx, delta, "vpunpcklwd", 25417 math_VPUNPCKLWD_YMM ); 25418 goto decode_success; 25419 } 25420 break; 25421 25422 case 0x62: 25423 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 25424 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */ 25425 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25426 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25427 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 25428 Iop_InterleaveLO32x4, NULL, 25429 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25430 goto decode_success; 25431 } 25432 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */ 25433 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */ 25434 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25435 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25436 uses_vvvv, vbi, pfx, delta, "vpunpckldq", 25437 math_VPUNPCKLDQ_YMM ); 25438 goto decode_success; 25439 } 25440 break; 25441 25442 case 0x63: 25443 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 25444 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */ 25445 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25446 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25447 uses_vvvv, vbi, pfx, delta, "vpacksswb", 25448 Iop_QNarrowBin16Sto8Sx16, NULL, 25449 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25450 goto decode_success; 25451 } 25452 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */ 25453 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */ 25454 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25455 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25456 uses_vvvv, vbi, pfx, delta, "vpacksswb", 25457 math_VPACKSSWB_YMM ); 25458 goto decode_success; 25459 } 25460 break; 25461 25462 case 0x64: 25463 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 25464 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */ 25465 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25466 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25467 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 ); 25468 goto decode_success; 25469 } 25470 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */ 25471 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */ 25472 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25473 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25474 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 ); 25475 goto decode_success; 25476 } 25477 break; 25478 25479 case 0x65: 25480 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 25481 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */ 25482 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25483 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25484 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 ); 25485 goto decode_success; 25486 } 25487 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */ 25488 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */ 25489 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25490 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25491 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 ); 25492 goto decode_success; 25493 } 25494 break; 25495 25496 case 0x66: 25497 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 25498 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */ 25499 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25500 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 25501 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 ); 25502 goto decode_success; 25503 } 25504 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */ 25505 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */ 25506 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25507 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 25508 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 ); 25509 goto decode_success; 25510 } 25511 break; 25512 25513 case 0x67: 25514 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25515 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */ 25516 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25517 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25518 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25519 Iop_QNarrowBin16Sto8Ux16, NULL, 25520 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25521 goto decode_success; 25522 } 25523 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */ 25524 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */ 25525 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25526 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25527 uses_vvvv, vbi, pfx, delta, "vpackuswb", 25528 math_VPACKUSWB_YMM ); 25529 goto decode_success; 25530 } 25531 break; 25532 25533 case 0x68: 25534 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25535 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */ 25536 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25537 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25538 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25539 Iop_InterleaveHI8x16, NULL, 25540 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25541 goto decode_success; 25542 } 25543 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */ 25544 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */ 25545 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25546 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25547 uses_vvvv, vbi, pfx, delta, "vpunpckhbw", 25548 math_VPUNPCKHBW_YMM ); 25549 goto decode_success; 25550 } 25551 break; 25552 25553 case 0x69: 25554 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25555 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */ 25556 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25557 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25558 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25559 Iop_InterleaveHI16x8, NULL, 25560 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25561 goto decode_success; 25562 } 25563 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */ 25564 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */ 25565 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25566 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25567 uses_vvvv, vbi, pfx, delta, "vpunpckhwd", 25568 math_VPUNPCKHWD_YMM ); 25569 goto decode_success; 25570 } 25571 break; 25572 25573 case 0x6A: 25574 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25575 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */ 25576 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25577 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25578 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25579 Iop_InterleaveHI32x4, NULL, 25580 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25581 goto decode_success; 25582 } 25583 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */ 25584 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */ 25585 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25586 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25587 uses_vvvv, vbi, pfx, delta, "vpunpckhdq", 25588 math_VPUNPCKHDQ_YMM ); 25589 goto decode_success; 25590 } 25591 break; 25592 25593 case 0x6B: 25594 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25595 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */ 25596 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25597 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25598 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25599 Iop_QNarrowBin32Sto16Sx8, NULL, 25600 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25601 goto decode_success; 25602 } 25603 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */ 25604 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */ 25605 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25606 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25607 uses_vvvv, vbi, pfx, delta, "vpackssdw", 25608 math_VPACKSSDW_YMM ); 25609 goto decode_success; 25610 } 25611 break; 25612 25613 case 0x6C: 25614 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25615 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */ 25616 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25617 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25618 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25619 Iop_InterleaveLO64x2, NULL, 25620 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25621 goto decode_success; 25622 } 25623 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */ 25624 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */ 25625 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25626 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25627 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq", 25628 math_VPUNPCKLQDQ_YMM ); 25629 goto decode_success; 25630 } 25631 break; 25632 25633 case 0x6D: 25634 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25635 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */ 25636 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25637 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 25638 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25639 Iop_InterleaveHI64x2, NULL, 25640 False/*!invertLeftArg*/, True/*swapArgs*/ ); 25641 goto decode_success; 25642 } 25643 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */ 25644 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */ 25645 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25646 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 25647 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq", 25648 math_VPUNPCKHQDQ_YMM ); 25649 goto decode_success; 25650 } 25651 break; 25652 25653 case 0x6E: 25654 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */ 25655 if (have66noF2noF3(pfx) 25656 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 25657 vassert(sz == 2); /* even tho we are transferring 4, not 2. */ 25658 UChar modrm = getUChar(delta); 25659 if (epartIsReg(modrm)) { 25660 delta += 1; 25661 putYMMRegLoAndZU( 25662 gregOfRexRM(pfx,modrm), 25663 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) ) 25664 ); 25665 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)), 25666 nameXMMReg(gregOfRexRM(pfx,modrm))); 25667 } else { 25668 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25669 delta += alen; 25670 putYMMRegLoAndZU( 25671 gregOfRexRM(pfx,modrm), 25672 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr))) 25673 ); 25674 DIP("vmovd %s, %s\n", dis_buf, 25675 nameXMMReg(gregOfRexRM(pfx,modrm))); 25676 } 25677 goto decode_success; 25678 } 25679 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */ 25680 if (have66noF2noF3(pfx) 25681 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 25682 vassert(sz == 2); /* even tho we are transferring 8, not 2. */ 25683 UChar modrm = getUChar(delta); 25684 if (epartIsReg(modrm)) { 25685 delta += 1; 25686 putYMMRegLoAndZU( 25687 gregOfRexRM(pfx,modrm), 25688 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) ) 25689 ); 25690 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)), 25691 nameXMMReg(gregOfRexRM(pfx,modrm))); 25692 } else { 25693 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25694 delta += alen; 25695 putYMMRegLoAndZU( 25696 gregOfRexRM(pfx,modrm), 25697 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr))) 25698 ); 25699 DIP("vmovq %s, %s\n", dis_buf, 25700 nameXMMReg(gregOfRexRM(pfx,modrm))); 25701 } 25702 goto decode_success; 25703 } 25704 break; 25705 25706 case 0x6F: 25707 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */ 25708 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */ 25709 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25710 && 1==getVexL(pfx)/*256*/) { 25711 UChar modrm = getUChar(delta); 25712 UInt rD = gregOfRexRM(pfx, modrm); 25713 IRTemp tD = newTemp(Ity_V256); 25714 Bool isA = have66noF2noF3(pfx); 25715 HChar ch = isA ? 'a' : 'u'; 25716 if (epartIsReg(modrm)) { 25717 UInt rS = eregOfRexRM(pfx, modrm); 25718 delta += 1; 25719 assign(tD, getYMMReg(rS)); 25720 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 25721 } else { 25722 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25723 delta += alen; 25724 if (isA) 25725 gen_SEGV_if_not_32_aligned(addr); 25726 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 25727 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD)); 25728 } 25729 putYMMReg(rD, mkexpr(tD)); 25730 goto decode_success; 25731 } 25732 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */ 25733 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */ 25734 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 25735 && 0==getVexL(pfx)/*128*/) { 25736 UChar modrm = getUChar(delta); 25737 UInt rD = gregOfRexRM(pfx, modrm); 25738 IRTemp tD = newTemp(Ity_V128); 25739 Bool isA = have66noF2noF3(pfx); 25740 HChar ch = isA ? 'a' : 'u'; 25741 if (epartIsReg(modrm)) { 25742 UInt rS = eregOfRexRM(pfx, modrm); 25743 delta += 1; 25744 assign(tD, getXMMReg(rS)); 25745 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 25746 } else { 25747 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 25748 delta += alen; 25749 if (isA) 25750 gen_SEGV_if_not_16_aligned(addr); 25751 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 25752 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD)); 25753 } 25754 putYMMRegLoAndZU(rD, mkexpr(tD)); 25755 goto decode_success; 25756 } 25757 break; 25758 25759 case 0x70: 25760 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */ 25761 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25762 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/); 25763 goto decode_success; 25764 } 25765 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */ 25766 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25767 delta = dis_PSHUFD_32x8( vbi, pfx, delta); 25768 goto decode_success; 25769 } 25770 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */ 25771 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 25772 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25773 True/*isAvx*/, False/*!xIsH*/ ); 25774 goto decode_success; 25775 } 25776 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */ 25777 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 25778 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ ); 25779 goto decode_success; 25780 } 25781 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */ 25782 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 25783 delta = dis_PSHUFxW_128( vbi, pfx, delta, 25784 True/*isAvx*/, True/*xIsH*/ ); 25785 goto decode_success; 25786 } 25787 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */ 25788 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 25789 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ ); 25790 goto decode_success; 25791 } 25792 break; 25793 25794 case 0x71: 25795 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */ 25796 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */ 25797 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */ 25798 if (have66noF2noF3(pfx) 25799 && 0==getVexL(pfx)/*128*/ 25800 && epartIsReg(getUChar(delta))) { 25801 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25802 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25803 "vpsrlw", Iop_ShrN16x8 ); 25804 *uses_vvvv = True; 25805 goto decode_success; 25806 } 25807 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25808 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25809 "vpsraw", Iop_SarN16x8 ); 25810 *uses_vvvv = True; 25811 goto decode_success; 25812 } 25813 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25814 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25815 "vpsllw", Iop_ShlN16x8 ); 25816 *uses_vvvv = True; 25817 goto decode_success; 25818 } 25819 /* else fall through */ 25820 } 25821 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */ 25822 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */ 25823 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */ 25824 if (have66noF2noF3(pfx) 25825 && 1==getVexL(pfx)/*256*/ 25826 && epartIsReg(getUChar(delta))) { 25827 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25828 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25829 "vpsrlw", Iop_ShrN16x16 ); 25830 *uses_vvvv = True; 25831 goto decode_success; 25832 } 25833 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25834 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25835 "vpsraw", Iop_SarN16x16 ); 25836 *uses_vvvv = True; 25837 goto decode_success; 25838 } 25839 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25840 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25841 "vpsllw", Iop_ShlN16x16 ); 25842 *uses_vvvv = True; 25843 goto decode_success; 25844 } 25845 /* else fall through */ 25846 } 25847 break; 25848 25849 case 0x72: 25850 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */ 25851 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */ 25852 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */ 25853 if (have66noF2noF3(pfx) 25854 && 0==getVexL(pfx)/*128*/ 25855 && epartIsReg(getUChar(delta))) { 25856 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25857 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25858 "vpsrld", Iop_ShrN32x4 ); 25859 *uses_vvvv = True; 25860 goto decode_success; 25861 } 25862 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25863 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25864 "vpsrad", Iop_SarN32x4 ); 25865 *uses_vvvv = True; 25866 goto decode_success; 25867 } 25868 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25869 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25870 "vpslld", Iop_ShlN32x4 ); 25871 *uses_vvvv = True; 25872 goto decode_success; 25873 } 25874 /* else fall through */ 25875 } 25876 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */ 25877 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */ 25878 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */ 25879 if (have66noF2noF3(pfx) 25880 && 1==getVexL(pfx)/*256*/ 25881 && epartIsReg(getUChar(delta))) { 25882 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) { 25883 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25884 "vpsrld", Iop_ShrN32x8 ); 25885 *uses_vvvv = True; 25886 goto decode_success; 25887 } 25888 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) { 25889 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25890 "vpsrad", Iop_SarN32x8 ); 25891 *uses_vvvv = True; 25892 goto decode_success; 25893 } 25894 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) { 25895 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25896 "vpslld", Iop_ShlN32x8 ); 25897 *uses_vvvv = True; 25898 goto decode_success; 25899 } 25900 /* else fall through */ 25901 } 25902 break; 25903 25904 case 0x73: 25905 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */ 25906 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */ 25907 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */ 25908 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */ 25909 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 25910 && epartIsReg(getUChar(delta))) { 25911 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25912 Int rD = getVexNvvvv(pfx); 25913 IRTemp vecS = newTemp(Ity_V128); 25914 if (gregLO3ofRM(getUChar(delta)) == 3) { 25915 Int imm = (Int)getUChar(delta+1); 25916 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25917 delta += 2; 25918 assign( vecS, getXMMReg(rS) ); 25919 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm ))); 25920 *uses_vvvv = True; 25921 goto decode_success; 25922 } 25923 if (gregLO3ofRM(getUChar(delta)) == 7) { 25924 Int imm = (Int)getUChar(delta+1); 25925 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD)); 25926 delta += 2; 25927 assign( vecS, getXMMReg(rS) ); 25928 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm ))); 25929 *uses_vvvv = True; 25930 goto decode_success; 25931 } 25932 if (gregLO3ofRM(getUChar(delta)) == 2) { 25933 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25934 "vpsrlq", Iop_ShrN64x2 ); 25935 *uses_vvvv = True; 25936 goto decode_success; 25937 } 25938 if (gregLO3ofRM(getUChar(delta)) == 6) { 25939 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta, 25940 "vpsllq", Iop_ShlN64x2 ); 25941 *uses_vvvv = True; 25942 goto decode_success; 25943 } 25944 /* else fall through */ 25945 } 25946 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */ 25947 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */ 25948 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */ 25949 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */ 25950 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 25951 && epartIsReg(getUChar(delta))) { 25952 Int rS = eregOfRexRM(pfx,getUChar(delta)); 25953 Int rD = getVexNvvvv(pfx); 25954 if (gregLO3ofRM(getUChar(delta)) == 3) { 25955 IRTemp vecS0 = newTemp(Ity_V128); 25956 IRTemp vecS1 = newTemp(Ity_V128); 25957 Int imm = (Int)getUChar(delta+1); 25958 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25959 delta += 2; 25960 assign( vecS0, getYMMRegLane128(rS, 0)); 25961 assign( vecS1, getYMMRegLane128(rS, 1)); 25962 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm ))); 25963 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm ))); 25964 *uses_vvvv = True; 25965 goto decode_success; 25966 } 25967 if (gregLO3ofRM(getUChar(delta)) == 7) { 25968 IRTemp vecS0 = newTemp(Ity_V128); 25969 IRTemp vecS1 = newTemp(Ity_V128); 25970 Int imm = (Int)getUChar(delta+1); 25971 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD)); 25972 delta += 2; 25973 assign( vecS0, getYMMRegLane128(rS, 0)); 25974 assign( vecS1, getYMMRegLane128(rS, 1)); 25975 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm ))); 25976 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm ))); 25977 *uses_vvvv = True; 25978 goto decode_success; 25979 } 25980 if (gregLO3ofRM(getUChar(delta)) == 2) { 25981 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25982 "vpsrlq", Iop_ShrN64x4 ); 25983 *uses_vvvv = True; 25984 goto decode_success; 25985 } 25986 if (gregLO3ofRM(getUChar(delta)) == 6) { 25987 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta, 25988 "vpsllq", Iop_ShlN64x4 ); 25989 *uses_vvvv = True; 25990 goto decode_success; 25991 } 25992 /* else fall through */ 25993 } 25994 break; 25995 25996 case 0x74: 25997 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 25998 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */ 25999 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26000 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26001 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 ); 26002 goto decode_success; 26003 } 26004 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */ 26005 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */ 26006 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26007 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26008 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 ); 26009 goto decode_success; 26010 } 26011 break; 26012 26013 case 0x75: 26014 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 26015 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */ 26016 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26017 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26018 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 ); 26019 goto decode_success; 26020 } 26021 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */ 26022 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */ 26023 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26024 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26025 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 ); 26026 goto decode_success; 26027 } 26028 break; 26029 26030 case 0x76: 26031 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 26032 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */ 26033 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26034 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26035 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 ); 26036 goto decode_success; 26037 } 26038 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */ 26039 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */ 26040 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26041 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26042 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 ); 26043 goto decode_success; 26044 } 26045 break; 26046 26047 case 0x77: 26048 /* VZEROUPPER = VEX.128.0F.WIG 77 */ 26049 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26050 Int i; 26051 IRTemp zero128 = newTemp(Ity_V128); 26052 assign(zero128, mkV128(0)); 26053 for (i = 0; i < 16; i++) { 26054 putYMMRegLane128(i, 1, mkexpr(zero128)); 26055 } 26056 DIP("vzeroupper\n"); 26057 goto decode_success; 26058 } 26059 /* VZEROALL = VEX.256.0F.WIG 77 */ 26060 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26061 Int i; 26062 IRTemp zero128 = newTemp(Ity_V128); 26063 assign(zero128, mkV128(0)); 26064 for (i = 0; i < 16; i++) { 26065 putYMMRegLoAndZU(i, mkexpr(zero128)); 26066 } 26067 DIP("vzeroall\n"); 26068 goto decode_success; 26069 } 26070 break; 26071 26072 case 0x7C: 26073 case 0x7D: 26074 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */ 26075 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */ 26076 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26077 IRTemp sV = newTemp(Ity_V128); 26078 IRTemp dV = newTemp(Ity_V128); 26079 Bool isAdd = opc == 0x7C; 26080 const HChar* str = isAdd ? "add" : "sub"; 26081 UChar modrm = getUChar(delta); 26082 UInt rG = gregOfRexRM(pfx,modrm); 26083 UInt rV = getVexNvvvv(pfx); 26084 if (epartIsReg(modrm)) { 26085 UInt rE = eregOfRexRM(pfx,modrm); 26086 assign( sV, getXMMReg(rE) ); 26087 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 26088 nameXMMReg(rV), nameXMMReg(rG)); 26089 delta += 1; 26090 } else { 26091 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26092 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 26093 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 26094 nameXMMReg(rV), nameXMMReg(rG)); 26095 delta += alen; 26096 } 26097 assign( dV, getXMMReg(rV) ); 26098 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) ); 26099 *uses_vvvv = True; 26100 goto decode_success; 26101 } 26102 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */ 26103 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */ 26104 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26105 IRTemp sV = newTemp(Ity_V256); 26106 IRTemp dV = newTemp(Ity_V256); 26107 IRTemp s1, s0, d1, d0; 26108 Bool isAdd = opc == 0x7C; 26109 const HChar* str = isAdd ? "add" : "sub"; 26110 UChar modrm = getUChar(delta); 26111 UInt rG = gregOfRexRM(pfx,modrm); 26112 UInt rV = getVexNvvvv(pfx); 26113 s1 = s0 = d1 = d0 = IRTemp_INVALID; 26114 if (epartIsReg(modrm)) { 26115 UInt rE = eregOfRexRM(pfx,modrm); 26116 assign( sV, getYMMReg(rE) ); 26117 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 26118 nameYMMReg(rV), nameYMMReg(rG)); 26119 delta += 1; 26120 } else { 26121 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26122 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 26123 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 26124 nameYMMReg(rV), nameYMMReg(rG)); 26125 delta += alen; 26126 } 26127 assign( dV, getYMMReg(rV) ); 26128 breakupV256toV128s( dV, &d1, &d0 ); 26129 breakupV256toV128s( sV, &s1, &s0 ); 26130 putYMMReg( rG, binop(Iop_V128HLtoV256, 26131 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ), 26132 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) ); 26133 *uses_vvvv = True; 26134 goto decode_success; 26135 } 26136 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */ 26137 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */ 26138 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26139 IRTemp sV = newTemp(Ity_V128); 26140 IRTemp dV = newTemp(Ity_V128); 26141 Bool isAdd = opc == 0x7C; 26142 const HChar* str = isAdd ? "add" : "sub"; 26143 UChar modrm = getUChar(delta); 26144 UInt rG = gregOfRexRM(pfx,modrm); 26145 UInt rV = getVexNvvvv(pfx); 26146 if (epartIsReg(modrm)) { 26147 UInt rE = eregOfRexRM(pfx,modrm); 26148 assign( sV, getXMMReg(rE) ); 26149 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE), 26150 nameXMMReg(rV), nameXMMReg(rG)); 26151 delta += 1; 26152 } else { 26153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26154 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 26155 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 26156 nameXMMReg(rV), nameXMMReg(rG)); 26157 delta += alen; 26158 } 26159 assign( dV, getXMMReg(rV) ); 26160 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) ); 26161 *uses_vvvv = True; 26162 goto decode_success; 26163 } 26164 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */ 26165 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */ 26166 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26167 IRTemp sV = newTemp(Ity_V256); 26168 IRTemp dV = newTemp(Ity_V256); 26169 IRTemp s1, s0, d1, d0; 26170 Bool isAdd = opc == 0x7C; 26171 const HChar* str = isAdd ? "add" : "sub"; 26172 UChar modrm = getUChar(delta); 26173 UInt rG = gregOfRexRM(pfx,modrm); 26174 UInt rV = getVexNvvvv(pfx); 26175 s1 = s0 = d1 = d0 = IRTemp_INVALID; 26176 if (epartIsReg(modrm)) { 26177 UInt rE = eregOfRexRM(pfx,modrm); 26178 assign( sV, getYMMReg(rE) ); 26179 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE), 26180 nameYMMReg(rV), nameYMMReg(rG)); 26181 delta += 1; 26182 } else { 26183 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26184 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 26185 DIP("vh%spd %s,%s,%s\n", str, dis_buf, 26186 nameYMMReg(rV), nameYMMReg(rG)); 26187 delta += alen; 26188 } 26189 assign( dV, getYMMReg(rV) ); 26190 breakupV256toV128s( dV, &d1, &d0 ); 26191 breakupV256toV128s( sV, &s1, &s0 ); 26192 putYMMReg( rG, binop(Iop_V128HLtoV256, 26193 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ), 26194 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) ); 26195 *uses_vvvv = True; 26196 goto decode_success; 26197 } 26198 break; 26199 26200 case 0x7E: 26201 /* Note the Intel docs don't make sense for this. I think they 26202 are wrong. They seem to imply it is a store when in fact I 26203 think it is a load. Also it's unclear whether this is W0, W1 26204 or WIG. */ 26205 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */ 26206 if (haveF3no66noF2(pfx) 26207 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 26208 vassert(sz == 4); /* even tho we are transferring 8, not 4. */ 26209 UChar modrm = getUChar(delta); 26210 UInt rG = gregOfRexRM(pfx,modrm); 26211 if (epartIsReg(modrm)) { 26212 UInt rE = eregOfRexRM(pfx,modrm); 26213 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 )); 26214 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 26215 delta += 1; 26216 } else { 26217 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26218 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) ); 26219 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 26220 delta += alen; 26221 } 26222 /* zero bits 255:64 */ 26223 putXMMRegLane64( rG, 1, mkU64(0) ); 26224 putYMMRegLane128( rG, 1, mkV128(0) ); 26225 goto decode_success; 26226 } 26227 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */ 26228 /* Moves from G to E, so is a store-form insn */ 26229 /* Intel docs list this in the VMOVD entry for some reason. */ 26230 if (have66noF2noF3(pfx) 26231 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 26232 UChar modrm = getUChar(delta); 26233 UInt rG = gregOfRexRM(pfx,modrm); 26234 if (epartIsReg(modrm)) { 26235 UInt rE = eregOfRexRM(pfx,modrm); 26236 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE)); 26237 putIReg64(rE, getXMMRegLane64(rG, 0)); 26238 delta += 1; 26239 } else { 26240 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26241 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) ); 26242 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG)); 26243 delta += alen; 26244 } 26245 goto decode_success; 26246 } 26247 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */ 26248 /* Moves from G to E, so is a store-form insn */ 26249 if (have66noF2noF3(pfx) 26250 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 26251 UChar modrm = getUChar(delta); 26252 UInt rG = gregOfRexRM(pfx,modrm); 26253 if (epartIsReg(modrm)) { 26254 UInt rE = eregOfRexRM(pfx,modrm); 26255 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE)); 26256 putIReg32(rE, getXMMRegLane32(rG, 0)); 26257 delta += 1; 26258 } else { 26259 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26260 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) ); 26261 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG)); 26262 delta += alen; 26263 } 26264 goto decode_success; 26265 } 26266 break; 26267 26268 case 0x7F: 26269 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */ 26270 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */ 26271 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 26272 && 1==getVexL(pfx)/*256*/) { 26273 UChar modrm = getUChar(delta); 26274 UInt rS = gregOfRexRM(pfx, modrm); 26275 IRTemp tS = newTemp(Ity_V256); 26276 Bool isA = have66noF2noF3(pfx); 26277 HChar ch = isA ? 'a' : 'u'; 26278 assign(tS, getYMMReg(rS)); 26279 if (epartIsReg(modrm)) { 26280 UInt rD = eregOfRexRM(pfx, modrm); 26281 delta += 1; 26282 putYMMReg(rD, mkexpr(tS)); 26283 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD)); 26284 } else { 26285 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26286 delta += alen; 26287 if (isA) 26288 gen_SEGV_if_not_32_aligned(addr); 26289 storeLE(mkexpr(addr), mkexpr(tS)); 26290 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf); 26291 } 26292 goto decode_success; 26293 } 26294 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */ 26295 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */ 26296 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx)) 26297 && 0==getVexL(pfx)/*128*/) { 26298 UChar modrm = getUChar(delta); 26299 UInt rS = gregOfRexRM(pfx, modrm); 26300 IRTemp tS = newTemp(Ity_V128); 26301 Bool isA = have66noF2noF3(pfx); 26302 HChar ch = isA ? 'a' : 'u'; 26303 assign(tS, getXMMReg(rS)); 26304 if (epartIsReg(modrm)) { 26305 UInt rD = eregOfRexRM(pfx, modrm); 26306 delta += 1; 26307 putYMMRegLoAndZU(rD, mkexpr(tS)); 26308 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD)); 26309 } else { 26310 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 26311 delta += alen; 26312 if (isA) 26313 gen_SEGV_if_not_16_aligned(addr); 26314 storeLE(mkexpr(addr), mkexpr(tS)); 26315 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf); 26316 } 26317 goto decode_success; 26318 } 26319 break; 26320 26321 case 0xAE: 26322 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */ 26323 if (haveNo66noF2noF3(pfx) 26324 && 0==getVexL(pfx)/*LZ*/ 26325 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 26326 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3 26327 && sz == 4) { 26328 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/); 26329 goto decode_success; 26330 } 26331 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */ 26332 if (haveNo66noF2noF3(pfx) 26333 && 0==getVexL(pfx)/*LZ*/ 26334 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */ 26335 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2 26336 && sz == 4) { 26337 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/); 26338 goto decode_success; 26339 } 26340 break; 26341 26342 case 0xC2: 26343 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */ 26344 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */ 26345 if (haveF2no66noF3(pfx)) { 26346 Long delta0 = delta; 26347 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26348 "vcmpsd", False/*!all_lanes*/, 26349 8/*sz*/); 26350 if (delta > delta0) goto decode_success; 26351 /* else fall through -- decoding has failed */ 26352 } 26353 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */ 26354 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */ 26355 if (haveF3no66noF2(pfx)) { 26356 Long delta0 = delta; 26357 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26358 "vcmpss", False/*!all_lanes*/, 26359 4/*sz*/); 26360 if (delta > delta0) goto decode_success; 26361 /* else fall through -- decoding has failed */ 26362 } 26363 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 26364 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */ 26365 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26366 Long delta0 = delta; 26367 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26368 "vcmppd", True/*all_lanes*/, 26369 8/*sz*/); 26370 if (delta > delta0) goto decode_success; 26371 /* else fall through -- decoding has failed */ 26372 } 26373 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 26374 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */ 26375 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26376 Long delta0 = delta; 26377 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26378 "vcmppd", 8/*sz*/); 26379 if (delta > delta0) goto decode_success; 26380 /* else fall through -- decoding has failed */ 26381 } 26382 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */ 26383 /* = VEX.NDS.128.0F.WIG C2 /r ib */ 26384 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26385 Long delta0 = delta; 26386 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26387 "vcmpps", True/*all_lanes*/, 26388 4/*sz*/); 26389 if (delta > delta0) goto decode_success; 26390 /* else fall through -- decoding has failed */ 26391 } 26392 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */ 26393 /* = VEX.NDS.256.0F.WIG C2 /r ib */ 26394 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26395 Long delta0 = delta; 26396 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta, 26397 "vcmpps", 4/*sz*/); 26398 if (delta > delta0) goto decode_success; 26399 /* else fall through -- decoding has failed */ 26400 } 26401 break; 26402 26403 case 0xC4: 26404 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */ 26405 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26406 UChar modrm = getUChar(delta); 26407 UInt rG = gregOfRexRM(pfx, modrm); 26408 UInt rV = getVexNvvvv(pfx); 26409 Int imm8; 26410 IRTemp new16 = newTemp(Ity_I16); 26411 26412 if ( epartIsReg( modrm ) ) { 26413 imm8 = (Int)(getUChar(delta+1) & 7); 26414 assign( new16, unop(Iop_32to16, 26415 getIReg32(eregOfRexRM(pfx,modrm))) ); 26416 delta += 1+1; 26417 DIP( "vpinsrw $%d,%s,%s\n", imm8, 26418 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) ); 26419 } else { 26420 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 26421 imm8 = (Int)(getUChar(delta+alen) & 7); 26422 assign( new16, loadLE( Ity_I16, mkexpr(addr) )); 26423 delta += alen+1; 26424 DIP( "vpinsrw $%d,%s,%s\n", 26425 imm8, dis_buf, nameXMMReg(rG) ); 26426 } 26427 26428 IRTemp src_vec = newTemp(Ity_V128); 26429 assign(src_vec, getXMMReg( rV )); 26430 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 ); 26431 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 26432 *uses_vvvv = True; 26433 goto decode_success; 26434 } 26435 break; 26436 26437 case 0xC5: 26438 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */ 26439 if (have66noF2noF3(pfx) 26440 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 26441 Long delta0 = delta; 26442 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta, 26443 True/*isAvx*/ ); 26444 if (delta > delta0) goto decode_success; 26445 /* else fall through -- decoding has failed */ 26446 } 26447 break; 26448 26449 case 0xC6: 26450 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 26451 /* = VEX.NDS.128.0F.WIG C6 /r ib */ 26452 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26453 Int imm8 = 0; 26454 IRTemp eV = newTemp(Ity_V128); 26455 IRTemp vV = newTemp(Ity_V128); 26456 UInt modrm = getUChar(delta); 26457 UInt rG = gregOfRexRM(pfx,modrm); 26458 UInt rV = getVexNvvvv(pfx); 26459 assign( vV, getXMMReg(rV) ); 26460 if (epartIsReg(modrm)) { 26461 UInt rE = eregOfRexRM(pfx,modrm); 26462 assign( eV, getXMMReg(rE) ); 26463 imm8 = (Int)getUChar(delta+1); 26464 delta += 1+1; 26465 DIP("vshufps $%d,%s,%s,%s\n", 26466 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 26467 } else { 26468 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26469 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 26470 imm8 = (Int)getUChar(delta+alen); 26471 delta += 1+alen; 26472 DIP("vshufps $%d,%s,%s,%s\n", 26473 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 26474 } 26475 IRTemp res = math_SHUFPS_128( eV, vV, imm8 ); 26476 putYMMRegLoAndZU( rG, mkexpr(res) ); 26477 *uses_vvvv = True; 26478 goto decode_success; 26479 } 26480 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 26481 /* = VEX.NDS.256.0F.WIG C6 /r ib */ 26482 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26483 Int imm8 = 0; 26484 IRTemp eV = newTemp(Ity_V256); 26485 IRTemp vV = newTemp(Ity_V256); 26486 UInt modrm = getUChar(delta); 26487 UInt rG = gregOfRexRM(pfx,modrm); 26488 UInt rV = getVexNvvvv(pfx); 26489 assign( vV, getYMMReg(rV) ); 26490 if (epartIsReg(modrm)) { 26491 UInt rE = eregOfRexRM(pfx,modrm); 26492 assign( eV, getYMMReg(rE) ); 26493 imm8 = (Int)getUChar(delta+1); 26494 delta += 1+1; 26495 DIP("vshufps $%d,%s,%s,%s\n", 26496 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 26497 } else { 26498 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26499 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 26500 imm8 = (Int)getUChar(delta+alen); 26501 delta += 1+alen; 26502 DIP("vshufps $%d,%s,%s,%s\n", 26503 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 26504 } 26505 IRTemp res = math_SHUFPS_256( eV, vV, imm8 ); 26506 putYMMReg( rG, mkexpr(res) ); 26507 *uses_vvvv = True; 26508 goto decode_success; 26509 } 26510 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */ 26511 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */ 26512 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26513 Int imm8 = 0; 26514 IRTemp eV = newTemp(Ity_V128); 26515 IRTemp vV = newTemp(Ity_V128); 26516 UInt modrm = getUChar(delta); 26517 UInt rG = gregOfRexRM(pfx,modrm); 26518 UInt rV = getVexNvvvv(pfx); 26519 assign( vV, getXMMReg(rV) ); 26520 if (epartIsReg(modrm)) { 26521 UInt rE = eregOfRexRM(pfx,modrm); 26522 assign( eV, getXMMReg(rE) ); 26523 imm8 = (Int)getUChar(delta+1); 26524 delta += 1+1; 26525 DIP("vshufpd $%d,%s,%s,%s\n", 26526 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 26527 } else { 26528 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26529 assign( eV, loadLE(Ity_V128, mkexpr(addr)) ); 26530 imm8 = (Int)getUChar(delta+alen); 26531 delta += 1+alen; 26532 DIP("vshufpd $%d,%s,%s,%s\n", 26533 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 26534 } 26535 IRTemp res = math_SHUFPD_128( eV, vV, imm8 ); 26536 putYMMRegLoAndZU( rG, mkexpr(res) ); 26537 *uses_vvvv = True; 26538 goto decode_success; 26539 } 26540 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */ 26541 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */ 26542 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26543 Int imm8 = 0; 26544 IRTemp eV = newTemp(Ity_V256); 26545 IRTemp vV = newTemp(Ity_V256); 26546 UInt modrm = getUChar(delta); 26547 UInt rG = gregOfRexRM(pfx,modrm); 26548 UInt rV = getVexNvvvv(pfx); 26549 assign( vV, getYMMReg(rV) ); 26550 if (epartIsReg(modrm)) { 26551 UInt rE = eregOfRexRM(pfx,modrm); 26552 assign( eV, getYMMReg(rE) ); 26553 imm8 = (Int)getUChar(delta+1); 26554 delta += 1+1; 26555 DIP("vshufpd $%d,%s,%s,%s\n", 26556 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 26557 } else { 26558 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 ); 26559 assign( eV, loadLE(Ity_V256, mkexpr(addr)) ); 26560 imm8 = (Int)getUChar(delta+alen); 26561 delta += 1+alen; 26562 DIP("vshufpd $%d,%s,%s,%s\n", 26563 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 26564 } 26565 IRTemp res = math_SHUFPD_256( eV, vV, imm8 ); 26566 putYMMReg( rG, mkexpr(res) ); 26567 *uses_vvvv = True; 26568 goto decode_success; 26569 } 26570 break; 26571 26572 case 0xD0: 26573 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */ 26574 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26575 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26576 uses_vvvv, vbi, pfx, delta, 26577 "vaddsubpd", math_ADDSUBPD_128 ); 26578 goto decode_success; 26579 } 26580 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */ 26581 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26582 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26583 uses_vvvv, vbi, pfx, delta, 26584 "vaddsubpd", math_ADDSUBPD_256 ); 26585 goto decode_success; 26586 } 26587 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */ 26588 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26589 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 26590 uses_vvvv, vbi, pfx, delta, 26591 "vaddsubps", math_ADDSUBPS_128 ); 26592 goto decode_success; 26593 } 26594 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */ 26595 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26596 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 26597 uses_vvvv, vbi, pfx, delta, 26598 "vaddsubps", math_ADDSUBPS_256 ); 26599 goto decode_success; 26600 } 26601 break; 26602 26603 case 0xD1: 26604 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */ 26605 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26606 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26607 "vpsrlw", Iop_ShrN16x8 ); 26608 *uses_vvvv = True; 26609 goto decode_success; 26610 26611 } 26612 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */ 26613 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26614 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26615 "vpsrlw", Iop_ShrN16x16 ); 26616 *uses_vvvv = True; 26617 goto decode_success; 26618 26619 } 26620 break; 26621 26622 case 0xD2: 26623 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */ 26624 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26625 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26626 "vpsrld", Iop_ShrN32x4 ); 26627 *uses_vvvv = True; 26628 goto decode_success; 26629 } 26630 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */ 26631 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26632 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26633 "vpsrld", Iop_ShrN32x8 ); 26634 *uses_vvvv = True; 26635 goto decode_success; 26636 } 26637 break; 26638 26639 case 0xD3: 26640 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */ 26641 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26642 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26643 "vpsrlq", Iop_ShrN64x2 ); 26644 *uses_vvvv = True; 26645 goto decode_success; 26646 } 26647 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */ 26648 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26649 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26650 "vpsrlq", Iop_ShrN64x4 ); 26651 *uses_vvvv = True; 26652 goto decode_success; 26653 } 26654 break; 26655 26656 case 0xD4: 26657 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26658 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */ 26659 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26660 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26661 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 ); 26662 goto decode_success; 26663 } 26664 /* VPADDQ r/m, rV, r ::: r = rV + r/m */ 26665 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */ 26666 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26667 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26668 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 ); 26669 goto decode_success; 26670 } 26671 break; 26672 26673 case 0xD5: 26674 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */ 26675 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26676 delta = dis_AVX128_E_V_to_G( 26677 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 ); 26678 goto decode_success; 26679 } 26680 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */ 26681 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26682 delta = dis_AVX256_E_V_to_G( 26683 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 ); 26684 goto decode_success; 26685 } 26686 break; 26687 26688 case 0xD6: 26689 /* I can't even find any Intel docs for this one. */ 26690 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half 26691 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0 26692 (WIG, maybe?) */ 26693 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 26694 && 0==getRexW(pfx)/*this might be redundant, dunno*/) { 26695 UChar modrm = getUChar(delta); 26696 UInt rG = gregOfRexRM(pfx,modrm); 26697 if (epartIsReg(modrm)) { 26698 /* fall through, awaiting test case */ 26699 /* dst: lo half copied, hi half zeroed */ 26700 } else { 26701 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26702 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 )); 26703 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf ); 26704 delta += alen; 26705 goto decode_success; 26706 } 26707 } 26708 break; 26709 26710 case 0xD7: 26711 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */ 26712 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26713 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ ); 26714 goto decode_success; 26715 } 26716 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */ 26717 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26718 delta = dis_PMOVMSKB_256( vbi, pfx, delta ); 26719 goto decode_success; 26720 } 26721 break; 26722 26723 case 0xD8: 26724 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */ 26725 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26726 delta = dis_AVX128_E_V_to_G( 26727 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 ); 26728 goto decode_success; 26729 } 26730 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */ 26731 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26732 delta = dis_AVX256_E_V_to_G( 26733 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 ); 26734 goto decode_success; 26735 } 26736 break; 26737 26738 case 0xD9: 26739 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */ 26740 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26741 delta = dis_AVX128_E_V_to_G( 26742 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 ); 26743 goto decode_success; 26744 } 26745 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */ 26746 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26747 delta = dis_AVX256_E_V_to_G( 26748 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 ); 26749 goto decode_success; 26750 } 26751 break; 26752 26753 case 0xDA: 26754 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */ 26755 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26756 delta = dis_AVX128_E_V_to_G( 26757 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 ); 26758 goto decode_success; 26759 } 26760 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */ 26761 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26762 delta = dis_AVX256_E_V_to_G( 26763 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 ); 26764 goto decode_success; 26765 } 26766 break; 26767 26768 case 0xDB: 26769 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26770 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */ 26771 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26772 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 26773 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 ); 26774 goto decode_success; 26775 } 26776 /* VPAND r/m, rV, r ::: r = rV & r/m */ 26777 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */ 26778 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26779 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 26780 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 ); 26781 goto decode_success; 26782 } 26783 break; 26784 26785 case 0xDC: 26786 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */ 26787 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26788 delta = dis_AVX128_E_V_to_G( 26789 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 ); 26790 goto decode_success; 26791 } 26792 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */ 26793 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26794 delta = dis_AVX256_E_V_to_G( 26795 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 ); 26796 goto decode_success; 26797 } 26798 break; 26799 26800 case 0xDD: 26801 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */ 26802 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26803 delta = dis_AVX128_E_V_to_G( 26804 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 ); 26805 goto decode_success; 26806 } 26807 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */ 26808 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26809 delta = dis_AVX256_E_V_to_G( 26810 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 ); 26811 goto decode_success; 26812 } 26813 break; 26814 26815 case 0xDE: 26816 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */ 26817 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26818 delta = dis_AVX128_E_V_to_G( 26819 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 ); 26820 goto decode_success; 26821 } 26822 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */ 26823 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26824 delta = dis_AVX256_E_V_to_G( 26825 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 ); 26826 goto decode_success; 26827 } 26828 break; 26829 26830 case 0xDF: 26831 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26832 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */ 26833 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26834 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 26835 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128, 26836 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26837 goto decode_success; 26838 } 26839 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */ 26840 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */ 26841 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26842 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG( 26843 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256, 26844 NULL, True/*invertLeftArg*/, False/*swapArgs*/ ); 26845 goto decode_success; 26846 } 26847 break; 26848 26849 case 0xE0: 26850 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */ 26851 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26852 delta = dis_AVX128_E_V_to_G( 26853 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 ); 26854 goto decode_success; 26855 } 26856 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */ 26857 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26858 delta = dis_AVX256_E_V_to_G( 26859 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 ); 26860 goto decode_success; 26861 } 26862 break; 26863 26864 case 0xE1: 26865 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */ 26866 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26867 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26868 "vpsraw", Iop_SarN16x8 ); 26869 *uses_vvvv = True; 26870 goto decode_success; 26871 } 26872 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */ 26873 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26874 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26875 "vpsraw", Iop_SarN16x16 ); 26876 *uses_vvvv = True; 26877 goto decode_success; 26878 } 26879 break; 26880 26881 case 0xE2: 26882 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */ 26883 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26884 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 26885 "vpsrad", Iop_SarN32x4 ); 26886 *uses_vvvv = True; 26887 goto decode_success; 26888 } 26889 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */ 26890 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26891 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 26892 "vpsrad", Iop_SarN32x8 ); 26893 *uses_vvvv = True; 26894 goto decode_success; 26895 } 26896 break; 26897 26898 case 0xE3: 26899 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */ 26900 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26901 delta = dis_AVX128_E_V_to_G( 26902 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 ); 26903 goto decode_success; 26904 } 26905 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */ 26906 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26907 delta = dis_AVX256_E_V_to_G( 26908 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 ); 26909 goto decode_success; 26910 } 26911 break; 26912 26913 case 0xE4: 26914 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */ 26915 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26916 delta = dis_AVX128_E_V_to_G( 26917 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 ); 26918 goto decode_success; 26919 } 26920 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */ 26921 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26922 delta = dis_AVX256_E_V_to_G( 26923 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 ); 26924 goto decode_success; 26925 } 26926 break; 26927 26928 case 0xE5: 26929 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */ 26930 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26931 delta = dis_AVX128_E_V_to_G( 26932 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 ); 26933 goto decode_success; 26934 } 26935 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */ 26936 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26937 delta = dis_AVX256_E_V_to_G( 26938 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 ); 26939 goto decode_success; 26940 } 26941 break; 26942 26943 case 0xE6: 26944 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */ 26945 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) { 26946 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/); 26947 goto decode_success; 26948 } 26949 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */ 26950 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) { 26951 delta = dis_CVTDQ2PD_256(vbi, pfx, delta); 26952 goto decode_success; 26953 } 26954 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */ 26955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26956 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26957 True/*r2zero*/); 26958 goto decode_success; 26959 } 26960 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */ 26961 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26962 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/); 26963 goto decode_success; 26964 } 26965 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */ 26966 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26967 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/, 26968 False/*!r2zero*/); 26969 goto decode_success; 26970 } 26971 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */ 26972 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26973 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/); 26974 goto decode_success; 26975 } 26976 break; 26977 26978 case 0xE7: 26979 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */ 26980 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 26981 UChar modrm = getUChar(delta); 26982 UInt rG = gregOfRexRM(pfx,modrm); 26983 if (!epartIsReg(modrm)) { 26984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26985 gen_SEGV_if_not_16_aligned( addr ); 26986 storeLE( mkexpr(addr), getXMMReg(rG) ); 26987 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG)); 26988 delta += alen; 26989 goto decode_success; 26990 } 26991 /* else fall through */ 26992 } 26993 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */ 26994 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 26995 UChar modrm = getUChar(delta); 26996 UInt rG = gregOfRexRM(pfx,modrm); 26997 if (!epartIsReg(modrm)) { 26998 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 26999 gen_SEGV_if_not_32_aligned( addr ); 27000 storeLE( mkexpr(addr), getYMMReg(rG) ); 27001 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG)); 27002 delta += alen; 27003 goto decode_success; 27004 } 27005 /* else fall through */ 27006 } 27007 break; 27008 27009 case 0xE8: 27010 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */ 27011 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27012 delta = dis_AVX128_E_V_to_G( 27013 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 ); 27014 goto decode_success; 27015 } 27016 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */ 27017 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27018 delta = dis_AVX256_E_V_to_G( 27019 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 ); 27020 goto decode_success; 27021 } 27022 break; 27023 27024 case 0xE9: 27025 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */ 27026 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27027 delta = dis_AVX128_E_V_to_G( 27028 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 ); 27029 goto decode_success; 27030 } 27031 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */ 27032 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27033 delta = dis_AVX256_E_V_to_G( 27034 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 ); 27035 goto decode_success; 27036 } 27037 break; 27038 27039 case 0xEA: 27040 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 27041 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */ 27042 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27043 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27044 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 ); 27045 goto decode_success; 27046 } 27047 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */ 27048 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */ 27049 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27050 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27051 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 ); 27052 goto decode_success; 27053 } 27054 break; 27055 27056 case 0xEB: 27057 /* VPOR r/m, rV, r ::: r = rV | r/m */ 27058 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */ 27059 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27060 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27061 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 ); 27062 goto decode_success; 27063 } 27064 /* VPOR r/m, rV, r ::: r = rV | r/m */ 27065 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */ 27066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27067 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27068 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 ); 27069 goto decode_success; 27070 } 27071 break; 27072 27073 case 0xEC: 27074 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */ 27075 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27076 delta = dis_AVX128_E_V_to_G( 27077 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 ); 27078 goto decode_success; 27079 } 27080 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */ 27081 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27082 delta = dis_AVX256_E_V_to_G( 27083 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 ); 27084 goto decode_success; 27085 } 27086 break; 27087 27088 case 0xED: 27089 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */ 27090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27091 delta = dis_AVX128_E_V_to_G( 27092 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 ); 27093 goto decode_success; 27094 } 27095 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */ 27096 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27097 delta = dis_AVX256_E_V_to_G( 27098 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 ); 27099 goto decode_success; 27100 } 27101 break; 27102 27103 case 0xEE: 27104 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 27105 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */ 27106 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27107 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27108 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 ); 27109 goto decode_success; 27110 } 27111 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */ 27112 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */ 27113 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27114 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27115 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 ); 27116 goto decode_success; 27117 } 27118 break; 27119 27120 case 0xEF: 27121 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 27122 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */ 27123 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27124 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27125 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 ); 27126 goto decode_success; 27127 } 27128 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */ 27129 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */ 27130 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27131 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27132 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 ); 27133 goto decode_success; 27134 } 27135 break; 27136 27137 case 0xF0: 27138 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */ 27139 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27140 UChar modrm = getUChar(delta); 27141 UInt rD = gregOfRexRM(pfx, modrm); 27142 IRTemp tD = newTemp(Ity_V256); 27143 if (epartIsReg(modrm)) break; 27144 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27145 delta += alen; 27146 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 27147 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD)); 27148 putYMMReg(rD, mkexpr(tD)); 27149 goto decode_success; 27150 } 27151 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */ 27152 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27153 UChar modrm = getUChar(delta); 27154 UInt rD = gregOfRexRM(pfx, modrm); 27155 IRTemp tD = newTemp(Ity_V128); 27156 if (epartIsReg(modrm)) break; 27157 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27158 delta += alen; 27159 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 27160 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD)); 27161 putYMMRegLoAndZU(rD, mkexpr(tD)); 27162 goto decode_success; 27163 } 27164 break; 27165 27166 case 0xF1: 27167 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */ 27168 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27169 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 27170 "vpsllw", Iop_ShlN16x8 ); 27171 *uses_vvvv = True; 27172 goto decode_success; 27173 27174 } 27175 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */ 27176 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27177 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 27178 "vpsllw", Iop_ShlN16x16 ); 27179 *uses_vvvv = True; 27180 goto decode_success; 27181 27182 } 27183 break; 27184 27185 case 0xF2: 27186 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */ 27187 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27188 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 27189 "vpslld", Iop_ShlN32x4 ); 27190 *uses_vvvv = True; 27191 goto decode_success; 27192 } 27193 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */ 27194 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27195 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 27196 "vpslld", Iop_ShlN32x8 ); 27197 *uses_vvvv = True; 27198 goto decode_success; 27199 } 27200 break; 27201 27202 case 0xF3: 27203 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */ 27204 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27205 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta, 27206 "vpsllq", Iop_ShlN64x2 ); 27207 *uses_vvvv = True; 27208 goto decode_success; 27209 } 27210 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */ 27211 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27212 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta, 27213 "vpsllq", Iop_ShlN64x4 ); 27214 *uses_vvvv = True; 27215 goto decode_success; 27216 } 27217 break; 27218 27219 case 0xF4: 27220 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */ 27221 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27222 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27223 uses_vvvv, vbi, pfx, delta, 27224 "vpmuludq", math_PMULUDQ_128 ); 27225 goto decode_success; 27226 } 27227 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */ 27228 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27229 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27230 uses_vvvv, vbi, pfx, delta, 27231 "vpmuludq", math_PMULUDQ_256 ); 27232 goto decode_success; 27233 } 27234 break; 27235 27236 case 0xF5: 27237 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */ 27238 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27239 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27240 uses_vvvv, vbi, pfx, delta, 27241 "vpmaddwd", math_PMADDWD_128 ); 27242 goto decode_success; 27243 } 27244 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */ 27245 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27246 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27247 uses_vvvv, vbi, pfx, delta, 27248 "vpmaddwd", math_PMADDWD_256 ); 27249 goto decode_success; 27250 } 27251 break; 27252 27253 case 0xF6: 27254 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */ 27255 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27256 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27257 uses_vvvv, vbi, pfx, delta, 27258 "vpsadbw", math_PSADBW_128 ); 27259 goto decode_success; 27260 } 27261 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */ 27262 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27263 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27264 uses_vvvv, vbi, pfx, delta, 27265 "vpsadbw", math_PSADBW_256 ); 27266 goto decode_success; 27267 } 27268 break; 27269 27270 case 0xF7: 27271 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */ 27272 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 27273 && epartIsReg(getUChar(delta))) { 27274 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ ); 27275 goto decode_success; 27276 } 27277 break; 27278 27279 case 0xF8: 27280 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 27281 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */ 27282 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27283 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27284 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 ); 27285 goto decode_success; 27286 } 27287 /* VPSUBB r/m, rV, r ::: r = rV - r/m */ 27288 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */ 27289 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27290 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27291 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 ); 27292 goto decode_success; 27293 } 27294 break; 27295 27296 case 0xF9: 27297 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 27298 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */ 27299 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27300 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27301 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 ); 27302 goto decode_success; 27303 } 27304 /* VPSUBW r/m, rV, r ::: r = rV - r/m */ 27305 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */ 27306 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27307 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27308 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 ); 27309 goto decode_success; 27310 } 27311 break; 27312 27313 case 0xFA: 27314 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 27315 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */ 27316 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27317 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27318 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 ); 27319 goto decode_success; 27320 } 27321 /* VPSUBD r/m, rV, r ::: r = rV - r/m */ 27322 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */ 27323 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27324 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27325 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 ); 27326 goto decode_success; 27327 } 27328 break; 27329 27330 case 0xFB: 27331 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 27332 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */ 27333 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27334 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27335 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 ); 27336 goto decode_success; 27337 } 27338 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */ 27339 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */ 27340 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27341 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27342 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 ); 27343 goto decode_success; 27344 } 27345 break; 27346 27347 case 0xFC: 27348 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 27349 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */ 27350 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27351 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27352 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 ); 27353 goto decode_success; 27354 } 27355 /* VPADDB r/m, rV, r ::: r = rV + r/m */ 27356 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */ 27357 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27358 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27359 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 ); 27360 goto decode_success; 27361 } 27362 break; 27363 27364 case 0xFD: 27365 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 27366 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */ 27367 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27368 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27369 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 ); 27370 goto decode_success; 27371 } 27372 /* VPADDW r/m, rV, r ::: r = rV + r/m */ 27373 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */ 27374 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27375 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27376 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 ); 27377 goto decode_success; 27378 } 27379 break; 27380 27381 case 0xFE: 27382 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 27383 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */ 27384 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27385 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 27386 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 ); 27387 goto decode_success; 27388 } 27389 /* VPADDD r/m, rV, r ::: r = rV + r/m */ 27390 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */ 27391 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27392 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 27393 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 ); 27394 goto decode_success; 27395 } 27396 break; 27397 27398 default: 27399 break; 27400 27401 } 27402 27403 //decode_failure: 27404 return deltaIN; 27405 27406 decode_success: 27407 return delta; 27408 } 27409 27410 27411 /*------------------------------------------------------------*/ 27412 /*--- ---*/ 27413 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/ 27414 /*--- ---*/ 27415 /*------------------------------------------------------------*/ 27416 27417 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 27418 { 27419 /* In the control vector, zero out all but the bottom two bits of 27420 each 32-bit lane. */ 27421 IRExpr* cv1 = binop(Iop_ShrN32x4, 27422 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)), 27423 mkU8(30)); 27424 /* And use the resulting cleaned-up control vector as steering 27425 in a Perm operation. */ 27426 IRTemp res = newTemp(Ity_V128); 27427 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1)); 27428 return res; 27429 } 27430 27431 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 27432 { 27433 IRTemp dHi, dLo, cHi, cLo; 27434 dHi = dLo = cHi = cLo = IRTemp_INVALID; 27435 breakupV256toV128s( dataV, &dHi, &dLo ); 27436 breakupV256toV128s( ctrlV, &cHi, &cLo ); 27437 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi ); 27438 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo ); 27439 IRTemp res = newTemp(Ity_V256); 27440 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 27441 return res; 27442 } 27443 27444 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV ) 27445 { 27446 /* No cleverness here .. */ 27447 IRTemp dHi, dLo, cHi, cLo; 27448 dHi = dLo = cHi = cLo = IRTemp_INVALID; 27449 breakupV128to64s( dataV, &dHi, &dLo ); 27450 breakupV128to64s( ctrlV, &cHi, &cLo ); 27451 IRExpr* rHi 27452 = IRExpr_ITE( unop(Iop_64to1, 27453 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))), 27454 mkexpr(dHi), mkexpr(dLo) ); 27455 IRExpr* rLo 27456 = IRExpr_ITE( unop(Iop_64to1, 27457 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))), 27458 mkexpr(dHi), mkexpr(dLo) ); 27459 IRTemp res = newTemp(Ity_V128); 27460 assign(res, binop(Iop_64HLtoV128, rHi, rLo)); 27461 return res; 27462 } 27463 27464 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV ) 27465 { 27466 IRTemp dHi, dLo, cHi, cLo; 27467 dHi = dLo = cHi = cLo = IRTemp_INVALID; 27468 breakupV256toV128s( dataV, &dHi, &dLo ); 27469 breakupV256toV128s( ctrlV, &cHi, &cLo ); 27470 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi ); 27471 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo ); 27472 IRTemp res = newTemp(Ity_V256); 27473 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo))); 27474 return res; 27475 } 27476 27477 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV ) 27478 { 27479 /* In the control vector, zero out all but the bottom three bits of 27480 each 32-bit lane. */ 27481 IRExpr* cv1 = binop(Iop_ShrN32x8, 27482 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)), 27483 mkU8(29)); 27484 /* And use the resulting cleaned-up control vector as steering 27485 in a Perm operation. */ 27486 IRTemp res = newTemp(Ity_V256); 27487 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1)); 27488 return res; 27489 } 27490 27491 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv, 27492 const VexAbiInfo* vbi, Prefix pfx, Long delta, 27493 const HChar* opname, IROp op8 ) 27494 { 27495 HChar dis_buf[50]; 27496 Int alen; 27497 Int size = getRexW(pfx) ? 8 : 4; 27498 IRType ty = szToITy(size); 27499 IRTemp src = newTemp(ty); 27500 IRTemp amt = newTemp(ty); 27501 UChar rm = getUChar(delta); 27502 27503 assign( amt, getIRegV(size,pfx) ); 27504 if (epartIsReg(rm)) { 27505 assign( src, getIRegE(size,pfx,rm) ); 27506 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), 27507 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 27508 delta++; 27509 } else { 27510 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27511 assign( src, loadLE(ty, mkexpr(addr)) ); 27512 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf, 27513 nameIRegG(size,pfx,rm)); 27514 delta += alen; 27515 } 27516 27517 putIRegG( size, pfx, rm, 27518 binop(mkSizedOp(ty,op8), mkexpr(src), 27519 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt), 27520 mkU(ty,8*size-1)))) ); 27521 /* Flags aren't modified. */ 27522 *uses_vvvv = True; 27523 return delta; 27524 } 27525 27526 27527 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc ) 27528 { 27529 UChar modrm = getUChar(delta); 27530 UInt rG = gregOfRexRM(pfx, modrm); 27531 UInt rV = getVexNvvvv(pfx); 27532 Bool scalar = (opc & 0xF) > 7 && (opc & 1); 27533 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32; 27534 IRType vty = scalar ? ty : getVexL(pfx) ? Ity_V256 : Ity_V128; 27535 IRTemp vX = newTemp(vty); 27536 IRTemp vY = newTemp(vty); 27537 IRTemp vZ = newTemp(vty); 27538 IRExpr *x[8], *y[8], *z[8]; 27539 IRTemp addr = IRTemp_INVALID; 27540 HChar dis_buf[50]; 27541 Int alen = 0; 27542 const HChar *name; 27543 const HChar *suffix; 27544 const HChar *order; 27545 Bool negateRes = False; 27546 Bool negateZeven = False; 27547 Bool negateZodd = False; 27548 Int i, j; 27549 Int count; 27550 static IROp ops[] = { Iop_V256to64_0, Iop_V256to64_1, 27551 Iop_V256to64_2, Iop_V256to64_3, 27552 Iop_V128to64, Iop_V128HIto64 }; 27553 27554 switch (opc & 0xF) { 27555 case 0x6: 27556 name = "addsub"; 27557 negateZeven = True; 27558 break; 27559 case 0x7: 27560 name = "subadd"; 27561 negateZodd = True; 27562 break; 27563 case 0x8: 27564 case 0x9: 27565 name = "add"; 27566 break; 27567 case 0xA: 27568 case 0xB: 27569 name = "sub"; 27570 negateZeven = True; 27571 negateZodd = True; 27572 break; 27573 case 0xC: 27574 case 0xD: 27575 name = "add"; 27576 negateRes = True; 27577 negateZeven = True; 27578 negateZodd = True; 27579 break; 27580 case 0xE: 27581 case 0xF: 27582 name = "sub"; 27583 negateRes = True; 27584 break; 27585 default: 27586 vpanic("dis_FMA(amd64)"); 27587 break; 27588 } 27589 switch (opc & 0xF0) { 27590 case 0x90: order = "132"; break; 27591 case 0xA0: order = "213"; break; 27592 case 0xB0: order = "231"; break; 27593 default: vpanic("dis_FMA(amd64)"); break; 27594 } 27595 if (scalar) 27596 suffix = ty == Ity_F64 ? "sd" : "ss"; 27597 else 27598 suffix = ty == Ity_F64 ? "pd" : "ps"; 27599 27600 if (scalar) { 27601 assign( vX, ty == Ity_F64 27602 ? getXMMRegLane64F(rG, 0) : getXMMRegLane32F(rG, 0) ); 27603 assign( vZ, ty == Ity_F64 27604 ? getXMMRegLane64F(rV, 0) : getXMMRegLane32F(rV, 0) ); 27605 } else { 27606 assign( vX, vty == Ity_V256 ? getYMMReg(rG) : getXMMReg(rG) ); 27607 assign( vZ, vty == Ity_V256 ? getYMMReg(rV) : getXMMReg(rV) ); 27608 } 27609 27610 if (epartIsReg(modrm)) { 27611 UInt rE = eregOfRexRM(pfx, modrm); 27612 delta += 1; 27613 if (scalar) 27614 assign( vY, ty == Ity_F64 27615 ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 27616 else 27617 assign( vY, vty == Ity_V256 ? getYMMReg(rE) : getXMMReg(rE) ); 27618 if (vty == Ity_V256) { 27619 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27620 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV), 27621 nameYMMReg(rG)); 27622 } else { 27623 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27624 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV), 27625 nameXMMReg(rG)); 27626 } 27627 } else { 27628 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 27629 delta += alen; 27630 assign(vY, loadLE(vty, mkexpr(addr))); 27631 if (vty == Ity_V256) { 27632 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27633 name, order, suffix, dis_buf, nameYMMReg(rV), 27634 nameYMMReg(rG)); 27635 } else { 27636 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "", 27637 name, order, suffix, dis_buf, nameXMMReg(rV), 27638 nameXMMReg(rG)); 27639 } 27640 } 27641 27642 /* vX/vY/vZ now in 132 order. If it is different order, swap the 27643 arguments. */ 27644 if ((opc & 0xF0) != 0x90) { 27645 IRTemp tem = vX; 27646 if ((opc & 0xF0) == 0xA0) { 27647 vX = vZ; 27648 vZ = vY; 27649 vY = tem; 27650 } else { 27651 vX = vZ; 27652 vZ = tem; 27653 } 27654 } 27655 27656 if (scalar) { 27657 count = 1; 27658 x[0] = mkexpr(vX); 27659 y[0] = mkexpr(vY); 27660 z[0] = mkexpr(vZ); 27661 } else if (ty == Ity_F32) { 27662 count = vty == Ity_V256 ? 8 : 4; 27663 j = vty == Ity_V256 ? 0 : 4; 27664 for (i = 0; i < count; i += 2) { 27665 IRTemp tem = newTemp(Ity_I64); 27666 assign(tem, unop(ops[i / 2 + j], mkexpr(vX))); 27667 x[i] = unop(Iop_64to32, mkexpr(tem)); 27668 x[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27669 tem = newTemp(Ity_I64); 27670 assign(tem, unop(ops[i / 2 + j], mkexpr(vY))); 27671 y[i] = unop(Iop_64to32, mkexpr(tem)); 27672 y[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27673 tem = newTemp(Ity_I64); 27674 assign(tem, unop(ops[i / 2 + j], mkexpr(vZ))); 27675 z[i] = unop(Iop_64to32, mkexpr(tem)); 27676 z[i + 1] = unop(Iop_64HIto32, mkexpr(tem)); 27677 } 27678 } else { 27679 count = vty == Ity_V256 ? 4 : 2; 27680 j = vty == Ity_V256 ? 0 : 4; 27681 for (i = 0; i < count; i++) { 27682 x[i] = unop(ops[i + j], mkexpr(vX)); 27683 y[i] = unop(ops[i + j], mkexpr(vY)); 27684 z[i] = unop(ops[i + j], mkexpr(vZ)); 27685 } 27686 } 27687 if (!scalar) 27688 for (i = 0; i < count; i++) { 27689 IROp op = ty == Ity_F64 27690 ? Iop_ReinterpI64asF64 : Iop_ReinterpI32asF32; 27691 x[i] = unop(op, x[i]); 27692 y[i] = unop(op, y[i]); 27693 z[i] = unop(op, z[i]); 27694 } 27695 for (i = 0; i < count; i++) { 27696 if ((i & 1) ? negateZodd : negateZeven) 27697 z[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, z[i]); 27698 x[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32, 27699 get_FAKE_roundingmode(), x[i], y[i], z[i]); 27700 if (negateRes) 27701 x[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, x[i]); 27702 if (ty == Ity_F64) 27703 putYMMRegLane64F( rG, i, x[i] ); 27704 else 27705 putYMMRegLane32F( rG, i, x[i] ); 27706 } 27707 if (vty != Ity_V256) 27708 putYMMRegLane128( rG, 1, mkV128(0) ); 27709 27710 return delta; 27711 } 27712 27713 27714 /* Masked load or masked store. */ 27715 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi, 27716 Prefix pfx, Long delta, 27717 const HChar* opname, Bool isYMM, IRType ty, 27718 Bool isLoad ) 27719 { 27720 HChar dis_buf[50]; 27721 Int alen, i; 27722 IRTemp addr; 27723 UChar modrm = getUChar(delta); 27724 UInt rG = gregOfRexRM(pfx,modrm); 27725 UInt rV = getVexNvvvv(pfx); 27726 27727 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 27728 delta += alen; 27729 27730 /**/ if (isLoad && isYMM) { 27731 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 27732 } 27733 else if (isLoad && !isYMM) { 27734 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 27735 } 27736 27737 else if (!isLoad && isYMM) { 27738 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf ); 27739 } 27740 else { 27741 vassert(!isLoad && !isYMM); 27742 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf ); 27743 } 27744 27745 vassert(ty == Ity_I32 || ty == Ity_I64); 27746 Bool laneIs32 = ty == Ity_I32; 27747 27748 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2); 27749 27750 for (i = 0; i < nLanes; i++) { 27751 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63); 27752 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1); 27753 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64; 27754 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64; 27755 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i ); 27756 27757 IRTemp cond = newTemp(Ity_I1); 27758 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one)); 27759 27760 IRTemp data = newTemp(ty); 27761 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), 27762 mkU64(i * (laneIs32 ? 4 : 8))); 27763 if (isLoad) { 27764 stmt( 27765 IRStmt_LoadG( 27766 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64, 27767 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond) 27768 )); 27769 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) ); 27770 } else { 27771 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i )); 27772 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) ); 27773 } 27774 } 27775 27776 if (isLoad && !isYMM) 27777 putYMMRegLane128( rG, 1, mkV128(0) ); 27778 27779 *uses_vvvv = True; 27780 return delta; 27781 } 27782 27783 27784 /* Gather. */ 27785 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi, 27786 Prefix pfx, Long delta, 27787 const HChar* opname, Bool isYMM, 27788 Bool isVM64x, IRType ty ) 27789 { 27790 HChar dis_buf[50]; 27791 Int alen, i, vscale, count1, count2; 27792 IRTemp addr; 27793 UChar modrm = getUChar(delta); 27794 UInt rG = gregOfRexRM(pfx,modrm); 27795 UInt rV = getVexNvvvv(pfx); 27796 UInt rI; 27797 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128; 27798 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128; 27799 IRTemp cond; 27800 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI, 27801 idxTy, &vscale ); 27802 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV) 27803 return delta; 27804 if (dstTy == Ity_V256) { 27805 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) ); 27806 } else { 27807 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) ); 27808 } 27809 delta += alen; 27810 27811 if (ty == Ity_I32) { 27812 count1 = isYMM ? 8 : 4; 27813 count2 = isVM64x ? count1 / 2 : count1; 27814 } else { 27815 count1 = count2 = isYMM ? 4 : 2; 27816 } 27817 27818 /* First update the mask register to copies of the sign bit. */ 27819 if (ty == Ity_I32) { 27820 if (isYMM) 27821 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) ); 27822 else 27823 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) ); 27824 } else { 27825 for (i = 0; i < count1; i++) { 27826 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ), 27827 mkU8(63)) ); 27828 } 27829 } 27830 27831 /* Next gather the individual elements. If any fault occurs, the 27832 corresponding mask element will be set and the loop stops. */ 27833 for (i = 0; i < count2; i++) { 27834 IRExpr *expr, *addr_expr; 27835 cond = newTemp(Ity_I1); 27836 assign( cond, 27837 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S, 27838 ty == Ity_I32 ? getYMMRegLane32( rV, i ) 27839 : getYMMRegLane64( rV, i ), 27840 mkU(ty, 0)) ); 27841 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i ) 27842 : getYMMRegLane64( rG, i ); 27843 addr_expr = isVM64x ? getYMMRegLane64( rI, i ) 27844 : unop(Iop_32Sto64, getYMMRegLane32( rI, i )); 27845 switch (vscale) { 27846 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break; 27847 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break; 27848 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break; 27849 default: break; 27850 } 27851 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr); 27852 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr); 27853 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP)); 27854 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr); 27855 if (ty == Ity_I32) { 27856 putYMMRegLane32( rG, i, expr ); 27857 putYMMRegLane32( rV, i, mkU32(0) ); 27858 } else { 27859 putYMMRegLane64( rG, i, expr); 27860 putYMMRegLane64( rV, i, mkU64(0) ); 27861 } 27862 } 27863 27864 if (!isYMM || (ty == Ity_I32 && isVM64x)) { 27865 if (ty == Ity_I64 || isYMM) 27866 putYMMRegLane128( rV, 1, mkV128(0) ); 27867 else if (ty == Ity_I32 && count2 == 2) { 27868 putYMMRegLane64( rV, 1, mkU64(0) ); 27869 putYMMRegLane64( rG, 1, mkU64(0) ); 27870 } 27871 putYMMRegLane128( rG, 1, mkV128(0) ); 27872 } 27873 27874 *uses_vvvv = True; 27875 return delta; 27876 } 27877 27878 27879 __attribute__((noinline)) 27880 static 27881 Long dis_ESC_0F38__VEX ( 27882 /*MB_OUT*/DisResult* dres, 27883 /*OUT*/ Bool* uses_vvvv, 27884 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 27885 Bool resteerCisOk, 27886 void* callback_opaque, 27887 const VexArchInfo* archinfo, 27888 const VexAbiInfo* vbi, 27889 Prefix pfx, Int sz, Long deltaIN 27890 ) 27891 { 27892 IRTemp addr = IRTemp_INVALID; 27893 Int alen = 0; 27894 HChar dis_buf[50]; 27895 Long delta = deltaIN; 27896 UChar opc = getUChar(delta); 27897 delta++; 27898 *uses_vvvv = False; 27899 27900 switch (opc) { 27901 27902 case 0x00: 27903 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27904 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */ 27905 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27906 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27907 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM ); 27908 goto decode_success; 27909 } 27910 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */ 27911 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */ 27912 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27913 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27914 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM ); 27915 goto decode_success; 27916 } 27917 break; 27918 27919 case 0x01: 27920 case 0x02: 27921 case 0x03: 27922 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */ 27923 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */ 27924 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */ 27925 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27926 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27927 *uses_vvvv = True; 27928 goto decode_success; 27929 } 27930 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */ 27931 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */ 27932 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */ 27933 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27934 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27935 *uses_vvvv = True; 27936 goto decode_success; 27937 } 27938 break; 27939 27940 case 0x04: 27941 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */ 27942 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27943 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 27944 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27945 math_PMADDUBSW_128 ); 27946 goto decode_success; 27947 } 27948 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */ 27949 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27950 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 27951 uses_vvvv, vbi, pfx, delta, "vpmaddubsw", 27952 math_PMADDUBSW_256 ); 27953 goto decode_success; 27954 } 27955 break; 27956 27957 case 0x05: 27958 case 0x06: 27959 case 0x07: 27960 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */ 27961 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */ 27962 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */ 27963 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27964 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc ); 27965 *uses_vvvv = True; 27966 goto decode_success; 27967 } 27968 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */ 27969 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */ 27970 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */ 27971 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 27972 delta = dis_PHADD_256( vbi, pfx, delta, opc ); 27973 *uses_vvvv = True; 27974 goto decode_success; 27975 } 27976 break; 27977 27978 case 0x08: 27979 case 0x09: 27980 case 0x0A: 27981 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */ 27982 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */ 27983 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */ 27984 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 27985 IRTemp sV = newTemp(Ity_V128); 27986 IRTemp dV = newTemp(Ity_V128); 27987 IRTemp sHi, sLo, dHi, dLo; 27988 sHi = sLo = dHi = dLo = IRTemp_INVALID; 27989 HChar ch = '?'; 27990 Int laneszB = 0; 27991 UChar modrm = getUChar(delta); 27992 UInt rG = gregOfRexRM(pfx,modrm); 27993 UInt rV = getVexNvvvv(pfx); 27994 27995 switch (opc) { 27996 case 0x08: laneszB = 1; ch = 'b'; break; 27997 case 0x09: laneszB = 2; ch = 'w'; break; 27998 case 0x0A: laneszB = 4; ch = 'd'; break; 27999 default: vassert(0); 28000 } 28001 28002 assign( dV, getXMMReg(rV) ); 28003 28004 if (epartIsReg(modrm)) { 28005 UInt rE = eregOfRexRM(pfx,modrm); 28006 assign( sV, getXMMReg(rE) ); 28007 delta += 1; 28008 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE), 28009 nameXMMReg(rV), nameXMMReg(rG)); 28010 } else { 28011 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 28012 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 28013 delta += alen; 28014 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 28015 nameXMMReg(rV), nameXMMReg(rG)); 28016 } 28017 28018 breakupV128to64s( dV, &dHi, &dLo ); 28019 breakupV128to64s( sV, &sHi, &sLo ); 28020 28021 putYMMRegLoAndZU( 28022 rG, 28023 binop(Iop_64HLtoV128, 28024 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), 28025 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) 28026 ) 28027 ); 28028 *uses_vvvv = True; 28029 goto decode_success; 28030 } 28031 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */ 28032 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */ 28033 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */ 28034 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28035 IRTemp sV = newTemp(Ity_V256); 28036 IRTemp dV = newTemp(Ity_V256); 28037 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 28038 s3 = s2 = s1 = s0 = IRTemp_INVALID; 28039 d3 = d2 = d1 = d0 = IRTemp_INVALID; 28040 UChar ch = '?'; 28041 Int laneszB = 0; 28042 UChar modrm = getUChar(delta); 28043 UInt rG = gregOfRexRM(pfx,modrm); 28044 UInt rV = getVexNvvvv(pfx); 28045 28046 switch (opc) { 28047 case 0x08: laneszB = 1; ch = 'b'; break; 28048 case 0x09: laneszB = 2; ch = 'w'; break; 28049 case 0x0A: laneszB = 4; ch = 'd'; break; 28050 default: vassert(0); 28051 } 28052 28053 assign( dV, getYMMReg(rV) ); 28054 28055 if (epartIsReg(modrm)) { 28056 UInt rE = eregOfRexRM(pfx,modrm); 28057 assign( sV, getYMMReg(rE) ); 28058 delta += 1; 28059 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE), 28060 nameYMMReg(rV), nameYMMReg(rG)); 28061 } else { 28062 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 28063 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 28064 delta += alen; 28065 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf, 28066 nameYMMReg(rV), nameYMMReg(rG)); 28067 } 28068 28069 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 28070 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 28071 28072 putYMMReg( 28073 rG, 28074 binop( Iop_V128HLtoV256, 28075 binop(Iop_64HLtoV128, 28076 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ), 28077 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB ) 28078 ), 28079 binop(Iop_64HLtoV128, 28080 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ), 28081 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB ) 28082 ) 28083 ) 28084 ); 28085 *uses_vvvv = True; 28086 goto decode_success; 28087 } 28088 break; 28089 28090 case 0x0B: 28091 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */ 28092 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28093 IRTemp sV = newTemp(Ity_V128); 28094 IRTemp dV = newTemp(Ity_V128); 28095 IRTemp sHi, sLo, dHi, dLo; 28096 sHi = sLo = dHi = dLo = IRTemp_INVALID; 28097 UChar modrm = getUChar(delta); 28098 UInt rG = gregOfRexRM(pfx,modrm); 28099 UInt rV = getVexNvvvv(pfx); 28100 28101 assign( dV, getXMMReg(rV) ); 28102 28103 if (epartIsReg(modrm)) { 28104 UInt rE = eregOfRexRM(pfx,modrm); 28105 assign( sV, getXMMReg(rE) ); 28106 delta += 1; 28107 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE), 28108 nameXMMReg(rV), nameXMMReg(rG)); 28109 } else { 28110 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 28111 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 28112 delta += alen; 28113 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 28114 nameXMMReg(rV), nameXMMReg(rG)); 28115 } 28116 28117 breakupV128to64s( dV, &dHi, &dLo ); 28118 breakupV128to64s( sV, &sHi, &sLo ); 28119 28120 putYMMRegLoAndZU( 28121 rG, 28122 binop(Iop_64HLtoV128, 28123 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), 28124 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) 28125 ) 28126 ); 28127 *uses_vvvv = True; 28128 goto decode_success; 28129 } 28130 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */ 28131 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28132 IRTemp sV = newTemp(Ity_V256); 28133 IRTemp dV = newTemp(Ity_V256); 28134 IRTemp s3, s2, s1, s0, d3, d2, d1, d0; 28135 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; 28136 UChar modrm = getUChar(delta); 28137 UInt rG = gregOfRexRM(pfx,modrm); 28138 UInt rV = getVexNvvvv(pfx); 28139 28140 assign( dV, getYMMReg(rV) ); 28141 28142 if (epartIsReg(modrm)) { 28143 UInt rE = eregOfRexRM(pfx,modrm); 28144 assign( sV, getYMMReg(rE) ); 28145 delta += 1; 28146 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE), 28147 nameYMMReg(rV), nameYMMReg(rG)); 28148 } else { 28149 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 28150 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 28151 delta += alen; 28152 DIP("vpmulhrsw %s,%s,%s\n", dis_buf, 28153 nameYMMReg(rV), nameYMMReg(rG)); 28154 } 28155 28156 breakupV256to64s( dV, &d3, &d2, &d1, &d0 ); 28157 breakupV256to64s( sV, &s3, &s2, &s1, &s0 ); 28158 28159 putYMMReg( 28160 rG, 28161 binop(Iop_V128HLtoV256, 28162 binop(Iop_64HLtoV128, 28163 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ), 28164 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ), 28165 binop(Iop_64HLtoV128, 28166 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ), 28167 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) ) 28168 ) 28169 ); 28170 *uses_vvvv = True; 28171 goto decode_success; 28172 } 28173 break; 28174 28175 case 0x0C: 28176 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */ 28177 if (have66noF2noF3(pfx) 28178 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 28179 UChar modrm = getUChar(delta); 28180 UInt rG = gregOfRexRM(pfx, modrm); 28181 UInt rV = getVexNvvvv(pfx); 28182 IRTemp ctrlV = newTemp(Ity_V128); 28183 if (epartIsReg(modrm)) { 28184 UInt rE = eregOfRexRM(pfx, modrm); 28185 delta += 1; 28186 DIP("vpermilps %s,%s,%s\n", 28187 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 28188 assign(ctrlV, getXMMReg(rE)); 28189 } else { 28190 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28191 delta += alen; 28192 DIP("vpermilps %s,%s,%s\n", 28193 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 28194 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 28195 } 28196 IRTemp dataV = newTemp(Ity_V128); 28197 assign(dataV, getXMMReg(rV)); 28198 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV); 28199 putYMMRegLoAndZU(rG, mkexpr(resV)); 28200 *uses_vvvv = True; 28201 goto decode_success; 28202 } 28203 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */ 28204 if (have66noF2noF3(pfx) 28205 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28206 UChar modrm = getUChar(delta); 28207 UInt rG = gregOfRexRM(pfx, modrm); 28208 UInt rV = getVexNvvvv(pfx); 28209 IRTemp ctrlV = newTemp(Ity_V256); 28210 if (epartIsReg(modrm)) { 28211 UInt rE = eregOfRexRM(pfx, modrm); 28212 delta += 1; 28213 DIP("vpermilps %s,%s,%s\n", 28214 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 28215 assign(ctrlV, getYMMReg(rE)); 28216 } else { 28217 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28218 delta += alen; 28219 DIP("vpermilps %s,%s,%s\n", 28220 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 28221 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 28222 } 28223 IRTemp dataV = newTemp(Ity_V256); 28224 assign(dataV, getYMMReg(rV)); 28225 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV); 28226 putYMMReg(rG, mkexpr(resV)); 28227 *uses_vvvv = True; 28228 goto decode_success; 28229 } 28230 break; 28231 28232 case 0x0D: 28233 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */ 28234 if (have66noF2noF3(pfx) 28235 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 28236 UChar modrm = getUChar(delta); 28237 UInt rG = gregOfRexRM(pfx, modrm); 28238 UInt rV = getVexNvvvv(pfx); 28239 IRTemp ctrlV = newTemp(Ity_V128); 28240 if (epartIsReg(modrm)) { 28241 UInt rE = eregOfRexRM(pfx, modrm); 28242 delta += 1; 28243 DIP("vpermilpd %s,%s,%s\n", 28244 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 28245 assign(ctrlV, getXMMReg(rE)); 28246 } else { 28247 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28248 delta += alen; 28249 DIP("vpermilpd %s,%s,%s\n", 28250 dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 28251 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr))); 28252 } 28253 IRTemp dataV = newTemp(Ity_V128); 28254 assign(dataV, getXMMReg(rV)); 28255 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV); 28256 putYMMRegLoAndZU(rG, mkexpr(resV)); 28257 *uses_vvvv = True; 28258 goto decode_success; 28259 } 28260 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */ 28261 if (have66noF2noF3(pfx) 28262 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28263 UChar modrm = getUChar(delta); 28264 UInt rG = gregOfRexRM(pfx, modrm); 28265 UInt rV = getVexNvvvv(pfx); 28266 IRTemp ctrlV = newTemp(Ity_V256); 28267 if (epartIsReg(modrm)) { 28268 UInt rE = eregOfRexRM(pfx, modrm); 28269 delta += 1; 28270 DIP("vpermilpd %s,%s,%s\n", 28271 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 28272 assign(ctrlV, getYMMReg(rE)); 28273 } else { 28274 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28275 delta += alen; 28276 DIP("vpermilpd %s,%s,%s\n", 28277 dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 28278 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr))); 28279 } 28280 IRTemp dataV = newTemp(Ity_V256); 28281 assign(dataV, getYMMReg(rV)); 28282 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV); 28283 putYMMReg(rG, mkexpr(resV)); 28284 *uses_vvvv = True; 28285 goto decode_success; 28286 } 28287 break; 28288 28289 case 0x0E: 28290 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */ 28291 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28292 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 ); 28293 goto decode_success; 28294 } 28295 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */ 28296 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28297 delta = dis_xTESTy_256( vbi, pfx, delta, 32 ); 28298 goto decode_success; 28299 } 28300 break; 28301 28302 case 0x0F: 28303 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */ 28304 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28305 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 ); 28306 goto decode_success; 28307 } 28308 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */ 28309 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28310 delta = dis_xTESTy_256( vbi, pfx, delta, 64 ); 28311 goto decode_success; 28312 } 28313 break; 28314 28315 case 0x16: 28316 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */ 28317 if (have66noF2noF3(pfx) 28318 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28319 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28320 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD ); 28321 goto decode_success; 28322 } 28323 break; 28324 28325 case 0x17: 28326 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */ 28327 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28328 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 ); 28329 goto decode_success; 28330 } 28331 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */ 28332 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28333 delta = dis_xTESTy_256( vbi, pfx, delta, 0 ); 28334 goto decode_success; 28335 } 28336 break; 28337 28338 case 0x18: 28339 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 28340 if (have66noF2noF3(pfx) 28341 && 0==getVexL(pfx)/*128*/ 28342 && !epartIsReg(getUChar(delta))) { 28343 UChar modrm = getUChar(delta); 28344 UInt rG = gregOfRexRM(pfx, modrm); 28345 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28346 delta += alen; 28347 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG)); 28348 IRTemp t32 = newTemp(Ity_I32); 28349 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28350 IRTemp t64 = newTemp(Ity_I64); 28351 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28352 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28353 putYMMRegLoAndZU(rG, res); 28354 goto decode_success; 28355 } 28356 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 28357 if (have66noF2noF3(pfx) 28358 && 1==getVexL(pfx)/*256*/ 28359 && !epartIsReg(getUChar(delta))) { 28360 UChar modrm = getUChar(delta); 28361 UInt rG = gregOfRexRM(pfx, modrm); 28362 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28363 delta += alen; 28364 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG)); 28365 IRTemp t32 = newTemp(Ity_I32); 28366 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 28367 IRTemp t64 = newTemp(Ity_I64); 28368 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28369 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28370 mkexpr(t64), mkexpr(t64)); 28371 putYMMReg(rG, res); 28372 goto decode_success; 28373 } 28374 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */ 28375 if (have66noF2noF3(pfx) 28376 && 0==getVexL(pfx)/*128*/ 28377 && epartIsReg(getUChar(delta))) { 28378 UChar modrm = getUChar(delta); 28379 UInt rG = gregOfRexRM(pfx, modrm); 28380 UInt rE = eregOfRexRM(pfx, modrm); 28381 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 28382 IRTemp t32 = newTemp(Ity_I32); 28383 assign(t32, getXMMRegLane32(rE, 0)); 28384 IRTemp t64 = newTemp(Ity_I64); 28385 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28386 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 28387 putYMMRegLoAndZU(rG, res); 28388 delta++; 28389 goto decode_success; 28390 } 28391 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */ 28392 if (have66noF2noF3(pfx) 28393 && 1==getVexL(pfx)/*256*/ 28394 && epartIsReg(getUChar(delta))) { 28395 UChar modrm = getUChar(delta); 28396 UInt rG = gregOfRexRM(pfx, modrm); 28397 UInt rE = eregOfRexRM(pfx, modrm); 28398 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28399 IRTemp t32 = newTemp(Ity_I32); 28400 assign(t32, getXMMRegLane32(rE, 0)); 28401 IRTemp t64 = newTemp(Ity_I64); 28402 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 28403 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28404 mkexpr(t64), mkexpr(t64)); 28405 putYMMReg(rG, res); 28406 delta++; 28407 goto decode_success; 28408 } 28409 break; 28410 28411 case 0x19: 28412 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 28413 if (have66noF2noF3(pfx) 28414 && 1==getVexL(pfx)/*256*/ 28415 && !epartIsReg(getUChar(delta))) { 28416 UChar modrm = getUChar(delta); 28417 UInt rG = gregOfRexRM(pfx, modrm); 28418 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28419 delta += alen; 28420 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG)); 28421 IRTemp t64 = newTemp(Ity_I64); 28422 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 28423 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28424 mkexpr(t64), mkexpr(t64)); 28425 putYMMReg(rG, res); 28426 goto decode_success; 28427 } 28428 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */ 28429 if (have66noF2noF3(pfx) 28430 && 1==getVexL(pfx)/*256*/ 28431 && epartIsReg(getUChar(delta))) { 28432 UChar modrm = getUChar(delta); 28433 UInt rG = gregOfRexRM(pfx, modrm); 28434 UInt rE = eregOfRexRM(pfx, modrm); 28435 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 28436 IRTemp t64 = newTemp(Ity_I64); 28437 assign(t64, getXMMRegLane64(rE, 0)); 28438 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 28439 mkexpr(t64), mkexpr(t64)); 28440 putYMMReg(rG, res); 28441 delta++; 28442 goto decode_success; 28443 } 28444 break; 28445 28446 case 0x1A: 28447 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */ 28448 if (have66noF2noF3(pfx) 28449 && 1==getVexL(pfx)/*256*/ 28450 && !epartIsReg(getUChar(delta))) { 28451 UChar modrm = getUChar(delta); 28452 UInt rG = gregOfRexRM(pfx, modrm); 28453 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28454 delta += alen; 28455 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG)); 28456 IRTemp t128 = newTemp(Ity_V128); 28457 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 28458 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 28459 goto decode_success; 28460 } 28461 break; 28462 28463 case 0x1C: 28464 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */ 28465 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28466 delta = dis_AVX128_E_to_G_unary( 28467 uses_vvvv, vbi, pfx, delta, 28468 "vpabsb", math_PABS_XMM_pap1 ); 28469 goto decode_success; 28470 } 28471 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */ 28472 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28473 delta = dis_AVX256_E_to_G_unary( 28474 uses_vvvv, vbi, pfx, delta, 28475 "vpabsb", math_PABS_YMM_pap1 ); 28476 goto decode_success; 28477 } 28478 break; 28479 28480 case 0x1D: 28481 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */ 28482 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28483 delta = dis_AVX128_E_to_G_unary( 28484 uses_vvvv, vbi, pfx, delta, 28485 "vpabsw", math_PABS_XMM_pap2 ); 28486 goto decode_success; 28487 } 28488 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */ 28489 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28490 delta = dis_AVX256_E_to_G_unary( 28491 uses_vvvv, vbi, pfx, delta, 28492 "vpabsw", math_PABS_YMM_pap2 ); 28493 goto decode_success; 28494 } 28495 break; 28496 28497 case 0x1E: 28498 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */ 28499 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28500 delta = dis_AVX128_E_to_G_unary( 28501 uses_vvvv, vbi, pfx, delta, 28502 "vpabsd", math_PABS_XMM_pap4 ); 28503 goto decode_success; 28504 } 28505 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */ 28506 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28507 delta = dis_AVX256_E_to_G_unary( 28508 uses_vvvv, vbi, pfx, delta, 28509 "vpabsd", math_PABS_YMM_pap4 ); 28510 goto decode_success; 28511 } 28512 break; 28513 28514 case 0x20: 28515 /* VPMOVSXBW xmm2/m64, xmm1 */ 28516 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */ 28517 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28518 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 28519 True/*isAvx*/, False/*!xIsZ*/ ); 28520 goto decode_success; 28521 } 28522 /* VPMOVSXBW xmm2/m128, ymm1 */ 28523 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */ 28524 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28525 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28526 goto decode_success; 28527 } 28528 break; 28529 28530 case 0x21: 28531 /* VPMOVSXBD xmm2/m32, xmm1 */ 28532 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */ 28533 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28534 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28535 True/*isAvx*/, False/*!xIsZ*/ ); 28536 goto decode_success; 28537 } 28538 /* VPMOVSXBD xmm2/m64, ymm1 */ 28539 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */ 28540 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28541 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28542 goto decode_success; 28543 } 28544 break; 28545 28546 case 0x22: 28547 /* VPMOVSXBQ xmm2/m16, xmm1 */ 28548 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */ 28549 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28550 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28551 goto decode_success; 28552 } 28553 /* VPMOVSXBQ xmm2/m32, ymm1 */ 28554 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */ 28555 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28556 delta = dis_PMOVSXBQ_256( vbi, pfx, delta ); 28557 goto decode_success; 28558 } 28559 break; 28560 28561 case 0x23: 28562 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */ 28563 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28564 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28565 True/*isAvx*/, False/*!xIsZ*/ ); 28566 goto decode_success; 28567 } 28568 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */ 28569 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28570 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28571 goto decode_success; 28572 } 28573 break; 28574 28575 case 0x24: 28576 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */ 28577 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28578 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28579 goto decode_success; 28580 } 28581 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */ 28582 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28583 delta = dis_PMOVSXWQ_256( vbi, pfx, delta ); 28584 goto decode_success; 28585 } 28586 break; 28587 28588 case 0x25: 28589 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */ 28590 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28591 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28592 True/*isAvx*/, False/*!xIsZ*/ ); 28593 goto decode_success; 28594 } 28595 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */ 28596 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28597 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ ); 28598 goto decode_success; 28599 } 28600 break; 28601 28602 case 0x28: 28603 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */ 28604 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28605 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex( 28606 uses_vvvv, vbi, pfx, delta, 28607 "vpmuldq", math_PMULDQ_128 ); 28608 goto decode_success; 28609 } 28610 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */ 28611 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28612 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28613 uses_vvvv, vbi, pfx, delta, 28614 "vpmuldq", math_PMULDQ_256 ); 28615 goto decode_success; 28616 } 28617 break; 28618 28619 case 0x29: 28620 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28621 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */ 28622 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28623 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28624 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 ); 28625 goto decode_success; 28626 } 28627 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */ 28628 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */ 28629 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28630 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28631 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 ); 28632 goto decode_success; 28633 } 28634 break; 28635 28636 case 0x2A: 28637 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */ 28638 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28639 && !epartIsReg(getUChar(delta))) { 28640 UChar modrm = getUChar(delta); 28641 UInt rD = gregOfRexRM(pfx, modrm); 28642 IRTemp tD = newTemp(Ity_V128); 28643 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28644 delta += alen; 28645 gen_SEGV_if_not_16_aligned(addr); 28646 assign(tD, loadLE(Ity_V128, mkexpr(addr))); 28647 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD)); 28648 putYMMRegLoAndZU(rD, mkexpr(tD)); 28649 goto decode_success; 28650 } 28651 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */ 28652 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28653 && !epartIsReg(getUChar(delta))) { 28654 UChar modrm = getUChar(delta); 28655 UInt rD = gregOfRexRM(pfx, modrm); 28656 IRTemp tD = newTemp(Ity_V256); 28657 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 28658 delta += alen; 28659 gen_SEGV_if_not_32_aligned(addr); 28660 assign(tD, loadLE(Ity_V256, mkexpr(addr))); 28661 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD)); 28662 putYMMReg(rD, mkexpr(tD)); 28663 goto decode_success; 28664 } 28665 break; 28666 28667 case 0x2B: 28668 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28669 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */ 28670 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28671 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG( 28672 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28673 Iop_QNarrowBin32Sto16Ux8, NULL, 28674 False/*!invertLeftArg*/, True/*swapArgs*/ ); 28675 goto decode_success; 28676 } 28677 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */ 28678 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */ 28679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28680 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28681 uses_vvvv, vbi, pfx, delta, "vpackusdw", 28682 math_VPACKUSDW_YMM ); 28683 goto decode_success; 28684 } 28685 break; 28686 28687 case 0x2C: 28688 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */ 28689 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28690 && 0==getRexW(pfx)/*W0*/ 28691 && !epartIsReg(getUChar(delta))) { 28692 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28693 /*!isYMM*/False, Ity_I32, /*isLoad*/True ); 28694 goto decode_success; 28695 } 28696 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */ 28697 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28698 && 0==getRexW(pfx)/*W0*/ 28699 && !epartIsReg(getUChar(delta))) { 28700 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28701 /*isYMM*/True, Ity_I32, /*isLoad*/True ); 28702 goto decode_success; 28703 } 28704 break; 28705 28706 case 0x2D: 28707 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */ 28708 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28709 && 0==getRexW(pfx)/*W0*/ 28710 && !epartIsReg(getUChar(delta))) { 28711 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28712 /*!isYMM*/False, Ity_I64, /*isLoad*/True ); 28713 goto decode_success; 28714 } 28715 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */ 28716 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28717 && 0==getRexW(pfx)/*W0*/ 28718 && !epartIsReg(getUChar(delta))) { 28719 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28720 /*isYMM*/True, Ity_I64, /*isLoad*/True ); 28721 goto decode_success; 28722 } 28723 break; 28724 28725 case 0x2E: 28726 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */ 28727 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28728 && 0==getRexW(pfx)/*W0*/ 28729 && !epartIsReg(getUChar(delta))) { 28730 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28731 /*!isYMM*/False, Ity_I32, /*!isLoad*/False ); 28732 goto decode_success; 28733 } 28734 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */ 28735 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28736 && 0==getRexW(pfx)/*W0*/ 28737 && !epartIsReg(getUChar(delta))) { 28738 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps", 28739 /*isYMM*/True, Ity_I32, /*!isLoad*/False ); 28740 goto decode_success; 28741 } 28742 break; 28743 28744 case 0x2F: 28745 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */ 28746 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 28747 && 0==getRexW(pfx)/*W0*/ 28748 && !epartIsReg(getUChar(delta))) { 28749 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28750 /*!isYMM*/False, Ity_I64, /*!isLoad*/False ); 28751 goto decode_success; 28752 } 28753 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */ 28754 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 28755 && 0==getRexW(pfx)/*W0*/ 28756 && !epartIsReg(getUChar(delta))) { 28757 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd", 28758 /*isYMM*/True, Ity_I64, /*!isLoad*/False ); 28759 goto decode_success; 28760 } 28761 break; 28762 28763 case 0x30: 28764 /* VPMOVZXBW xmm2/m64, xmm1 */ 28765 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */ 28766 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28767 delta = dis_PMOVxXBW_128( vbi, pfx, delta, 28768 True/*isAvx*/, True/*xIsZ*/ ); 28769 goto decode_success; 28770 } 28771 /* VPMOVZXBW xmm2/m128, ymm1 */ 28772 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */ 28773 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28774 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ ); 28775 goto decode_success; 28776 } 28777 break; 28778 28779 case 0x31: 28780 /* VPMOVZXBD xmm2/m32, xmm1 */ 28781 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */ 28782 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28783 delta = dis_PMOVxXBD_128( vbi, pfx, delta, 28784 True/*isAvx*/, True/*xIsZ*/ ); 28785 goto decode_success; 28786 } 28787 /* VPMOVZXBD xmm2/m64, ymm1 */ 28788 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */ 28789 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28790 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28791 goto decode_success; 28792 } 28793 break; 28794 28795 case 0x32: 28796 /* VPMOVZXBQ xmm2/m16, xmm1 */ 28797 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */ 28798 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28799 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28800 goto decode_success; 28801 } 28802 /* VPMOVZXBQ xmm2/m32, ymm1 */ 28803 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */ 28804 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28805 delta = dis_PMOVZXBQ_256( vbi, pfx, delta ); 28806 goto decode_success; 28807 } 28808 break; 28809 28810 case 0x33: 28811 /* VPMOVZXWD xmm2/m64, xmm1 */ 28812 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */ 28813 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28814 delta = dis_PMOVxXWD_128( vbi, pfx, delta, 28815 True/*isAvx*/, True/*xIsZ*/ ); 28816 goto decode_success; 28817 } 28818 /* VPMOVZXWD xmm2/m128, ymm1 */ 28819 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */ 28820 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28821 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ ); 28822 goto decode_success; 28823 } 28824 break; 28825 28826 case 0x34: 28827 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */ 28828 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28829 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ ); 28830 goto decode_success; 28831 } 28832 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */ 28833 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28834 delta = dis_PMOVZXWQ_256( vbi, pfx, delta ); 28835 goto decode_success; 28836 } 28837 break; 28838 28839 case 0x35: 28840 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */ 28841 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28842 delta = dis_PMOVxXDQ_128( vbi, pfx, delta, 28843 True/*isAvx*/, True/*xIsZ*/ ); 28844 goto decode_success; 28845 } 28846 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */ 28847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28848 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ ); 28849 goto decode_success; 28850 } 28851 break; 28852 28853 case 0x36: 28854 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */ 28855 if (have66noF2noF3(pfx) 28856 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 28857 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex( 28858 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD ); 28859 goto decode_success; 28860 } 28861 break; 28862 28863 case 0x37: 28864 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28865 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */ 28866 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28867 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28868 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 ); 28869 goto decode_success; 28870 } 28871 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */ 28872 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */ 28873 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28874 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28875 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 ); 28876 goto decode_success; 28877 } 28878 break; 28879 28880 case 0x38: 28881 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28882 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */ 28883 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28884 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28885 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 ); 28886 goto decode_success; 28887 } 28888 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */ 28889 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */ 28890 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28891 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28892 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 ); 28893 goto decode_success; 28894 } 28895 break; 28896 28897 case 0x39: 28898 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28899 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */ 28900 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28901 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28902 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 ); 28903 goto decode_success; 28904 } 28905 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */ 28906 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */ 28907 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28908 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28909 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 ); 28910 goto decode_success; 28911 } 28912 break; 28913 28914 case 0x3A: 28915 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28916 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */ 28917 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28918 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28919 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 ); 28920 goto decode_success; 28921 } 28922 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */ 28923 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */ 28924 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28925 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28926 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 ); 28927 goto decode_success; 28928 } 28929 break; 28930 28931 case 0x3B: 28932 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28933 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */ 28934 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28935 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28936 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 ); 28937 goto decode_success; 28938 } 28939 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */ 28940 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */ 28941 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28942 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28943 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 ); 28944 goto decode_success; 28945 } 28946 break; 28947 28948 case 0x3C: 28949 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28950 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */ 28951 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28952 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28953 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 ); 28954 goto decode_success; 28955 } 28956 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */ 28957 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */ 28958 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28959 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28960 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 ); 28961 goto decode_success; 28962 } 28963 break; 28964 28965 case 0x3D: 28966 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28967 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */ 28968 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28969 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28970 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 ); 28971 goto decode_success; 28972 } 28973 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */ 28974 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */ 28975 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28976 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28977 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 ); 28978 goto decode_success; 28979 } 28980 break; 28981 28982 case 0x3E: 28983 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28984 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */ 28985 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 28986 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 28987 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 ); 28988 goto decode_success; 28989 } 28990 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */ 28991 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */ 28992 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 28993 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 28994 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 ); 28995 goto decode_success; 28996 } 28997 break; 28998 28999 case 0x3F: 29000 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 29001 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */ 29002 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29003 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 29004 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 ); 29005 goto decode_success; 29006 } 29007 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */ 29008 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */ 29009 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29010 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 29011 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 ); 29012 goto decode_success; 29013 } 29014 break; 29015 29016 case 0x40: 29017 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 29018 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */ 29019 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29020 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple( 29021 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 ); 29022 goto decode_success; 29023 } 29024 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */ 29025 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */ 29026 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 29027 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple( 29028 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 ); 29029 goto decode_success; 29030 } 29031 break; 29032 29033 case 0x41: 29034 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */ 29035 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29036 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ ); 29037 goto decode_success; 29038 } 29039 break; 29040 29041 case 0x45: 29042 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */ 29043 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */ 29044 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 29045 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd", 29046 Iop_Shr32, 1==getVexL(pfx) ); 29047 *uses_vvvv = True; 29048 goto decode_success; 29049 } 29050 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */ 29051 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */ 29052 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 29053 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq", 29054 Iop_Shr64, 1==getVexL(pfx) ); 29055 *uses_vvvv = True; 29056 goto decode_success; 29057 } 29058 break; 29059 29060 case 0x46: 29061 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */ 29062 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */ 29063 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 29064 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd", 29065 Iop_Sar32, 1==getVexL(pfx) ); 29066 *uses_vvvv = True; 29067 goto decode_success; 29068 } 29069 break; 29070 29071 case 0x47: 29072 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */ 29073 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */ 29074 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) { 29075 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd", 29076 Iop_Shl32, 1==getVexL(pfx) ); 29077 *uses_vvvv = True; 29078 goto decode_success; 29079 } 29080 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */ 29081 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */ 29082 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) { 29083 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq", 29084 Iop_Shl64, 1==getVexL(pfx) ); 29085 *uses_vvvv = True; 29086 goto decode_success; 29087 } 29088 break; 29089 29090 case 0x58: 29091 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */ 29092 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29093 && 0==getRexW(pfx)/*W0*/) { 29094 UChar modrm = getUChar(delta); 29095 UInt rG = gregOfRexRM(pfx, modrm); 29096 IRTemp t32 = newTemp(Ity_I32); 29097 if (epartIsReg(modrm)) { 29098 UInt rE = eregOfRexRM(pfx, modrm); 29099 delta++; 29100 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 29101 assign(t32, getXMMRegLane32(rE, 0)); 29102 } else { 29103 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29104 delta += alen; 29105 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG)); 29106 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 29107 } 29108 IRTemp t64 = newTemp(Ity_I64); 29109 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29110 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 29111 putYMMRegLoAndZU(rG, res); 29112 goto decode_success; 29113 } 29114 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */ 29115 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29116 && 0==getRexW(pfx)/*W0*/) { 29117 UChar modrm = getUChar(delta); 29118 UInt rG = gregOfRexRM(pfx, modrm); 29119 IRTemp t32 = newTemp(Ity_I32); 29120 if (epartIsReg(modrm)) { 29121 UInt rE = eregOfRexRM(pfx, modrm); 29122 delta++; 29123 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 29124 assign(t32, getXMMRegLane32(rE, 0)); 29125 } else { 29126 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29127 delta += alen; 29128 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG)); 29129 assign(t32, loadLE(Ity_I32, mkexpr(addr))); 29130 } 29131 IRTemp t64 = newTemp(Ity_I64); 29132 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29133 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 29134 mkexpr(t64), mkexpr(t64)); 29135 putYMMReg(rG, res); 29136 goto decode_success; 29137 } 29138 break; 29139 29140 case 0x59: 29141 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */ 29142 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29143 && 0==getRexW(pfx)/*W0*/) { 29144 UChar modrm = getUChar(delta); 29145 UInt rG = gregOfRexRM(pfx, modrm); 29146 IRTemp t64 = newTemp(Ity_I64); 29147 if (epartIsReg(modrm)) { 29148 UInt rE = eregOfRexRM(pfx, modrm); 29149 delta++; 29150 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 29151 assign(t64, getXMMRegLane64(rE, 0)); 29152 } else { 29153 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29154 delta += alen; 29155 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG)); 29156 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 29157 } 29158 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 29159 putYMMRegLoAndZU(rG, res); 29160 goto decode_success; 29161 } 29162 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */ 29163 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29164 && 0==getRexW(pfx)/*W0*/) { 29165 UChar modrm = getUChar(delta); 29166 UInt rG = gregOfRexRM(pfx, modrm); 29167 IRTemp t64 = newTemp(Ity_I64); 29168 if (epartIsReg(modrm)) { 29169 UInt rE = eregOfRexRM(pfx, modrm); 29170 delta++; 29171 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 29172 assign(t64, getXMMRegLane64(rE, 0)); 29173 } else { 29174 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29175 delta += alen; 29176 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG)); 29177 assign(t64, loadLE(Ity_I64, mkexpr(addr))); 29178 } 29179 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 29180 mkexpr(t64), mkexpr(t64)); 29181 putYMMReg(rG, res); 29182 goto decode_success; 29183 } 29184 break; 29185 29186 case 0x5A: 29187 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */ 29188 if (have66noF2noF3(pfx) 29189 && 1==getVexL(pfx)/*256*/ 29190 && !epartIsReg(getUChar(delta))) { 29191 UChar modrm = getUChar(delta); 29192 UInt rG = gregOfRexRM(pfx, modrm); 29193 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29194 delta += alen; 29195 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG)); 29196 IRTemp t128 = newTemp(Ity_V128); 29197 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 29198 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) ); 29199 goto decode_success; 29200 } 29201 break; 29202 29203 case 0x78: 29204 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */ 29205 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29206 && 0==getRexW(pfx)/*W0*/) { 29207 UChar modrm = getUChar(delta); 29208 UInt rG = gregOfRexRM(pfx, modrm); 29209 IRTemp t8 = newTemp(Ity_I8); 29210 if (epartIsReg(modrm)) { 29211 UInt rE = eregOfRexRM(pfx, modrm); 29212 delta++; 29213 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 29214 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 29215 } else { 29216 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29217 delta += alen; 29218 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG)); 29219 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 29220 } 29221 IRTemp t16 = newTemp(Ity_I16); 29222 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 29223 IRTemp t32 = newTemp(Ity_I32); 29224 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 29225 IRTemp t64 = newTemp(Ity_I64); 29226 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29227 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 29228 putYMMRegLoAndZU(rG, res); 29229 goto decode_success; 29230 } 29231 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */ 29232 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29233 && 0==getRexW(pfx)/*W0*/) { 29234 UChar modrm = getUChar(delta); 29235 UInt rG = gregOfRexRM(pfx, modrm); 29236 IRTemp t8 = newTemp(Ity_I8); 29237 if (epartIsReg(modrm)) { 29238 UInt rE = eregOfRexRM(pfx, modrm); 29239 delta++; 29240 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 29241 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0))); 29242 } else { 29243 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29244 delta += alen; 29245 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG)); 29246 assign(t8, loadLE(Ity_I8, mkexpr(addr))); 29247 } 29248 IRTemp t16 = newTemp(Ity_I16); 29249 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8))); 29250 IRTemp t32 = newTemp(Ity_I32); 29251 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 29252 IRTemp t64 = newTemp(Ity_I64); 29253 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29254 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 29255 mkexpr(t64), mkexpr(t64)); 29256 putYMMReg(rG, res); 29257 goto decode_success; 29258 } 29259 break; 29260 29261 case 0x79: 29262 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */ 29263 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29264 && 0==getRexW(pfx)/*W0*/) { 29265 UChar modrm = getUChar(delta); 29266 UInt rG = gregOfRexRM(pfx, modrm); 29267 IRTemp t16 = newTemp(Ity_I16); 29268 if (epartIsReg(modrm)) { 29269 UInt rE = eregOfRexRM(pfx, modrm); 29270 delta++; 29271 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); 29272 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 29273 } else { 29274 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29275 delta += alen; 29276 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG)); 29277 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 29278 } 29279 IRTemp t32 = newTemp(Ity_I32); 29280 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 29281 IRTemp t64 = newTemp(Ity_I64); 29282 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29283 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64)); 29284 putYMMRegLoAndZU(rG, res); 29285 goto decode_success; 29286 } 29287 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */ 29288 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29289 && 0==getRexW(pfx)/*W0*/) { 29290 UChar modrm = getUChar(delta); 29291 UInt rG = gregOfRexRM(pfx, modrm); 29292 IRTemp t16 = newTemp(Ity_I16); 29293 if (epartIsReg(modrm)) { 29294 UInt rE = eregOfRexRM(pfx, modrm); 29295 delta++; 29296 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG)); 29297 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0))); 29298 } else { 29299 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 ); 29300 delta += alen; 29301 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG)); 29302 assign(t16, loadLE(Ity_I16, mkexpr(addr))); 29303 } 29304 IRTemp t32 = newTemp(Ity_I32); 29305 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16))); 29306 IRTemp t64 = newTemp(Ity_I64); 29307 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32))); 29308 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64), 29309 mkexpr(t64), mkexpr(t64)); 29310 putYMMReg(rG, res); 29311 goto decode_success; 29312 } 29313 break; 29314 29315 case 0x8C: 29316 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */ 29317 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29318 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29319 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 29320 /*!isYMM*/False, Ity_I32, /*isLoad*/True ); 29321 goto decode_success; 29322 } 29323 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */ 29324 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29325 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29326 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 29327 /*isYMM*/True, Ity_I32, /*isLoad*/True ); 29328 goto decode_success; 29329 } 29330 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */ 29331 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29332 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29333 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 29334 /*!isYMM*/False, Ity_I64, /*isLoad*/True ); 29335 goto decode_success; 29336 } 29337 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */ 29338 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29339 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29340 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 29341 /*isYMM*/True, Ity_I64, /*isLoad*/True ); 29342 goto decode_success; 29343 } 29344 break; 29345 29346 case 0x8E: 29347 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */ 29348 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29349 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29350 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 29351 /*!isYMM*/False, Ity_I32, /*!isLoad*/False ); 29352 goto decode_success; 29353 } 29354 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */ 29355 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29356 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29357 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd", 29358 /*isYMM*/True, Ity_I32, /*!isLoad*/False ); 29359 goto decode_success; 29360 } 29361 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */ 29362 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29363 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29364 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 29365 /*!isYMM*/False, Ity_I64, /*!isLoad*/False ); 29366 goto decode_success; 29367 } 29368 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */ 29369 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29370 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29371 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq", 29372 /*isYMM*/True, Ity_I64, /*!isLoad*/False ); 29373 goto decode_success; 29374 } 29375 break; 29376 29377 case 0x90: 29378 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */ 29379 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29380 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29381 Long delta0 = delta; 29382 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 29383 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 29384 if (delta != delta0) 29385 goto decode_success; 29386 } 29387 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */ 29388 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29389 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29390 Long delta0 = delta; 29391 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd", 29392 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 29393 if (delta != delta0) 29394 goto decode_success; 29395 } 29396 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */ 29397 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29398 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29399 Long delta0 = delta; 29400 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 29401 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 29402 if (delta != delta0) 29403 goto decode_success; 29404 } 29405 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */ 29406 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29407 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29408 Long delta0 = delta; 29409 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq", 29410 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 29411 if (delta != delta0) 29412 goto decode_success; 29413 } 29414 break; 29415 29416 case 0x91: 29417 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */ 29418 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29419 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29420 Long delta0 = delta; 29421 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 29422 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 29423 if (delta != delta0) 29424 goto decode_success; 29425 } 29426 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */ 29427 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29428 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29429 Long delta0 = delta; 29430 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd", 29431 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 29432 if (delta != delta0) 29433 goto decode_success; 29434 } 29435 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */ 29436 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29437 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29438 Long delta0 = delta; 29439 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 29440 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 29441 if (delta != delta0) 29442 goto decode_success; 29443 } 29444 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */ 29445 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29446 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29447 Long delta0 = delta; 29448 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq", 29449 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 29450 if (delta != delta0) 29451 goto decode_success; 29452 } 29453 break; 29454 29455 case 0x92: 29456 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */ 29457 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29458 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29459 Long delta0 = delta; 29460 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 29461 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 ); 29462 if (delta != delta0) 29463 goto decode_success; 29464 } 29465 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */ 29466 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29467 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29468 Long delta0 = delta; 29469 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps", 29470 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 ); 29471 if (delta != delta0) 29472 goto decode_success; 29473 } 29474 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */ 29475 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29476 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29477 Long delta0 = delta; 29478 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 29479 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 ); 29480 if (delta != delta0) 29481 goto decode_success; 29482 } 29483 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */ 29484 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29485 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29486 Long delta0 = delta; 29487 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd", 29488 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 ); 29489 if (delta != delta0) 29490 goto decode_success; 29491 } 29492 break; 29493 29494 case 0x93: 29495 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */ 29496 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29497 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29498 Long delta0 = delta; 29499 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 29500 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 ); 29501 if (delta != delta0) 29502 goto decode_success; 29503 } 29504 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */ 29505 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29506 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) { 29507 Long delta0 = delta; 29508 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps", 29509 /*isYMM*/True, /*isVM64x*/True, Ity_I32 ); 29510 if (delta != delta0) 29511 goto decode_success; 29512 } 29513 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */ 29514 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/ 29515 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29516 Long delta0 = delta; 29517 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 29518 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 ); 29519 if (delta != delta0) 29520 goto decode_success; 29521 } 29522 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */ 29523 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 29524 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) { 29525 Long delta0 = delta; 29526 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd", 29527 /*isYMM*/True, /*isVM64x*/True, Ity_I64 ); 29528 if (delta != delta0) 29529 goto decode_success; 29530 } 29531 break; 29532 29533 case 0x96 ... 0x9F: 29534 case 0xA6 ... 0xAF: 29535 case 0xB6 ... 0xBF: 29536 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */ 29537 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */ 29538 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */ 29539 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */ 29540 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */ 29541 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */ 29542 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */ 29543 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */ 29544 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */ 29545 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */ 29546 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */ 29547 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */ 29548 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */ 29549 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */ 29550 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */ 29551 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */ 29552 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */ 29553 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */ 29554 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */ 29555 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */ 29556 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */ 29557 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */ 29558 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */ 29559 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */ 29560 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */ 29561 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */ 29562 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */ 29563 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */ 29564 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */ 29565 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */ 29566 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */ 29567 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */ 29568 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */ 29569 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */ 29570 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */ 29571 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */ 29572 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */ 29573 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */ 29574 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */ 29575 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */ 29576 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */ 29577 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */ 29578 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */ 29579 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */ 29580 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */ 29581 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */ 29582 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */ 29583 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */ 29584 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */ 29585 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */ 29586 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */ 29587 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */ 29588 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */ 29589 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */ 29590 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */ 29591 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */ 29592 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */ 29593 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */ 29594 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */ 29595 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */ 29596 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */ 29597 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */ 29598 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */ 29599 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */ 29600 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */ 29601 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */ 29602 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */ 29603 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */ 29604 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */ 29605 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */ 29606 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */ 29607 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */ 29608 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */ 29609 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */ 29610 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */ 29611 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */ 29612 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */ 29613 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */ 29614 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */ 29615 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */ 29616 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */ 29617 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */ 29618 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */ 29619 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */ 29620 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */ 29621 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */ 29622 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */ 29623 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */ 29624 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */ 29625 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */ 29626 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */ 29627 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */ 29628 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */ 29629 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */ 29630 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */ 29631 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */ 29632 if (have66noF2noF3(pfx)) { 29633 delta = dis_FMA( vbi, pfx, delta, opc ); 29634 *uses_vvvv = True; 29635 goto decode_success; 29636 } 29637 break; 29638 29639 case 0xDB: 29640 case 0xDC: 29641 case 0xDD: 29642 case 0xDE: 29643 case 0xDF: 29644 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */ 29645 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */ 29646 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */ 29647 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */ 29648 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */ 29649 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 29650 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc ); 29651 if (opc != 0xDB) *uses_vvvv = True; 29652 goto decode_success; 29653 } 29654 break; 29655 29656 case 0xF2: 29657 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */ 29658 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */ 29659 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29660 Int size = getRexW(pfx) ? 8 : 4; 29661 IRType ty = szToITy(size); 29662 IRTemp dst = newTemp(ty); 29663 IRTemp src1 = newTemp(ty); 29664 IRTemp src2 = newTemp(ty); 29665 UChar rm = getUChar(delta); 29666 29667 assign( src1, getIRegV(size,pfx) ); 29668 if (epartIsReg(rm)) { 29669 assign( src2, getIRegE(size,pfx,rm) ); 29670 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm), 29671 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29672 delta++; 29673 } else { 29674 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29675 assign( src2, loadLE(ty, mkexpr(addr)) ); 29676 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29677 nameIRegG(size,pfx,rm)); 29678 delta += alen; 29679 } 29680 29681 assign( dst, binop( mkSizedOp(ty,Iop_And8), 29682 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ), 29683 mkexpr(src2) ) ); 29684 putIRegG( size, pfx, rm, mkexpr(dst) ); 29685 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29686 ? AMD64G_CC_OP_ANDN64 29687 : AMD64G_CC_OP_ANDN32)) ); 29688 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29689 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 29690 *uses_vvvv = True; 29691 goto decode_success; 29692 } 29693 break; 29694 29695 case 0xF3: 29696 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */ 29697 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */ 29698 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29699 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) { 29700 Int size = getRexW(pfx) ? 8 : 4; 29701 IRType ty = szToITy(size); 29702 IRTemp src = newTemp(ty); 29703 IRTemp dst = newTemp(ty); 29704 UChar rm = getUChar(delta); 29705 29706 if (epartIsReg(rm)) { 29707 assign( src, getIRegE(size,pfx,rm) ); 29708 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm), 29709 nameIRegV(size,pfx)); 29710 delta++; 29711 } else { 29712 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29713 assign( src, loadLE(ty, mkexpr(addr)) ); 29714 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29715 delta += alen; 29716 } 29717 29718 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29719 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0), 29720 mkexpr(src)), mkexpr(src)) ); 29721 putIRegV( size, pfx, mkexpr(dst) ); 29722 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29723 ? AMD64G_CC_OP_BLSI64 29724 : AMD64G_CC_OP_BLSI32)) ); 29725 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29726 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29727 *uses_vvvv = True; 29728 goto decode_success; 29729 } 29730 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */ 29731 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */ 29732 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29733 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) { 29734 Int size = getRexW(pfx) ? 8 : 4; 29735 IRType ty = szToITy(size); 29736 IRTemp src = newTemp(ty); 29737 IRTemp dst = newTemp(ty); 29738 UChar rm = getUChar(delta); 29739 29740 if (epartIsReg(rm)) { 29741 assign( src, getIRegE(size,pfx,rm) ); 29742 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm), 29743 nameIRegV(size,pfx)); 29744 delta++; 29745 } else { 29746 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29747 assign( src, loadLE(ty, mkexpr(addr)) ); 29748 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29749 delta += alen; 29750 } 29751 29752 assign( dst, binop(mkSizedOp(ty,Iop_Xor8), 29753 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29754 mkU(ty, 1)), mkexpr(src)) ); 29755 putIRegV( size, pfx, mkexpr(dst) ); 29756 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29757 ? AMD64G_CC_OP_BLSMSK64 29758 : AMD64G_CC_OP_BLSMSK32)) ); 29759 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29760 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29761 *uses_vvvv = True; 29762 goto decode_success; 29763 } 29764 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */ 29765 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */ 29766 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ 29767 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) { 29768 Int size = getRexW(pfx) ? 8 : 4; 29769 IRType ty = szToITy(size); 29770 IRTemp src = newTemp(ty); 29771 IRTemp dst = newTemp(ty); 29772 UChar rm = getUChar(delta); 29773 29774 if (epartIsReg(rm)) { 29775 assign( src, getIRegE(size,pfx,rm) ); 29776 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm), 29777 nameIRegV(size,pfx)); 29778 delta++; 29779 } else { 29780 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29781 assign( src, loadLE(ty, mkexpr(addr)) ); 29782 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx)); 29783 delta += alen; 29784 } 29785 29786 assign( dst, binop(mkSizedOp(ty,Iop_And8), 29787 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src), 29788 mkU(ty, 1)), mkexpr(src)) ); 29789 putIRegV( size, pfx, mkexpr(dst) ); 29790 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29791 ? AMD64G_CC_OP_BLSR64 29792 : AMD64G_CC_OP_BLSR32)) ); 29793 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29794 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) ); 29795 *uses_vvvv = True; 29796 goto decode_success; 29797 } 29798 break; 29799 29800 case 0xF5: 29801 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */ 29802 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */ 29803 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29804 Int size = getRexW(pfx) ? 8 : 4; 29805 IRType ty = szToITy(size); 29806 IRTemp dst = newTemp(ty); 29807 IRTemp src1 = newTemp(ty); 29808 IRTemp src2 = newTemp(ty); 29809 IRTemp start = newTemp(Ity_I8); 29810 IRTemp cond = newTemp(Ity_I1); 29811 UChar rm = getUChar(delta); 29812 29813 assign( src2, getIRegV(size,pfx) ); 29814 if (epartIsReg(rm)) { 29815 assign( src1, getIRegE(size,pfx,rm) ); 29816 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), 29817 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 29818 delta++; 29819 } else { 29820 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29821 assign( src1, loadLE(ty, mkexpr(addr)) ); 29822 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 29823 nameIRegG(size,pfx,rm)); 29824 delta += alen; 29825 } 29826 29827 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) ); 29828 assign( cond, binop(Iop_CmpLT32U, 29829 unop(Iop_8Uto32, mkexpr(start)), 29830 mkU32(8*size)) ); 29831 /* if (start < opsize) { 29832 if (start == 0) 29833 dst = 0; 29834 else 29835 dst = (src1 << (opsize-start)) u>> (opsize-start); 29836 } else { 29837 dst = src1; 29838 } */ 29839 assign( dst, 29840 IRExpr_ITE( 29841 mkexpr(cond), 29842 IRExpr_ITE( 29843 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)), 29844 mkU(ty, 0), 29845 binop( 29846 mkSizedOp(ty,Iop_Shr8), 29847 binop( 29848 mkSizedOp(ty,Iop_Shl8), 29849 mkexpr(src1), 29850 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29851 ), 29852 binop(Iop_Sub8, mkU8(8*size), mkexpr(start)) 29853 ) 29854 ), 29855 mkexpr(src1) 29856 ) 29857 ); 29858 putIRegG( size, pfx, rm, mkexpr(dst) ); 29859 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 29860 ? AMD64G_CC_OP_BLSR64 29861 : AMD64G_CC_OP_BLSR32)) ); 29862 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 29863 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) ); 29864 *uses_vvvv = True; 29865 goto decode_success; 29866 } 29867 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */ 29868 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */ 29869 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29870 Int size = getRexW(pfx) ? 8 : 4; 29871 IRType ty = szToITy(size); 29872 IRTemp src = newTemp(ty); 29873 IRTemp mask = newTemp(ty); 29874 UChar rm = getUChar(delta); 29875 29876 assign( src, getIRegV(size,pfx) ); 29877 if (epartIsReg(rm)) { 29878 assign( mask, getIRegE(size,pfx,rm) ); 29879 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm), 29880 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29881 delta++; 29882 } else { 29883 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29884 assign( mask, loadLE(ty, mkexpr(addr)) ); 29885 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29886 nameIRegG(size,pfx,rm)); 29887 delta += alen; 29888 } 29889 29890 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)), 29891 widenUto64(mkexpr(mask)) ); 29892 putIRegG( size, pfx, rm, 29893 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29894 "amd64g_calculate_pdep", 29895 &amd64g_calculate_pdep, args)) ); 29896 *uses_vvvv = True; 29897 /* Flags aren't modified. */ 29898 goto decode_success; 29899 } 29900 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */ 29901 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */ 29902 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29903 Int size = getRexW(pfx) ? 8 : 4; 29904 IRType ty = szToITy(size); 29905 IRTemp src = newTemp(ty); 29906 IRTemp mask = newTemp(ty); 29907 UChar rm = getUChar(delta); 29908 29909 assign( src, getIRegV(size,pfx) ); 29910 if (epartIsReg(rm)) { 29911 assign( mask, getIRegE(size,pfx,rm) ); 29912 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm), 29913 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29914 delta++; 29915 } else { 29916 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29917 assign( mask, loadLE(ty, mkexpr(addr)) ); 29918 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29919 nameIRegG(size,pfx,rm)); 29920 delta += alen; 29921 } 29922 29923 /* First mask off bits not set in mask, they are ignored 29924 and it should be fine if they contain undefined values. */ 29925 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8), 29926 mkexpr(src), mkexpr(mask)); 29927 IRExpr** args = mkIRExprVec_2( widenUto64(masked), 29928 widenUto64(mkexpr(mask)) ); 29929 putIRegG( size, pfx, rm, 29930 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/, 29931 "amd64g_calculate_pext", 29932 &amd64g_calculate_pext, args)) ); 29933 *uses_vvvv = True; 29934 /* Flags aren't modified. */ 29935 goto decode_success; 29936 } 29937 break; 29938 29939 case 0xF6: 29940 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */ 29941 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */ 29942 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29943 Int size = getRexW(pfx) ? 8 : 4; 29944 IRType ty = szToITy(size); 29945 IRTemp src1 = newTemp(ty); 29946 IRTemp src2 = newTemp(ty); 29947 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64); 29948 UChar rm = getUChar(delta); 29949 29950 assign( src1, getIRegRDX(size) ); 29951 if (epartIsReg(rm)) { 29952 assign( src2, getIRegE(size,pfx,rm) ); 29953 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm), 29954 nameIRegV(size,pfx), nameIRegG(size,pfx,rm)); 29955 delta++; 29956 } else { 29957 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 29958 assign( src2, loadLE(ty, mkexpr(addr)) ); 29959 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx), 29960 nameIRegG(size,pfx,rm)); 29961 delta += alen; 29962 } 29963 29964 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32, 29965 mkexpr(src1), mkexpr(src2)) ); 29966 putIRegV( size, pfx, 29967 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) ); 29968 putIRegG( size, pfx, rm, 29969 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32, 29970 mkexpr(res)) ); 29971 *uses_vvvv = True; 29972 /* Flags aren't modified. */ 29973 goto decode_success; 29974 } 29975 break; 29976 29977 case 0xF7: 29978 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */ 29979 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */ 29980 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29981 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 ); 29982 goto decode_success; 29983 } 29984 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */ 29985 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */ 29986 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29987 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 ); 29988 goto decode_success; 29989 } 29990 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */ 29991 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */ 29992 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29993 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 ); 29994 goto decode_success; 29995 } 29996 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */ 29997 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */ 29998 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 29999 Int size = getRexW(pfx) ? 8 : 4; 30000 IRType ty = szToITy(size); 30001 IRTemp dst = newTemp(ty); 30002 IRTemp src1 = newTemp(ty); 30003 IRTemp src2 = newTemp(ty); 30004 IRTemp stle = newTemp(Ity_I16); 30005 IRTemp start = newTemp(Ity_I8); 30006 IRTemp len = newTemp(Ity_I8); 30007 UChar rm = getUChar(delta); 30008 30009 assign( src2, getIRegV(size,pfx) ); 30010 if (epartIsReg(rm)) { 30011 assign( src1, getIRegE(size,pfx,rm) ); 30012 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), 30013 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm)); 30014 delta++; 30015 } else { 30016 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 30017 assign( src1, loadLE(ty, mkexpr(addr)) ); 30018 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf, 30019 nameIRegG(size,pfx,rm)); 30020 delta += alen; 30021 } 30022 30023 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) ); 30024 assign( start, unop( Iop_16to8, mkexpr(stle) ) ); 30025 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) ); 30026 /* if (start+len < opsize) { 30027 if (len != 0) 30028 dst = (src1 << (opsize-start-len)) u>> (opsize-len); 30029 else 30030 dst = 0; 30031 } else { 30032 if (start < opsize) 30033 dst = src1 u>> start; 30034 else 30035 dst = 0; 30036 } */ 30037 assign( dst, 30038 IRExpr_ITE( 30039 binop(Iop_CmpLT32U, 30040 binop(Iop_Add32, 30041 unop(Iop_8Uto32, mkexpr(start)), 30042 unop(Iop_8Uto32, mkexpr(len))), 30043 mkU32(8*size)), 30044 IRExpr_ITE( 30045 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)), 30046 mkU(ty, 0), 30047 binop(mkSizedOp(ty,Iop_Shr8), 30048 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1), 30049 binop(Iop_Sub8, 30050 binop(Iop_Sub8, mkU8(8*size), 30051 mkexpr(start)), 30052 mkexpr(len))), 30053 binop(Iop_Sub8, mkU8(8*size), 30054 mkexpr(len))) 30055 ), 30056 IRExpr_ITE( 30057 binop(Iop_CmpLT32U, 30058 unop(Iop_8Uto32, mkexpr(start)), 30059 mkU32(8*size)), 30060 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1), 30061 mkexpr(start)), 30062 mkU(ty, 0) 30063 ) 30064 ) 30065 ); 30066 putIRegG( size, pfx, rm, mkexpr(dst) ); 30067 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8 30068 ? AMD64G_CC_OP_ANDN64 30069 : AMD64G_CC_OP_ANDN32)) ); 30070 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) ); 30071 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) ); 30072 *uses_vvvv = True; 30073 goto decode_success; 30074 } 30075 break; 30076 30077 default: 30078 break; 30079 30080 } 30081 30082 //decode_failure: 30083 return deltaIN; 30084 30085 decode_success: 30086 return delta; 30087 } 30088 30089 30090 /*------------------------------------------------------------*/ 30091 /*--- ---*/ 30092 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/ 30093 /*--- ---*/ 30094 /*------------------------------------------------------------*/ 30095 30096 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 ) 30097 { 30098 vassert(imm8 < 256); 30099 IRTemp s3, s2, s1, s0; 30100 s3 = s2 = s1 = s0 = IRTemp_INVALID; 30101 breakupV128to32s( sV, &s3, &s2, &s1, &s0 ); 30102 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \ 30103 : ((_nn)==2) ? s2 : s3) 30104 IRTemp res = newTemp(Ity_V128); 30105 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3), 30106 SEL((imm8 >> 4) & 3), 30107 SEL((imm8 >> 2) & 3), 30108 SEL((imm8 >> 0) & 3) )); 30109 # undef SEL 30110 return res; 30111 } 30112 30113 __attribute__((noinline)) 30114 static 30115 Long dis_ESC_0F3A__VEX ( 30116 /*MB_OUT*/DisResult* dres, 30117 /*OUT*/ Bool* uses_vvvv, 30118 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 30119 Bool resteerCisOk, 30120 void* callback_opaque, 30121 const VexArchInfo* archinfo, 30122 const VexAbiInfo* vbi, 30123 Prefix pfx, Int sz, Long deltaIN 30124 ) 30125 { 30126 IRTemp addr = IRTemp_INVALID; 30127 Int alen = 0; 30128 HChar dis_buf[50]; 30129 Long delta = deltaIN; 30130 UChar opc = getUChar(delta); 30131 delta++; 30132 *uses_vvvv = False; 30133 30134 switch (opc) { 30135 30136 case 0x00: 30137 case 0x01: 30138 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */ 30139 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */ 30140 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/ 30141 && 1==getRexW(pfx)/*W1*/) { 30142 UChar modrm = getUChar(delta); 30143 UInt imm8 = 0; 30144 UInt rG = gregOfRexRM(pfx, modrm); 30145 IRTemp sV = newTemp(Ity_V256); 30146 const HChar *name = opc == 0 ? "vpermq" : "vpermpd"; 30147 if (epartIsReg(modrm)) { 30148 UInt rE = eregOfRexRM(pfx, modrm); 30149 delta += 1; 30150 imm8 = getUChar(delta); 30151 DIP("%s $%u,%s,%s\n", 30152 name, imm8, nameYMMReg(rE), nameYMMReg(rG)); 30153 assign(sV, getYMMReg(rE)); 30154 } else { 30155 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30156 delta += alen; 30157 imm8 = getUChar(delta); 30158 DIP("%s $%u,%s,%s\n", 30159 name, imm8, dis_buf, nameYMMReg(rG)); 30160 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 30161 } 30162 delta++; 30163 IRTemp s[4]; 30164 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID; 30165 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]); 30166 IRTemp dV = newTemp(Ity_V256); 30167 assign(dV, IRExpr_Qop(Iop_64x4toV256, 30168 mkexpr(s[(imm8 >> 6) & 3]), 30169 mkexpr(s[(imm8 >> 4) & 3]), 30170 mkexpr(s[(imm8 >> 2) & 3]), 30171 mkexpr(s[(imm8 >> 0) & 3]))); 30172 putYMMReg(rG, mkexpr(dV)); 30173 goto decode_success; 30174 } 30175 break; 30176 30177 case 0x02: 30178 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */ 30179 if (have66noF2noF3(pfx) 30180 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30181 UChar modrm = getUChar(delta); 30182 UInt imm8 = 0; 30183 UInt rG = gregOfRexRM(pfx, modrm); 30184 UInt rV = getVexNvvvv(pfx); 30185 IRTemp sV = newTemp(Ity_V128); 30186 IRTemp dV = newTemp(Ity_V128); 30187 UInt i; 30188 IRTemp s[4], d[4]; 30189 assign(sV, getXMMReg(rV)); 30190 if (epartIsReg(modrm)) { 30191 UInt rE = eregOfRexRM(pfx, modrm); 30192 delta += 1; 30193 imm8 = getUChar(delta); 30194 DIP("vpblendd $%u,%s,%s,%s\n", 30195 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30196 assign(dV, getXMMReg(rE)); 30197 } else { 30198 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30199 delta += alen; 30200 imm8 = getUChar(delta); 30201 DIP("vpblendd $%u,%s,%s,%s\n", 30202 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30203 assign(dV, loadLE(Ity_V128, mkexpr(addr))); 30204 } 30205 delta++; 30206 for (i = 0; i < 4; i++) { 30207 s[i] = IRTemp_INVALID; 30208 d[i] = IRTemp_INVALID; 30209 } 30210 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] ); 30211 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] ); 30212 for (i = 0; i < 4; i++) 30213 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 30214 putYMMRegLane128(rG, 1, mkV128(0)); 30215 *uses_vvvv = True; 30216 goto decode_success; 30217 } 30218 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */ 30219 if (have66noF2noF3(pfx) 30220 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30221 UChar modrm = getUChar(delta); 30222 UInt imm8 = 0; 30223 UInt rG = gregOfRexRM(pfx, modrm); 30224 UInt rV = getVexNvvvv(pfx); 30225 IRTemp sV = newTemp(Ity_V256); 30226 IRTemp dV = newTemp(Ity_V256); 30227 UInt i; 30228 IRTemp s[8], d[8]; 30229 assign(sV, getYMMReg(rV)); 30230 if (epartIsReg(modrm)) { 30231 UInt rE = eregOfRexRM(pfx, modrm); 30232 delta += 1; 30233 imm8 = getUChar(delta); 30234 DIP("vpblendd $%u,%s,%s,%s\n", 30235 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30236 assign(dV, getYMMReg(rE)); 30237 } else { 30238 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30239 delta += alen; 30240 imm8 = getUChar(delta); 30241 DIP("vpblendd $%u,%s,%s,%s\n", 30242 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30243 assign(dV, loadLE(Ity_V256, mkexpr(addr))); 30244 } 30245 delta++; 30246 for (i = 0; i < 8; i++) { 30247 s[i] = IRTemp_INVALID; 30248 d[i] = IRTemp_INVALID; 30249 } 30250 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4], 30251 &s[3], &s[2], &s[1], &s[0] ); 30252 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4], 30253 &d[3], &d[2], &d[1], &d[0] ); 30254 for (i = 0; i < 8; i++) 30255 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i])); 30256 *uses_vvvv = True; 30257 goto decode_success; 30258 } 30259 break; 30260 30261 case 0x04: 30262 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */ 30263 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30264 UChar modrm = getUChar(delta); 30265 UInt imm8 = 0; 30266 UInt rG = gregOfRexRM(pfx, modrm); 30267 IRTemp sV = newTemp(Ity_V256); 30268 if (epartIsReg(modrm)) { 30269 UInt rE = eregOfRexRM(pfx, modrm); 30270 delta += 1; 30271 imm8 = getUChar(delta); 30272 DIP("vpermilps $%u,%s,%s\n", 30273 imm8, nameYMMReg(rE), nameYMMReg(rG)); 30274 assign(sV, getYMMReg(rE)); 30275 } else { 30276 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30277 delta += alen; 30278 imm8 = getUChar(delta); 30279 DIP("vpermilps $%u,%s,%s\n", 30280 imm8, dis_buf, nameYMMReg(rG)); 30281 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 30282 } 30283 delta++; 30284 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID; 30285 breakupV256toV128s( sV, &sVhi, &sVlo ); 30286 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 ); 30287 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 ); 30288 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo)); 30289 putYMMReg(rG, res); 30290 goto decode_success; 30291 } 30292 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */ 30293 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30294 UChar modrm = getUChar(delta); 30295 UInt imm8 = 0; 30296 UInt rG = gregOfRexRM(pfx, modrm); 30297 IRTemp sV = newTemp(Ity_V128); 30298 if (epartIsReg(modrm)) { 30299 UInt rE = eregOfRexRM(pfx, modrm); 30300 delta += 1; 30301 imm8 = getUChar(delta); 30302 DIP("vpermilps $%u,%s,%s\n", 30303 imm8, nameXMMReg(rE), nameXMMReg(rG)); 30304 assign(sV, getXMMReg(rE)); 30305 } else { 30306 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30307 delta += alen; 30308 imm8 = getUChar(delta); 30309 DIP("vpermilps $%u,%s,%s\n", 30310 imm8, dis_buf, nameXMMReg(rG)); 30311 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 30312 } 30313 delta++; 30314 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) ); 30315 goto decode_success; 30316 } 30317 break; 30318 30319 case 0x05: 30320 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */ 30321 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30322 UChar modrm = getUChar(delta); 30323 UInt imm8 = 0; 30324 UInt rG = gregOfRexRM(pfx, modrm); 30325 IRTemp sV = newTemp(Ity_V128); 30326 if (epartIsReg(modrm)) { 30327 UInt rE = eregOfRexRM(pfx, modrm); 30328 delta += 1; 30329 imm8 = getUChar(delta); 30330 DIP("vpermilpd $%u,%s,%s\n", 30331 imm8, nameXMMReg(rE), nameXMMReg(rG)); 30332 assign(sV, getXMMReg(rE)); 30333 } else { 30334 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30335 delta += alen; 30336 imm8 = getUChar(delta); 30337 DIP("vpermilpd $%u,%s,%s\n", 30338 imm8, dis_buf, nameXMMReg(rG)); 30339 assign(sV, loadLE(Ity_V128, mkexpr(addr))); 30340 } 30341 delta++; 30342 IRTemp s1 = newTemp(Ity_I64); 30343 IRTemp s0 = newTemp(Ity_I64); 30344 assign(s1, unop(Iop_V128HIto64, mkexpr(sV))); 30345 assign(s0, unop(Iop_V128to64, mkexpr(sV))); 30346 IRTemp dV = newTemp(Ity_V128); 30347 assign(dV, binop(Iop_64HLtoV128, 30348 mkexpr((imm8 & (1<<1)) ? s1 : s0), 30349 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 30350 putYMMRegLoAndZU(rG, mkexpr(dV)); 30351 goto decode_success; 30352 } 30353 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */ 30354 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30355 UChar modrm = getUChar(delta); 30356 UInt imm8 = 0; 30357 UInt rG = gregOfRexRM(pfx, modrm); 30358 IRTemp sV = newTemp(Ity_V256); 30359 if (epartIsReg(modrm)) { 30360 UInt rE = eregOfRexRM(pfx, modrm); 30361 delta += 1; 30362 imm8 = getUChar(delta); 30363 DIP("vpermilpd $%u,%s,%s\n", 30364 imm8, nameYMMReg(rE), nameYMMReg(rG)); 30365 assign(sV, getYMMReg(rE)); 30366 } else { 30367 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30368 delta += alen; 30369 imm8 = getUChar(delta); 30370 DIP("vpermilpd $%u,%s,%s\n", 30371 imm8, dis_buf, nameYMMReg(rG)); 30372 assign(sV, loadLE(Ity_V256, mkexpr(addr))); 30373 } 30374 delta++; 30375 IRTemp s3, s2, s1, s0; 30376 s3 = s2 = s1 = s0 = IRTemp_INVALID; 30377 breakupV256to64s(sV, &s3, &s2, &s1, &s0); 30378 IRTemp dV = newTemp(Ity_V256); 30379 assign(dV, IRExpr_Qop(Iop_64x4toV256, 30380 mkexpr((imm8 & (1<<3)) ? s3 : s2), 30381 mkexpr((imm8 & (1<<2)) ? s3 : s2), 30382 mkexpr((imm8 & (1<<1)) ? s1 : s0), 30383 mkexpr((imm8 & (1<<0)) ? s1 : s0))); 30384 putYMMReg(rG, mkexpr(dV)); 30385 goto decode_success; 30386 } 30387 break; 30388 30389 case 0x06: 30390 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */ 30391 if (have66noF2noF3(pfx) 30392 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 30393 UChar modrm = getUChar(delta); 30394 UInt imm8 = 0; 30395 UInt rG = gregOfRexRM(pfx, modrm); 30396 UInt rV = getVexNvvvv(pfx); 30397 IRTemp s00 = newTemp(Ity_V128); 30398 IRTemp s01 = newTemp(Ity_V128); 30399 IRTemp s10 = newTemp(Ity_V128); 30400 IRTemp s11 = newTemp(Ity_V128); 30401 assign(s00, getYMMRegLane128(rV, 0)); 30402 assign(s01, getYMMRegLane128(rV, 1)); 30403 if (epartIsReg(modrm)) { 30404 UInt rE = eregOfRexRM(pfx, modrm); 30405 delta += 1; 30406 imm8 = getUChar(delta); 30407 DIP("vperm2f128 $%u,%s,%s,%s\n", 30408 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30409 assign(s10, getYMMRegLane128(rE, 0)); 30410 assign(s11, getYMMRegLane128(rE, 1)); 30411 } else { 30412 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30413 delta += alen; 30414 imm8 = getUChar(delta); 30415 DIP("vperm2f128 $%u,%s,%s,%s\n", 30416 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30417 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 30418 mkexpr(addr), mkU64(0)))); 30419 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 30420 mkexpr(addr), mkU64(16)))); 30421 } 30422 delta++; 30423 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 30424 : ((_nn)==2) ? s10 : s11) 30425 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 30426 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 30427 # undef SEL 30428 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 30429 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 30430 *uses_vvvv = True; 30431 goto decode_success; 30432 } 30433 break; 30434 30435 case 0x08: 30436 /* VROUNDPS imm8, xmm2/m128, xmm1 */ 30437 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */ 30438 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30439 UChar modrm = getUChar(delta); 30440 UInt rG = gregOfRexRM(pfx, modrm); 30441 IRTemp src = newTemp(Ity_V128); 30442 IRTemp s0 = IRTemp_INVALID; 30443 IRTemp s1 = IRTemp_INVALID; 30444 IRTemp s2 = IRTemp_INVALID; 30445 IRTemp s3 = IRTemp_INVALID; 30446 IRTemp rm = newTemp(Ity_I32); 30447 Int imm = 0; 30448 30449 modrm = getUChar(delta); 30450 30451 if (epartIsReg(modrm)) { 30452 UInt rE = eregOfRexRM(pfx, modrm); 30453 assign( src, getXMMReg( rE ) ); 30454 imm = getUChar(delta+1); 30455 if (imm & ~15) break; 30456 delta += 1+1; 30457 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 30458 } else { 30459 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30460 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 30461 imm = getUChar(delta+alen); 30462 if (imm & ~15) break; 30463 delta += alen+1; 30464 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 30465 } 30466 30467 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30468 that encoding is the same as the encoding for IRRoundingMode, 30469 we can use that value directly in the IR as a rounding 30470 mode. */ 30471 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30472 30473 breakupV128to32s( src, &s3, &s2, &s1, &s0 ); 30474 putYMMRegLane128( rG, 1, mkV128(0) ); 30475 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 30476 unop(Iop_ReinterpI32asF32, mkexpr(s))) 30477 putYMMRegLane32F( rG, 3, CVT(s3) ); 30478 putYMMRegLane32F( rG, 2, CVT(s2) ); 30479 putYMMRegLane32F( rG, 1, CVT(s1) ); 30480 putYMMRegLane32F( rG, 0, CVT(s0) ); 30481 # undef CVT 30482 goto decode_success; 30483 } 30484 /* VROUNDPS imm8, ymm2/m256, ymm1 */ 30485 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */ 30486 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30487 UChar modrm = getUChar(delta); 30488 UInt rG = gregOfRexRM(pfx, modrm); 30489 IRTemp src = newTemp(Ity_V256); 30490 IRTemp s0 = IRTemp_INVALID; 30491 IRTemp s1 = IRTemp_INVALID; 30492 IRTemp s2 = IRTemp_INVALID; 30493 IRTemp s3 = IRTemp_INVALID; 30494 IRTemp s4 = IRTemp_INVALID; 30495 IRTemp s5 = IRTemp_INVALID; 30496 IRTemp s6 = IRTemp_INVALID; 30497 IRTemp s7 = IRTemp_INVALID; 30498 IRTemp rm = newTemp(Ity_I32); 30499 Int imm = 0; 30500 30501 modrm = getUChar(delta); 30502 30503 if (epartIsReg(modrm)) { 30504 UInt rE = eregOfRexRM(pfx, modrm); 30505 assign( src, getYMMReg( rE ) ); 30506 imm = getUChar(delta+1); 30507 if (imm & ~15) break; 30508 delta += 1+1; 30509 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 30510 } else { 30511 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30512 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 30513 imm = getUChar(delta+alen); 30514 if (imm & ~15) break; 30515 delta += alen+1; 30516 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 30517 } 30518 30519 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30520 that encoding is the same as the encoding for IRRoundingMode, 30521 we can use that value directly in the IR as a rounding 30522 mode. */ 30523 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30524 30525 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 ); 30526 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \ 30527 unop(Iop_ReinterpI32asF32, mkexpr(s))) 30528 putYMMRegLane32F( rG, 7, CVT(s7) ); 30529 putYMMRegLane32F( rG, 6, CVT(s6) ); 30530 putYMMRegLane32F( rG, 5, CVT(s5) ); 30531 putYMMRegLane32F( rG, 4, CVT(s4) ); 30532 putYMMRegLane32F( rG, 3, CVT(s3) ); 30533 putYMMRegLane32F( rG, 2, CVT(s2) ); 30534 putYMMRegLane32F( rG, 1, CVT(s1) ); 30535 putYMMRegLane32F( rG, 0, CVT(s0) ); 30536 # undef CVT 30537 goto decode_success; 30538 } 30539 30540 case 0x09: 30541 /* VROUNDPD imm8, xmm2/m128, xmm1 */ 30542 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */ 30543 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30544 UChar modrm = getUChar(delta); 30545 UInt rG = gregOfRexRM(pfx, modrm); 30546 IRTemp src = newTemp(Ity_V128); 30547 IRTemp s0 = IRTemp_INVALID; 30548 IRTemp s1 = IRTemp_INVALID; 30549 IRTemp rm = newTemp(Ity_I32); 30550 Int imm = 0; 30551 30552 modrm = getUChar(delta); 30553 30554 if (epartIsReg(modrm)) { 30555 UInt rE = eregOfRexRM(pfx, modrm); 30556 assign( src, getXMMReg( rE ) ); 30557 imm = getUChar(delta+1); 30558 if (imm & ~15) break; 30559 delta += 1+1; 30560 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) ); 30561 } else { 30562 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30563 assign( src, loadLE(Ity_V128, mkexpr(addr) ) ); 30564 imm = getUChar(delta+alen); 30565 if (imm & ~15) break; 30566 delta += alen+1; 30567 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) ); 30568 } 30569 30570 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30571 that encoding is the same as the encoding for IRRoundingMode, 30572 we can use that value directly in the IR as a rounding 30573 mode. */ 30574 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30575 30576 breakupV128to64s( src, &s1, &s0 ); 30577 putYMMRegLane128( rG, 1, mkV128(0) ); 30578 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 30579 unop(Iop_ReinterpI64asF64, mkexpr(s))) 30580 putYMMRegLane64F( rG, 1, CVT(s1) ); 30581 putYMMRegLane64F( rG, 0, CVT(s0) ); 30582 # undef CVT 30583 goto decode_success; 30584 } 30585 /* VROUNDPD imm8, ymm2/m256, ymm1 */ 30586 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */ 30587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30588 UChar modrm = getUChar(delta); 30589 UInt rG = gregOfRexRM(pfx, modrm); 30590 IRTemp src = newTemp(Ity_V256); 30591 IRTemp s0 = IRTemp_INVALID; 30592 IRTemp s1 = IRTemp_INVALID; 30593 IRTemp s2 = IRTemp_INVALID; 30594 IRTemp s3 = IRTemp_INVALID; 30595 IRTemp rm = newTemp(Ity_I32); 30596 Int imm = 0; 30597 30598 modrm = getUChar(delta); 30599 30600 if (epartIsReg(modrm)) { 30601 UInt rE = eregOfRexRM(pfx, modrm); 30602 assign( src, getYMMReg( rE ) ); 30603 imm = getUChar(delta+1); 30604 if (imm & ~15) break; 30605 delta += 1+1; 30606 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) ); 30607 } else { 30608 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30609 assign( src, loadLE(Ity_V256, mkexpr(addr) ) ); 30610 imm = getUChar(delta+alen); 30611 if (imm & ~15) break; 30612 delta += alen+1; 30613 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) ); 30614 } 30615 30616 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30617 that encoding is the same as the encoding for IRRoundingMode, 30618 we can use that value directly in the IR as a rounding 30619 mode. */ 30620 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3)); 30621 30622 breakupV256to64s( src, &s3, &s2, &s1, &s0 ); 30623 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \ 30624 unop(Iop_ReinterpI64asF64, mkexpr(s))) 30625 putYMMRegLane64F( rG, 3, CVT(s3) ); 30626 putYMMRegLane64F( rG, 2, CVT(s2) ); 30627 putYMMRegLane64F( rG, 1, CVT(s1) ); 30628 putYMMRegLane64F( rG, 0, CVT(s0) ); 30629 # undef CVT 30630 goto decode_success; 30631 } 30632 30633 case 0x0A: 30634 case 0x0B: 30635 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */ 30636 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */ 30637 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */ 30638 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */ 30639 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30640 UChar modrm = getUChar(delta); 30641 UInt rG = gregOfRexRM(pfx, modrm); 30642 UInt rV = getVexNvvvv(pfx); 30643 Bool isD = opc == 0x0B; 30644 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); 30645 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); 30646 Int imm = 0; 30647 30648 if (epartIsReg(modrm)) { 30649 UInt rE = eregOfRexRM(pfx, modrm); 30650 assign( src, 30651 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) ); 30652 imm = getUChar(delta+1); 30653 if (imm & ~15) break; 30654 delta += 1+1; 30655 DIP( "vrounds%c $%d,%s,%s,%s\n", 30656 isD ? 'd' : 's', 30657 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) ); 30658 } else { 30659 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30660 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); 30661 imm = getUChar(delta+alen); 30662 if (imm & ~15) break; 30663 delta += alen+1; 30664 DIP( "vrounds%c $%d,%s,%s,%s\n", 30665 isD ? 'd' : 's', 30666 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) ); 30667 } 30668 30669 /* (imm & 3) contains an Intel-encoded rounding mode. Because 30670 that encoding is the same as the encoding for IRRoundingMode, 30671 we can use that value directly in the IR as a rounding 30672 mode. */ 30673 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, 30674 (imm & 4) ? get_sse_roundingmode() 30675 : mkU32(imm & 3), 30676 mkexpr(src)) ); 30677 30678 if (isD) 30679 putXMMRegLane64F( rG, 0, mkexpr(res) ); 30680 else { 30681 putXMMRegLane32F( rG, 0, mkexpr(res) ); 30682 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) ); 30683 } 30684 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) ); 30685 putYMMRegLane128( rG, 1, mkV128(0) ); 30686 *uses_vvvv = True; 30687 goto decode_success; 30688 } 30689 break; 30690 30691 case 0x0C: 30692 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */ 30693 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */ 30694 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30695 UChar modrm = getUChar(delta); 30696 UInt imm8; 30697 UInt rG = gregOfRexRM(pfx, modrm); 30698 UInt rV = getVexNvvvv(pfx); 30699 IRTemp sV = newTemp(Ity_V256); 30700 IRTemp sE = newTemp(Ity_V256); 30701 assign ( sV, getYMMReg(rV) ); 30702 if (epartIsReg(modrm)) { 30703 UInt rE = eregOfRexRM(pfx, modrm); 30704 delta += 1; 30705 imm8 = getUChar(delta); 30706 DIP("vblendps $%u,%s,%s,%s\n", 30707 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30708 assign(sE, getYMMReg(rE)); 30709 } else { 30710 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30711 delta += alen; 30712 imm8 = getUChar(delta); 30713 DIP("vblendps $%u,%s,%s,%s\n", 30714 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30715 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30716 } 30717 delta++; 30718 putYMMReg( rG, 30719 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) ); 30720 *uses_vvvv = True; 30721 goto decode_success; 30722 } 30723 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */ 30724 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */ 30725 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30726 UChar modrm = getUChar(delta); 30727 UInt imm8; 30728 UInt rG = gregOfRexRM(pfx, modrm); 30729 UInt rV = getVexNvvvv(pfx); 30730 IRTemp sV = newTemp(Ity_V128); 30731 IRTemp sE = newTemp(Ity_V128); 30732 assign ( sV, getXMMReg(rV) ); 30733 if (epartIsReg(modrm)) { 30734 UInt rE = eregOfRexRM(pfx, modrm); 30735 delta += 1; 30736 imm8 = getUChar(delta); 30737 DIP("vblendps $%u,%s,%s,%s\n", 30738 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30739 assign(sE, getXMMReg(rE)); 30740 } else { 30741 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30742 delta += alen; 30743 imm8 = getUChar(delta); 30744 DIP("vblendps $%u,%s,%s,%s\n", 30745 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30746 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30747 } 30748 delta++; 30749 putYMMRegLoAndZU( rG, 30750 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) ); 30751 *uses_vvvv = True; 30752 goto decode_success; 30753 } 30754 break; 30755 30756 case 0x0D: 30757 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */ 30758 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */ 30759 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30760 UChar modrm = getUChar(delta); 30761 UInt imm8; 30762 UInt rG = gregOfRexRM(pfx, modrm); 30763 UInt rV = getVexNvvvv(pfx); 30764 IRTemp sV = newTemp(Ity_V256); 30765 IRTemp sE = newTemp(Ity_V256); 30766 assign ( sV, getYMMReg(rV) ); 30767 if (epartIsReg(modrm)) { 30768 UInt rE = eregOfRexRM(pfx, modrm); 30769 delta += 1; 30770 imm8 = getUChar(delta); 30771 DIP("vblendpd $%u,%s,%s,%s\n", 30772 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30773 assign(sE, getYMMReg(rE)); 30774 } else { 30775 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30776 delta += alen; 30777 imm8 = getUChar(delta); 30778 DIP("vblendpd $%u,%s,%s,%s\n", 30779 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30780 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30781 } 30782 delta++; 30783 putYMMReg( rG, 30784 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) ); 30785 *uses_vvvv = True; 30786 goto decode_success; 30787 } 30788 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */ 30789 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */ 30790 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30791 UChar modrm = getUChar(delta); 30792 UInt imm8; 30793 UInt rG = gregOfRexRM(pfx, modrm); 30794 UInt rV = getVexNvvvv(pfx); 30795 IRTemp sV = newTemp(Ity_V128); 30796 IRTemp sE = newTemp(Ity_V128); 30797 assign ( sV, getXMMReg(rV) ); 30798 if (epartIsReg(modrm)) { 30799 UInt rE = eregOfRexRM(pfx, modrm); 30800 delta += 1; 30801 imm8 = getUChar(delta); 30802 DIP("vblendpd $%u,%s,%s,%s\n", 30803 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30804 assign(sE, getXMMReg(rE)); 30805 } else { 30806 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30807 delta += alen; 30808 imm8 = getUChar(delta); 30809 DIP("vblendpd $%u,%s,%s,%s\n", 30810 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30811 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30812 } 30813 delta++; 30814 putYMMRegLoAndZU( rG, 30815 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) ); 30816 *uses_vvvv = True; 30817 goto decode_success; 30818 } 30819 break; 30820 30821 case 0x0E: 30822 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */ 30823 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */ 30824 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30825 UChar modrm = getUChar(delta); 30826 UInt imm8; 30827 UInt rG = gregOfRexRM(pfx, modrm); 30828 UInt rV = getVexNvvvv(pfx); 30829 IRTemp sV = newTemp(Ity_V128); 30830 IRTemp sE = newTemp(Ity_V128); 30831 assign ( sV, getXMMReg(rV) ); 30832 if (epartIsReg(modrm)) { 30833 UInt rE = eregOfRexRM(pfx, modrm); 30834 delta += 1; 30835 imm8 = getUChar(delta); 30836 DIP("vpblendw $%u,%s,%s,%s\n", 30837 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG)); 30838 assign(sE, getXMMReg(rE)); 30839 } else { 30840 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30841 delta += alen; 30842 imm8 = getUChar(delta); 30843 DIP("vpblendw $%u,%s,%s,%s\n", 30844 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG)); 30845 assign(sE, loadLE(Ity_V128, mkexpr(addr))); 30846 } 30847 delta++; 30848 putYMMRegLoAndZU( rG, 30849 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) ); 30850 *uses_vvvv = True; 30851 goto decode_success; 30852 } 30853 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */ 30854 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */ 30855 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30856 UChar modrm = getUChar(delta); 30857 UInt imm8; 30858 UInt rG = gregOfRexRM(pfx, modrm); 30859 UInt rV = getVexNvvvv(pfx); 30860 IRTemp sV = newTemp(Ity_V256); 30861 IRTemp sE = newTemp(Ity_V256); 30862 IRTemp sVhi, sVlo, sEhi, sElo; 30863 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID; 30864 assign ( sV, getYMMReg(rV) ); 30865 if (epartIsReg(modrm)) { 30866 UInt rE = eregOfRexRM(pfx, modrm); 30867 delta += 1; 30868 imm8 = getUChar(delta); 30869 DIP("vpblendw $%u,%s,%s,%s\n", 30870 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 30871 assign(sE, getYMMReg(rE)); 30872 } else { 30873 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30874 delta += alen; 30875 imm8 = getUChar(delta); 30876 DIP("vpblendw $%u,%s,%s,%s\n", 30877 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 30878 assign(sE, loadLE(Ity_V256, mkexpr(addr))); 30879 } 30880 delta++; 30881 breakupV256toV128s( sV, &sVhi, &sVlo ); 30882 breakupV256toV128s( sE, &sEhi, &sElo ); 30883 putYMMReg( rG, binop( Iop_V128HLtoV256, 30884 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ), 30885 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) ); 30886 *uses_vvvv = True; 30887 goto decode_success; 30888 } 30889 break; 30890 30891 case 0x0F: 30892 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */ 30893 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */ 30894 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 30895 UChar modrm = getUChar(delta); 30896 UInt rG = gregOfRexRM(pfx, modrm); 30897 UInt rV = getVexNvvvv(pfx); 30898 IRTemp sV = newTemp(Ity_V128); 30899 IRTemp dV = newTemp(Ity_V128); 30900 UInt imm8; 30901 30902 assign( dV, getXMMReg(rV) ); 30903 30904 if ( epartIsReg( modrm ) ) { 30905 UInt rE = eregOfRexRM(pfx, modrm); 30906 assign( sV, getXMMReg(rE) ); 30907 imm8 = getUChar(delta+1); 30908 delta += 1+1; 30909 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE), 30910 nameXMMReg(rV), nameXMMReg(rG)); 30911 } else { 30912 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30913 assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); 30914 imm8 = getUChar(delta+alen); 30915 delta += alen+1; 30916 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf, 30917 nameXMMReg(rV), nameXMMReg(rG)); 30918 } 30919 30920 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 ); 30921 putYMMRegLoAndZU( rG, mkexpr(res) ); 30922 *uses_vvvv = True; 30923 goto decode_success; 30924 } 30925 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */ 30926 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */ 30927 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 30928 UChar modrm = getUChar(delta); 30929 UInt rG = gregOfRexRM(pfx, modrm); 30930 UInt rV = getVexNvvvv(pfx); 30931 IRTemp sV = newTemp(Ity_V256); 30932 IRTemp dV = newTemp(Ity_V256); 30933 IRTemp sHi, sLo, dHi, dLo; 30934 sHi = sLo = dHi = dLo = IRTemp_INVALID; 30935 UInt imm8; 30936 30937 assign( dV, getYMMReg(rV) ); 30938 30939 if ( epartIsReg( modrm ) ) { 30940 UInt rE = eregOfRexRM(pfx, modrm); 30941 assign( sV, getYMMReg(rE) ); 30942 imm8 = getUChar(delta+1); 30943 delta += 1+1; 30944 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE), 30945 nameYMMReg(rV), nameYMMReg(rG)); 30946 } else { 30947 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 30948 assign( sV, loadLE(Ity_V256, mkexpr(addr)) ); 30949 imm8 = getUChar(delta+alen); 30950 delta += alen+1; 30951 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf, 30952 nameYMMReg(rV), nameYMMReg(rG)); 30953 } 30954 30955 breakupV256toV128s( dV, &dHi, &dLo ); 30956 breakupV256toV128s( sV, &sHi, &sLo ); 30957 putYMMReg( rG, binop( Iop_V128HLtoV256, 30958 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ), 30959 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) ) 30960 ); 30961 *uses_vvvv = True; 30962 goto decode_success; 30963 } 30964 break; 30965 30966 case 0x14: 30967 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */ 30968 if (have66noF2noF3(pfx) 30969 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30970 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ ); 30971 goto decode_success; 30972 } 30973 break; 30974 30975 case 0x15: 30976 /* VPEXTRW imm8, reg/m16, xmm2 */ 30977 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */ 30978 if (have66noF2noF3(pfx) 30979 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30980 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ ); 30981 goto decode_success; 30982 } 30983 break; 30984 30985 case 0x16: 30986 /* VPEXTRD imm8, r32/m32, xmm2 */ 30987 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */ 30988 if (have66noF2noF3(pfx) 30989 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 30990 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ ); 30991 goto decode_success; 30992 } 30993 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */ 30994 if (have66noF2noF3(pfx) 30995 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 30996 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ ); 30997 goto decode_success; 30998 } 30999 break; 31000 31001 case 0x17: 31002 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */ 31003 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31004 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ ); 31005 goto decode_success; 31006 } 31007 break; 31008 31009 case 0x18: 31010 /* VINSERTF128 r/m, rV, rD 31011 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 31012 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */ 31013 if (have66noF2noF3(pfx) 31014 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31015 UChar modrm = getUChar(delta); 31016 UInt ib = 0; 31017 UInt rG = gregOfRexRM(pfx, modrm); 31018 UInt rV = getVexNvvvv(pfx); 31019 IRTemp t128 = newTemp(Ity_V128); 31020 if (epartIsReg(modrm)) { 31021 UInt rE = eregOfRexRM(pfx, modrm); 31022 delta += 1; 31023 assign(t128, getXMMReg(rE)); 31024 ib = getUChar(delta); 31025 DIP("vinsertf128 $%u,%s,%s,%s\n", 31026 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 31027 } else { 31028 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31029 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 31030 delta += alen; 31031 ib = getUChar(delta); 31032 DIP("vinsertf128 $%u,%s,%s,%s\n", 31033 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 31034 } 31035 delta++; 31036 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 31037 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 31038 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 31039 *uses_vvvv = True; 31040 goto decode_success; 31041 } 31042 break; 31043 31044 case 0x19: 31045 /* VEXTRACTF128 $lane_no, rS, r/m 31046 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 31047 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */ 31048 if (have66noF2noF3(pfx) 31049 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31050 UChar modrm = getUChar(delta); 31051 UInt ib = 0; 31052 UInt rS = gregOfRexRM(pfx, modrm); 31053 IRTemp t128 = newTemp(Ity_V128); 31054 if (epartIsReg(modrm)) { 31055 UInt rD = eregOfRexRM(pfx, modrm); 31056 delta += 1; 31057 ib = getUChar(delta); 31058 assign(t128, getYMMRegLane128(rS, ib & 1)); 31059 putYMMRegLoAndZU(rD, mkexpr(t128)); 31060 DIP("vextractf128 $%u,%s,%s\n", 31061 ib, nameXMMReg(rS), nameYMMReg(rD)); 31062 } else { 31063 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31064 delta += alen; 31065 ib = getUChar(delta); 31066 assign(t128, getYMMRegLane128(rS, ib & 1)); 31067 storeLE(mkexpr(addr), mkexpr(t128)); 31068 DIP("vextractf128 $%u,%s,%s\n", 31069 ib, nameYMMReg(rS), dis_buf); 31070 } 31071 delta++; 31072 /* doesn't use vvvv */ 31073 goto decode_success; 31074 } 31075 break; 31076 31077 case 0x20: 31078 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */ 31079 if (have66noF2noF3(pfx) 31080 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 31081 UChar modrm = getUChar(delta); 31082 UInt rG = gregOfRexRM(pfx, modrm); 31083 UInt rV = getVexNvvvv(pfx); 31084 Int imm8; 31085 IRTemp src_u8 = newTemp(Ity_I8); 31086 31087 if ( epartIsReg( modrm ) ) { 31088 UInt rE = eregOfRexRM(pfx,modrm); 31089 imm8 = (Int)(getUChar(delta+1) & 15); 31090 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) ); 31091 delta += 1+1; 31092 DIP( "vpinsrb $%d,%s,%s,%s\n", 31093 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31094 } else { 31095 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31096 imm8 = (Int)(getUChar(delta+alen) & 15); 31097 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) ); 31098 delta += alen+1; 31099 DIP( "vpinsrb $%d,%s,%s,%s\n", 31100 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31101 } 31102 31103 IRTemp src_vec = newTemp(Ity_V128); 31104 assign(src_vec, getXMMReg( rV )); 31105 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 ); 31106 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 31107 *uses_vvvv = True; 31108 goto decode_success; 31109 } 31110 break; 31111 31112 case 0x21: 31113 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1 31114 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */ 31115 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31116 UChar modrm = getUChar(delta); 31117 UInt rG = gregOfRexRM(pfx, modrm); 31118 UInt rV = getVexNvvvv(pfx); 31119 UInt imm8; 31120 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */ 31121 const IRTemp inval = IRTemp_INVALID; 31122 31123 if ( epartIsReg( modrm ) ) { 31124 UInt rE = eregOfRexRM(pfx, modrm); 31125 IRTemp vE = newTemp(Ity_V128); 31126 assign( vE, getXMMReg(rE) ); 31127 IRTemp dsE[4] = { inval, inval, inval, inval }; 31128 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] ); 31129 imm8 = getUChar(delta+1); 31130 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */ 31131 delta += 1+1; 31132 DIP( "insertps $%u, %s,%s\n", 31133 imm8, nameXMMReg(rE), nameXMMReg(rG) ); 31134 } else { 31135 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31136 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) ); 31137 imm8 = getUChar(delta+alen); 31138 delta += alen+1; 31139 DIP( "insertps $%u, %s,%s\n", 31140 imm8, dis_buf, nameXMMReg(rG) ); 31141 } 31142 31143 IRTemp vV = newTemp(Ity_V128); 31144 assign( vV, getXMMReg(rV) ); 31145 31146 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) ); 31147 *uses_vvvv = True; 31148 goto decode_success; 31149 } 31150 break; 31151 31152 case 0x22: 31153 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */ 31154 if (have66noF2noF3(pfx) 31155 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) { 31156 UChar modrm = getUChar(delta); 31157 UInt rG = gregOfRexRM(pfx, modrm); 31158 UInt rV = getVexNvvvv(pfx); 31159 Int imm8_10; 31160 IRTemp src_u32 = newTemp(Ity_I32); 31161 31162 if ( epartIsReg( modrm ) ) { 31163 UInt rE = eregOfRexRM(pfx,modrm); 31164 imm8_10 = (Int)(getUChar(delta+1) & 3); 31165 assign( src_u32, getIReg32( rE ) ); 31166 delta += 1+1; 31167 DIP( "vpinsrd $%d,%s,%s,%s\n", 31168 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31169 } else { 31170 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31171 imm8_10 = (Int)(getUChar(delta+alen) & 3); 31172 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) ); 31173 delta += alen+1; 31174 DIP( "vpinsrd $%d,%s,%s,%s\n", 31175 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31176 } 31177 31178 IRTemp src_vec = newTemp(Ity_V128); 31179 assign(src_vec, getXMMReg( rV )); 31180 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 ); 31181 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 31182 *uses_vvvv = True; 31183 goto decode_success; 31184 } 31185 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */ 31186 if (have66noF2noF3(pfx) 31187 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) { 31188 UChar modrm = getUChar(delta); 31189 UInt rG = gregOfRexRM(pfx, modrm); 31190 UInt rV = getVexNvvvv(pfx); 31191 Int imm8_0; 31192 IRTemp src_u64 = newTemp(Ity_I64); 31193 31194 if ( epartIsReg( modrm ) ) { 31195 UInt rE = eregOfRexRM(pfx,modrm); 31196 imm8_0 = (Int)(getUChar(delta+1) & 1); 31197 assign( src_u64, getIReg64( rE ) ); 31198 delta += 1+1; 31199 DIP( "vpinsrq $%d,%s,%s,%s\n", 31200 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31201 } else { 31202 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31203 imm8_0 = (Int)(getUChar(delta+alen) & 1); 31204 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) ); 31205 delta += alen+1; 31206 DIP( "vpinsrd $%d,%s,%s,%s\n", 31207 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31208 } 31209 31210 IRTemp src_vec = newTemp(Ity_V128); 31211 assign(src_vec, getXMMReg( rV )); 31212 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 ); 31213 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 31214 *uses_vvvv = True; 31215 goto decode_success; 31216 } 31217 break; 31218 31219 case 0x38: 31220 /* VINSERTI128 r/m, rV, rD 31221 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */ 31222 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */ 31223 if (have66noF2noF3(pfx) 31224 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31225 UChar modrm = getUChar(delta); 31226 UInt ib = 0; 31227 UInt rG = gregOfRexRM(pfx, modrm); 31228 UInt rV = getVexNvvvv(pfx); 31229 IRTemp t128 = newTemp(Ity_V128); 31230 if (epartIsReg(modrm)) { 31231 UInt rE = eregOfRexRM(pfx, modrm); 31232 delta += 1; 31233 assign(t128, getXMMReg(rE)); 31234 ib = getUChar(delta); 31235 DIP("vinserti128 $%u,%s,%s,%s\n", 31236 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 31237 } else { 31238 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31239 assign(t128, loadLE(Ity_V128, mkexpr(addr))); 31240 delta += alen; 31241 ib = getUChar(delta); 31242 DIP("vinserti128 $%u,%s,%s,%s\n", 31243 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 31244 } 31245 delta++; 31246 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0)); 31247 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1)); 31248 putYMMRegLane128(rG, ib & 1, mkexpr(t128)); 31249 *uses_vvvv = True; 31250 goto decode_success; 31251 } 31252 break; 31253 31254 case 0x39: 31255 /* VEXTRACTI128 $lane_no, rS, r/m 31256 ::: r/m:V128 = a lane of rS:V256 (RM format) */ 31257 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */ 31258 if (have66noF2noF3(pfx) 31259 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31260 UChar modrm = getUChar(delta); 31261 UInt ib = 0; 31262 UInt rS = gregOfRexRM(pfx, modrm); 31263 IRTemp t128 = newTemp(Ity_V128); 31264 if (epartIsReg(modrm)) { 31265 UInt rD = eregOfRexRM(pfx, modrm); 31266 delta += 1; 31267 ib = getUChar(delta); 31268 assign(t128, getYMMRegLane128(rS, ib & 1)); 31269 putYMMRegLoAndZU(rD, mkexpr(t128)); 31270 DIP("vextracti128 $%u,%s,%s\n", 31271 ib, nameXMMReg(rS), nameYMMReg(rD)); 31272 } else { 31273 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31274 delta += alen; 31275 ib = getUChar(delta); 31276 assign(t128, getYMMRegLane128(rS, ib & 1)); 31277 storeLE(mkexpr(addr), mkexpr(t128)); 31278 DIP("vextracti128 $%u,%s,%s\n", 31279 ib, nameYMMReg(rS), dis_buf); 31280 } 31281 delta++; 31282 /* doesn't use vvvv */ 31283 goto decode_success; 31284 } 31285 break; 31286 31287 case 0x40: 31288 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */ 31289 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31290 UChar modrm = getUChar(delta); 31291 UInt rG = gregOfRexRM(pfx, modrm); 31292 UInt rV = getVexNvvvv(pfx); 31293 IRTemp dst_vec = newTemp(Ity_V128); 31294 Int imm8; 31295 if (epartIsReg( modrm )) { 31296 UInt rE = eregOfRexRM(pfx,modrm); 31297 imm8 = (Int)getUChar(delta+1); 31298 assign( dst_vec, getXMMReg( rE ) ); 31299 delta += 1+1; 31300 DIP( "vdpps $%d,%s,%s,%s\n", 31301 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31302 } else { 31303 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31304 imm8 = (Int)getUChar(delta+alen); 31305 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 31306 delta += alen+1; 31307 DIP( "vdpps $%d,%s,%s,%s\n", 31308 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31309 } 31310 31311 IRTemp src_vec = newTemp(Ity_V128); 31312 assign(src_vec, getXMMReg( rV )); 31313 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 ); 31314 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 31315 *uses_vvvv = True; 31316 goto decode_success; 31317 } 31318 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */ 31319 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31320 UChar modrm = getUChar(delta); 31321 UInt rG = gregOfRexRM(pfx, modrm); 31322 UInt rV = getVexNvvvv(pfx); 31323 IRTemp dst_vec = newTemp(Ity_V256); 31324 Int imm8; 31325 if (epartIsReg( modrm )) { 31326 UInt rE = eregOfRexRM(pfx,modrm); 31327 imm8 = (Int)getUChar(delta+1); 31328 assign( dst_vec, getYMMReg( rE ) ); 31329 delta += 1+1; 31330 DIP( "vdpps $%d,%s,%s,%s\n", 31331 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 31332 } else { 31333 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31334 imm8 = (Int)getUChar(delta+alen); 31335 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 31336 delta += alen+1; 31337 DIP( "vdpps $%d,%s,%s,%s\n", 31338 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 31339 } 31340 31341 IRTemp src_vec = newTemp(Ity_V256); 31342 assign(src_vec, getYMMReg( rV )); 31343 IRTemp s0, s1, d0, d1; 31344 s0 = s1 = d0 = d1 = IRTemp_INVALID; 31345 breakupV256toV128s( dst_vec, &d1, &d0 ); 31346 breakupV256toV128s( src_vec, &s1, &s0 ); 31347 putYMMReg( rG, binop( Iop_V128HLtoV256, 31348 mkexpr( math_DPPS_128(s1, d1, imm8) ), 31349 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) ); 31350 *uses_vvvv = True; 31351 goto decode_success; 31352 } 31353 break; 31354 31355 case 0x41: 31356 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */ 31357 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31358 UChar modrm = getUChar(delta); 31359 UInt rG = gregOfRexRM(pfx, modrm); 31360 UInt rV = getVexNvvvv(pfx); 31361 IRTemp dst_vec = newTemp(Ity_V128); 31362 Int imm8; 31363 if (epartIsReg( modrm )) { 31364 UInt rE = eregOfRexRM(pfx,modrm); 31365 imm8 = (Int)getUChar(delta+1); 31366 assign( dst_vec, getXMMReg( rE ) ); 31367 delta += 1+1; 31368 DIP( "vdppd $%d,%s,%s,%s\n", 31369 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31370 } else { 31371 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31372 imm8 = (Int)getUChar(delta+alen); 31373 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 31374 delta += alen+1; 31375 DIP( "vdppd $%d,%s,%s,%s\n", 31376 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31377 } 31378 31379 IRTemp src_vec = newTemp(Ity_V128); 31380 assign(src_vec, getXMMReg( rV )); 31381 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 ); 31382 putYMMRegLoAndZU( rG, mkexpr(res_vec) ); 31383 *uses_vvvv = True; 31384 goto decode_success; 31385 } 31386 break; 31387 31388 case 0x42: 31389 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */ 31390 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */ 31391 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31392 UChar modrm = getUChar(delta); 31393 Int imm8; 31394 IRTemp src_vec = newTemp(Ity_V128); 31395 IRTemp dst_vec = newTemp(Ity_V128); 31396 UInt rG = gregOfRexRM(pfx, modrm); 31397 UInt rV = getVexNvvvv(pfx); 31398 31399 assign( dst_vec, getXMMReg(rV) ); 31400 31401 if ( epartIsReg( modrm ) ) { 31402 UInt rE = eregOfRexRM(pfx, modrm); 31403 31404 imm8 = (Int)getUChar(delta+1); 31405 assign( src_vec, getXMMReg(rE) ); 31406 delta += 1+1; 31407 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 31408 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31409 } else { 31410 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 31411 1/* imm8 is 1 byte after the amode */ ); 31412 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) ); 31413 imm8 = (Int)getUChar(delta+alen); 31414 delta += alen+1; 31415 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 31416 dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31417 } 31418 31419 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec, 31420 src_vec, imm8) ) ); 31421 *uses_vvvv = True; 31422 goto decode_success; 31423 } 31424 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */ 31425 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */ 31426 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31427 UChar modrm = getUChar(delta); 31428 Int imm8; 31429 IRTemp src_vec = newTemp(Ity_V256); 31430 IRTemp dst_vec = newTemp(Ity_V256); 31431 UInt rG = gregOfRexRM(pfx, modrm); 31432 UInt rV = getVexNvvvv(pfx); 31433 IRTemp sHi, sLo, dHi, dLo; 31434 sHi = sLo = dHi = dLo = IRTemp_INVALID; 31435 31436 assign( dst_vec, getYMMReg(rV) ); 31437 31438 if ( epartIsReg( modrm ) ) { 31439 UInt rE = eregOfRexRM(pfx, modrm); 31440 31441 imm8 = (Int)getUChar(delta+1); 31442 assign( src_vec, getYMMReg(rE) ); 31443 delta += 1+1; 31444 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 31445 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) ); 31446 } else { 31447 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 31448 1/* imm8 is 1 byte after the amode */ ); 31449 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) ); 31450 imm8 = (Int)getUChar(delta+alen); 31451 delta += alen+1; 31452 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8, 31453 dis_buf, nameYMMReg(rV), nameYMMReg(rG) ); 31454 } 31455 31456 breakupV256toV128s( dst_vec, &dHi, &dLo ); 31457 breakupV256toV128s( src_vec, &sHi, &sLo ); 31458 putYMMReg( rG, binop( Iop_V128HLtoV256, 31459 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ), 31460 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) ); 31461 *uses_vvvv = True; 31462 goto decode_success; 31463 } 31464 break; 31465 31466 case 0x44: 31467 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */ 31468 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */ 31469 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8 31470 * Carry-less multiplication of selected XMM quadwords into XMM 31471 * registers (a.k.a multiplication of polynomials over GF(2)) 31472 */ 31473 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31474 UChar modrm = getUChar(delta); 31475 Int imm8; 31476 IRTemp sV = newTemp(Ity_V128); 31477 IRTemp dV = newTemp(Ity_V128); 31478 UInt rG = gregOfRexRM(pfx, modrm); 31479 UInt rV = getVexNvvvv(pfx); 31480 31481 assign( dV, getXMMReg(rV) ); 31482 31483 if ( epartIsReg( modrm ) ) { 31484 UInt rE = eregOfRexRM(pfx, modrm); 31485 imm8 = (Int)getUChar(delta+1); 31486 assign( sV, getXMMReg(rE) ); 31487 delta += 1+1; 31488 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8, 31489 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) ); 31490 } else { 31491 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 31492 1/* imm8 is 1 byte after the amode */ ); 31493 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) ); 31494 imm8 = (Int)getUChar(delta+alen); 31495 delta += alen+1; 31496 DIP( "vpclmulqdq $%d, %s,%s,%s\n", 31497 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) ); 31498 } 31499 31500 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) ); 31501 *uses_vvvv = True; 31502 goto decode_success; 31503 } 31504 break; 31505 31506 case 0x46: 31507 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */ 31508 if (have66noF2noF3(pfx) 31509 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) { 31510 UChar modrm = getUChar(delta); 31511 UInt imm8 = 0; 31512 UInt rG = gregOfRexRM(pfx, modrm); 31513 UInt rV = getVexNvvvv(pfx); 31514 IRTemp s00 = newTemp(Ity_V128); 31515 IRTemp s01 = newTemp(Ity_V128); 31516 IRTemp s10 = newTemp(Ity_V128); 31517 IRTemp s11 = newTemp(Ity_V128); 31518 assign(s00, getYMMRegLane128(rV, 0)); 31519 assign(s01, getYMMRegLane128(rV, 1)); 31520 if (epartIsReg(modrm)) { 31521 UInt rE = eregOfRexRM(pfx, modrm); 31522 delta += 1; 31523 imm8 = getUChar(delta); 31524 DIP("vperm2i128 $%u,%s,%s,%s\n", 31525 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG)); 31526 assign(s10, getYMMRegLane128(rE, 0)); 31527 assign(s11, getYMMRegLane128(rE, 1)); 31528 } else { 31529 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 ); 31530 delta += alen; 31531 imm8 = getUChar(delta); 31532 DIP("vperm2i128 $%u,%s,%s,%s\n", 31533 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG)); 31534 assign(s10, loadLE(Ity_V128, binop(Iop_Add64, 31535 mkexpr(addr), mkU64(0)))); 31536 assign(s11, loadLE(Ity_V128, binop(Iop_Add64, 31537 mkexpr(addr), mkU64(16)))); 31538 } 31539 delta++; 31540 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \ 31541 : ((_nn)==2) ? s10 : s11) 31542 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3))); 31543 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3))); 31544 # undef SEL 31545 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0)); 31546 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0)); 31547 *uses_vvvv = True; 31548 goto decode_success; 31549 } 31550 break; 31551 31552 case 0x4A: 31553 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4 31554 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31555 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */ 31556 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31557 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31558 "vblendvps", 4, Iop_SarN32x4 ); 31559 *uses_vvvv = True; 31560 goto decode_success; 31561 } 31562 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4 31563 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31564 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */ 31565 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31566 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31567 "vblendvps", 4, Iop_SarN32x4 ); 31568 *uses_vvvv = True; 31569 goto decode_success; 31570 } 31571 break; 31572 31573 case 0x4B: 31574 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4 31575 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31576 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */ 31577 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31578 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31579 "vblendvpd", 8, Iop_SarN64x2 ); 31580 *uses_vvvv = True; 31581 goto decode_success; 31582 } 31583 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4 31584 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31585 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */ 31586 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31587 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31588 "vblendvpd", 8, Iop_SarN64x2 ); 31589 *uses_vvvv = True; 31590 goto decode_success; 31591 } 31592 break; 31593 31594 case 0x4C: 31595 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4 31596 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */ 31597 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */ 31598 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31599 delta = dis_VBLENDV_128 ( vbi, pfx, delta, 31600 "vpblendvb", 1, Iop_SarN8x16 ); 31601 *uses_vvvv = True; 31602 goto decode_success; 31603 } 31604 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4 31605 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */ 31606 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */ 31607 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) { 31608 delta = dis_VBLENDV_256 ( vbi, pfx, delta, 31609 "vpblendvb", 1, Iop_SarN8x16 ); 31610 *uses_vvvv = True; 31611 goto decode_success; 31612 } 31613 break; 31614 31615 case 0x60: 31616 case 0x61: 31617 case 0x62: 31618 case 0x63: 31619 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1 31620 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1 31621 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1 31622 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1 31623 (selected special cases that actually occur in glibc, 31624 not by any means a complete implementation.) 31625 */ 31626 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31627 Long delta0 = delta; 31628 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc ); 31629 if (delta > delta0) goto decode_success; 31630 /* else fall though; dis_PCMPxSTRx failed to decode it */ 31631 } 31632 break; 31633 31634 case 0xDF: 31635 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */ 31636 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) { 31637 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ ); 31638 goto decode_success; 31639 } 31640 break; 31641 31642 case 0xF0: 31643 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */ 31644 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */ 31645 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) { 31646 Int size = getRexW(pfx) ? 8 : 4; 31647 IRType ty = szToITy(size); 31648 IRTemp src = newTemp(ty); 31649 UChar rm = getUChar(delta); 31650 UChar imm8; 31651 31652 if (epartIsReg(rm)) { 31653 imm8 = getUChar(delta+1); 31654 assign( src, getIRegE(size,pfx,rm) ); 31655 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm), 31656 nameIRegG(size,pfx,rm)); 31657 delta += 2; 31658 } else { 31659 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 ); 31660 imm8 = getUChar(delta+alen); 31661 assign( src, loadLE(ty, mkexpr(addr)) ); 31662 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm)); 31663 delta += alen + 1; 31664 } 31665 imm8 &= 8*size-1; 31666 31667 /* dst = (src >>u imm8) | (src << (size-imm8)) */ 31668 putIRegG( size, pfx, rm, 31669 imm8 == 0 ? mkexpr(src) 31670 : binop( mkSizedOp(ty,Iop_Or8), 31671 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src), 31672 mkU8(imm8) ), 31673 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src), 31674 mkU8(8*size-imm8) ) ) ); 31675 /* Flags aren't modified. */ 31676 goto decode_success; 31677 } 31678 break; 31679 31680 default: 31681 break; 31682 31683 } 31684 31685 //decode_failure: 31686 return deltaIN; 31687 31688 decode_success: 31689 return delta; 31690 } 31691 31692 31693 /*------------------------------------------------------------*/ 31694 /*--- ---*/ 31695 /*--- Disassemble a single instruction ---*/ 31696 /*--- ---*/ 31697 /*------------------------------------------------------------*/ 31698 31699 /* Disassemble a single instruction into IR. The instruction is 31700 located in host memory at &guest_code[delta]. */ 31701 31702 static 31703 DisResult disInstr_AMD64_WRK ( 31704 /*OUT*/Bool* expect_CAS, 31705 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ), 31706 Bool resteerCisOk, 31707 void* callback_opaque, 31708 Long delta64, 31709 const VexArchInfo* archinfo, 31710 const VexAbiInfo* vbi, 31711 Bool sigill_diag 31712 ) 31713 { 31714 IRTemp t1, t2; 31715 UChar pre; 31716 Int n, n_prefixes; 31717 DisResult dres; 31718 31719 /* The running delta */ 31720 Long delta = delta64; 31721 31722 /* Holds eip at the start of the insn, so that we can print 31723 consistent error messages for unimplemented insns. */ 31724 Long delta_start = delta; 31725 31726 /* sz denotes the nominal data-op size of the insn; we change it to 31727 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of 31728 conflict REX.W takes precedence. */ 31729 Int sz = 4; 31730 31731 /* pfx holds the summary of prefixes. */ 31732 Prefix pfx = PFX_EMPTY; 31733 31734 /* Holds the computed opcode-escape indication. */ 31735 Escape esc = ESC_NONE; 31736 31737 /* Set result defaults. */ 31738 dres.whatNext = Dis_Continue; 31739 dres.len = 0; 31740 dres.continueAt = 0; 31741 dres.jk_StopHere = Ijk_INVALID; 31742 *expect_CAS = False; 31743 31744 vassert(guest_RIP_next_assumed == 0); 31745 vassert(guest_RIP_next_mustcheck == False); 31746 31747 t1 = t2 = IRTemp_INVALID; 31748 31749 DIP("\t0x%llx: ", guest_RIP_bbstart+delta); 31750 31751 /* Spot "Special" instructions (see comment at top of file). */ 31752 { 31753 const UChar* code = guest_code + delta; 31754 /* Spot the 16-byte preamble: 31755 48C1C703 rolq $3, %rdi 31756 48C1C70D rolq $13, %rdi 31757 48C1C73D rolq $61, %rdi 31758 48C1C733 rolq $51, %rdi 31759 */ 31760 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7 31761 && code[ 3] == 0x03 && 31762 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7 31763 && code[ 7] == 0x0D && 31764 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7 31765 && code[11] == 0x3D && 31766 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7 31767 && code[15] == 0x33) { 31768 /* Got a "Special" instruction preamble. Which one is it? */ 31769 if (code[16] == 0x48 && code[17] == 0x87 31770 && code[18] == 0xDB /* xchgq %rbx,%rbx */) { 31771 /* %RDX = client_request ( %RAX ) */ 31772 DIP("%%rdx = client_request ( %%rax )\n"); 31773 delta += 19; 31774 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta); 31775 vassert(dres.whatNext == Dis_StopHere); 31776 goto decode_success; 31777 } 31778 else 31779 if (code[16] == 0x48 && code[17] == 0x87 31780 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) { 31781 /* %RAX = guest_NRADDR */ 31782 DIP("%%rax = guest_NRADDR\n"); 31783 delta += 19; 31784 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 )); 31785 goto decode_success; 31786 } 31787 else 31788 if (code[16] == 0x48 && code[17] == 0x87 31789 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) { 31790 /* call-noredir *%RAX */ 31791 DIP("call-noredir *%%rax\n"); 31792 delta += 19; 31793 t1 = newTemp(Ity_I64); 31794 assign(t1, getIRegRAX(8)); 31795 t2 = newTemp(Ity_I64); 31796 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8))); 31797 putIReg64(R_RSP, mkexpr(t2)); 31798 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta)); 31799 jmp_treg(&dres, Ijk_NoRedir, t1); 31800 vassert(dres.whatNext == Dis_StopHere); 31801 goto decode_success; 31802 } 31803 else 31804 if (code[16] == 0x48 && code[17] == 0x87 31805 && code[18] == 0xff /* xchgq %rdi,%rdi */) { 31806 /* IR injection */ 31807 DIP("IR injection\n"); 31808 vex_inject_ir(irsb, Iend_LE); 31809 31810 // Invalidate the current insn. The reason is that the IRop we're 31811 // injecting here can change. In which case the translation has to 31812 // be redone. For ease of handling, we simply invalidate all the 31813 // time. 31814 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr))); 31815 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19))); 31816 31817 delta += 19; 31818 31819 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 31820 dres.whatNext = Dis_StopHere; 31821 dres.jk_StopHere = Ijk_InvalICache; 31822 goto decode_success; 31823 } 31824 /* We don't know what it is. */ 31825 goto decode_failure; 31826 /*NOTREACHED*/ 31827 } 31828 } 31829 31830 /* Eat prefixes, summarising the result in pfx and sz, and rejecting 31831 as many invalid combinations as possible. */ 31832 n_prefixes = 0; 31833 while (True) { 31834 if (n_prefixes > 7) goto decode_failure; 31835 pre = getUChar(delta); 31836 switch (pre) { 31837 case 0x66: pfx |= PFX_66; break; 31838 case 0x67: pfx |= PFX_ASO; break; 31839 case 0xF2: pfx |= PFX_F2; break; 31840 case 0xF3: pfx |= PFX_F3; break; 31841 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break; 31842 case 0x2E: pfx |= PFX_CS; break; 31843 case 0x3E: pfx |= PFX_DS; break; 31844 case 0x26: pfx |= PFX_ES; break; 31845 case 0x64: pfx |= PFX_FS; break; 31846 case 0x65: pfx |= PFX_GS; break; 31847 case 0x36: pfx |= PFX_SS; break; 31848 case 0x40 ... 0x4F: 31849 pfx |= PFX_REX; 31850 if (pre & (1<<3)) pfx |= PFX_REXW; 31851 if (pre & (1<<2)) pfx |= PFX_REXR; 31852 if (pre & (1<<1)) pfx |= PFX_REXX; 31853 if (pre & (1<<0)) pfx |= PFX_REXB; 31854 break; 31855 default: 31856 goto not_a_legacy_prefix; 31857 } 31858 n_prefixes++; 31859 delta++; 31860 } 31861 31862 not_a_legacy_prefix: 31863 /* We've used up all the non-VEX prefixes. Parse and validate a 31864 VEX prefix if that's appropriate. */ 31865 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) { 31866 /* Used temporarily for holding VEX prefixes. */ 31867 UChar vex0 = getUChar(delta); 31868 if (vex0 == 0xC4) { 31869 /* 3-byte VEX */ 31870 UChar vex1 = getUChar(delta+1); 31871 UChar vex2 = getUChar(delta+2); 31872 delta += 3; 31873 pfx |= PFX_VEX; 31874 /* Snarf contents of byte 1 */ 31875 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31876 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX; 31877 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB; 31878 /* m-mmmm */ 31879 switch (vex1 & 0x1F) { 31880 case 1: esc = ESC_0F; break; 31881 case 2: esc = ESC_0F38; break; 31882 case 3: esc = ESC_0F3A; break; 31883 /* Any other m-mmmm field will #UD */ 31884 default: goto decode_failure; 31885 } 31886 /* Snarf contents of byte 2 */ 31887 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0; 31888 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3; 31889 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2; 31890 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1; 31891 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0; 31892 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0; 31893 /* pp */ 31894 switch (vex2 & 3) { 31895 case 0: break; 31896 case 1: pfx |= PFX_66; break; 31897 case 2: pfx |= PFX_F3; break; 31898 case 3: pfx |= PFX_F2; break; 31899 default: vassert(0); 31900 } 31901 } 31902 else if (vex0 == 0xC5) { 31903 /* 2-byte VEX */ 31904 UChar vex1 = getUChar(delta+1); 31905 delta += 2; 31906 pfx |= PFX_VEX; 31907 /* Snarf contents of byte 1 */ 31908 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR; 31909 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3; 31910 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2; 31911 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1; 31912 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0; 31913 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0; 31914 /* pp */ 31915 switch (vex1 & 3) { 31916 case 0: break; 31917 case 1: pfx |= PFX_66; break; 31918 case 2: pfx |= PFX_F3; break; 31919 case 3: pfx |= PFX_F2; break; 31920 default: vassert(0); 31921 } 31922 /* implied: */ 31923 esc = ESC_0F; 31924 } 31925 /* Can't have both VEX and REX */ 31926 if ((pfx & PFX_VEX) && (pfx & PFX_REX)) 31927 goto decode_failure; /* can't have both */ 31928 } 31929 31930 /* Dump invalid combinations */ 31931 n = 0; 31932 if (pfx & PFX_F2) n++; 31933 if (pfx & PFX_F3) n++; 31934 if (n > 1) 31935 goto decode_failure; /* can't have both */ 31936 31937 n = 0; 31938 if (pfx & PFX_CS) n++; 31939 if (pfx & PFX_DS) n++; 31940 if (pfx & PFX_ES) n++; 31941 if (pfx & PFX_FS) n++; 31942 if (pfx & PFX_GS) n++; 31943 if (pfx & PFX_SS) n++; 31944 if (n > 1) 31945 goto decode_failure; /* multiple seg overrides == illegal */ 31946 31947 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi' 31948 that we should accept it. */ 31949 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const) 31950 goto decode_failure; 31951 31952 /* Ditto for %gs prefixes. */ 31953 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const) 31954 goto decode_failure; 31955 31956 /* Set up sz. */ 31957 sz = 4; 31958 if (pfx & PFX_66) sz = 2; 31959 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8; 31960 31961 /* Now we should be looking at the primary opcode byte or the 31962 leading escapes. Check that any LOCK prefix is actually 31963 allowed. */ 31964 if (haveLOCK(pfx)) { 31965 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) { 31966 DIP("lock "); 31967 } else { 31968 *expect_CAS = False; 31969 goto decode_failure; 31970 } 31971 } 31972 31973 /* Eat up opcode escape bytes, until we're really looking at the 31974 primary opcode byte. But only if there's no VEX present. */ 31975 if (!(pfx & PFX_VEX)) { 31976 vassert(esc == ESC_NONE); 31977 pre = getUChar(delta); 31978 if (pre == 0x0F) { 31979 delta++; 31980 pre = getUChar(delta); 31981 switch (pre) { 31982 case 0x38: esc = ESC_0F38; delta++; break; 31983 case 0x3A: esc = ESC_0F3A; delta++; break; 31984 default: esc = ESC_0F; break; 31985 } 31986 } 31987 } 31988 31989 /* So now we're really really looking at the primary opcode 31990 byte. */ 31991 Long delta_at_primary_opcode = delta; 31992 31993 if (!(pfx & PFX_VEX)) { 31994 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE 31995 instructions preserve the upper 128 bits of YMM registers; 31996 iow we can simply ignore the presence of the upper halves of 31997 these registers. */ 31998 switch (esc) { 31999 case ESC_NONE: 32000 delta = dis_ESC_NONE( &dres, expect_CAS, 32001 resteerOkFn, resteerCisOk, callback_opaque, 32002 archinfo, vbi, pfx, sz, delta ); 32003 break; 32004 case ESC_0F: 32005 delta = dis_ESC_0F ( &dres, expect_CAS, 32006 resteerOkFn, resteerCisOk, callback_opaque, 32007 archinfo, vbi, pfx, sz, delta ); 32008 break; 32009 case ESC_0F38: 32010 delta = dis_ESC_0F38( &dres, 32011 resteerOkFn, resteerCisOk, callback_opaque, 32012 archinfo, vbi, pfx, sz, delta ); 32013 break; 32014 case ESC_0F3A: 32015 delta = dis_ESC_0F3A( &dres, 32016 resteerOkFn, resteerCisOk, callback_opaque, 32017 archinfo, vbi, pfx, sz, delta ); 32018 break; 32019 default: 32020 vassert(0); 32021 } 32022 } else { 32023 /* VEX prefixed instruction */ 32024 /* Sloppy Intel wording: "An instruction encoded with a VEX.128 32025 prefix that loads a YMM register operand ..." zeroes out bits 32026 128 and above of the register. */ 32027 Bool uses_vvvv = False; 32028 switch (esc) { 32029 case ESC_0F: 32030 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv, 32031 resteerOkFn, resteerCisOk, 32032 callback_opaque, 32033 archinfo, vbi, pfx, sz, delta ); 32034 break; 32035 case ESC_0F38: 32036 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv, 32037 resteerOkFn, resteerCisOk, 32038 callback_opaque, 32039 archinfo, vbi, pfx, sz, delta ); 32040 break; 32041 case ESC_0F3A: 32042 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv, 32043 resteerOkFn, resteerCisOk, 32044 callback_opaque, 32045 archinfo, vbi, pfx, sz, delta ); 32046 break; 32047 case ESC_NONE: 32048 /* The presence of a VEX prefix, by Intel definition, 32049 always implies at least an 0F escape. */ 32050 goto decode_failure; 32051 default: 32052 vassert(0); 32053 } 32054 /* If the insn doesn't use VEX.vvvv then it must be all ones. 32055 Check this. */ 32056 if (!uses_vvvv) { 32057 if (getVexNvvvv(pfx) != 0) 32058 goto decode_failure; 32059 } 32060 } 32061 32062 vassert(delta - delta_at_primary_opcode >= 0); 32063 vassert(delta - delta_at_primary_opcode < 16/*let's say*/); 32064 32065 /* Use delta == delta_at_primary_opcode to denote decode failure. 32066 This implies that any successful decode must use at least one 32067 byte up. */ 32068 if (delta == delta_at_primary_opcode) 32069 goto decode_failure; 32070 else 32071 goto decode_success; /* \o/ */ 32072 32073 32074 decode_failure: 32075 /* All decode failures end up here. */ 32076 if (sigill_diag) { 32077 vex_printf("vex amd64->IR: unhandled instruction bytes: " 32078 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 32079 getUChar(delta_start+0), 32080 getUChar(delta_start+1), 32081 getUChar(delta_start+2), 32082 getUChar(delta_start+3), 32083 getUChar(delta_start+4), 32084 getUChar(delta_start+5), 32085 getUChar(delta_start+6), 32086 getUChar(delta_start+7) ); 32087 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n", 32088 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx), 32089 getRexX(pfx), getRexB(pfx)); 32090 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n", 32091 haveVEX(pfx) ? 1 : 0, getVexL(pfx), 32092 getVexNvvvv(pfx), 32093 esc==ESC_NONE ? "NONE" : 32094 esc==ESC_0F ? "0F" : 32095 esc==ESC_0F38 ? "0F38" : 32096 esc==ESC_0F3A ? "0F3A" : "???"); 32097 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n", 32098 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0, 32099 haveF3(pfx) ? 1 : 0); 32100 } 32101 32102 /* Tell the dispatcher that this insn cannot be decoded, and so has 32103 not been executed, and (is currently) the next to be executed. 32104 RIP should be up-to-date since it made so at the start of each 32105 insn, but nevertheless be paranoid and update it again right 32106 now. */ 32107 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) ); 32108 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr); 32109 vassert(dres.whatNext == Dis_StopHere); 32110 dres.len = 0; 32111 /* We also need to say that a CAS is not expected now, regardless 32112 of what it might have been set to at the start of the function, 32113 since the IR that we've emitted just above (to synthesis a 32114 SIGILL) does not involve any CAS, and presumably no other IR has 32115 been emitted for this (non-decoded) insn. */ 32116 *expect_CAS = False; 32117 return dres; 32118 32119 32120 decode_success: 32121 /* All decode successes end up here. */ 32122 switch (dres.whatNext) { 32123 case Dis_Continue: 32124 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) ); 32125 break; 32126 case Dis_ResteerU: 32127 case Dis_ResteerC: 32128 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) ); 32129 break; 32130 case Dis_StopHere: 32131 break; 32132 default: 32133 vassert(0); 32134 } 32135 32136 DIP("\n"); 32137 dres.len = toUInt(delta - delta_start); 32138 return dres; 32139 } 32140 32141 #undef DIP 32142 #undef DIS 32143 32144 32145 /*------------------------------------------------------------*/ 32146 /*--- Top-level fn ---*/ 32147 /*------------------------------------------------------------*/ 32148 32149 /* Disassemble a single instruction into IR. The instruction 32150 is located in host memory at &guest_code[delta]. */ 32151 32152 DisResult disInstr_AMD64 ( IRSB* irsb_IN, 32153 Bool (*resteerOkFn) ( void*, Addr ), 32154 Bool resteerCisOk, 32155 void* callback_opaque, 32156 const UChar* guest_code_IN, 32157 Long delta, 32158 Addr guest_IP, 32159 VexArch guest_arch, 32160 const VexArchInfo* archinfo, 32161 const VexAbiInfo* abiinfo, 32162 VexEndness host_endness_IN, 32163 Bool sigill_diag_IN ) 32164 { 32165 Int i, x1, x2; 32166 Bool expect_CAS, has_CAS; 32167 DisResult dres; 32168 32169 /* Set globals (see top of this file) */ 32170 vassert(guest_arch == VexArchAMD64); 32171 guest_code = guest_code_IN; 32172 irsb = irsb_IN; 32173 host_endness = host_endness_IN; 32174 guest_RIP_curr_instr = guest_IP; 32175 guest_RIP_bbstart = guest_IP - delta; 32176 32177 /* We'll consult these after doing disInstr_AMD64_WRK. */ 32178 guest_RIP_next_assumed = 0; 32179 guest_RIP_next_mustcheck = False; 32180 32181 x1 = irsb_IN->stmts_used; 32182 expect_CAS = False; 32183 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 32184 resteerCisOk, 32185 callback_opaque, 32186 delta, archinfo, abiinfo, sigill_diag_IN ); 32187 x2 = irsb_IN->stmts_used; 32188 vassert(x2 >= x1); 32189 32190 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it 32191 got it right. Failure of this assertion is serious and denotes 32192 a bug in disInstr. */ 32193 if (guest_RIP_next_mustcheck 32194 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) { 32195 vex_printf("\n"); 32196 vex_printf("assumed next %%rip = 0x%llx\n", 32197 guest_RIP_next_assumed ); 32198 vex_printf(" actual next %%rip = 0x%llx\n", 32199 guest_RIP_curr_instr + dres.len ); 32200 vpanic("disInstr_AMD64: disInstr miscalculated next %rip"); 32201 } 32202 32203 /* See comment at the top of disInstr_AMD64_WRK for meaning of 32204 expect_CAS. Here, we (sanity-)check for the presence/absence of 32205 IRCAS as directed by the returned expect_CAS value. */ 32206 has_CAS = False; 32207 for (i = x1; i < x2; i++) { 32208 if (irsb_IN->stmts[i]->tag == Ist_CAS) 32209 has_CAS = True; 32210 } 32211 32212 if (expect_CAS != has_CAS) { 32213 /* inconsistency detected. re-disassemble the instruction so as 32214 to generate a useful error message; then assert. */ 32215 vex_traceflags |= VEX_TRACE_FE; 32216 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn, 32217 resteerCisOk, 32218 callback_opaque, 32219 delta, archinfo, abiinfo, sigill_diag_IN ); 32220 for (i = x1; i < x2; i++) { 32221 vex_printf("\t\t"); 32222 ppIRStmt(irsb_IN->stmts[i]); 32223 vex_printf("\n"); 32224 } 32225 /* Failure of this assertion is serious and denotes a bug in 32226 disInstr. */ 32227 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling"); 32228 } 32229 32230 return dres; 32231 } 32232 32233 32234 /*------------------------------------------------------------*/ 32235 /*--- Unused stuff ---*/ 32236 /*------------------------------------------------------------*/ 32237 32238 // A potentially more Memcheck-friendly version of gen_LZCNT, if 32239 // this should ever be needed. 32240 // 32241 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src ) 32242 //{ 32243 // /* Scheme is simple: propagate the most significant 1-bit into all 32244 // lower positions in the word. This gives a word of the form 32245 // 0---01---1. Now invert it, giving a word of the form 32246 // 1---10---0, then do a population-count idiom (to count the 1s, 32247 // which is the number of leading zeroes, or the word size if the 32248 // original word was 0. 32249 // */ 32250 // Int i; 32251 // IRTemp t[7]; 32252 // for (i = 0; i < 7; i++) { 32253 // t[i] = newTemp(ty); 32254 // } 32255 // if (ty == Ity_I64) { 32256 // assign(t[0], binop(Iop_Or64, mkexpr(src), 32257 // binop(Iop_Shr64, mkexpr(src), mkU8(1)))); 32258 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]), 32259 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2)))); 32260 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]), 32261 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4)))); 32262 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]), 32263 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8)))); 32264 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]), 32265 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16)))); 32266 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]), 32267 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32)))); 32268 // assign(t[6], unop(Iop_Not64, mkexpr(t[5]))); 32269 // return gen_POPCOUNT(ty, t[6]); 32270 // } 32271 // if (ty == Ity_I32) { 32272 // assign(t[0], binop(Iop_Or32, mkexpr(src), 32273 // binop(Iop_Shr32, mkexpr(src), mkU8(1)))); 32274 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]), 32275 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2)))); 32276 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]), 32277 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4)))); 32278 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]), 32279 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8)))); 32280 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]), 32281 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16)))); 32282 // assign(t[5], unop(Iop_Not32, mkexpr(t[4]))); 32283 // return gen_POPCOUNT(ty, t[5]); 32284 // } 32285 // if (ty == Ity_I16) { 32286 // assign(t[0], binop(Iop_Or16, mkexpr(src), 32287 // binop(Iop_Shr16, mkexpr(src), mkU8(1)))); 32288 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]), 32289 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2)))); 32290 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]), 32291 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4)))); 32292 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]), 32293 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8)))); 32294 // assign(t[4], unop(Iop_Not16, mkexpr(t[3]))); 32295 // return gen_POPCOUNT(ty, t[4]); 32296 // } 32297 // vassert(0); 32298 //} 32299 32300 32301 /*--------------------------------------------------------------------*/ 32302 /*--- end guest_amd64_toIR.c ---*/ 32303 /*--------------------------------------------------------------------*/ 32304