Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                     guest_amd64_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 /* Translates AMD64 code to IR. */
     37 
     38 /* TODO:
     39 
     40    All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
     41    to ensure a 64-bit value is being written.
     42 
     43    x87 FP Limitations:
     44 
     45    * all arithmetic done at 64 bits
     46 
     47    * no FP exceptions, except for handling stack over/underflow
     48 
     49    * FP rounding mode observed only for float->int conversions and
     50      int->float conversions which could lose accuracy, and for
     51      float-to-float rounding.  For all other operations,
     52      round-to-nearest is used, regardless.
     53 
     54    * FP sin/cos/tan/sincos: C2 flag is always cleared.  IOW the
     55      simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
     56      even when it isn't.
     57 
     58    * some of the FCOM cases could do with testing -- not convinced
     59      that the args are the right way round.
     60 
     61    * FSAVE does not re-initialise the FPU; it should do
     62 
     63    * FINIT not only initialises the FPU environment, it also zeroes
     64      all the FP registers.  It should leave the registers unchanged.
     65 
     66     RDTSC returns zero, always.
     67 
     68     SAHF should cause eflags[1] == 1, and in fact it produces 0.  As
     69     per Intel docs this bit has no meaning anyway.  Since PUSHF is the
     70     only way to observe eflags[1], a proper fix would be to make that
     71     bit be set by PUSHF.
     72 
     73     This module uses global variables and so is not MT-safe (if that
     74     should ever become relevant).
     75 */
     76 
     77 /* Notes re address size overrides (0x67).
     78 
     79    According to the AMD documentation (24594 Rev 3.09, Sept 2003,
     80    "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
     81    and System Instructions"), Section 1.2.3 ("Address-Size Override
     82    Prefix"):
     83 
     84    0x67 applies to all explicit memory references, causing the top
     85    32 bits of the effective address to become zero.
     86 
     87    0x67 has no effect on stack references (push/pop); these always
     88    use a 64-bit address.
     89 
     90    0x67 changes the interpretation of instructions which implicitly
     91    reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
     92    instead.  These are:
     93 
     94       cmp{s,sb,sw,sd,sq}
     95       in{s,sb,sw,sd}
     96       jcxz, jecxz, jrcxz
     97       lod{s,sb,sw,sd,sq}
     98       loop{,e,bz,be,z}
     99       mov{s,sb,sw,sd,sq}
    100       out{s,sb,sw,sd}
    101       rep{,e,ne,nz}
    102       sca{s,sb,sw,sd,sq}
    103       sto{s,sb,sw,sd,sq}
    104       xlat{,b} */
    105 
    106 /* "Special" instructions.
    107 
    108    This instruction decoder can decode three special instructions
    109    which mean nothing natively (are no-ops as far as regs/mem are
    110    concerned) but have meaning for supporting Valgrind.  A special
    111    instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
    112    48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
    113    $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
    114    Following that, one of the following 3 are allowed (standard
    115    interpretation in parentheses):
    116 
    117       4887DB (xchgq %rbx,%rbx)   %RDX = client_request ( %RAX )
    118       4887C9 (xchgq %rcx,%rcx)   %RAX = guest_NRADDR
    119       4887D2 (xchgq %rdx,%rdx)   call-noredir *%RAX
    120 
    121    Any other bytes following the 16-byte preamble are illegal and
    122    constitute a failure in instruction decoding.  This all assumes
    123    that the preamble will never occur except in specific code
    124    fragments designed for Valgrind to catch.
    125 
    126    No prefixes may precede a "Special" instruction.
    127 */
    128 
    129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
    130    insns: the side-exit back to the start of the insn is done with
    131    Ijk_Boring.  This is quite wrong, it should be done with
    132    Ijk_NoRedir, since otherwise the side exit, which is intended to
    133    restart the instruction for whatever reason, could go somewhere
    134    entirely else.  Doing it right (with Ijk_NoRedir jumps) would make
    135    no-redir jumps performance critical, at least for rep-prefixed
    136    instructions, since all iterations thereof would involve such a
    137    jump.  It's not such a big deal with casLE since the side exit is
    138    only taken if the CAS fails, that is, the location is contended,
    139    which is relatively unlikely.
    140 
    141    Note also, the test for CAS success vs failure is done using
    142    Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
    143    Iop_Cmp{EQ,NE} equivalents.  This is so as to tell Memcheck that it
    144    shouldn't definedness-check these comparisons.  See
    145    COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
    146    background/rationale.
    147 */
    148 
    149 /* LOCK prefixed instructions.  These are translated using IR-level
    150    CAS statements (IRCAS) and are believed to preserve atomicity, even
    151    from the point of view of some other process racing against a
    152    simulated one (presumably they communicate via a shared memory
    153    segment).
    154 
    155    Handlers which are aware of LOCK prefixes are:
    156       dis_op2_G_E      (add, or, adc, sbb, and, sub, xor)
    157       dis_cmpxchg_G_E  (cmpxchg)
    158       dis_Grp1         (add, or, adc, sbb, and, sub, xor)
    159       dis_Grp3         (not, neg)
    160       dis_Grp4         (inc, dec)
    161       dis_Grp5         (inc, dec)
    162       dis_Grp8_Imm     (bts, btc, btr)
    163       dis_bt_G_E       (bts, btc, btr)
    164       dis_xadd_G_E     (xadd)
    165 */
    166 
    167 
    168 #include "libvex_basictypes.h"
    169 #include "libvex_ir.h"
    170 #include "libvex.h"
    171 #include "libvex_guest_amd64.h"
    172 
    173 #include "main_util.h"
    174 #include "main_globals.h"
    175 #include "guest_generic_bb_to_IR.h"
    176 #include "guest_generic_x87.h"
    177 #include "guest_amd64_defs.h"
    178 
    179 
    180 /*------------------------------------------------------------*/
    181 /*--- Globals                                              ---*/
    182 /*------------------------------------------------------------*/
    183 
    184 /* These are set at the start of the translation of an insn, right
    185    down in disInstr_AMD64, so that we don't have to pass them around
    186    endlessly.  They are all constant during the translation of any
    187    given insn. */
    188 
    189 /* These are set at the start of the translation of a BB, so
    190    that we don't have to pass them around endlessly. */
    191 
    192 /* We need to know this to do sub-register accesses correctly. */
    193 static Bool host_is_bigendian;
    194 
    195 /* Pointer to the guest code area (points to start of BB, not to the
    196    insn being processed). */
    197 static UChar* guest_code;
    198 
    199 /* The guest address corresponding to guest_code[0]. */
    200 static Addr64 guest_RIP_bbstart;
    201 
    202 /* The guest address for the instruction currently being
    203    translated. */
    204 static Addr64 guest_RIP_curr_instr;
    205 
    206 /* The IRSB* into which we're generating code. */
    207 static IRSB* irsb;
    208 
    209 /* For ensuring that %rip-relative addressing is done right.  A read
    210    of %rip generates the address of the next instruction.  It may be
    211    that we don't conveniently know that inside disAMode().  For sanity
    212    checking, if the next insn %rip is needed, we make a guess at what
    213    it is, record that guess here, and set the accompanying Bool to
    214    indicate that -- after this insn's decode is finished -- that guess
    215    needs to be checked.  */
    216 
    217 /* At the start of each insn decode, is set to (0, False).
    218    After the decode, if _mustcheck is now True, _assumed is
    219    checked. */
    220 
    221 static Addr64 guest_RIP_next_assumed;
    222 static Bool   guest_RIP_next_mustcheck;
    223 
    224 
    225 /*------------------------------------------------------------*/
    226 /*--- Helpers for constructing IR.                         ---*/
    227 /*------------------------------------------------------------*/
    228 
    229 /* Generate a new temporary of the given type. */
    230 static IRTemp newTemp ( IRType ty )
    231 {
    232    vassert(isPlausibleIRType(ty));
    233    return newIRTemp( irsb->tyenv, ty );
    234 }
    235 
    236 /* Add a statement to the list held by "irsb". */
    237 static void stmt ( IRStmt* st )
    238 {
    239    addStmtToIRSB( irsb, st );
    240 }
    241 
    242 /* Generate a statement "dst := e". */
    243 static void assign ( IRTemp dst, IRExpr* e )
    244 {
    245    stmt( IRStmt_WrTmp(dst, e) );
    246 }
    247 
    248 static IRExpr* unop ( IROp op, IRExpr* a )
    249 {
    250    return IRExpr_Unop(op, a);
    251 }
    252 
    253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    254 {
    255    return IRExpr_Binop(op, a1, a2);
    256 }
    257 
    258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    259 {
    260    return IRExpr_Triop(op, a1, a2, a3);
    261 }
    262 
    263 static IRExpr* mkexpr ( IRTemp tmp )
    264 {
    265    return IRExpr_RdTmp(tmp);
    266 }
    267 
    268 static IRExpr* mkU8 ( ULong i )
    269 {
    270    vassert(i < 256);
    271    return IRExpr_Const(IRConst_U8( (UChar)i ));
    272 }
    273 
    274 static IRExpr* mkU16 ( ULong i )
    275 {
    276    vassert(i < 0x10000ULL);
    277    return IRExpr_Const(IRConst_U16( (UShort)i ));
    278 }
    279 
    280 static IRExpr* mkU32 ( ULong i )
    281 {
    282    vassert(i < 0x100000000ULL);
    283    return IRExpr_Const(IRConst_U32( (UInt)i ));
    284 }
    285 
    286 static IRExpr* mkU64 ( ULong i )
    287 {
    288    return IRExpr_Const(IRConst_U64(i));
    289 }
    290 
    291 static IRExpr* mkU ( IRType ty, ULong i )
    292 {
    293    switch (ty) {
    294       case Ity_I8:  return mkU8(i);
    295       case Ity_I16: return mkU16(i);
    296       case Ity_I32: return mkU32(i);
    297       case Ity_I64: return mkU64(i);
    298       default: vpanic("mkU(amd64)");
    299    }
    300 }
    301 
    302 static void storeLE ( IRExpr* addr, IRExpr* data )
    303 {
    304    stmt( IRStmt_Store(Iend_LE, addr, data) );
    305 }
    306 
    307 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    308 {
    309    return IRExpr_Load(Iend_LE, ty, addr);
    310 }
    311 
    312 static IROp mkSizedOp ( IRType ty, IROp op8 )
    313 {
    314    vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
    315            || op8 == Iop_Mul8
    316            || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
    317            || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
    318            || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
    319            || op8 == Iop_CasCmpNE8
    320            || op8 == Iop_Not8 );
    321    switch (ty) {
    322       case Ity_I8:  return 0 +op8;
    323       case Ity_I16: return 1 +op8;
    324       case Ity_I32: return 2 +op8;
    325       case Ity_I64: return 3 +op8;
    326       default: vpanic("mkSizedOp(amd64)");
    327    }
    328 }
    329 
    330 static
    331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
    332 {
    333    if (szSmall == 1 && szBig == 4) {
    334       return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
    335    }
    336    if (szSmall == 1 && szBig == 2) {
    337       return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
    338    }
    339    if (szSmall == 2 && szBig == 4) {
    340       return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
    341    }
    342    if (szSmall == 1 && szBig == 8 && !signd) {
    343       return unop(Iop_8Uto64, src);
    344    }
    345    if (szSmall == 1 && szBig == 8 && signd) {
    346       return unop(Iop_8Sto64, src);
    347    }
    348    if (szSmall == 2 && szBig == 8 && !signd) {
    349       return unop(Iop_16Uto64, src);
    350    }
    351    if (szSmall == 2 && szBig == 8 && signd) {
    352       return unop(Iop_16Sto64, src);
    353    }
    354    vpanic("doScalarWidening(amd64)");
    355 }
    356 
    357 
    358 
    359 /*------------------------------------------------------------*/
    360 /*--- Debugging output                                     ---*/
    361 /*------------------------------------------------------------*/
    362 
    363 /* Bomb out if we can't handle something. */
    364 __attribute__ ((noreturn))
    365 static void unimplemented ( HChar* str )
    366 {
    367    vex_printf("amd64toIR: unimplemented feature\n");
    368    vpanic(str);
    369 }
    370 
    371 #define DIP(format, args...)           \
    372    if (vex_traceflags & VEX_TRACE_FE)  \
    373       vex_printf(format, ## args)
    374 
    375 #define DIS(buf, format, args...)      \
    376    if (vex_traceflags & VEX_TRACE_FE)  \
    377       vex_sprintf(buf, format, ## args)
    378 
    379 
    380 /*------------------------------------------------------------*/
    381 /*--- Offsets of various parts of the amd64 guest state.   ---*/
    382 /*------------------------------------------------------------*/
    383 
    384 #define OFFB_RAX       offsetof(VexGuestAMD64State,guest_RAX)
    385 #define OFFB_RBX       offsetof(VexGuestAMD64State,guest_RBX)
    386 #define OFFB_RCX       offsetof(VexGuestAMD64State,guest_RCX)
    387 #define OFFB_RDX       offsetof(VexGuestAMD64State,guest_RDX)
    388 #define OFFB_RSP       offsetof(VexGuestAMD64State,guest_RSP)
    389 #define OFFB_RBP       offsetof(VexGuestAMD64State,guest_RBP)
    390 #define OFFB_RSI       offsetof(VexGuestAMD64State,guest_RSI)
    391 #define OFFB_RDI       offsetof(VexGuestAMD64State,guest_RDI)
    392 #define OFFB_R8        offsetof(VexGuestAMD64State,guest_R8)
    393 #define OFFB_R9        offsetof(VexGuestAMD64State,guest_R9)
    394 #define OFFB_R10       offsetof(VexGuestAMD64State,guest_R10)
    395 #define OFFB_R11       offsetof(VexGuestAMD64State,guest_R11)
    396 #define OFFB_R12       offsetof(VexGuestAMD64State,guest_R12)
    397 #define OFFB_R13       offsetof(VexGuestAMD64State,guest_R13)
    398 #define OFFB_R14       offsetof(VexGuestAMD64State,guest_R14)
    399 #define OFFB_R15       offsetof(VexGuestAMD64State,guest_R15)
    400 
    401 #define OFFB_RIP       offsetof(VexGuestAMD64State,guest_RIP)
    402 
    403 #define OFFB_FS_ZERO   offsetof(VexGuestAMD64State,guest_FS_ZERO)
    404 #define OFFB_GS_0x60   offsetof(VexGuestAMD64State,guest_GS_0x60)
    405 
    406 #define OFFB_CC_OP     offsetof(VexGuestAMD64State,guest_CC_OP)
    407 #define OFFB_CC_DEP1   offsetof(VexGuestAMD64State,guest_CC_DEP1)
    408 #define OFFB_CC_DEP2   offsetof(VexGuestAMD64State,guest_CC_DEP2)
    409 #define OFFB_CC_NDEP   offsetof(VexGuestAMD64State,guest_CC_NDEP)
    410 
    411 #define OFFB_FPREGS    offsetof(VexGuestAMD64State,guest_FPREG[0])
    412 #define OFFB_FPTAGS    offsetof(VexGuestAMD64State,guest_FPTAG[0])
    413 #define OFFB_DFLAG     offsetof(VexGuestAMD64State,guest_DFLAG)
    414 #define OFFB_ACFLAG    offsetof(VexGuestAMD64State,guest_ACFLAG)
    415 #define OFFB_IDFLAG    offsetof(VexGuestAMD64State,guest_IDFLAG)
    416 #define OFFB_FTOP      offsetof(VexGuestAMD64State,guest_FTOP)
    417 #define OFFB_FC3210    offsetof(VexGuestAMD64State,guest_FC3210)
    418 #define OFFB_FPROUND   offsetof(VexGuestAMD64State,guest_FPROUND)
    419 //..
    420 //.. #define OFFB_CS        offsetof(VexGuestX86State,guest_CS)
    421 //.. #define OFFB_DS        offsetof(VexGuestX86State,guest_DS)
    422 //.. #define OFFB_ES        offsetof(VexGuestX86State,guest_ES)
    423 //.. #define OFFB_FS        offsetof(VexGuestX86State,guest_FS)
    424 //.. #define OFFB_GS        offsetof(VexGuestX86State,guest_GS)
    425 //.. #define OFFB_SS        offsetof(VexGuestX86State,guest_SS)
    426 //.. #define OFFB_LDT       offsetof(VexGuestX86State,guest_LDT)
    427 //.. #define OFFB_GDT       offsetof(VexGuestX86State,guest_GDT)
    428 
    429 #define OFFB_SSEROUND  offsetof(VexGuestAMD64State,guest_SSEROUND)
    430 #define OFFB_XMM0      offsetof(VexGuestAMD64State,guest_XMM0)
    431 #define OFFB_XMM1      offsetof(VexGuestAMD64State,guest_XMM1)
    432 #define OFFB_XMM2      offsetof(VexGuestAMD64State,guest_XMM2)
    433 #define OFFB_XMM3      offsetof(VexGuestAMD64State,guest_XMM3)
    434 #define OFFB_XMM4      offsetof(VexGuestAMD64State,guest_XMM4)
    435 #define OFFB_XMM5      offsetof(VexGuestAMD64State,guest_XMM5)
    436 #define OFFB_XMM6      offsetof(VexGuestAMD64State,guest_XMM6)
    437 #define OFFB_XMM7      offsetof(VexGuestAMD64State,guest_XMM7)
    438 #define OFFB_XMM8      offsetof(VexGuestAMD64State,guest_XMM8)
    439 #define OFFB_XMM9      offsetof(VexGuestAMD64State,guest_XMM9)
    440 #define OFFB_XMM10     offsetof(VexGuestAMD64State,guest_XMM10)
    441 #define OFFB_XMM11     offsetof(VexGuestAMD64State,guest_XMM11)
    442 #define OFFB_XMM12     offsetof(VexGuestAMD64State,guest_XMM12)
    443 #define OFFB_XMM13     offsetof(VexGuestAMD64State,guest_XMM13)
    444 #define OFFB_XMM14     offsetof(VexGuestAMD64State,guest_XMM14)
    445 #define OFFB_XMM15     offsetof(VexGuestAMD64State,guest_XMM15)
    446 #define OFFB_XMM16     offsetof(VexGuestAMD64State,guest_XMM16)
    447 
    448 #define OFFB_EMWARN    offsetof(VexGuestAMD64State,guest_EMWARN)
    449 #define OFFB_TISTART   offsetof(VexGuestAMD64State,guest_TISTART)
    450 #define OFFB_TILEN     offsetof(VexGuestAMD64State,guest_TILEN)
    451 
    452 #define OFFB_NRADDR    offsetof(VexGuestAMD64State,guest_NRADDR)
    453 
    454 
    455 /*------------------------------------------------------------*/
    456 /*--- Helper bits and pieces for deconstructing the        ---*/
    457 /*--- amd64 insn stream.                                   ---*/
    458 /*------------------------------------------------------------*/
    459 
    460 /* This is the AMD64 register encoding -- integer regs. */
    461 #define R_RAX 0
    462 #define R_RCX 1
    463 #define R_RDX 2
    464 #define R_RBX 3
    465 #define R_RSP 4
    466 #define R_RBP 5
    467 #define R_RSI 6
    468 #define R_RDI 7
    469 #define R_R8  8
    470 #define R_R9  9
    471 #define R_R10 10
    472 #define R_R11 11
    473 #define R_R12 12
    474 #define R_R13 13
    475 #define R_R14 14
    476 #define R_R15 15
    477 
    478 //.. #define R_AL (0+R_EAX)
    479 //.. #define R_AH (4+R_EAX)
    480 
    481 /* This is the Intel register encoding -- segment regs. */
    482 #define R_ES 0
    483 #define R_CS 1
    484 #define R_SS 2
    485 #define R_DS 3
    486 #define R_FS 4
    487 #define R_GS 5
    488 
    489 
    490 /* Various simple conversions */
    491 
    492 static ULong extend_s_8to64 ( UChar x )
    493 {
    494    return (ULong)((((Long)x) << 56) >> 56);
    495 }
    496 
    497 static ULong extend_s_16to64 ( UShort x )
    498 {
    499    return (ULong)((((Long)x) << 48) >> 48);
    500 }
    501 
    502 static ULong extend_s_32to64 ( UInt x )
    503 {
    504    return (ULong)((((Long)x) << 32) >> 32);
    505 }
    506 
    507 /* Figure out whether the mod and rm parts of a modRM byte refer to a
    508    register or memory.  If so, the byte will have the form 11XXXYYY,
    509    where YYY is the register number. */
    510 inline
    511 static Bool epartIsReg ( UChar mod_reg_rm )
    512 {
    513    return toBool(0xC0 == (mod_reg_rm & 0xC0));
    514 }
    515 
    516 /* Extract the 'g' field from a modRM byte.  This only produces 3
    517    bits, which is not a complete register number.  You should avoid
    518    this function if at all possible. */
    519 inline
    520 static Int gregLO3ofRM ( UChar mod_reg_rm )
    521 {
    522    return (Int)( (mod_reg_rm >> 3) & 7 );
    523 }
    524 
    525 /* Ditto the 'e' field of a modRM byte. */
    526 inline
    527 static Int eregLO3ofRM ( UChar mod_reg_rm )
    528 {
    529    return (Int)(mod_reg_rm & 0x7);
    530 }
    531 
    532 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
    533 
    534 static UChar getUChar ( Long delta )
    535 {
    536    UChar v = guest_code[delta+0];
    537    return v;
    538 }
    539 
    540 static UInt getUDisp16 ( Long delta )
    541 {
    542    UInt v = guest_code[delta+1]; v <<= 8;
    543    v |= guest_code[delta+0];
    544    return v & 0xFFFF;
    545 }
    546 
    547 //.. static UInt getUDisp ( Int size, Long delta )
    548 //.. {
    549 //..    switch (size) {
    550 //..       case 4: return getUDisp32(delta);
    551 //..       case 2: return getUDisp16(delta);
    552 //..       case 1: return getUChar(delta);
    553 //..       default: vpanic("getUDisp(x86)");
    554 //..    }
    555 //..    return 0; /*notreached*/
    556 //.. }
    557 
    558 
    559 /* Get a byte value out of the insn stream and sign-extend to 64
    560    bits. */
    561 static Long getSDisp8 ( Long delta )
    562 {
    563    return extend_s_8to64( guest_code[delta] );
    564 }
    565 
    566 /* Get a 16-bit value out of the insn stream and sign-extend to 64
    567    bits. */
    568 static Long getSDisp16 ( Long delta )
    569 {
    570    UInt v = guest_code[delta+1]; v <<= 8;
    571    v |= guest_code[delta+0];
    572    return extend_s_16to64( (UShort)v );
    573 }
    574 
    575 /* Get a 32-bit value out of the insn stream and sign-extend to 64
    576    bits. */
    577 static Long getSDisp32 ( Long delta )
    578 {
    579    UInt v = guest_code[delta+3]; v <<= 8;
    580    v |= guest_code[delta+2]; v <<= 8;
    581    v |= guest_code[delta+1]; v <<= 8;
    582    v |= guest_code[delta+0];
    583    return extend_s_32to64( v );
    584 }
    585 
    586 /* Get a 64-bit value out of the insn stream. */
    587 static Long getDisp64 ( Long delta )
    588 {
    589    ULong v = 0;
    590    v |= guest_code[delta+7]; v <<= 8;
    591    v |= guest_code[delta+6]; v <<= 8;
    592    v |= guest_code[delta+5]; v <<= 8;
    593    v |= guest_code[delta+4]; v <<= 8;
    594    v |= guest_code[delta+3]; v <<= 8;
    595    v |= guest_code[delta+2]; v <<= 8;
    596    v |= guest_code[delta+1]; v <<= 8;
    597    v |= guest_code[delta+0];
    598    return v;
    599 }
    600 
    601 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
    602    if this is called with size==8.  Should not happen. */
    603 static Long getSDisp ( Int size, Long delta )
    604 {
    605    switch (size) {
    606       case 4: return getSDisp32(delta);
    607       case 2: return getSDisp16(delta);
    608       case 1: return getSDisp8(delta);
    609       default: vpanic("getSDisp(amd64)");
    610   }
    611 }
    612 
    613 static ULong mkSizeMask ( Int sz )
    614 {
    615    switch (sz) {
    616       case 1: return 0x00000000000000FFULL;
    617       case 2: return 0x000000000000FFFFULL;
    618       case 4: return 0x00000000FFFFFFFFULL;
    619       case 8: return 0xFFFFFFFFFFFFFFFFULL;
    620       default: vpanic("mkSzMask(amd64)");
    621    }
    622 }
    623 
    624 static Int imin ( Int a, Int b )
    625 {
    626    return (a < b) ? a : b;
    627 }
    628 
    629 static IRType szToITy ( Int n )
    630 {
    631    switch (n) {
    632       case 1: return Ity_I8;
    633       case 2: return Ity_I16;
    634       case 4: return Ity_I32;
    635       case 8: return Ity_I64;
    636       default: vex_printf("\nszToITy(%d)\n", n);
    637                vpanic("szToITy(amd64)");
    638    }
    639 }
    640 
    641 
    642 /*------------------------------------------------------------*/
    643 /*--- For dealing with prefixes.                           ---*/
    644 /*------------------------------------------------------------*/
    645 
    646 /* The idea is to pass around an int holding a bitmask summarising
    647    info from the prefixes seen on the current instruction, including
    648    info from the REX byte.  This info is used in various places, but
    649    most especially when making sense of register fields in
    650    instructions.
    651 
    652    The top 16 bits of the prefix are 0x3141, just as a hacky way
    653    to ensure it really is a valid prefix.
    654 
    655    Things you can safely assume about a well-formed prefix:
    656    * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
    657    * if REX is not present then REXW,REXR,REXX,REXB will read
    658      as zero.
    659    * F2 and F3 will not both be 1.
    660 */
    661 
    662 typedef UInt  Prefix;
    663 
    664 #define PFX_ASO   (1<<0)     /* address-size override present (0x67) */
    665 #define PFX_66    (1<<1)     /* operand-size override-to-16 present (0x66) */
    666 #define PFX_REX   (1<<2)     /* REX byte present (0x40 to 0x4F) */
    667 #define PFX_REXW  (1<<3)     /* REX W bit, if REX present, else 0 */
    668 #define PFX_REXR  (1<<4)     /* REX R bit, if REX present, else 0 */
    669 #define PFX_REXX  (1<<5)     /* REX X bit, if REX present, else 0 */
    670 #define PFX_REXB  (1<<6)     /* REX B bit, if REX present, else 0 */
    671 #define PFX_LOCK  (1<<7)     /* bus LOCK prefix present (0xF0) */
    672 #define PFX_F2    (1<<8)     /* REP/REPE/REPZ prefix present (0xF2) */
    673 #define PFX_F3    (1<<9)     /* REPNE/REPNZ prefix present (0xF3) */
    674 #define PFX_CS    (1<<10)    /* CS segment prefix present (0x2E) */
    675 #define PFX_DS    (1<<11)    /* DS segment prefix present (0x3E) */
    676 #define PFX_ES    (1<<12)    /* ES segment prefix present (0x26) */
    677 #define PFX_FS    (1<<13)    /* FS segment prefix present (0x64) */
    678 #define PFX_GS    (1<<14)    /* GS segment prefix present (0x65) */
    679 #define PFX_SS    (1<<15)    /* SS segment prefix present (0x36) */
    680 
    681 #define PFX_EMPTY 0x31410000
    682 
    683 static Bool IS_VALID_PFX ( Prefix pfx ) {
    684    return toBool((pfx & 0xFFFF0000) == PFX_EMPTY);
    685 }
    686 
    687 static Bool haveREX ( Prefix pfx ) {
    688    return toBool(pfx & PFX_REX);
    689 }
    690 
    691 static Int getRexW ( Prefix pfx ) {
    692    return (pfx & PFX_REXW) ? 1 : 0;
    693 }
    694 /* Apparently unused.
    695 static Int getRexR ( Prefix pfx ) {
    696    return (pfx & PFX_REXR) ? 1 : 0;
    697 }
    698 */
    699 static Int getRexX ( Prefix pfx ) {
    700    return (pfx & PFX_REXX) ? 1 : 0;
    701 }
    702 static Int getRexB ( Prefix pfx ) {
    703    return (pfx & PFX_REXB) ? 1 : 0;
    704 }
    705 
    706 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
    707    completely changes what instruction it really is. */
    708 static Bool haveF2orF3 ( Prefix pfx ) {
    709    return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
    710 }
    711 static Bool haveF2 ( Prefix pfx ) {
    712    return toBool((pfx & PFX_F2) > 0);
    713 }
    714 static Bool haveF3 ( Prefix pfx ) {
    715    return toBool((pfx & PFX_F3) > 0);
    716 }
    717 
    718 static Bool have66 ( Prefix pfx ) {
    719    return toBool((pfx & PFX_66) > 0);
    720 }
    721 static Bool haveASO ( Prefix pfx ) {
    722    return toBool((pfx & PFX_ASO) > 0);
    723 }
    724 
    725 /* Return True iff pfx has 66 set and F2 and F3 clear */
    726 static Bool have66noF2noF3 ( Prefix pfx )
    727 {
    728   return
    729      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
    730 }
    731 
    732 /* Return True iff pfx has F2 set and 66 and F3 clear */
    733 static Bool haveF2no66noF3 ( Prefix pfx )
    734 {
    735   return
    736      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
    737 }
    738 
    739 /* Return True iff pfx has F3 set and 66 and F2 clear */
    740 static Bool haveF3no66noF2 ( Prefix pfx )
    741 {
    742   return
    743      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
    744 }
    745 
    746 /* Return True iff pfx has F3 set and F2 clear */
    747 static Bool haveF3noF2 ( Prefix pfx )
    748 {
    749   return
    750      toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
    751 }
    752 
    753 /* Return True iff pfx has F2 set and F3 clear */
    754 static Bool haveF2noF3 ( Prefix pfx )
    755 {
    756   return
    757      toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
    758 }
    759 
    760 /* Return True iff pfx has 66, F2 and F3 clear */
    761 static Bool haveNo66noF2noF3 ( Prefix pfx )
    762 {
    763   return
    764      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
    765 }
    766 
    767 /* Return True iff pfx has any of 66, F2 and F3 set */
    768 static Bool have66orF2orF3 ( Prefix pfx )
    769 {
    770   return toBool( ! haveNo66noF2noF3(pfx) );
    771 }
    772 
    773 /* Return True iff pfx has 66 or F2 set */
    774 static Bool have66orF2 ( Prefix pfx )
    775 {
    776    return toBool((pfx & (PFX_66|PFX_F2)) > 0);
    777 }
    778 
    779 /* Clear all the segment-override bits in a prefix. */
    780 static Prefix clearSegBits ( Prefix p )
    781 {
    782    return
    783       p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
    784 }
    785 
    786 
    787 /*------------------------------------------------------------*/
    788 /*--- For dealing with integer registers                   ---*/
    789 /*------------------------------------------------------------*/
    790 
    791 /* This is somewhat complex.  The rules are:
    792 
    793    For 64, 32 and 16 bit register references, the e or g fields in the
    794    modrm bytes supply the low 3 bits of the register number.  The
    795    fourth (most-significant) bit of the register number is supplied by
    796    the REX byte, if it is present; else that bit is taken to be zero.
    797 
    798    The REX.R bit supplies the high bit corresponding to the g register
    799    field, and the REX.B bit supplies the high bit corresponding to the
    800    e register field (when the mod part of modrm indicates that modrm's
    801    e component refers to a register and not to memory).
    802 
    803    The REX.X bit supplies a high register bit for certain registers
    804    in SIB address modes, and is generally rarely used.
    805 
    806    For 8 bit register references, the presence of the REX byte itself
    807    has significance.  If there is no REX present, then the 3-bit
    808    number extracted from the modrm e or g field is treated as an index
    809    into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
    810    old x86 encoding scheme.
    811 
    812    But if there is a REX present, the register reference is
    813    interpreted in the same way as for 64/32/16-bit references: a high
    814    bit is extracted from REX, giving a 4-bit number, and the denoted
    815    register is the lowest 8 bits of the 16 integer registers denoted
    816    by the number.  In particular, values 3 through 7 of this sequence
    817    do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
    818    %rsp %rbp %rsi %rdi.
    819 
    820    The REX.W bit has no bearing at all on register numbers.  Instead
    821    its presence indicates that the operand size is to be overridden
    822    from its default value (32 bits) to 64 bits instead.  This is in
    823    the same fashion that an 0x66 prefix indicates the operand size is
    824    to be overridden from 32 bits down to 16 bits.  When both REX.W and
    825    0x66 are present there is a conflict, and REX.W takes precedence.
    826 
    827    Rather than try to handle this complexity using a single huge
    828    function, several smaller ones are provided.  The aim is to make it
    829    as difficult as possible to screw up register decoding in a subtle
    830    and hard-to-track-down way.
    831 
    832    Because these routines fish around in the host's memory (that is,
    833    in the guest state area) for sub-parts of guest registers, their
    834    correctness depends on the host's endianness.  So far these
    835    routines only work for little-endian hosts.  Those for which
    836    endianness is important have assertions to ensure sanity.
    837 */
    838 
    839 
    840 /* About the simplest question you can ask: where do the 64-bit
    841    integer registers live (in the guest state) ? */
    842 
    843 static Int integerGuestReg64Offset ( UInt reg )
    844 {
    845    switch (reg) {
    846       case R_RAX: return OFFB_RAX;
    847       case R_RCX: return OFFB_RCX;
    848       case R_RDX: return OFFB_RDX;
    849       case R_RBX: return OFFB_RBX;
    850       case R_RSP: return OFFB_RSP;
    851       case R_RBP: return OFFB_RBP;
    852       case R_RSI: return OFFB_RSI;
    853       case R_RDI: return OFFB_RDI;
    854       case R_R8:  return OFFB_R8;
    855       case R_R9:  return OFFB_R9;
    856       case R_R10: return OFFB_R10;
    857       case R_R11: return OFFB_R11;
    858       case R_R12: return OFFB_R12;
    859       case R_R13: return OFFB_R13;
    860       case R_R14: return OFFB_R14;
    861       case R_R15: return OFFB_R15;
    862       default: vpanic("integerGuestReg64Offset(amd64)");
    863    }
    864 }
    865 
    866 
    867 /* Produce the name of an integer register, for printing purposes.
    868    reg is a number in the range 0 .. 15 that has been generated from a
    869    3-bit reg-field number and a REX extension bit.  irregular denotes
    870    the case where sz==1 and no REX byte is present. */
    871 
    872 static
    873 HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
    874 {
    875    static HChar* ireg64_names[16]
    876      = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
    877          "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
    878    static HChar* ireg32_names[16]
    879      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
    880          "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
    881    static HChar* ireg16_names[16]
    882      = { "%ax",  "%cx",  "%dx",  "%bx",  "%sp",  "%bp",  "%si",  "%di",
    883          "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
    884    static HChar* ireg8_names[16]
    885      = { "%al",  "%cl",  "%dl",  "%bl",  "%spl", "%bpl", "%sil", "%dil",
    886          "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
    887    static HChar* ireg8_irregular[8]
    888      = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
    889 
    890    vassert(reg < 16);
    891    if (sz == 1) {
    892       if (irregular)
    893          vassert(reg < 8);
    894    } else {
    895       vassert(irregular == False);
    896    }
    897 
    898    switch (sz) {
    899       case 8: return ireg64_names[reg];
    900       case 4: return ireg32_names[reg];
    901       case 2: return ireg16_names[reg];
    902       case 1: if (irregular) {
    903                  return ireg8_irregular[reg];
    904               } else {
    905                  return ireg8_names[reg];
    906               }
    907       default: vpanic("nameIReg(amd64)");
    908    }
    909 }
    910 
    911 /* Using the same argument conventions as nameIReg, produce the
    912    guest state offset of an integer register. */
    913 
    914 static
    915 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
    916 {
    917    vassert(reg < 16);
    918    if (sz == 1) {
    919       if (irregular)
    920          vassert(reg < 8);
    921    } else {
    922       vassert(irregular == False);
    923    }
    924 
    925    /* Deal with irregular case -- sz==1 and no REX present */
    926    if (sz == 1 && irregular) {
    927       switch (reg) {
    928          case R_RSP: return 1+ OFFB_RAX;
    929          case R_RBP: return 1+ OFFB_RCX;
    930          case R_RSI: return 1+ OFFB_RDX;
    931          case R_RDI: return 1+ OFFB_RBX;
    932          default:    break; /* use the normal case */
    933       }
    934    }
    935 
    936    /* Normal case */
    937    return integerGuestReg64Offset(reg);
    938 }
    939 
    940 
    941 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
    942 
    943 static IRExpr* getIRegCL ( void )
    944 {
    945    vassert(!host_is_bigendian);
    946    return IRExpr_Get( OFFB_RCX, Ity_I8 );
    947 }
    948 
    949 
    950 /* Write to the %AH register. */
    951 
    952 static void putIRegAH ( IRExpr* e )
    953 {
    954    vassert(!host_is_bigendian);
    955    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
    956    stmt( IRStmt_Put( OFFB_RAX+1, e ) );
    957 }
    958 
    959 
    960 /* Read/write various widths of %RAX, as it has various
    961    special-purpose uses. */
    962 
    963 static HChar* nameIRegRAX ( Int sz )
    964 {
    965    switch (sz) {
    966       case 1: return "%al";
    967       case 2: return "%ax";
    968       case 4: return "%eax";
    969       case 8: return "%rax";
    970       default: vpanic("nameIRegRAX(amd64)");
    971    }
    972 }
    973 
    974 static IRExpr* getIRegRAX ( Int sz )
    975 {
    976    vassert(!host_is_bigendian);
    977    switch (sz) {
    978       case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
    979       case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
    980       case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
    981       case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
    982       default: vpanic("getIRegRAX(amd64)");
    983    }
    984 }
    985 
    986 static void putIRegRAX ( Int sz, IRExpr* e )
    987 {
    988    IRType ty = typeOfIRExpr(irsb->tyenv, e);
    989    vassert(!host_is_bigendian);
    990    switch (sz) {
    991       case 8: vassert(ty == Ity_I64);
    992               stmt( IRStmt_Put( OFFB_RAX, e ));
    993               break;
    994       case 4: vassert(ty == Ity_I32);
    995               stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
    996               break;
    997       case 2: vassert(ty == Ity_I16);
    998               stmt( IRStmt_Put( OFFB_RAX, e ));
    999               break;
   1000       case 1: vassert(ty == Ity_I8);
   1001               stmt( IRStmt_Put( OFFB_RAX, e ));
   1002               break;
   1003       default: vpanic("putIRegRAX(amd64)");
   1004    }
   1005 }
   1006 
   1007 
   1008 /* Read/write various widths of %RDX, as it has various
   1009    special-purpose uses. */
   1010 
   1011 static HChar* nameIRegRDX ( Int sz )
   1012 {
   1013    switch (sz) {
   1014       case 1: return "%dl";
   1015       case 2: return "%dx";
   1016       case 4: return "%edx";
   1017       case 8: return "%rdx";
   1018       default: vpanic("nameIRegRDX(amd64)");
   1019    }
   1020 }
   1021 
   1022 static IRExpr* getIRegRDX ( Int sz )
   1023 {
   1024    vassert(!host_is_bigendian);
   1025    switch (sz) {
   1026       case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
   1027       case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
   1028       case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
   1029       case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
   1030       default: vpanic("getIRegRDX(amd64)");
   1031    }
   1032 }
   1033 
   1034 static void putIRegRDX ( Int sz, IRExpr* e )
   1035 {
   1036    vassert(!host_is_bigendian);
   1037    vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
   1038    switch (sz) {
   1039       case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
   1040               break;
   1041       case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
   1042               break;
   1043       case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
   1044               break;
   1045       case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
   1046               break;
   1047       default: vpanic("putIRegRDX(amd64)");
   1048    }
   1049 }
   1050 
   1051 
   1052 /* Simplistic functions to deal with the integer registers as a
   1053    straightforward bank of 16 64-bit regs. */
   1054 
   1055 static IRExpr* getIReg64 ( UInt regno )
   1056 {
   1057    return IRExpr_Get( integerGuestReg64Offset(regno),
   1058                       Ity_I64 );
   1059 }
   1060 
   1061 static void putIReg64 ( UInt regno, IRExpr* e )
   1062 {
   1063    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   1064    stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
   1065 }
   1066 
   1067 static HChar* nameIReg64 ( UInt regno )
   1068 {
   1069    return nameIReg( 8, regno, False );
   1070 }
   1071 
   1072 
   1073 /* Simplistic functions to deal with the lower halves of integer
   1074    registers as a straightforward bank of 16 32-bit regs. */
   1075 
   1076 static IRExpr* getIReg32 ( UInt regno )
   1077 {
   1078    vassert(!host_is_bigendian);
   1079    return unop(Iop_64to32,
   1080                IRExpr_Get( integerGuestReg64Offset(regno),
   1081                            Ity_I64 ));
   1082 }
   1083 
   1084 static void putIReg32 ( UInt regno, IRExpr* e )
   1085 {
   1086    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
   1087    stmt( IRStmt_Put( integerGuestReg64Offset(regno),
   1088                      unop(Iop_32Uto64,e) ) );
   1089 }
   1090 
   1091 static HChar* nameIReg32 ( UInt regno )
   1092 {
   1093    return nameIReg( 4, regno, False );
   1094 }
   1095 
   1096 
   1097 /* Simplistic functions to deal with the lower quarters of integer
   1098    registers as a straightforward bank of 16 16-bit regs. */
   1099 
   1100 static IRExpr* getIReg16 ( UInt regno )
   1101 {
   1102    vassert(!host_is_bigendian);
   1103    return IRExpr_Get( integerGuestReg64Offset(regno),
   1104                       Ity_I16 );
   1105 }
   1106 
   1107 static void putIReg16 ( UInt regno, IRExpr* e )
   1108 {
   1109    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
   1110    stmt( IRStmt_Put( integerGuestReg64Offset(regno),
   1111                      unop(Iop_16Uto64,e) ) );
   1112 }
   1113 
   1114 static HChar* nameIReg16 ( UInt regno )
   1115 {
   1116    return nameIReg( 2, regno, False );
   1117 }
   1118 
   1119 
   1120 /* Sometimes what we know is a 3-bit register number, a REX byte, and
   1121    which field of the REX byte is to be used to extend to a 4-bit
   1122    number.  These functions cater for that situation.
   1123 */
   1124 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
   1125 {
   1126    vassert(lo3bits < 8);
   1127    vassert(IS_VALID_PFX(pfx));
   1128    return getIReg64( lo3bits | (getRexX(pfx) << 3) );
   1129 }
   1130 
   1131 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
   1132 {
   1133    vassert(lo3bits < 8);
   1134    vassert(IS_VALID_PFX(pfx));
   1135    return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
   1136 }
   1137 
   1138 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
   1139 {
   1140    vassert(lo3bits < 8);
   1141    vassert(IS_VALID_PFX(pfx));
   1142    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1143    return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1144                         toBool(sz==1 && !haveREX(pfx)) );
   1145 }
   1146 
   1147 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
   1148 {
   1149    vassert(lo3bits < 8);
   1150    vassert(IS_VALID_PFX(pfx));
   1151    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1152    if (sz == 4) {
   1153       sz = 8;
   1154       return unop(Iop_64to32,
   1155                   IRExpr_Get(
   1156                      offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1157                                      toBool(sz==1 && !haveREX(pfx)) ),
   1158                      szToITy(sz)
   1159                  )
   1160              );
   1161    } else {
   1162       return IRExpr_Get(
   1163                 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1164                                 toBool(sz==1 && !haveREX(pfx)) ),
   1165                 szToITy(sz)
   1166              );
   1167    }
   1168 }
   1169 
   1170 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
   1171 {
   1172    vassert(lo3bits < 8);
   1173    vassert(IS_VALID_PFX(pfx));
   1174    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1175    vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
   1176    stmt( IRStmt_Put(
   1177             offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1178                             toBool(sz==1 && !haveREX(pfx)) ),
   1179             sz==4 ? unop(Iop_32Uto64,e) : e
   1180    ));
   1181 }
   1182 
   1183 
   1184 /* Functions for getting register numbers from modrm bytes and REX
   1185    when we don't have to consider the complexities of integer subreg
   1186    accesses.
   1187 */
   1188 /* Extract the g reg field from a modRM byte, and augment it using the
   1189    REX.R bit from the supplied REX byte.  The R bit usually is
   1190    associated with the g register field.
   1191 */
   1192 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
   1193 {
   1194    Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
   1195    reg += (pfx & PFX_REXR) ? 8 : 0;
   1196    return reg;
   1197 }
   1198 
   1199 /* Extract the e reg field from a modRM byte, and augment it using the
   1200    REX.B bit from the supplied REX byte.  The B bit usually is
   1201    associated with the e register field (when modrm indicates e is a
   1202    register, that is).
   1203 */
   1204 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
   1205 {
   1206    Int rm;
   1207    vassert(epartIsReg(mod_reg_rm));
   1208    rm = (Int)(mod_reg_rm & 0x7);
   1209    rm += (pfx & PFX_REXB) ? 8 : 0;
   1210    return rm;
   1211 }
   1212 
   1213 
   1214 /* General functions for dealing with integer register access. */
   1215 
   1216 /* Produce the guest state offset for a reference to the 'g' register
   1217    field in a modrm byte, taking into account REX (or its absence),
   1218    and the size of the access.
   1219 */
   1220 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1221 {
   1222    UInt reg;
   1223    vassert(!host_is_bigendian);
   1224    vassert(IS_VALID_PFX(pfx));
   1225    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1226    reg = gregOfRexRM( pfx, mod_reg_rm );
   1227    return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
   1228 }
   1229 
   1230 static
   1231 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1232 {
   1233    if (sz == 4) {
   1234       sz = 8;
   1235       return unop(Iop_64to32,
   1236                   IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
   1237                               szToITy(sz) ));
   1238    } else {
   1239       return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
   1240                          szToITy(sz) );
   1241    }
   1242 }
   1243 
   1244 static
   1245 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
   1246 {
   1247    vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
   1248    if (sz == 4) {
   1249       e = unop(Iop_32Uto64,e);
   1250    }
   1251    stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
   1252 }
   1253 
   1254 static
   1255 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1256 {
   1257    return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
   1258                         toBool(sz==1 && !haveREX(pfx)) );
   1259 }
   1260 
   1261 
   1262 /* Produce the guest state offset for a reference to the 'e' register
   1263    field in a modrm byte, taking into account REX (or its absence),
   1264    and the size of the access.  eregOfRexRM will assert if mod_reg_rm
   1265    denotes a memory access rather than a register access.
   1266 */
   1267 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1268 {
   1269    UInt reg;
   1270    vassert(!host_is_bigendian);
   1271    vassert(IS_VALID_PFX(pfx));
   1272    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1273    reg = eregOfRexRM( pfx, mod_reg_rm );
   1274    return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
   1275 }
   1276 
   1277 static
   1278 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1279 {
   1280    if (sz == 4) {
   1281       sz = 8;
   1282       return unop(Iop_64to32,
   1283                   IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
   1284                               szToITy(sz) ));
   1285    } else {
   1286       return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
   1287                          szToITy(sz) );
   1288    }
   1289 }
   1290 
   1291 static
   1292 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
   1293 {
   1294    vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
   1295    if (sz == 4) {
   1296       e = unop(Iop_32Uto64,e);
   1297    }
   1298    stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
   1299 }
   1300 
   1301 static
   1302 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1303 {
   1304    return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
   1305                         toBool(sz==1 && !haveREX(pfx)) );
   1306 }
   1307 
   1308 
   1309 /*------------------------------------------------------------*/
   1310 /*--- For dealing with XMM registers                       ---*/
   1311 /*------------------------------------------------------------*/
   1312 
   1313 //.. static Int segmentGuestRegOffset ( UInt sreg )
   1314 //.. {
   1315 //..    switch (sreg) {
   1316 //..       case R_ES: return OFFB_ES;
   1317 //..       case R_CS: return OFFB_CS;
   1318 //..       case R_SS: return OFFB_SS;
   1319 //..       case R_DS: return OFFB_DS;
   1320 //..       case R_FS: return OFFB_FS;
   1321 //..       case R_GS: return OFFB_GS;
   1322 //..       default: vpanic("segmentGuestRegOffset(x86)");
   1323 //..    }
   1324 //.. }
   1325 
   1326 static Int xmmGuestRegOffset ( UInt xmmreg )
   1327 {
   1328    switch (xmmreg) {
   1329       case 0:  return OFFB_XMM0;
   1330       case 1:  return OFFB_XMM1;
   1331       case 2:  return OFFB_XMM2;
   1332       case 3:  return OFFB_XMM3;
   1333       case 4:  return OFFB_XMM4;
   1334       case 5:  return OFFB_XMM5;
   1335       case 6:  return OFFB_XMM6;
   1336       case 7:  return OFFB_XMM7;
   1337       case 8:  return OFFB_XMM8;
   1338       case 9:  return OFFB_XMM9;
   1339       case 10: return OFFB_XMM10;
   1340       case 11: return OFFB_XMM11;
   1341       case 12: return OFFB_XMM12;
   1342       case 13: return OFFB_XMM13;
   1343       case 14: return OFFB_XMM14;
   1344       case 15: return OFFB_XMM15;
   1345       default: vpanic("xmmGuestRegOffset(amd64)");
   1346    }
   1347 }
   1348 
   1349 /* Lanes of vector registers are always numbered from zero being the
   1350    least significant lane (rightmost in the register).  */
   1351 
   1352 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
   1353 {
   1354    /* Correct for little-endian host only. */
   1355    vassert(!host_is_bigendian);
   1356    vassert(laneno >= 0 && laneno < 8);
   1357    return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
   1358 }
   1359 
   1360 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
   1361 {
   1362    /* Correct for little-endian host only. */
   1363    vassert(!host_is_bigendian);
   1364    vassert(laneno >= 0 && laneno < 4);
   1365    return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
   1366 }
   1367 
   1368 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
   1369 {
   1370    /* Correct for little-endian host only. */
   1371    vassert(!host_is_bigendian);
   1372    vassert(laneno >= 0 && laneno < 2);
   1373    return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
   1374 }
   1375 
   1376 //.. static IRExpr* getSReg ( UInt sreg )
   1377 //.. {
   1378 //..    return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
   1379 //.. }
   1380 //..
   1381 //.. static void putSReg ( UInt sreg, IRExpr* e )
   1382 //.. {
   1383 //..    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
   1384 //..    stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
   1385 //.. }
   1386 
   1387 static IRExpr* getXMMReg ( UInt xmmreg )
   1388 {
   1389    return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
   1390 }
   1391 
   1392 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
   1393 {
   1394    return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
   1395 }
   1396 
   1397 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
   1398 {
   1399    return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
   1400 }
   1401 
   1402 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
   1403 {
   1404    return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
   1405 }
   1406 
   1407 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
   1408 {
   1409    return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
   1410 }
   1411 
   1412 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
   1413 {
   1414   return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
   1415 }
   1416 
   1417 static void putXMMReg ( UInt xmmreg, IRExpr* e )
   1418 {
   1419    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
   1420    stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
   1421 }
   1422 
   1423 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
   1424 {
   1425    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   1426    stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
   1427 }
   1428 
   1429 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
   1430 {
   1431    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
   1432    stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
   1433 }
   1434 
   1435 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
   1436 {
   1437    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
   1438    stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
   1439 }
   1440 
   1441 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
   1442 {
   1443    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
   1444    stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
   1445 }
   1446 
   1447 static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
   1448 {
   1449    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
   1450    stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
   1451 }
   1452 
   1453 static IRExpr* mkV128 ( UShort mask )
   1454 {
   1455    return IRExpr_Const(IRConst_V128(mask));
   1456 }
   1457 
   1458 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
   1459 {
   1460    vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
   1461    vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
   1462    return unop(Iop_64to1,
   1463                binop(Iop_And64,
   1464                      unop(Iop_1Uto64,x),
   1465                      unop(Iop_1Uto64,y)));
   1466 }
   1467 
   1468 /* Generate a compare-and-swap operation, operating on memory at
   1469    'addr'.  The expected value is 'expVal' and the new value is
   1470    'newVal'.  If the operation fails, then transfer control (with a
   1471    no-redir jump (XXX no -- see comment at top of this file)) to
   1472    'restart_point', which is presumably the address of the guest
   1473    instruction again -- retrying, essentially. */
   1474 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
   1475                     Addr64 restart_point )
   1476 {
   1477    IRCAS* cas;
   1478    IRType tyE    = typeOfIRExpr(irsb->tyenv, expVal);
   1479    IRType tyN    = typeOfIRExpr(irsb->tyenv, newVal);
   1480    IRTemp oldTmp = newTemp(tyE);
   1481    IRTemp expTmp = newTemp(tyE);
   1482    vassert(tyE == tyN);
   1483    vassert(tyE == Ity_I64 || tyE == Ity_I32
   1484            || tyE == Ity_I16 || tyE == Ity_I8);
   1485    assign(expTmp, expVal);
   1486    cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
   1487                   NULL, mkexpr(expTmp), NULL, newVal );
   1488    stmt( IRStmt_CAS(cas) );
   1489    stmt( IRStmt_Exit(
   1490             binop( mkSizedOp(tyE,Iop_CasCmpNE8),
   1491                    mkexpr(oldTmp), mkexpr(expTmp) ),
   1492             Ijk_Boring, /*Ijk_NoRedir*/
   1493             IRConst_U64( restart_point )
   1494          ));
   1495 }
   1496 
   1497 
   1498 /*------------------------------------------------------------*/
   1499 /*--- Helpers for %rflags.                                 ---*/
   1500 /*------------------------------------------------------------*/
   1501 
   1502 /* -------------- Evaluating the flags-thunk. -------------- */
   1503 
   1504 /* Build IR to calculate all the eflags from stored
   1505    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1506    Ity_I64. */
   1507 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
   1508 {
   1509    IRExpr** args
   1510       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1511                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1512                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1513                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1514    IRExpr* call
   1515       = mkIRExprCCall(
   1516            Ity_I64,
   1517            0/*regparm*/,
   1518            "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
   1519            args
   1520         );
   1521    /* Exclude OP and NDEP from definedness checking.  We're only
   1522       interested in DEP1 and DEP2. */
   1523    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1524    return call;
   1525 }
   1526 
   1527 /* Build IR to calculate some particular condition from stored
   1528    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1529    Ity_Bit. */
   1530 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
   1531 {
   1532    IRExpr** args
   1533       = mkIRExprVec_5( mkU64(cond),
   1534                        IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1535                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1536                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1537                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1538    IRExpr* call
   1539       = mkIRExprCCall(
   1540            Ity_I64,
   1541            0/*regparm*/,
   1542            "amd64g_calculate_condition", &amd64g_calculate_condition,
   1543            args
   1544         );
   1545    /* Exclude the requested condition, OP and NDEP from definedness
   1546       checking.  We're only interested in DEP1 and DEP2. */
   1547    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
   1548    return unop(Iop_64to1, call);
   1549 }
   1550 
   1551 /* Build IR to calculate just the carry flag from stored
   1552    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression :: Ity_I64. */
   1553 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
   1554 {
   1555    IRExpr** args
   1556       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1557                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1558                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1559                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1560    IRExpr* call
   1561       = mkIRExprCCall(
   1562            Ity_I64,
   1563            0/*regparm*/,
   1564            "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
   1565            args
   1566         );
   1567    /* Exclude OP and NDEP from definedness checking.  We're only
   1568       interested in DEP1 and DEP2. */
   1569    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1570    return call;
   1571 }
   1572 
   1573 
   1574 /* -------------- Building the flags-thunk. -------------- */
   1575 
   1576 /* The machinery in this section builds the flag-thunk following a
   1577    flag-setting operation.  Hence the various setFlags_* functions.
   1578 */
   1579 
   1580 static Bool isAddSub ( IROp op8 )
   1581 {
   1582    return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
   1583 }
   1584 
   1585 static Bool isLogic ( IROp op8 )
   1586 {
   1587    return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
   1588 }
   1589 
   1590 /* U-widen 8/16/32/64 bit int expr to 64. */
   1591 static IRExpr* widenUto64 ( IRExpr* e )
   1592 {
   1593    switch (typeOfIRExpr(irsb->tyenv,e)) {
   1594       case Ity_I64: return e;
   1595       case Ity_I32: return unop(Iop_32Uto64, e);
   1596       case Ity_I16: return unop(Iop_16Uto64, e);
   1597       case Ity_I8:  return unop(Iop_8Uto64, e);
   1598       default: vpanic("widenUto64");
   1599    }
   1600 }
   1601 
   1602 /* S-widen 8/16/32/64 bit int expr to 32. */
   1603 static IRExpr* widenSto64 ( IRExpr* e )
   1604 {
   1605    switch (typeOfIRExpr(irsb->tyenv,e)) {
   1606       case Ity_I64: return e;
   1607       case Ity_I32: return unop(Iop_32Sto64, e);
   1608       case Ity_I16: return unop(Iop_16Sto64, e);
   1609       case Ity_I8:  return unop(Iop_8Sto64, e);
   1610       default: vpanic("widenSto64");
   1611    }
   1612 }
   1613 
   1614 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64.  Clearly only some
   1615    of these combinations make sense. */
   1616 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
   1617 {
   1618    IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
   1619    if (src_ty == dst_ty)
   1620       return e;
   1621    if (src_ty == Ity_I32 && dst_ty == Ity_I16)
   1622       return unop(Iop_32to16, e);
   1623    if (src_ty == Ity_I32 && dst_ty == Ity_I8)
   1624       return unop(Iop_32to8, e);
   1625    if (src_ty == Ity_I64 && dst_ty == Ity_I32)
   1626       return unop(Iop_64to32, e);
   1627    if (src_ty == Ity_I64 && dst_ty == Ity_I16)
   1628       return unop(Iop_64to16, e);
   1629    if (src_ty == Ity_I64 && dst_ty == Ity_I8)
   1630       return unop(Iop_64to8, e);
   1631 
   1632    vex_printf("\nsrc, dst tys are: ");
   1633    ppIRType(src_ty);
   1634    vex_printf(", ");
   1635    ppIRType(dst_ty);
   1636    vex_printf("\n");
   1637    vpanic("narrowTo(amd64)");
   1638 }
   1639 
   1640 
   1641 /* Set the flags thunk OP, DEP1 and DEP2 fields.  The supplied op is
   1642    auto-sized up to the real op. */
   1643 
   1644 static
   1645 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
   1646 {
   1647    Int ccOp = 0;
   1648    switch (ty) {
   1649       case Ity_I8:  ccOp = 0; break;
   1650       case Ity_I16: ccOp = 1; break;
   1651       case Ity_I32: ccOp = 2; break;
   1652       case Ity_I64: ccOp = 3; break;
   1653       default: vassert(0);
   1654    }
   1655    switch (op8) {
   1656       case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB;   break;
   1657       case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB;   break;
   1658       default:       ppIROp(op8);
   1659                      vpanic("setFlags_DEP1_DEP2(amd64)");
   1660    }
   1661    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1662    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
   1663    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
   1664 }
   1665 
   1666 
   1667 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
   1668 
   1669 static
   1670 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
   1671 {
   1672    Int ccOp = 0;
   1673    switch (ty) {
   1674       case Ity_I8:  ccOp = 0; break;
   1675       case Ity_I16: ccOp = 1; break;
   1676       case Ity_I32: ccOp = 2; break;
   1677       case Ity_I64: ccOp = 3; break;
   1678       default: vassert(0);
   1679    }
   1680    switch (op8) {
   1681       case Iop_Or8:
   1682       case Iop_And8:
   1683       case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
   1684       default:       ppIROp(op8);
   1685                      vpanic("setFlags_DEP1(amd64)");
   1686    }
   1687    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1688    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
   1689    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
   1690 }
   1691 
   1692 
   1693 /* For shift operations, we put in the result and the undershifted
   1694    result.  Except if the shift amount is zero, the thunk is left
   1695    unchanged. */
   1696 
   1697 static void setFlags_DEP1_DEP2_shift ( IROp    op64,
   1698                                        IRTemp  res,
   1699                                        IRTemp  resUS,
   1700                                        IRType  ty,
   1701                                        IRTemp  guard )
   1702 {
   1703    Int ccOp = 0;
   1704    switch (ty) {
   1705       case Ity_I8:  ccOp = 0; break;
   1706       case Ity_I16: ccOp = 1; break;
   1707       case Ity_I32: ccOp = 2; break;
   1708       case Ity_I64: ccOp = 3; break;
   1709       default: vassert(0);
   1710    }
   1711 
   1712    vassert(guard);
   1713 
   1714    /* Both kinds of right shifts are handled by the same thunk
   1715       operation. */
   1716    switch (op64) {
   1717       case Iop_Shr64:
   1718       case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
   1719       case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
   1720       default:        ppIROp(op64);
   1721                       vpanic("setFlags_DEP1_DEP2_shift(amd64)");
   1722    }
   1723 
   1724    /* DEP1 contains the result, DEP2 contains the undershifted value. */
   1725    stmt( IRStmt_Put( OFFB_CC_OP,
   1726                      IRExpr_Mux0X( mkexpr(guard),
   1727                                    IRExpr_Get(OFFB_CC_OP,Ity_I64),
   1728                                    mkU64(ccOp))) );
   1729    stmt( IRStmt_Put( OFFB_CC_DEP1,
   1730                      IRExpr_Mux0X( mkexpr(guard),
   1731                                    IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
   1732                                    widenUto64(mkexpr(res)))) );
   1733    stmt( IRStmt_Put( OFFB_CC_DEP2,
   1734                      IRExpr_Mux0X( mkexpr(guard),
   1735                                    IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
   1736                                    widenUto64(mkexpr(resUS)))) );
   1737 }
   1738 
   1739 
   1740 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
   1741    the former value of the carry flag, which unfortunately we have to
   1742    compute. */
   1743 
   1744 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
   1745 {
   1746    Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
   1747 
   1748    switch (ty) {
   1749       case Ity_I8:  ccOp += 0; break;
   1750       case Ity_I16: ccOp += 1; break;
   1751       case Ity_I32: ccOp += 2; break;
   1752       case Ity_I64: ccOp += 3; break;
   1753       default: vassert(0);
   1754    }
   1755 
   1756    /* This has to come first, because calculating the C flag
   1757       may require reading all four thunk fields. */
   1758    stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
   1759    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1760    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
   1761    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
   1762 }
   1763 
   1764 
   1765 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
   1766    two arguments. */
   1767 
   1768 static
   1769 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
   1770 {
   1771    switch (ty) {
   1772       case Ity_I8:
   1773          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
   1774          break;
   1775       case Ity_I16:
   1776          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
   1777          break;
   1778       case Ity_I32:
   1779          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
   1780          break;
   1781       case Ity_I64:
   1782          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
   1783          break;
   1784       default:
   1785          vpanic("setFlags_MUL(amd64)");
   1786    }
   1787    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
   1788    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
   1789 }
   1790 
   1791 
   1792 /* -------------- Condition codes. -------------- */
   1793 
   1794 /* Condition codes, using the AMD encoding.  */
   1795 
   1796 static HChar* name_AMD64Condcode ( AMD64Condcode cond )
   1797 {
   1798    switch (cond) {
   1799       case AMD64CondO:      return "o";
   1800       case AMD64CondNO:     return "no";
   1801       case AMD64CondB:      return "b";
   1802       case AMD64CondNB:     return "ae"; /*"nb";*/
   1803       case AMD64CondZ:      return "e"; /*"z";*/
   1804       case AMD64CondNZ:     return "ne"; /*"nz";*/
   1805       case AMD64CondBE:     return "be";
   1806       case AMD64CondNBE:    return "a"; /*"nbe";*/
   1807       case AMD64CondS:      return "s";
   1808       case AMD64CondNS:     return "ns";
   1809       case AMD64CondP:      return "p";
   1810       case AMD64CondNP:     return "np";
   1811       case AMD64CondL:      return "l";
   1812       case AMD64CondNL:     return "ge"; /*"nl";*/
   1813       case AMD64CondLE:     return "le";
   1814       case AMD64CondNLE:    return "g"; /*"nle";*/
   1815       case AMD64CondAlways: return "ALWAYS";
   1816       default: vpanic("name_AMD64Condcode");
   1817    }
   1818 }
   1819 
   1820 static
   1821 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode  cond,
   1822                                           /*OUT*/Bool*   needInvert )
   1823 {
   1824    vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
   1825    if (cond & 1) {
   1826       *needInvert = True;
   1827       return cond-1;
   1828    } else {
   1829       *needInvert = False;
   1830       return cond;
   1831    }
   1832 }
   1833 
   1834 
   1835 /* -------------- Helpers for ADD/SUB with carry. -------------- */
   1836 
   1837 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
   1838    appropriately.
   1839 
   1840    Optionally, generate a store for the 'tres' value.  This can either
   1841    be a normal store, or it can be a cas-with-possible-failure style
   1842    store:
   1843 
   1844    if taddr is IRTemp_INVALID, then no store is generated.
   1845 
   1846    if taddr is not IRTemp_INVALID, then a store (using taddr as
   1847    the address) is generated:
   1848 
   1849      if texpVal is IRTemp_INVALID then a normal store is
   1850      generated, and restart_point must be zero (it is irrelevant).
   1851 
   1852      if texpVal is not IRTemp_INVALID then a cas-style store is
   1853      generated.  texpVal is the expected value, restart_point
   1854      is the restart point if the store fails, and texpVal must
   1855      have the same type as tres.
   1856 
   1857 */
   1858 static void helper_ADC ( Int sz,
   1859                          IRTemp tres, IRTemp ta1, IRTemp ta2,
   1860                          /* info about optional store: */
   1861                          IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
   1862 {
   1863    UInt    thunkOp;
   1864    IRType  ty    = szToITy(sz);
   1865    IRTemp  oldc  = newTemp(Ity_I64);
   1866    IRTemp  oldcn = newTemp(ty);
   1867    IROp    plus  = mkSizedOp(ty, Iop_Add8);
   1868    IROp    xor   = mkSizedOp(ty, Iop_Xor8);
   1869 
   1870    vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
   1871 
   1872    switch (sz) {
   1873       case 8:  thunkOp = AMD64G_CC_OP_ADCQ; break;
   1874       case 4:  thunkOp = AMD64G_CC_OP_ADCL; break;
   1875       case 2:  thunkOp = AMD64G_CC_OP_ADCW; break;
   1876       case 1:  thunkOp = AMD64G_CC_OP_ADCB; break;
   1877       default: vassert(0);
   1878    }
   1879 
   1880    /* oldc = old carry flag, 0 or 1 */
   1881    assign( oldc,  binop(Iop_And64,
   1882                         mk_amd64g_calculate_rflags_c(),
   1883                         mkU64(1)) );
   1884 
   1885    assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
   1886 
   1887    assign( tres, binop(plus,
   1888                        binop(plus,mkexpr(ta1),mkexpr(ta2)),
   1889                        mkexpr(oldcn)) );
   1890 
   1891    /* Possibly generate a store of 'tres' to 'taddr'.  See comment at
   1892       start of this function. */
   1893    if (taddr != IRTemp_INVALID) {
   1894       if (texpVal == IRTemp_INVALID) {
   1895          vassert(restart_point == 0);
   1896          storeLE( mkexpr(taddr), mkexpr(tres) );
   1897       } else {
   1898          vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
   1899          /* .. and hence 'texpVal' has the same type as 'tres'. */
   1900          casLE( mkexpr(taddr),
   1901                 mkexpr(texpVal), mkexpr(tres), restart_point );
   1902       }
   1903    }
   1904 
   1905    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(thunkOp) ) );
   1906    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1))  ));
   1907    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
   1908                                                          mkexpr(oldcn)) )) );
   1909    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
   1910 }
   1911 
   1912 
   1913 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
   1914    appropriately.  As with helper_ADC, possibly generate a store of
   1915    the result -- see comments on helper_ADC for details.
   1916 */
   1917 static void helper_SBB ( Int sz,
   1918                          IRTemp tres, IRTemp ta1, IRTemp ta2,
   1919                          /* info about optional store: */
   1920                          IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
   1921 {
   1922    UInt    thunkOp;
   1923    IRType  ty    = szToITy(sz);
   1924    IRTemp  oldc  = newTemp(Ity_I64);
   1925    IRTemp  oldcn = newTemp(ty);
   1926    IROp    minus = mkSizedOp(ty, Iop_Sub8);
   1927    IROp    xor   = mkSizedOp(ty, Iop_Xor8);
   1928 
   1929    vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
   1930 
   1931    switch (sz) {
   1932       case 8:  thunkOp = AMD64G_CC_OP_SBBQ; break;
   1933       case 4:  thunkOp = AMD64G_CC_OP_SBBL; break;
   1934       case 2:  thunkOp = AMD64G_CC_OP_SBBW; break;
   1935       case 1:  thunkOp = AMD64G_CC_OP_SBBB; break;
   1936       default: vassert(0);
   1937    }
   1938 
   1939    /* oldc = old carry flag, 0 or 1 */
   1940    assign( oldc, binop(Iop_And64,
   1941                        mk_amd64g_calculate_rflags_c(),
   1942                        mkU64(1)) );
   1943 
   1944    assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
   1945 
   1946    assign( tres, binop(minus,
   1947                        binop(minus,mkexpr(ta1),mkexpr(ta2)),
   1948                        mkexpr(oldcn)) );
   1949 
   1950    /* Possibly generate a store of 'tres' to 'taddr'.  See comment at
   1951       start of this function. */
   1952    if (taddr != IRTemp_INVALID) {
   1953       if (texpVal == IRTemp_INVALID) {
   1954          vassert(restart_point == 0);
   1955          storeLE( mkexpr(taddr), mkexpr(tres) );
   1956       } else {
   1957          vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
   1958          /* .. and hence 'texpVal' has the same type as 'tres'. */
   1959          casLE( mkexpr(taddr),
   1960                 mkexpr(texpVal), mkexpr(tres), restart_point );
   1961       }
   1962    }
   1963 
   1964    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(thunkOp) ) );
   1965    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
   1966    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
   1967                                                          mkexpr(oldcn)) )) );
   1968    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
   1969 }
   1970 
   1971 
   1972 /* -------------- Helpers for disassembly printing. -------------- */
   1973 
   1974 static HChar* nameGrp1 ( Int opc_aux )
   1975 {
   1976    static HChar* grp1_names[8]
   1977      = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
   1978    if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
   1979    return grp1_names[opc_aux];
   1980 }
   1981 
   1982 static HChar* nameGrp2 ( Int opc_aux )
   1983 {
   1984    static HChar* grp2_names[8]
   1985      = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
   1986    if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
   1987    return grp2_names[opc_aux];
   1988 }
   1989 
   1990 static HChar* nameGrp4 ( Int opc_aux )
   1991 {
   1992    static HChar* grp4_names[8]
   1993      = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
   1994    if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
   1995    return grp4_names[opc_aux];
   1996 }
   1997 
   1998 static HChar* nameGrp5 ( Int opc_aux )
   1999 {
   2000    static HChar* grp5_names[8]
   2001      = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
   2002    if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
   2003    return grp5_names[opc_aux];
   2004 }
   2005 
   2006 static HChar* nameGrp8 ( Int opc_aux )
   2007 {
   2008    static HChar* grp8_names[8]
   2009       = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
   2010    if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
   2011    return grp8_names[opc_aux];
   2012 }
   2013 
   2014 //.. static HChar* nameSReg ( UInt sreg )
   2015 //.. {
   2016 //..    switch (sreg) {
   2017 //..       case R_ES: return "%es";
   2018 //..       case R_CS: return "%cs";
   2019 //..       case R_SS: return "%ss";
   2020 //..       case R_DS: return "%ds";
   2021 //..       case R_FS: return "%fs";
   2022 //..       case R_GS: return "%gs";
   2023 //..       default: vpanic("nameSReg(x86)");
   2024 //..    }
   2025 //.. }
   2026 
   2027 static HChar* nameMMXReg ( Int mmxreg )
   2028 {
   2029    static HChar* mmx_names[8]
   2030      = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
   2031    if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
   2032    return mmx_names[mmxreg];
   2033 }
   2034 
   2035 static HChar* nameXMMReg ( Int xmmreg )
   2036 {
   2037    static HChar* xmm_names[16]
   2038      = { "%xmm0",  "%xmm1",  "%xmm2",  "%xmm3",
   2039          "%xmm4",  "%xmm5",  "%xmm6",  "%xmm7",
   2040          "%xmm8",  "%xmm9",  "%xmm10", "%xmm11",
   2041          "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
   2042    if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
   2043    return xmm_names[xmmreg];
   2044 }
   2045 
   2046 static HChar* nameMMXGran ( Int gran )
   2047 {
   2048    switch (gran) {
   2049       case 0: return "b";
   2050       case 1: return "w";
   2051       case 2: return "d";
   2052       case 3: return "q";
   2053       default: vpanic("nameMMXGran(amd64,guest)");
   2054    }
   2055 }
   2056 
   2057 static HChar nameISize ( Int size )
   2058 {
   2059    switch (size) {
   2060       case 8: return 'q';
   2061       case 4: return 'l';
   2062       case 2: return 'w';
   2063       case 1: return 'b';
   2064       default: vpanic("nameISize(amd64)");
   2065    }
   2066 }
   2067 
   2068 
   2069 /*------------------------------------------------------------*/
   2070 /*--- JMP helpers                                          ---*/
   2071 /*------------------------------------------------------------*/
   2072 
   2073 static void jmp_lit( IRJumpKind kind, Addr64 d64 )
   2074 {
   2075    irsb->next     = mkU64(d64);
   2076    irsb->jumpkind = kind;
   2077 }
   2078 
   2079 static void jmp_treg( IRJumpKind kind, IRTemp t )
   2080 {
   2081    irsb->next     = mkexpr(t);
   2082    irsb->jumpkind = kind;
   2083 }
   2084 
   2085 static
   2086 void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
   2087 {
   2088    Bool          invert;
   2089    AMD64Condcode condPos;
   2090    condPos = positiveIse_AMD64Condcode ( cond, &invert );
   2091    if (invert) {
   2092       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
   2093                          Ijk_Boring,
   2094                          IRConst_U64(d64_false) ) );
   2095       irsb->next     = mkU64(d64_true);
   2096       irsb->jumpkind = Ijk_Boring;
   2097    } else {
   2098       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
   2099                          Ijk_Boring,
   2100                          IRConst_U64(d64_true) ) );
   2101       irsb->next     = mkU64(d64_false);
   2102       irsb->jumpkind = Ijk_Boring;
   2103    }
   2104 }
   2105 
   2106 /* Let new_rsp be the %rsp value after a call/return.  Let nia be the
   2107    guest address of the next instruction to be executed.
   2108 
   2109    This function generates an AbiHint to say that -128(%rsp)
   2110    .. -1(%rsp) should now be regarded as uninitialised.
   2111 */
   2112 static
   2113 void make_redzone_AbiHint ( VexAbiInfo* vbi,
   2114                             IRTemp new_rsp, IRTemp nia, HChar* who )
   2115 {
   2116    Int szB = vbi->guest_stack_redzone_size;
   2117    vassert(szB >= 0);
   2118 
   2119    /* A bit of a kludge.  Currently the only AbI we've guested AMD64
   2120       for is ELF.  So just check it's the expected 128 value
   2121       (paranoia). */
   2122    vassert(szB == 128);
   2123 
   2124    if (0) vex_printf("AbiHint: %s\n", who);
   2125    vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
   2126    vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
   2127    if (szB > 0)
   2128       stmt( IRStmt_AbiHint(
   2129                binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
   2130                szB,
   2131                mkexpr(nia)
   2132             ));
   2133 }
   2134 
   2135 
   2136 /*------------------------------------------------------------*/
   2137 /*--- Disassembling addressing modes                       ---*/
   2138 /*------------------------------------------------------------*/
   2139 
   2140 static
   2141 HChar* segRegTxt ( Prefix pfx )
   2142 {
   2143    if (pfx & PFX_CS) return "%cs:";
   2144    if (pfx & PFX_DS) return "%ds:";
   2145    if (pfx & PFX_ES) return "%es:";
   2146    if (pfx & PFX_FS) return "%fs:";
   2147    if (pfx & PFX_GS) return "%gs:";
   2148    if (pfx & PFX_SS) return "%ss:";
   2149    return ""; /* no override */
   2150 }
   2151 
   2152 
   2153 /* 'virtual' is an IRExpr* holding a virtual address.  Convert it to a
   2154    linear address by adding any required segment override as indicated
   2155    by sorb, and also dealing with any address size override
   2156    present. */
   2157 static
   2158 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
   2159                               Prefix pfx, IRExpr* virtual )
   2160 {
   2161    /* --- segment overrides --- */
   2162    if (pfx & PFX_FS) {
   2163       if (vbi->guest_amd64_assume_fs_is_zero) {
   2164          /* Note that this is a linux-kernel specific hack that relies
   2165             on the assumption that %fs is always zero. */
   2166          /* return virtual + guest_FS_ZERO. */
   2167          virtual = binop(Iop_Add64, virtual,
   2168                                     IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
   2169       } else {
   2170          unimplemented("amd64 %fs segment override");
   2171       }
   2172    }
   2173 
   2174    if (pfx & PFX_GS) {
   2175       if (vbi->guest_amd64_assume_gs_is_0x60) {
   2176          /* Note that this is a darwin-kernel specific hack that relies
   2177             on the assumption that %gs is always 0x60. */
   2178          /* return virtual + guest_GS_0x60. */
   2179          virtual = binop(Iop_Add64, virtual,
   2180                                     IRExpr_Get(OFFB_GS_0x60, Ity_I64));
   2181       } else {
   2182          unimplemented("amd64 %gs segment override");
   2183       }
   2184    }
   2185 
   2186    /* cs, ds, es and ss are simply ignored in 64-bit mode. */
   2187 
   2188    /* --- address size override --- */
   2189    if (haveASO(pfx))
   2190       virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
   2191 
   2192    return virtual;
   2193 }
   2194 
   2195 //.. {
   2196 //..    Int    sreg;
   2197 //..    IRType hWordTy;
   2198 //..    IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
   2199 //..
   2200 //..    if (sorb == 0)
   2201 //..       /* the common case - no override */
   2202 //..       return virtual;
   2203 //..
   2204 //..    switch (sorb) {
   2205 //..       case 0x3E: sreg = R_DS; break;
   2206 //..       case 0x26: sreg = R_ES; break;
   2207 //..       case 0x64: sreg = R_FS; break;
   2208 //..       case 0x65: sreg = R_GS; break;
   2209 //..       default: vpanic("handleAddrOverrides(x86,guest)");
   2210 //..    }
   2211 //..
   2212 //..    hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
   2213 //..
   2214 //..    seg_selector = newTemp(Ity_I32);
   2215 //..    ldt_ptr      = newTemp(hWordTy);
   2216 //..    gdt_ptr      = newTemp(hWordTy);
   2217 //..    r64          = newTemp(Ity_I64);
   2218 //..
   2219 //..    assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
   2220 //..    assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
   2221 //..    assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
   2222 //..
   2223 //..    /*
   2224 //..    Call this to do the translation and limit checks:
   2225 //..    ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2226 //..                                  UInt seg_selector, UInt virtual_addr )
   2227 //..    */
   2228 //..    assign(
   2229 //..       r64,
   2230 //..       mkIRExprCCall(
   2231 //..          Ity_I64,
   2232 //..          0/*regparms*/,
   2233 //..          "x86g_use_seg_selector",
   2234 //..          &x86g_use_seg_selector,
   2235 //..          mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
   2236 //..                         mkexpr(seg_selector), virtual)
   2237 //..       )
   2238 //..    );
   2239 //..
   2240 //..    /* If the high 32 of the result are non-zero, there was a
   2241 //..       failure in address translation.  In which case, make a
   2242 //..       quick exit.
   2243 //..    */
   2244 //..    stmt(
   2245 //..       IRStmt_Exit(
   2246 //..          binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
   2247 //..          Ijk_MapFail,
   2248 //..          IRConst_U32( guest_eip_curr_instr )
   2249 //..       )
   2250 //..    );
   2251 //..
   2252 //..    /* otherwise, here's the translated result. */
   2253 //..    return unop(Iop_64to32, mkexpr(r64));
   2254 //.. }
   2255 
   2256 
   2257 /* Generate IR to calculate an address indicated by a ModRM and
   2258    following SIB bytes.  The expression, and the number of bytes in
   2259    the address mode, are returned (the latter in *len).  Note that
   2260    this fn should not be called if the R/M part of the address denotes
   2261    a register instead of memory.  If print_codegen is true, text of
   2262    the addressing mode is placed in buf.
   2263 
   2264    The computed address is stored in a new tempreg, and the
   2265    identity of the tempreg is returned.
   2266 
   2267    extra_bytes holds the number of bytes after the amode, as supplied
   2268    by the caller.  This is needed to make sense of %rip-relative
   2269    addresses.  Note that the value that *len is set to is only the
   2270    length of the amode itself and does not include the value supplied
   2271    in extra_bytes.
   2272  */
   2273 
   2274 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
   2275 {
   2276    IRTemp tmp = newTemp(Ity_I64);
   2277    assign( tmp, addr64 );
   2278    return tmp;
   2279 }
   2280 
   2281 static
   2282 IRTemp disAMode ( /*OUT*/Int* len,
   2283                   VexAbiInfo* vbi, Prefix pfx, Long delta,
   2284                   /*OUT*/HChar* buf, Int extra_bytes )
   2285 {
   2286    UChar mod_reg_rm = getUChar(delta);
   2287    delta++;
   2288 
   2289    buf[0] = (UChar)0;
   2290    vassert(extra_bytes >= 0 && extra_bytes < 10);
   2291 
   2292    /* squeeze out the reg field from mod_reg_rm, since a 256-entry
   2293       jump table seems a bit excessive.
   2294    */
   2295    mod_reg_rm &= 0xC7;                         /* is now XX000YYY */
   2296    mod_reg_rm  = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
   2297                                                /* is now XX0XXYYY */
   2298    mod_reg_rm &= 0x1F;                         /* is now 000XXYYY */
   2299    switch (mod_reg_rm) {
   2300 
   2301       /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
   2302          REX.B==1: (%r8)  .. (%r15), not including (%r12) or (%r13).
   2303       */
   2304       case 0x00: case 0x01: case 0x02: case 0x03:
   2305       /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
   2306          { UChar rm = toUChar(mod_reg_rm & 7);
   2307            DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
   2308            *len = 1;
   2309            return disAMode_copy2tmp(
   2310                   handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
   2311          }
   2312 
   2313       /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
   2314          REX.B==1: d8(%r8)  ... d8(%r15), not including d8(%r12)
   2315       */
   2316       case 0x08: case 0x09: case 0x0A: case 0x0B:
   2317       /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
   2318          { UChar rm = toUChar(mod_reg_rm & 7);
   2319            Long d   = getSDisp8(delta);
   2320            if (d == 0) {
   2321               DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
   2322            } else {
   2323               DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
   2324            }
   2325            *len = 2;
   2326            return disAMode_copy2tmp(
   2327                   handleAddrOverrides(vbi, pfx,
   2328                      binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
   2329          }
   2330 
   2331       /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
   2332          REX.B==1: d32(%r8)  ... d32(%r15), not including d32(%r12)
   2333       */
   2334       case 0x10: case 0x11: case 0x12: case 0x13:
   2335       /* ! 14 */ case 0x15: case 0x16: case 0x17:
   2336          { UChar rm = toUChar(mod_reg_rm & 7);
   2337            Long  d  = getSDisp32(delta);
   2338            DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
   2339            *len = 5;
   2340            return disAMode_copy2tmp(
   2341                   handleAddrOverrides(vbi, pfx,
   2342                      binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
   2343          }
   2344 
   2345       /* REX.B==0: a register, %rax .. %rdi.  This shouldn't happen. */
   2346       /* REX.B==1: a register, %r8  .. %r16.  This shouldn't happen. */
   2347       case 0x18: case 0x19: case 0x1A: case 0x1B:
   2348       case 0x1C: case 0x1D: case 0x1E: case 0x1F:
   2349          vpanic("disAMode(amd64): not an addr!");
   2350 
   2351       /* RIP + disp32.  This assumes that guest_RIP_curr_instr is set
   2352          correctly at the start of handling each instruction. */
   2353       case 0x05:
   2354          { Long d = getSDisp32(delta);
   2355            *len = 5;
   2356            DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
   2357            /* We need to know the next instruction's start address.
   2358               Try and figure out what it is, record the guess, and ask
   2359               the top-level driver logic (bbToIR_AMD64) to check we
   2360               guessed right, after the instruction is completely
   2361               decoded. */
   2362            guest_RIP_next_mustcheck = True;
   2363            guest_RIP_next_assumed = guest_RIP_bbstart
   2364                                     + delta+4 + extra_bytes;
   2365            return disAMode_copy2tmp(
   2366                      handleAddrOverrides(vbi, pfx,
   2367                         binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
   2368                                          mkU64(d))));
   2369          }
   2370 
   2371       case 0x04: {
   2372          /* SIB, with no displacement.  Special cases:
   2373             -- %rsp cannot act as an index value.
   2374                If index_r indicates %rsp, zero is used for the index.
   2375             -- when mod is zero and base indicates RBP or R13, base is
   2376                instead a 32-bit sign-extended literal.
   2377             It's all madness, I tell you.  Extract %index, %base and
   2378             scale from the SIB byte.  The value denoted is then:
   2379                | %index == %RSP && (%base == %RBP || %base == %R13)
   2380                = d32 following SIB byte
   2381                | %index == %RSP && !(%base == %RBP || %base == %R13)
   2382                = %base
   2383                | %index != %RSP && (%base == %RBP || %base == %R13)
   2384                = d32 following SIB byte + (%index << scale)
   2385                | %index != %RSP && !(%base == %RBP || %base == %R13)
   2386                = %base + (%index << scale)
   2387          */
   2388          UChar sib     = getUChar(delta);
   2389          UChar scale   = toUChar((sib >> 6) & 3);
   2390          UChar index_r = toUChar((sib >> 3) & 7);
   2391          UChar base_r  = toUChar(sib & 7);
   2392          /* correct since #(R13) == 8 + #(RBP) */
   2393          Bool  base_is_BPor13 = toBool(base_r == R_RBP);
   2394          Bool  index_is_SP    = toBool(index_r == R_RSP && 0==getRexX(pfx));
   2395          delta++;
   2396 
   2397          if ((!index_is_SP) && (!base_is_BPor13)) {
   2398             if (scale == 0) {
   2399                DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
   2400                          nameIRegRexB(8,pfx,base_r),
   2401                          nameIReg64rexX(pfx,index_r));
   2402             } else {
   2403                DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
   2404                          nameIRegRexB(8,pfx,base_r),
   2405                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2406             }
   2407             *len = 2;
   2408             return
   2409                disAMode_copy2tmp(
   2410                handleAddrOverrides(vbi, pfx,
   2411                   binop(Iop_Add64,
   2412                         getIRegRexB(8,pfx,base_r),
   2413                         binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
   2414                               mkU8(scale)))));
   2415          }
   2416 
   2417          if ((!index_is_SP) && base_is_BPor13) {
   2418             Long d = getSDisp32(delta);
   2419             DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
   2420                       nameIReg64rexX(pfx,index_r), 1<<scale);
   2421             *len = 6;
   2422             return
   2423                disAMode_copy2tmp(
   2424                handleAddrOverrides(vbi, pfx,
   2425                   binop(Iop_Add64,
   2426                         binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
   2427                                          mkU8(scale)),
   2428                         mkU64(d))));
   2429          }
   2430 
   2431          if (index_is_SP && (!base_is_BPor13)) {
   2432             DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
   2433             *len = 2;
   2434             return disAMode_copy2tmp(
   2435                    handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
   2436          }
   2437 
   2438          if (index_is_SP && base_is_BPor13) {
   2439             Long d = getSDisp32(delta);
   2440             DIS(buf, "%s%lld", segRegTxt(pfx), d);
   2441             *len = 6;
   2442             return disAMode_copy2tmp(
   2443                    handleAddrOverrides(vbi, pfx, mkU64(d)));
   2444          }
   2445 
   2446          vassert(0);
   2447       }
   2448 
   2449       /* SIB, with 8-bit displacement.  Special cases:
   2450          -- %esp cannot act as an index value.
   2451             If index_r indicates %esp, zero is used for the index.
   2452          Denoted value is:
   2453             | %index == %ESP
   2454             = d8 + %base
   2455             | %index != %ESP
   2456             = d8 + %base + (%index << scale)
   2457       */
   2458       case 0x0C: {
   2459          UChar sib     = getUChar(delta);
   2460          UChar scale   = toUChar((sib >> 6) & 3);
   2461          UChar index_r = toUChar((sib >> 3) & 7);
   2462          UChar base_r  = toUChar(sib & 7);
   2463          Long d        = getSDisp8(delta+1);
   2464 
   2465          if (index_r == R_RSP && 0==getRexX(pfx)) {
   2466             DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
   2467                                    d, nameIRegRexB(8,pfx,base_r));
   2468             *len = 3;
   2469             return disAMode_copy2tmp(
   2470                    handleAddrOverrides(vbi, pfx,
   2471                       binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
   2472          } else {
   2473             if (scale == 0) {
   2474                DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
   2475                          nameIRegRexB(8,pfx,base_r),
   2476                          nameIReg64rexX(pfx,index_r));
   2477             } else {
   2478                DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
   2479                          nameIRegRexB(8,pfx,base_r),
   2480                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2481             }
   2482             *len = 3;
   2483             return
   2484                 disAMode_copy2tmp(
   2485                 handleAddrOverrides(vbi, pfx,
   2486                   binop(Iop_Add64,
   2487                         binop(Iop_Add64,
   2488                               getIRegRexB(8,pfx,base_r),
   2489                               binop(Iop_Shl64,
   2490                                     getIReg64rexX(pfx,index_r), mkU8(scale))),
   2491                         mkU64(d))));
   2492          }
   2493          vassert(0); /*NOTREACHED*/
   2494       }
   2495 
   2496       /* SIB, with 32-bit displacement.  Special cases:
   2497          -- %rsp cannot act as an index value.
   2498             If index_r indicates %rsp, zero is used for the index.
   2499          Denoted value is:
   2500             | %index == %RSP
   2501             = d32 + %base
   2502             | %index != %RSP
   2503             = d32 + %base + (%index << scale)
   2504       */
   2505       case 0x14: {
   2506          UChar sib     = getUChar(delta);
   2507          UChar scale   = toUChar((sib >> 6) & 3);
   2508          UChar index_r = toUChar((sib >> 3) & 7);
   2509          UChar base_r  = toUChar(sib & 7);
   2510          Long d        = getSDisp32(delta+1);
   2511 
   2512          if (index_r == R_RSP && 0==getRexX(pfx)) {
   2513             DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
   2514                                    d, nameIRegRexB(8,pfx,base_r));
   2515             *len = 6;
   2516             return disAMode_copy2tmp(
   2517                    handleAddrOverrides(vbi, pfx,
   2518                       binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
   2519          } else {
   2520             if (scale == 0) {
   2521                DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
   2522                          nameIRegRexB(8,pfx,base_r),
   2523                          nameIReg64rexX(pfx,index_r));
   2524             } else {
   2525                DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
   2526                          nameIRegRexB(8,pfx,base_r),
   2527                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2528             }
   2529             *len = 6;
   2530             return
   2531                 disAMode_copy2tmp(
   2532                 handleAddrOverrides(vbi, pfx,
   2533                   binop(Iop_Add64,
   2534                         binop(Iop_Add64,
   2535                               getIRegRexB(8,pfx,base_r),
   2536                               binop(Iop_Shl64,
   2537                                     getIReg64rexX(pfx,index_r), mkU8(scale))),
   2538                         mkU64(d))));
   2539          }
   2540          vassert(0); /*NOTREACHED*/
   2541       }
   2542 
   2543       default:
   2544          vpanic("disAMode(amd64)");
   2545          return 0; /*notreached*/
   2546    }
   2547 }
   2548 
   2549 
   2550 /* Figure out the number of (insn-stream) bytes constituting the amode
   2551    beginning at delta.  Is useful for getting hold of literals beyond
   2552    the end of the amode before it has been disassembled.  */
   2553 
   2554 static UInt lengthAMode ( Prefix pfx, Long delta )
   2555 {
   2556    UChar mod_reg_rm = getUChar(delta);
   2557    delta++;
   2558 
   2559    /* squeeze out the reg field from mod_reg_rm, since a 256-entry
   2560       jump table seems a bit excessive.
   2561    */
   2562    mod_reg_rm &= 0xC7;                         /* is now XX000YYY */
   2563    mod_reg_rm  = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
   2564                                                /* is now XX0XXYYY */
   2565    mod_reg_rm &= 0x1F;                         /* is now 000XXYYY */
   2566    switch (mod_reg_rm) {
   2567 
   2568       /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
   2569          REX.B==1: (%r8)  .. (%r15), not including (%r12) or (%r13).
   2570       */
   2571       case 0x00: case 0x01: case 0x02: case 0x03:
   2572       /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
   2573          return 1;
   2574 
   2575       /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
   2576          REX.B==1: d8(%r8)  ... d8(%r15), not including d8(%r12)
   2577       */
   2578       case 0x08: case 0x09: case 0x0A: case 0x0B:
   2579       /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
   2580          return 2;
   2581 
   2582       /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
   2583          REX.B==1: d32(%r8)  ... d32(%r15), not including d32(%r12)
   2584       */
   2585       case 0x10: case 0x11: case 0x12: case 0x13:
   2586       /* ! 14 */ case 0x15: case 0x16: case 0x17:
   2587          return 5;
   2588 
   2589       /* REX.B==0: a register, %rax .. %rdi.  This shouldn't happen. */
   2590       /* REX.B==1: a register, %r8  .. %r16.  This shouldn't happen. */
   2591       /* Not an address, but still handled. */
   2592       case 0x18: case 0x19: case 0x1A: case 0x1B:
   2593       case 0x1C: case 0x1D: case 0x1E: case 0x1F:
   2594          return 1;
   2595 
   2596       /* RIP + disp32. */
   2597       case 0x05:
   2598          return 5;
   2599 
   2600       case 0x04: {
   2601          /* SIB, with no displacement. */
   2602          UChar sib     = getUChar(delta);
   2603          UChar base_r  = toUChar(sib & 7);
   2604          /* correct since #(R13) == 8 + #(RBP) */
   2605          Bool  base_is_BPor13 = toBool(base_r == R_RBP);
   2606 
   2607          if (base_is_BPor13) {
   2608             return 6;
   2609          } else {
   2610             return 2;
   2611          }
   2612       }
   2613 
   2614       /* SIB, with 8-bit displacement. */
   2615       case 0x0C:
   2616          return 3;
   2617 
   2618       /* SIB, with 32-bit displacement. */
   2619       case 0x14:
   2620          return 6;
   2621 
   2622       default:
   2623          vpanic("lengthAMode(amd64)");
   2624          return 0; /*notreached*/
   2625    }
   2626 }
   2627 
   2628 
   2629 /*------------------------------------------------------------*/
   2630 /*--- Disassembling common idioms                          ---*/
   2631 /*------------------------------------------------------------*/
   2632 
   2633 /* Handle binary integer instructions of the form
   2634       op E, G  meaning
   2635       op reg-or-mem, reg
   2636    Is passed the a ptr to the modRM byte, the actual operation, and the
   2637    data size.  Returns the address advanced completely over this
   2638    instruction.
   2639 
   2640    E(src) is reg-or-mem
   2641    G(dst) is reg.
   2642 
   2643    If E is reg, -->    GET %G,  tmp
   2644                        OP %E,   tmp
   2645                        PUT tmp, %G
   2646 
   2647    If E is mem and OP is not reversible,
   2648                 -->    (getAddr E) -> tmpa
   2649                        LD (tmpa), tmpa
   2650                        GET %G, tmp2
   2651                        OP tmpa, tmp2
   2652                        PUT tmp2, %G
   2653 
   2654    If E is mem and OP is reversible
   2655                 -->    (getAddr E) -> tmpa
   2656                        LD (tmpa), tmpa
   2657                        OP %G, tmpa
   2658                        PUT tmpa, %G
   2659 */
   2660 static
   2661 ULong dis_op2_E_G ( VexAbiInfo* vbi,
   2662                     Prefix      pfx,
   2663                     Bool        addSubCarry,
   2664                     IROp        op8,
   2665                     Bool        keep,
   2666                     Int         size,
   2667                     Long        delta0,
   2668                     HChar*      t_amd64opc )
   2669 {
   2670    HChar   dis_buf[50];
   2671    Int     len;
   2672    IRType  ty   = szToITy(size);
   2673    IRTemp  dst1 = newTemp(ty);
   2674    IRTemp  src  = newTemp(ty);
   2675    IRTemp  dst0 = newTemp(ty);
   2676    UChar   rm   = getUChar(delta0);
   2677    IRTemp  addr = IRTemp_INVALID;
   2678 
   2679    /* addSubCarry == True indicates the intended operation is
   2680       add-with-carry or subtract-with-borrow. */
   2681    if (addSubCarry) {
   2682       vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
   2683       vassert(keep);
   2684    }
   2685 
   2686    if (epartIsReg(rm)) {
   2687       /* Specially handle XOR reg,reg, because that doesn't really
   2688          depend on reg, and doing the obvious thing potentially
   2689          generates a spurious value check failure due to the bogus
   2690          dependency. */
   2691       if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
   2692           && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
   2693          if (False && op8 == Iop_Sub8)
   2694             vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
   2695 	 putIRegG(size,pfx,rm, mkU(ty,0));
   2696       }
   2697 
   2698       assign( dst0, getIRegG(size,pfx,rm) );
   2699       assign( src,  getIRegE(size,pfx,rm) );
   2700 
   2701       if (addSubCarry && op8 == Iop_Add8) {
   2702          helper_ADC( size, dst1, dst0, src,
   2703                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2704          putIRegG(size, pfx, rm, mkexpr(dst1));
   2705       } else
   2706       if (addSubCarry && op8 == Iop_Sub8) {
   2707          helper_SBB( size, dst1, dst0, src,
   2708                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2709          putIRegG(size, pfx, rm, mkexpr(dst1));
   2710       } else {
   2711          assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   2712          if (isAddSub(op8))
   2713             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2714          else
   2715             setFlags_DEP1(op8, dst1, ty);
   2716          if (keep)
   2717             putIRegG(size, pfx, rm, mkexpr(dst1));
   2718       }
   2719 
   2720       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2721                           nameIRegE(size,pfx,rm),
   2722                           nameIRegG(size,pfx,rm));
   2723       return 1+delta0;
   2724    } else {
   2725       /* E refers to memory */
   2726       addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2727       assign( dst0, getIRegG(size,pfx,rm) );
   2728       assign( src,  loadLE(szToITy(size), mkexpr(addr)) );
   2729 
   2730       if (addSubCarry && op8 == Iop_Add8) {
   2731          helper_ADC( size, dst1, dst0, src,
   2732                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2733          putIRegG(size, pfx, rm, mkexpr(dst1));
   2734       } else
   2735       if (addSubCarry && op8 == Iop_Sub8) {
   2736          helper_SBB( size, dst1, dst0, src,
   2737                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2738          putIRegG(size, pfx, rm, mkexpr(dst1));
   2739       } else {
   2740          assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   2741          if (isAddSub(op8))
   2742             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2743          else
   2744             setFlags_DEP1(op8, dst1, ty);
   2745          if (keep)
   2746             putIRegG(size, pfx, rm, mkexpr(dst1));
   2747       }
   2748 
   2749       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2750                           dis_buf, nameIRegG(size, pfx, rm));
   2751       return len+delta0;
   2752    }
   2753 }
   2754 
   2755 
   2756 
   2757 /* Handle binary integer instructions of the form
   2758       op G, E  meaning
   2759       op reg, reg-or-mem
   2760    Is passed the a ptr to the modRM byte, the actual operation, and the
   2761    data size.  Returns the address advanced completely over this
   2762    instruction.
   2763 
   2764    G(src) is reg.
   2765    E(dst) is reg-or-mem
   2766 
   2767    If E is reg, -->    GET %E,  tmp
   2768                        OP %G,   tmp
   2769                        PUT tmp, %E
   2770 
   2771    If E is mem, -->    (getAddr E) -> tmpa
   2772                        LD (tmpa), tmpv
   2773                        OP %G, tmpv
   2774                        ST tmpv, (tmpa)
   2775 */
   2776 static
   2777 ULong dis_op2_G_E ( VexAbiInfo* vbi,
   2778                     Prefix      pfx,
   2779                     Bool        addSubCarry,
   2780                     IROp        op8,
   2781                     Bool        keep,
   2782                     Int         size,
   2783                     Long        delta0,
   2784                     HChar*      t_amd64opc )
   2785 {
   2786    HChar   dis_buf[50];
   2787    Int     len;
   2788    IRType  ty   = szToITy(size);
   2789    IRTemp  dst1 = newTemp(ty);
   2790    IRTemp  src  = newTemp(ty);
   2791    IRTemp  dst0 = newTemp(ty);
   2792    UChar   rm   = getUChar(delta0);
   2793    IRTemp  addr = IRTemp_INVALID;
   2794 
   2795    /* addSubCarry == True indicates the intended operation is
   2796       add-with-carry or subtract-with-borrow. */
   2797    if (addSubCarry) {
   2798       vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
   2799       vassert(keep);
   2800    }
   2801 
   2802    if (epartIsReg(rm)) {
   2803       /* Specially handle XOR reg,reg, because that doesn't really
   2804          depend on reg, and doing the obvious thing potentially
   2805          generates a spurious value check failure due to the bogus
   2806          dependency.  Ditto SBB reg,reg. */
   2807       if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
   2808           && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
   2809          putIRegE(size,pfx,rm, mkU(ty,0));
   2810       }
   2811 
   2812       assign(dst0, getIRegE(size,pfx,rm));
   2813       assign(src,  getIRegG(size,pfx,rm));
   2814 
   2815       if (addSubCarry && op8 == Iop_Add8) {
   2816          helper_ADC( size, dst1, dst0, src,
   2817                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2818          putIRegE(size, pfx, rm, mkexpr(dst1));
   2819       } else
   2820       if (addSubCarry && op8 == Iop_Sub8) {
   2821          helper_SBB( size, dst1, dst0, src,
   2822                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2823          putIRegE(size, pfx, rm, mkexpr(dst1));
   2824       } else {
   2825          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   2826          if (isAddSub(op8))
   2827             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2828          else
   2829             setFlags_DEP1(op8, dst1, ty);
   2830          if (keep)
   2831             putIRegE(size, pfx, rm, mkexpr(dst1));
   2832       }
   2833 
   2834       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2835                           nameIRegG(size,pfx,rm),
   2836                           nameIRegE(size,pfx,rm));
   2837       return 1+delta0;
   2838    }
   2839 
   2840    /* E refers to memory */
   2841    {
   2842       addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2843       assign(dst0, loadLE(ty,mkexpr(addr)));
   2844       assign(src,  getIRegG(size,pfx,rm));
   2845 
   2846       if (addSubCarry && op8 == Iop_Add8) {
   2847          if (pfx & PFX_LOCK) {
   2848             /* cas-style store */
   2849             helper_ADC( size, dst1, dst0, src,
   2850                         /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   2851          } else {
   2852             /* normal store */
   2853             helper_ADC( size, dst1, dst0, src,
   2854                         /*store*/addr, IRTemp_INVALID, 0 );
   2855          }
   2856       } else
   2857       if (addSubCarry && op8 == Iop_Sub8) {
   2858          if (pfx & PFX_LOCK) {
   2859             /* cas-style store */
   2860             helper_SBB( size, dst1, dst0, src,
   2861                         /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   2862          } else {
   2863             /* normal store */
   2864             helper_SBB( size, dst1, dst0, src,
   2865                         /*store*/addr, IRTemp_INVALID, 0 );
   2866          }
   2867       } else {
   2868          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   2869          if (keep) {
   2870             if (pfx & PFX_LOCK) {
   2871                if (0) vex_printf("locked case\n" );
   2872                casLE( mkexpr(addr),
   2873                       mkexpr(dst0)/*expval*/,
   2874                       mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
   2875             } else {
   2876                if (0) vex_printf("nonlocked case\n");
   2877                storeLE(mkexpr(addr), mkexpr(dst1));
   2878             }
   2879          }
   2880          if (isAddSub(op8))
   2881             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2882          else
   2883             setFlags_DEP1(op8, dst1, ty);
   2884       }
   2885 
   2886       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2887                           nameIRegG(size,pfx,rm), dis_buf);
   2888       return len+delta0;
   2889    }
   2890 }
   2891 
   2892 
   2893 /* Handle move instructions of the form
   2894       mov E, G  meaning
   2895       mov reg-or-mem, reg
   2896    Is passed the a ptr to the modRM byte, and the data size.  Returns
   2897    the address advanced completely over this instruction.
   2898 
   2899    E(src) is reg-or-mem
   2900    G(dst) is reg.
   2901 
   2902    If E is reg, -->    GET %E,  tmpv
   2903                        PUT tmpv, %G
   2904 
   2905    If E is mem  -->    (getAddr E) -> tmpa
   2906                        LD (tmpa), tmpb
   2907                        PUT tmpb, %G
   2908 */
   2909 static
   2910 ULong dis_mov_E_G ( VexAbiInfo* vbi,
   2911                     Prefix      pfx,
   2912                     Int         size,
   2913                     Long        delta0 )
   2914 {
   2915    Int len;
   2916    UChar rm = getUChar(delta0);
   2917    HChar dis_buf[50];
   2918 
   2919    if (epartIsReg(rm)) {
   2920       putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
   2921       DIP("mov%c %s,%s\n", nameISize(size),
   2922                            nameIRegE(size,pfx,rm),
   2923                            nameIRegG(size,pfx,rm));
   2924       return 1+delta0;
   2925    }
   2926 
   2927    /* E refers to memory */
   2928    {
   2929       IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2930       putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
   2931       DIP("mov%c %s,%s\n", nameISize(size),
   2932                            dis_buf,
   2933                            nameIRegG(size,pfx,rm));
   2934       return delta0+len;
   2935    }
   2936 }
   2937 
   2938 
   2939 /* Handle move instructions of the form
   2940       mov G, E  meaning
   2941       mov reg, reg-or-mem
   2942    Is passed the a ptr to the modRM byte, and the data size.  Returns
   2943    the address advanced completely over this instruction.
   2944 
   2945    G(src) is reg.
   2946    E(dst) is reg-or-mem
   2947 
   2948    If E is reg, -->    GET %G,  tmp
   2949                        PUT tmp, %E
   2950 
   2951    If E is mem, -->    (getAddr E) -> tmpa
   2952                        GET %G, tmpv
   2953                        ST tmpv, (tmpa)
   2954 */
   2955 static
   2956 ULong dis_mov_G_E ( VexAbiInfo* vbi,
   2957                     Prefix      pfx,
   2958                     Int         size,
   2959                     Long        delta0 )
   2960 {
   2961    Int len;
   2962    UChar rm = getUChar(delta0);
   2963    HChar dis_buf[50];
   2964 
   2965    if (epartIsReg(rm)) {
   2966       putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
   2967       DIP("mov%c %s,%s\n", nameISize(size),
   2968                            nameIRegG(size,pfx,rm),
   2969                            nameIRegE(size,pfx,rm));
   2970       return 1+delta0;
   2971    }
   2972 
   2973    /* E refers to memory */
   2974    {
   2975       IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2976       storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
   2977       DIP("mov%c %s,%s\n", nameISize(size),
   2978                            nameIRegG(size,pfx,rm),
   2979                            dis_buf);
   2980       return len+delta0;
   2981    }
   2982 }
   2983 
   2984 
   2985 /* op $immediate, AL/AX/EAX/RAX. */
   2986 static
   2987 ULong dis_op_imm_A ( Int    size,
   2988                      Bool   carrying,
   2989                      IROp   op8,
   2990                      Bool   keep,
   2991                      Long   delta,
   2992                      HChar* t_amd64opc )
   2993 {
   2994    Int    size4 = imin(size,4);
   2995    IRType ty    = szToITy(size);
   2996    IRTemp dst0  = newTemp(ty);
   2997    IRTemp src   = newTemp(ty);
   2998    IRTemp dst1  = newTemp(ty);
   2999    Long  lit    = getSDisp(size4,delta);
   3000    assign(dst0, getIRegRAX(size));
   3001    assign(src,  mkU(ty,lit & mkSizeMask(size)));
   3002 
   3003    if (isAddSub(op8) && !carrying) {
   3004       assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   3005       setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3006    }
   3007    else
   3008    if (isLogic(op8)) {
   3009       vassert(!carrying);
   3010       assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   3011       setFlags_DEP1(op8, dst1, ty);
   3012    }
   3013    else
   3014    if (op8 == Iop_Add8 && carrying) {
   3015       helper_ADC( size, dst1, dst0, src,
   3016                   /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3017    }
   3018    else
   3019    if (op8 == Iop_Sub8 && carrying) {
   3020       helper_SBB( size, dst1, dst0, src,
   3021                   /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3022    }
   3023    else
   3024       vpanic("dis_op_imm_A(amd64,guest)");
   3025 
   3026    if (keep)
   3027       putIRegRAX(size, mkexpr(dst1));
   3028 
   3029    DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
   3030                            lit, nameIRegRAX(size));
   3031    return delta+size4;
   3032 }
   3033 
   3034 
   3035 /* Sign- and Zero-extending moves. */
   3036 static
   3037 ULong dis_movx_E_G ( VexAbiInfo* vbi,
   3038                      Prefix pfx,
   3039                      Long delta, Int szs, Int szd, Bool sign_extend )
   3040 {
   3041    UChar rm = getUChar(delta);
   3042    if (epartIsReg(rm)) {
   3043       putIRegG(szd, pfx, rm,
   3044                     doScalarWidening(
   3045                        szs,szd,sign_extend,
   3046                        getIRegE(szs,pfx,rm)));
   3047       DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
   3048                                nameISize(szs),
   3049                                nameISize(szd),
   3050                                nameIRegE(szs,pfx,rm),
   3051                                nameIRegG(szd,pfx,rm));
   3052       return 1+delta;
   3053    }
   3054 
   3055    /* E refers to memory */
   3056    {
   3057       Int    len;
   3058       HChar  dis_buf[50];
   3059       IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
   3060       putIRegG(szd, pfx, rm,
   3061                     doScalarWidening(
   3062                        szs,szd,sign_extend,
   3063                        loadLE(szToITy(szs),mkexpr(addr))));
   3064       DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
   3065                                nameISize(szs),
   3066                                nameISize(szd),
   3067                                dis_buf,
   3068                                nameIRegG(szd,pfx,rm));
   3069       return len+delta;
   3070    }
   3071 }
   3072 
   3073 
   3074 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
   3075    the 64 / 32 / 16 / 8 bit quantity in the given IRTemp.  */
   3076 static
   3077 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
   3078 {
   3079    /* special-case the 64-bit case */
   3080    if (sz == 8) {
   3081       IROp   op     = signed_divide ? Iop_DivModS128to64
   3082                                     : Iop_DivModU128to64;
   3083       IRTemp src128 = newTemp(Ity_I128);
   3084       IRTemp dst128 = newTemp(Ity_I128);
   3085       assign( src128, binop(Iop_64HLto128,
   3086                             getIReg64(R_RDX),
   3087                             getIReg64(R_RAX)) );
   3088       assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
   3089       putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
   3090       putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
   3091    } else {
   3092       IROp   op    = signed_divide ? Iop_DivModS64to32
   3093                                    : Iop_DivModU64to32;
   3094       IRTemp src64 = newTemp(Ity_I64);
   3095       IRTemp dst64 = newTemp(Ity_I64);
   3096       switch (sz) {
   3097       case 4:
   3098          assign( src64,
   3099                  binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
   3100          assign( dst64,
   3101                  binop(op, mkexpr(src64), mkexpr(t)) );
   3102          putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
   3103          putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
   3104          break;
   3105       case 2: {
   3106          IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
   3107          IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
   3108          assign( src64, unop(widen3264,
   3109                              binop(Iop_16HLto32,
   3110                                    getIRegRDX(2),
   3111                                    getIRegRAX(2))) );
   3112          assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
   3113          putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
   3114          putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
   3115          break;
   3116       }
   3117       case 1: {
   3118          IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
   3119          IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
   3120          IROp widen816  = signed_divide ? Iop_8Sto16  : Iop_8Uto16;
   3121          assign( src64, unop(widen3264,
   3122                         unop(widen1632, getIRegRAX(2))) );
   3123          assign( dst64,
   3124                  binop(op, mkexpr(src64),
   3125                            unop(widen1632, unop(widen816, mkexpr(t)))) );
   3126          putIRegRAX( 1, unop(Iop_16to8,
   3127                         unop(Iop_32to16,
   3128                         unop(Iop_64to32,mkexpr(dst64)))) );
   3129          putIRegAH( unop(Iop_16to8,
   3130                     unop(Iop_32to16,
   3131                     unop(Iop_64HIto32,mkexpr(dst64)))) );
   3132          break;
   3133       }
   3134       default:
   3135          vpanic("codegen_div(amd64)");
   3136       }
   3137    }
   3138 }
   3139 
   3140 static
   3141 ULong dis_Grp1 ( VexAbiInfo* vbi,
   3142                  Prefix pfx,
   3143                  Long delta, UChar modrm,
   3144                  Int am_sz, Int d_sz, Int sz, Long d64 )
   3145 {
   3146    Int     len;
   3147    HChar   dis_buf[50];
   3148    IRType  ty   = szToITy(sz);
   3149    IRTemp  dst1 = newTemp(ty);
   3150    IRTemp  src  = newTemp(ty);
   3151    IRTemp  dst0 = newTemp(ty);
   3152    IRTemp  addr = IRTemp_INVALID;
   3153    IROp    op8  = Iop_INVALID;
   3154    ULong   mask = mkSizeMask(sz);
   3155 
   3156    switch (gregLO3ofRM(modrm)) {
   3157       case 0: op8 = Iop_Add8; break;  case 1: op8 = Iop_Or8;  break;
   3158       case 2: break;  // ADC
   3159       case 3: break;  // SBB
   3160       case 4: op8 = Iop_And8; break;  case 5: op8 = Iop_Sub8; break;
   3161       case 6: op8 = Iop_Xor8; break;  case 7: op8 = Iop_Sub8; break;
   3162       /*NOTREACHED*/
   3163       default: vpanic("dis_Grp1(amd64): unhandled case");
   3164    }
   3165 
   3166    if (epartIsReg(modrm)) {
   3167       vassert(am_sz == 1);
   3168 
   3169       assign(dst0, getIRegE(sz,pfx,modrm));
   3170       assign(src,  mkU(ty,d64 & mask));
   3171 
   3172       if (gregLO3ofRM(modrm) == 2 /* ADC */) {
   3173          helper_ADC( sz, dst1, dst0, src,
   3174                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3175       } else
   3176       if (gregLO3ofRM(modrm) == 3 /* SBB */) {
   3177          helper_SBB( sz, dst1, dst0, src,
   3178                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3179       } else {
   3180          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   3181          if (isAddSub(op8))
   3182             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3183          else
   3184             setFlags_DEP1(op8, dst1, ty);
   3185       }
   3186 
   3187       if (gregLO3ofRM(modrm) < 7)
   3188          putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3189 
   3190       delta += (am_sz + d_sz);
   3191       DIP("%s%c $%lld, %s\n",
   3192           nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
   3193           nameIRegE(sz,pfx,modrm));
   3194    } else {
   3195       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
   3196 
   3197       assign(dst0, loadLE(ty,mkexpr(addr)));
   3198       assign(src, mkU(ty,d64 & mask));
   3199 
   3200       if (gregLO3ofRM(modrm) == 2 /* ADC */) {
   3201          if (pfx & PFX_LOCK) {
   3202             /* cas-style store */
   3203             helper_ADC( sz, dst1, dst0, src,
   3204                        /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   3205          } else {
   3206             /* normal store */
   3207             helper_ADC( sz, dst1, dst0, src,
   3208                         /*store*/addr, IRTemp_INVALID, 0 );
   3209          }
   3210       } else
   3211       if (gregLO3ofRM(modrm) == 3 /* SBB */) {
   3212          if (pfx & PFX_LOCK) {
   3213             /* cas-style store */
   3214             helper_SBB( sz, dst1, dst0, src,
   3215                        /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   3216          } else {
   3217             /* normal store */
   3218             helper_SBB( sz, dst1, dst0, src,
   3219                         /*store*/addr, IRTemp_INVALID, 0 );
   3220          }
   3221       } else {
   3222          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   3223          if (gregLO3ofRM(modrm) < 7) {
   3224             if (pfx & PFX_LOCK) {
   3225                casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
   3226                                     mkexpr(dst1)/*newVal*/,
   3227                                     guest_RIP_curr_instr );
   3228             } else {
   3229                storeLE(mkexpr(addr), mkexpr(dst1));
   3230             }
   3231          }
   3232          if (isAddSub(op8))
   3233             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3234          else
   3235             setFlags_DEP1(op8, dst1, ty);
   3236       }
   3237 
   3238       delta += (len+d_sz);
   3239       DIP("%s%c $%lld, %s\n",
   3240           nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
   3241           d64, dis_buf);
   3242    }
   3243    return delta;
   3244 }
   3245 
   3246 
   3247 /* Group 2 extended opcodes.  shift_expr must be an 8-bit typed
   3248    expression. */
   3249 
   3250 static
   3251 ULong dis_Grp2 ( VexAbiInfo* vbi,
   3252                  Prefix pfx,
   3253                  Long delta, UChar modrm,
   3254                  Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
   3255                  HChar* shift_expr_txt, Bool* decode_OK )
   3256 {
   3257    /* delta on entry points at the modrm byte. */
   3258    HChar  dis_buf[50];
   3259    Int    len;
   3260    Bool   isShift, isRotate, isRotateC;
   3261    IRType ty    = szToITy(sz);
   3262    IRTemp dst0  = newTemp(ty);
   3263    IRTemp dst1  = newTemp(ty);
   3264    IRTemp addr  = IRTemp_INVALID;
   3265 
   3266    *decode_OK = True;
   3267 
   3268    vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
   3269 
   3270    /* Put value to shift/rotate in dst0. */
   3271    if (epartIsReg(modrm)) {
   3272       assign(dst0, getIRegE(sz, pfx, modrm));
   3273       delta += (am_sz + d_sz);
   3274    } else {
   3275       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
   3276       assign(dst0, loadLE(ty,mkexpr(addr)));
   3277       delta += len + d_sz;
   3278    }
   3279 
   3280    isShift = False;
   3281    switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
   3282 
   3283    isRotate = False;
   3284    switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
   3285 
   3286    isRotateC = False;
   3287    switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
   3288 
   3289    if (!isShift && !isRotate && !isRotateC) {
   3290       /*NOTREACHED*/
   3291       vpanic("dis_Grp2(Reg): unhandled case(amd64)");
   3292    }
   3293 
   3294    if (isRotateC) {
   3295       /* Call a helper; this insn is so ridiculous it does not deserve
   3296          better.  One problem is, the helper has to calculate both the
   3297          new value and the new flags.  This is more than 64 bits, and
   3298          there is no way to return more than 64 bits from the helper.
   3299          Hence the crude and obvious solution is to call it twice,
   3300          using the sign of the sz field to indicate whether it is the
   3301          value or rflags result we want.
   3302       */
   3303       Bool     left = toBool(gregLO3ofRM(modrm) == 2);
   3304       IRExpr** argsVALUE;
   3305       IRExpr** argsRFLAGS;
   3306 
   3307       IRTemp new_value  = newTemp(Ity_I64);
   3308       IRTemp new_rflags = newTemp(Ity_I64);
   3309       IRTemp old_rflags = newTemp(Ity_I64);
   3310 
   3311       assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
   3312 
   3313       argsVALUE
   3314          = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
   3315                           widenUto64(shift_expr),   /* rotate amount */
   3316                           mkexpr(old_rflags),
   3317                           mkU64(sz) );
   3318       assign( new_value,
   3319                  mkIRExprCCall(
   3320                     Ity_I64,
   3321                     0/*regparm*/,
   3322                     left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
   3323                     left ? &amd64g_calculate_RCL  : &amd64g_calculate_RCR,
   3324                     argsVALUE
   3325                  )
   3326             );
   3327 
   3328       argsRFLAGS
   3329          = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
   3330                           widenUto64(shift_expr),   /* rotate amount */
   3331                           mkexpr(old_rflags),
   3332                           mkU64(-sz) );
   3333       assign( new_rflags,
   3334                  mkIRExprCCall(
   3335                     Ity_I64,
   3336                     0/*regparm*/,
   3337                     left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
   3338                     left ? &amd64g_calculate_RCL  : &amd64g_calculate_RCR,
   3339                     argsRFLAGS
   3340                  )
   3341             );
   3342 
   3343       assign( dst1, narrowTo(ty, mkexpr(new_value)) );
   3344       stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   3345       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
   3346       stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   3347       stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
   3348    }
   3349 
   3350    else
   3351    if (isShift) {
   3352 
   3353       IRTemp pre64     = newTemp(Ity_I64);
   3354       IRTemp res64     = newTemp(Ity_I64);
   3355       IRTemp res64ss   = newTemp(Ity_I64);
   3356       IRTemp shift_amt = newTemp(Ity_I8);
   3357       UChar  mask      = toUChar(sz==8 ? 63 : 31);
   3358       IROp   op64;
   3359 
   3360       switch (gregLO3ofRM(modrm)) {
   3361          case 4: op64 = Iop_Shl64; break;
   3362          case 5: op64 = Iop_Shr64; break;
   3363          case 6: op64 = Iop_Shl64; break;
   3364          case 7: op64 = Iop_Sar64; break;
   3365          /*NOTREACHED*/
   3366          default: vpanic("dis_Grp2:shift"); break;
   3367       }
   3368 
   3369       /* Widen the value to be shifted to 64 bits, do the shift, and
   3370          narrow back down.  This seems surprisingly long-winded, but
   3371          unfortunately the AMD semantics requires that 8/16/32-bit
   3372          shifts give defined results for shift values all the way up
   3373          to 32, and this seems the simplest way to do it.  It has the
   3374          advantage that the only IR level shifts generated are of 64
   3375          bit values, and the shift amount is guaranteed to be in the
   3376          range 0 .. 63, thereby observing the IR semantics requiring
   3377          all shift values to be in the range 0 .. 2^word_size-1.
   3378 
   3379          Therefore the shift amount is masked with 63 for 64-bit shifts
   3380          and 31 for all others.
   3381       */
   3382       /* shift_amt = shift_expr & MASK, regardless of operation size */
   3383       assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
   3384 
   3385       /* suitably widen the value to be shifted to 64 bits. */
   3386       assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
   3387                                      : widenUto64(mkexpr(dst0)) );
   3388 
   3389       /* res64 = pre64 `shift` shift_amt */
   3390       assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
   3391 
   3392       /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
   3393       assign( res64ss,
   3394               binop(op64,
   3395                     mkexpr(pre64),
   3396                     binop(Iop_And8,
   3397                           binop(Iop_Sub8,
   3398                                 mkexpr(shift_amt), mkU8(1)),
   3399                           mkU8(mask))) );
   3400 
   3401       /* Build the flags thunk. */
   3402       setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
   3403 
   3404       /* Narrow the result back down. */
   3405       assign( dst1, narrowTo(ty, mkexpr(res64)) );
   3406 
   3407    } /* if (isShift) */
   3408 
   3409    else
   3410    if (isRotate) {
   3411       Int    ccOp      = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
   3412                                         : (ty==Ity_I32 ? 2 : 3));
   3413       Bool   left      = toBool(gregLO3ofRM(modrm) == 0);
   3414       IRTemp rot_amt   = newTemp(Ity_I8);
   3415       IRTemp rot_amt64 = newTemp(Ity_I8);
   3416       IRTemp oldFlags  = newTemp(Ity_I64);
   3417       UChar  mask      = toUChar(sz==8 ? 63 : 31);
   3418 
   3419       /* rot_amt = shift_expr & mask */
   3420       /* By masking the rotate amount thusly, the IR-level Shl/Shr
   3421          expressions never shift beyond the word size and thus remain
   3422          well defined. */
   3423       assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
   3424 
   3425       if (ty == Ity_I64)
   3426          assign(rot_amt, mkexpr(rot_amt64));
   3427       else
   3428          assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
   3429 
   3430       if (left) {
   3431 
   3432          /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
   3433          assign(dst1,
   3434             binop( mkSizedOp(ty,Iop_Or8),
   3435                    binop( mkSizedOp(ty,Iop_Shl8),
   3436                           mkexpr(dst0),
   3437                           mkexpr(rot_amt)
   3438                    ),
   3439                    binop( mkSizedOp(ty,Iop_Shr8),
   3440                           mkexpr(dst0),
   3441                           binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
   3442                    )
   3443             )
   3444          );
   3445          ccOp += AMD64G_CC_OP_ROLB;
   3446 
   3447       } else { /* right */
   3448 
   3449          /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
   3450          assign(dst1,
   3451             binop( mkSizedOp(ty,Iop_Or8),
   3452                    binop( mkSizedOp(ty,Iop_Shr8),
   3453                           mkexpr(dst0),
   3454                           mkexpr(rot_amt)
   3455                    ),
   3456                    binop( mkSizedOp(ty,Iop_Shl8),
   3457                           mkexpr(dst0),
   3458                           binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
   3459                    )
   3460             )
   3461          );
   3462          ccOp += AMD64G_CC_OP_RORB;
   3463 
   3464       }
   3465 
   3466       /* dst1 now holds the rotated value.  Build flag thunk.  We
   3467          need the resulting value for this, and the previous flags.
   3468          Except don't set it if the rotate count is zero. */
   3469 
   3470       assign(oldFlags, mk_amd64g_calculate_rflags_all());
   3471 
   3472       /* CC_DEP1 is the rotated value.  CC_NDEP is flags before. */
   3473       stmt( IRStmt_Put( OFFB_CC_OP,
   3474                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3475                                       IRExpr_Get(OFFB_CC_OP,Ity_I64),
   3476                                       mkU64(ccOp))) );
   3477       stmt( IRStmt_Put( OFFB_CC_DEP1,
   3478                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3479                                       IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
   3480                                       widenUto64(mkexpr(dst1)))) );
   3481       stmt( IRStmt_Put( OFFB_CC_DEP2,
   3482                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3483                                       IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
   3484                                       mkU64(0))) );
   3485       stmt( IRStmt_Put( OFFB_CC_NDEP,
   3486                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3487                                       IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
   3488                                       mkexpr(oldFlags))) );
   3489    } /* if (isRotate) */
   3490 
   3491    /* Save result, and finish up. */
   3492    if (epartIsReg(modrm)) {
   3493       putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3494       if (vex_traceflags & VEX_TRACE_FE) {
   3495          vex_printf("%s%c ",
   3496                     nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
   3497          if (shift_expr_txt)
   3498             vex_printf("%s", shift_expr_txt);
   3499          else
   3500             ppIRExpr(shift_expr);
   3501          vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
   3502       }
   3503    } else {
   3504       storeLE(mkexpr(addr), mkexpr(dst1));
   3505       if (vex_traceflags & VEX_TRACE_FE) {
   3506          vex_printf("%s%c ",
   3507                     nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
   3508          if (shift_expr_txt)
   3509             vex_printf("%s", shift_expr_txt);
   3510          else
   3511             ppIRExpr(shift_expr);
   3512          vex_printf(", %s\n", dis_buf);
   3513       }
   3514    }
   3515    return delta;
   3516 }
   3517 
   3518 
   3519 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
   3520 static
   3521 ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
   3522                      Prefix pfx,
   3523                      Long delta, UChar modrm,
   3524                      Int am_sz, Int sz, ULong src_val,
   3525                      Bool* decode_OK )
   3526 {
   3527    /* src_val denotes a d8.
   3528       And delta on entry points at the modrm byte. */
   3529 
   3530    IRType ty     = szToITy(sz);
   3531    IRTemp t2     = newTemp(Ity_I64);
   3532    IRTemp t2m    = newTemp(Ity_I64);
   3533    IRTemp t_addr = IRTemp_INVALID;
   3534    HChar  dis_buf[50];
   3535    ULong  mask;
   3536 
   3537    /* we're optimists :-) */
   3538    *decode_OK = True;
   3539 
   3540    /* Limit src_val -- the bit offset -- to something within a word.
   3541       The Intel docs say that literal offsets larger than a word are
   3542       masked in this way. */
   3543    switch (sz) {
   3544       case 2:  src_val &= 15; break;
   3545       case 4:  src_val &= 31; break;
   3546       case 8:  src_val &= 63; break;
   3547       default: *decode_OK = False; return delta;
   3548    }
   3549 
   3550    /* Invent a mask suitable for the operation. */
   3551    switch (gregLO3ofRM(modrm)) {
   3552       case 4: /* BT */  mask = 0;                  break;
   3553       case 5: /* BTS */ mask = 1ULL << src_val;    break;
   3554       case 6: /* BTR */ mask = ~(1ULL << src_val); break;
   3555       case 7: /* BTC */ mask = 1ULL << src_val;    break;
   3556          /* If this needs to be extended, probably simplest to make a
   3557             new function to handle the other cases (0 .. 3).  The
   3558             Intel docs do however not indicate any use for 0 .. 3, so
   3559             we don't expect this to happen. */
   3560       default: *decode_OK = False; return delta;
   3561    }
   3562 
   3563    /* Fetch the value to be tested and modified into t2, which is
   3564       64-bits wide regardless of sz. */
   3565    if (epartIsReg(modrm)) {
   3566       vassert(am_sz == 1);
   3567       assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
   3568       delta += (am_sz + 1);
   3569       DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
   3570                                 nameISize(sz),
   3571                                 src_val, nameIRegE(sz,pfx,modrm));
   3572    } else {
   3573       Int len;
   3574       t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
   3575       delta  += (len+1);
   3576       assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
   3577       DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
   3578                                 nameISize(sz),
   3579                                 src_val, dis_buf);
   3580    }
   3581 
   3582    /* Compute the new value into t2m, if non-BT. */
   3583    switch (gregLO3ofRM(modrm)) {
   3584       case 4: /* BT */
   3585          break;
   3586       case 5: /* BTS */
   3587          assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
   3588          break;
   3589       case 6: /* BTR */
   3590          assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
   3591          break;
   3592       case 7: /* BTC */
   3593          assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
   3594          break;
   3595      default:
   3596          /*NOTREACHED*/ /*the previous switch guards this*/
   3597          vassert(0);
   3598    }
   3599 
   3600    /* Write the result back, if non-BT. */
   3601    if (gregLO3ofRM(modrm) != 4 /* BT */) {
   3602       if (epartIsReg(modrm)) {
   3603 	putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
   3604       } else {
   3605          if (pfx & PFX_LOCK) {
   3606             casLE( mkexpr(t_addr),
   3607                    narrowTo(ty, mkexpr(t2))/*expd*/,
   3608                    narrowTo(ty, mkexpr(t2m))/*new*/,
   3609                    guest_RIP_curr_instr );
   3610          } else {
   3611             storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
   3612          }
   3613       }
   3614    }
   3615 
   3616    /* Copy relevant bit from t2 into the carry flag. */
   3617    /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
   3618    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   3619    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   3620    stmt( IRStmt_Put(
   3621             OFFB_CC_DEP1,
   3622             binop(Iop_And64,
   3623                   binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
   3624                   mkU64(1))
   3625        ));
   3626    /* Set NDEP even though it isn't used.  This makes redundant-PUT
   3627       elimination of previous stores to this field work better. */
   3628    stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
   3629 
   3630    return delta;
   3631 }
   3632 
   3633 
   3634 /* Signed/unsigned widening multiply.  Generate IR to multiply the
   3635    value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
   3636    RDX:RAX/EDX:EAX/DX:AX/AX.
   3637 */
   3638 static void codegen_mulL_A_D ( Int sz, Bool syned,
   3639                                IRTemp tmp, HChar* tmp_txt )
   3640 {
   3641    IRType ty = szToITy(sz);
   3642    IRTemp t1 = newTemp(ty);
   3643 
   3644    assign( t1, getIRegRAX(sz) );
   3645 
   3646    switch (ty) {
   3647       case Ity_I64: {
   3648          IRTemp res128  = newTemp(Ity_I128);
   3649          IRTemp resHi   = newTemp(Ity_I64);
   3650          IRTemp resLo   = newTemp(Ity_I64);
   3651          IROp   mulOp   = syned ? Iop_MullS64 : Iop_MullU64;
   3652          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3653          setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
   3654          assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3655          assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
   3656          assign( resLo, unop(Iop_128to64,mkexpr(res128)));
   3657          putIReg64(R_RDX, mkexpr(resHi));
   3658          putIReg64(R_RAX, mkexpr(resLo));
   3659          break;
   3660       }
   3661       case Ity_I32: {
   3662          IRTemp res64   = newTemp(Ity_I64);
   3663          IRTemp resHi   = newTemp(Ity_I32);
   3664          IRTemp resLo   = newTemp(Ity_I32);
   3665          IROp   mulOp   = syned ? Iop_MullS32 : Iop_MullU32;
   3666          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3667          setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
   3668          assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3669          assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
   3670          assign( resLo, unop(Iop_64to32,mkexpr(res64)));
   3671          putIRegRDX(4, mkexpr(resHi));
   3672          putIRegRAX(4, mkexpr(resLo));
   3673          break;
   3674       }
   3675       case Ity_I16: {
   3676          IRTemp res32   = newTemp(Ity_I32);
   3677          IRTemp resHi   = newTemp(Ity_I16);
   3678          IRTemp resLo   = newTemp(Ity_I16);
   3679          IROp   mulOp   = syned ? Iop_MullS16 : Iop_MullU16;
   3680          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3681          setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
   3682          assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3683          assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
   3684          assign( resLo, unop(Iop_32to16,mkexpr(res32)));
   3685          putIRegRDX(2, mkexpr(resHi));
   3686          putIRegRAX(2, mkexpr(resLo));
   3687          break;
   3688       }
   3689       case Ity_I8: {
   3690          IRTemp res16   = newTemp(Ity_I16);
   3691          IRTemp resHi   = newTemp(Ity_I8);
   3692          IRTemp resLo   = newTemp(Ity_I8);
   3693          IROp   mulOp   = syned ? Iop_MullS8 : Iop_MullU8;
   3694          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3695          setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
   3696          assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3697          assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
   3698          assign( resLo, unop(Iop_16to8,mkexpr(res16)));
   3699          putIRegRAX(2, mkexpr(res16));
   3700          break;
   3701       }
   3702       default:
   3703          ppIRType(ty);
   3704          vpanic("codegen_mulL_A_D(amd64)");
   3705    }
   3706    DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
   3707 }
   3708 
   3709 
   3710 /* Group 3 extended opcodes. */
   3711 static
   3712 ULong dis_Grp3 ( VexAbiInfo* vbi,
   3713                  Prefix pfx, Int sz, Long delta, Bool* decode_OK )
   3714 {
   3715    Long    d64;
   3716    UChar   modrm;
   3717    HChar   dis_buf[50];
   3718    Int     len;
   3719    IRTemp  addr;
   3720    IRType  ty = szToITy(sz);
   3721    IRTemp  t1 = newTemp(ty);
   3722    IRTemp dst1, src, dst0;
   3723    *decode_OK = True;
   3724    modrm = getUChar(delta);
   3725    if (epartIsReg(modrm)) {
   3726       switch (gregLO3ofRM(modrm)) {
   3727          case 0: { /* TEST */
   3728             delta++;
   3729             d64 = getSDisp(imin(4,sz), delta);
   3730             delta += imin(4,sz);
   3731             dst1 = newTemp(ty);
   3732             assign(dst1, binop(mkSizedOp(ty,Iop_And8),
   3733                                getIRegE(sz,pfx,modrm),
   3734                                mkU(ty, d64 & mkSizeMask(sz))));
   3735             setFlags_DEP1( Iop_And8, dst1, ty );
   3736             DIP("test%c $%lld, %s\n",
   3737                 nameISize(sz), d64,
   3738                 nameIRegE(sz, pfx, modrm));
   3739             break;
   3740          }
   3741          case 1:
   3742             *decode_OK = False;
   3743             return delta;
   3744          case 2: /* NOT */
   3745             delta++;
   3746             putIRegE(sz, pfx, modrm,
   3747                               unop(mkSizedOp(ty,Iop_Not8),
   3748                                    getIRegE(sz, pfx, modrm)));
   3749             DIP("not%c %s\n", nameISize(sz),
   3750                               nameIRegE(sz, pfx, modrm));
   3751             break;
   3752          case 3: /* NEG */
   3753             delta++;
   3754             dst0 = newTemp(ty);
   3755             src  = newTemp(ty);
   3756             dst1 = newTemp(ty);
   3757             assign(dst0, mkU(ty,0));
   3758             assign(src,  getIRegE(sz, pfx, modrm));
   3759             assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
   3760                                                        mkexpr(src)));
   3761             setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
   3762             putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3763             DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
   3764             break;
   3765          case 4: /* MUL (unsigned widening) */
   3766             delta++;
   3767             src = newTemp(ty);
   3768             assign(src, getIRegE(sz,pfx,modrm));
   3769             codegen_mulL_A_D ( sz, False, src,
   3770                                nameIRegE(sz,pfx,modrm) );
   3771             break;
   3772          case 5: /* IMUL (signed widening) */
   3773             delta++;
   3774             src = newTemp(ty);
   3775             assign(src, getIRegE(sz,pfx,modrm));
   3776             codegen_mulL_A_D ( sz, True, src,
   3777                                nameIRegE(sz,pfx,modrm) );
   3778             break;
   3779          case 6: /* DIV */
   3780             delta++;
   3781             assign( t1, getIRegE(sz, pfx, modrm) );
   3782             codegen_div ( sz, t1, False );
   3783             DIP("div%c %s\n", nameISize(sz),
   3784                               nameIRegE(sz, pfx, modrm));
   3785             break;
   3786          case 7: /* IDIV */
   3787             delta++;
   3788             assign( t1, getIRegE(sz, pfx, modrm) );
   3789             codegen_div ( sz, t1, True );
   3790             DIP("idiv%c %s\n", nameISize(sz),
   3791                                nameIRegE(sz, pfx, modrm));
   3792             break;
   3793          default:
   3794             /*NOTREACHED*/
   3795             vpanic("Grp3(amd64,R)");
   3796       }
   3797    } else {
   3798       addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
   3799                         /* we have to inform disAMode of any immediate
   3800 			   bytes used */
   3801                         gregLO3ofRM(modrm)==0/*TEST*/
   3802                            ? imin(4,sz)
   3803                            : 0
   3804                       );
   3805       t1   = newTemp(ty);
   3806       delta += len;
   3807       assign(t1, loadLE(ty,mkexpr(addr)));
   3808       switch (gregLO3ofRM(modrm)) {
   3809          case 0: { /* TEST */
   3810             d64 = getSDisp(imin(4,sz), delta);
   3811             delta += imin(4,sz);
   3812             dst1 = newTemp(ty);
   3813             assign(dst1, binop(mkSizedOp(ty,Iop_And8),
   3814                                mkexpr(t1),
   3815                                mkU(ty, d64 & mkSizeMask(sz))));
   3816             setFlags_DEP1( Iop_And8, dst1, ty );
   3817             DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
   3818             break;
   3819          }
   3820          case 1:
   3821             *decode_OK = False;
   3822             return delta;
   3823          case 2: /* NOT */
   3824             dst1 = newTemp(ty);
   3825             assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
   3826             if (pfx & PFX_LOCK) {
   3827                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
   3828                                     guest_RIP_curr_instr );
   3829             } else {
   3830                storeLE( mkexpr(addr), mkexpr(dst1) );
   3831             }
   3832             DIP("not%c %s\n", nameISize(sz), dis_buf);
   3833             break;
   3834          case 3: /* NEG */
   3835             dst0 = newTemp(ty);
   3836             src  = newTemp(ty);
   3837             dst1 = newTemp(ty);
   3838             assign(dst0, mkU(ty,0));
   3839             assign(src,  mkexpr(t1));
   3840             assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
   3841                                                        mkexpr(src)));
   3842             if (pfx & PFX_LOCK) {
   3843                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
   3844                                     guest_RIP_curr_instr );
   3845             } else {
   3846                storeLE( mkexpr(addr), mkexpr(dst1) );
   3847             }
   3848             setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
   3849             DIP("neg%c %s\n", nameISize(sz), dis_buf);
   3850             break;
   3851          case 4: /* MUL (unsigned widening) */
   3852             codegen_mulL_A_D ( sz, False, t1, dis_buf );
   3853             break;
   3854          case 5: /* IMUL */
   3855             codegen_mulL_A_D ( sz, True, t1, dis_buf );
   3856             break;
   3857          case 6: /* DIV */
   3858             codegen_div ( sz, t1, False );
   3859             DIP("div%c %s\n", nameISize(sz), dis_buf);
   3860             break;
   3861          case 7: /* IDIV */
   3862             codegen_div ( sz, t1, True );
   3863             DIP("idiv%c %s\n", nameISize(sz), dis_buf);
   3864             break;
   3865          default:
   3866             /*NOTREACHED*/
   3867             vpanic("Grp3(amd64,M)");
   3868       }
   3869    }
   3870    return delta;
   3871 }
   3872 
   3873 
   3874 /* Group 4 extended opcodes. */
   3875 static
   3876 ULong dis_Grp4 ( VexAbiInfo* vbi,
   3877                  Prefix pfx, Long delta, Bool* decode_OK )
   3878 {
   3879    Int   alen;
   3880    UChar modrm;
   3881    HChar dis_buf[50];
   3882    IRType ty = Ity_I8;
   3883    IRTemp t1 = newTemp(ty);
   3884    IRTemp t2 = newTemp(ty);
   3885 
   3886    *decode_OK = True;
   3887 
   3888    modrm = getUChar(delta);
   3889    if (epartIsReg(modrm)) {
   3890       assign(t1, getIRegE(1, pfx, modrm));
   3891       switch (gregLO3ofRM(modrm)) {
   3892          case 0: /* INC */
   3893             assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
   3894             putIRegE(1, pfx, modrm, mkexpr(t2));
   3895             setFlags_INC_DEC( True, t2, ty );
   3896             break;
   3897          case 1: /* DEC */
   3898             assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
   3899             putIRegE(1, pfx, modrm, mkexpr(t2));
   3900             setFlags_INC_DEC( False, t2, ty );
   3901             break;
   3902          default:
   3903             *decode_OK = False;
   3904             return delta;
   3905       }
   3906       delta++;
   3907       DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
   3908                       nameIRegE(1, pfx, modrm));
   3909    } else {
   3910       IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
   3911       assign( t1, loadLE(ty, mkexpr(addr)) );
   3912       switch (gregLO3ofRM(modrm)) {
   3913          case 0: /* INC */
   3914             assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
   3915             if (pfx & PFX_LOCK) {
   3916                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
   3917                       guest_RIP_curr_instr );
   3918             } else {
   3919                storeLE( mkexpr(addr), mkexpr(t2) );
   3920             }
   3921             setFlags_INC_DEC( True, t2, ty );
   3922             break;
   3923          case 1: /* DEC */
   3924             assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
   3925             if (pfx & PFX_LOCK) {
   3926                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
   3927                       guest_RIP_curr_instr );
   3928             } else {
   3929                storeLE( mkexpr(addr), mkexpr(t2) );
   3930             }
   3931             setFlags_INC_DEC( False, t2, ty );
   3932             break;
   3933          default:
   3934             *decode_OK = False;
   3935             return delta;
   3936       }
   3937       delta += alen;
   3938       DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
   3939    }
   3940    return delta;
   3941 }
   3942 
   3943 
   3944 /* Group 5 extended opcodes. */
   3945 static
   3946 ULong dis_Grp5 ( VexAbiInfo* vbi,
   3947                  Prefix pfx, Int sz, Long delta,
   3948                  DisResult* dres, Bool* decode_OK )
   3949 {
   3950    Int     len;
   3951    UChar   modrm;
   3952    HChar   dis_buf[50];
   3953    IRTemp  addr = IRTemp_INVALID;
   3954    IRType  ty = szToITy(sz);
   3955    IRTemp  t1 = newTemp(ty);
   3956    IRTemp  t2 = IRTemp_INVALID;
   3957    IRTemp  t3 = IRTemp_INVALID;
   3958    Bool    showSz = True;
   3959 
   3960    *decode_OK = True;
   3961 
   3962    modrm = getUChar(delta);
   3963    if (epartIsReg(modrm)) {
   3964       assign(t1, getIRegE(sz,pfx,modrm));
   3965       switch (gregLO3ofRM(modrm)) {
   3966          case 0: /* INC */
   3967             t2 = newTemp(ty);
   3968             assign(t2, binop(mkSizedOp(ty,Iop_Add8),
   3969                              mkexpr(t1), mkU(ty,1)));
   3970             setFlags_INC_DEC( True, t2, ty );
   3971             putIRegE(sz,pfx,modrm, mkexpr(t2));
   3972             break;
   3973          case 1: /* DEC */
   3974             t2 = newTemp(ty);
   3975             assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
   3976                              mkexpr(t1), mkU(ty,1)));
   3977             setFlags_INC_DEC( False, t2, ty );
   3978             putIRegE(sz,pfx,modrm, mkexpr(t2));
   3979             break;
   3980          case 2: /* call Ev */
   3981             /* Ignore any sz value and operate as if sz==8. */
   3982             if (!(sz == 4 || sz == 8)) goto unhandled;
   3983             sz = 8;
   3984             t3 = newTemp(Ity_I64);
   3985             assign(t3, getIRegE(sz,pfx,modrm));
   3986             t2 = newTemp(Ity_I64);
   3987             assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
   3988             putIReg64(R_RSP, mkexpr(t2));
   3989             storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
   3990             make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
   3991             jmp_treg(Ijk_Call,t3);
   3992             dres->whatNext = Dis_StopHere;
   3993             showSz = False;
   3994             break;
   3995          case 4: /* jmp Ev */
   3996             /* Ignore any sz value and operate as if sz==8. */
   3997             if (!(sz == 4 || sz == 8)) goto unhandled;
   3998             sz = 8;
   3999             t3 = newTemp(Ity_I64);
   4000             assign(t3, getIRegE(sz,pfx,modrm));
   4001             jmp_treg(Ijk_Boring,t3);
   4002             dres->whatNext = Dis_StopHere;
   4003             showSz = False;
   4004             break;
   4005          default:
   4006             *decode_OK = False;
   4007             return delta;
   4008       }
   4009       delta++;
   4010       DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
   4011                        showSz ? nameISize(sz) : ' ',
   4012                        nameIRegE(sz, pfx, modrm));
   4013    } else {
   4014       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
   4015       if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
   4016                                   && gregLO3ofRM(modrm) != 6) {
   4017          assign(t1, loadLE(ty,mkexpr(addr)));
   4018       }
   4019       switch (gregLO3ofRM(modrm)) {
   4020          case 0: /* INC */
   4021             t2 = newTemp(ty);
   4022             assign(t2, binop(mkSizedOp(ty,Iop_Add8),
   4023                              mkexpr(t1), mkU(ty,1)));
   4024             if (pfx & PFX_LOCK) {
   4025                casLE( mkexpr(addr),
   4026                       mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
   4027             } else {
   4028                storeLE(mkexpr(addr),mkexpr(t2));
   4029             }
   4030             setFlags_INC_DEC( True, t2, ty );
   4031             break;
   4032          case 1: /* DEC */
   4033             t2 = newTemp(ty);
   4034             assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
   4035                              mkexpr(t1), mkU(ty,1)));
   4036             if (pfx & PFX_LOCK) {
   4037                casLE( mkexpr(addr),
   4038                       mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
   4039             } else {
   4040                storeLE(mkexpr(addr),mkexpr(t2));
   4041             }
   4042             setFlags_INC_DEC( False, t2, ty );
   4043             break;
   4044          case 2: /* call Ev */
   4045             /* Ignore any sz value and operate as if sz==8. */
   4046             if (!(sz == 4 || sz == 8)) goto unhandled;
   4047             sz = 8;
   4048             t3 = newTemp(Ity_I64);
   4049             assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4050             t2 = newTemp(Ity_I64);
   4051             assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
   4052             putIReg64(R_RSP, mkexpr(t2));
   4053             storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
   4054             make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
   4055             jmp_treg(Ijk_Call,t3);
   4056             dres->whatNext = Dis_StopHere;
   4057             showSz = False;
   4058             break;
   4059          case 4: /* JMP Ev */
   4060             /* Ignore any sz value and operate as if sz==8. */
   4061             if (!(sz == 4 || sz == 8)) goto unhandled;
   4062             sz = 8;
   4063             t3 = newTemp(Ity_I64);
   4064             assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4065             jmp_treg(Ijk_Boring,t3);
   4066             dres->whatNext = Dis_StopHere;
   4067             showSz = False;
   4068             break;
   4069          case 6: /* PUSH Ev */
   4070             /* There is no encoding for 32-bit operand size; hence ... */
   4071             if (sz == 4) sz = 8;
   4072             if (!(sz == 8 || sz == 2)) goto unhandled;
   4073             if (sz == 8) {
   4074                t3 = newTemp(Ity_I64);
   4075                assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4076                t2 = newTemp(Ity_I64);
   4077                assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
   4078                putIReg64(R_RSP, mkexpr(t2) );
   4079                storeLE( mkexpr(t2), mkexpr(t3) );
   4080                break;
   4081 	    } else {
   4082                goto unhandled; /* awaiting test case */
   4083 	    }
   4084          default:
   4085          unhandled:
   4086             *decode_OK = False;
   4087             return delta;
   4088       }
   4089       delta += len;
   4090       DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
   4091                        showSz ? nameISize(sz) : ' ',
   4092                        dis_buf);
   4093    }
   4094    return delta;
   4095 }
   4096 
   4097 
   4098 /*------------------------------------------------------------*/
   4099 /*--- Disassembling string ops (including REP prefixes)    ---*/
   4100 /*------------------------------------------------------------*/
   4101 
   4102 /* Code shared by all the string ops */
   4103 static
   4104 void dis_string_op_increment ( Int sz, IRTemp t_inc )
   4105 {
   4106    UChar logSz;
   4107    if (sz == 8 || sz == 4 || sz == 2) {
   4108       logSz = 1;
   4109       if (sz == 4) logSz = 2;
   4110       if (sz == 8) logSz = 3;
   4111       assign( t_inc,
   4112               binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
   4113                                mkU8(logSz) ) );
   4114    } else {
   4115       assign( t_inc,
   4116               IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
   4117    }
   4118 }
   4119 
   4120 static
   4121 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
   4122                     Int sz, HChar* name, Prefix pfx )
   4123 {
   4124    IRTemp t_inc = newTemp(Ity_I64);
   4125    /* Really we ought to inspect the override prefixes, but we don't.
   4126       The following assertion catches any resulting sillyness. */
   4127    vassert(pfx == clearSegBits(pfx));
   4128    dis_string_op_increment(sz, t_inc);
   4129    dis_OP( sz, t_inc, pfx );
   4130    DIP("%s%c\n", name, nameISize(sz));
   4131 }
   4132 
   4133 static
   4134 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
   4135 {
   4136    IRType ty = szToITy(sz);
   4137    IRTemp td = newTemp(Ity_I64);   /* RDI */
   4138    IRTemp ts = newTemp(Ity_I64);   /* RSI */
   4139    IRExpr *incd, *incs;
   4140 
   4141    if (haveASO(pfx)) {
   4142       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4143       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4144    } else {
   4145       assign( td, getIReg64(R_RDI) );
   4146       assign( ts, getIReg64(R_RSI) );
   4147    }
   4148 
   4149    storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
   4150 
   4151    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4152    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4153    if (haveASO(pfx)) {
   4154       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4155       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4156    }
   4157    putIReg64( R_RDI, incd );
   4158    putIReg64( R_RSI, incs );
   4159 }
   4160 
   4161 static
   4162 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
   4163 {
   4164    IRType ty = szToITy(sz);
   4165    IRTemp ts = newTemp(Ity_I64);   /* RSI */
   4166    IRExpr *incs;
   4167 
   4168    if (haveASO(pfx))
   4169       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4170    else
   4171       assign( ts, getIReg64(R_RSI) );
   4172 
   4173    putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
   4174 
   4175    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4176    if (haveASO(pfx))
   4177       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4178    putIReg64( R_RSI, incs );
   4179 }
   4180 
   4181 static
   4182 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
   4183 {
   4184    IRType ty = szToITy(sz);
   4185    IRTemp ta = newTemp(ty);        /* rAX */
   4186    IRTemp td = newTemp(Ity_I64);   /* RDI */
   4187    IRExpr *incd;
   4188 
   4189    assign( ta, getIRegRAX(sz) );
   4190 
   4191    if (haveASO(pfx))
   4192       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4193    else
   4194       assign( td, getIReg64(R_RDI) );
   4195 
   4196    storeLE( mkexpr(td), mkexpr(ta) );
   4197 
   4198    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4199    if (haveASO(pfx))
   4200       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4201    putIReg64( R_RDI, incd );
   4202 }
   4203 
   4204 static
   4205 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
   4206 {
   4207    IRType ty  = szToITy(sz);
   4208    IRTemp tdv = newTemp(ty);      /* (RDI) */
   4209    IRTemp tsv = newTemp(ty);      /* (RSI) */
   4210    IRTemp td  = newTemp(Ity_I64); /*  RDI  */
   4211    IRTemp ts  = newTemp(Ity_I64); /*  RSI  */
   4212    IRExpr *incd, *incs;
   4213 
   4214    if (haveASO(pfx)) {
   4215       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4216       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4217    } else {
   4218       assign( td, getIReg64(R_RDI) );
   4219       assign( ts, getIReg64(R_RSI) );
   4220    }
   4221 
   4222    assign( tdv, loadLE(ty,mkexpr(td)) );
   4223 
   4224    assign( tsv, loadLE(ty,mkexpr(ts)) );
   4225 
   4226    setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
   4227 
   4228    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4229    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4230    if (haveASO(pfx)) {
   4231       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4232       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4233    }
   4234    putIReg64( R_RDI, incd );
   4235    putIReg64( R_RSI, incs );
   4236 }
   4237 
   4238 static
   4239 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
   4240 {
   4241    IRType ty  = szToITy(sz);
   4242    IRTemp ta  = newTemp(ty);       /*  rAX  */
   4243    IRTemp td  = newTemp(Ity_I64);  /*  RDI  */
   4244    IRTemp tdv = newTemp(ty);       /* (RDI) */
   4245    IRExpr *incd;
   4246 
   4247    assign( ta, getIRegRAX(sz) );
   4248 
   4249    if (haveASO(pfx))
   4250       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4251    else
   4252       assign( td, getIReg64(R_RDI) );
   4253 
   4254    assign( tdv, loadLE(ty,mkexpr(td)) );
   4255 
   4256    setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
   4257 
   4258    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4259    if (haveASO(pfx))
   4260       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4261    putIReg64( R_RDI, incd );
   4262 }
   4263 
   4264 
   4265 /* Wrap the appropriate string op inside a REP/REPE/REPNE.  We assume
   4266    the insn is the last one in the basic block, and so emit a jump to
   4267    the next insn, rather than just falling through. */
   4268 static
   4269 void dis_REP_op ( AMD64Condcode cond,
   4270                   void (*dis_OP)(Int, IRTemp, Prefix),
   4271                   Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
   4272                   Prefix pfx )
   4273 {
   4274    IRTemp t_inc = newTemp(Ity_I64);
   4275    IRTemp tc;
   4276    IRExpr* cmp;
   4277 
   4278    /* Really we ought to inspect the override prefixes, but we don't.
   4279       The following assertion catches any resulting sillyness. */
   4280    vassert(pfx == clearSegBits(pfx));
   4281 
   4282    if (haveASO(pfx)) {
   4283       tc = newTemp(Ity_I32);  /*  ECX  */
   4284       assign( tc, getIReg32(R_RCX) );
   4285       cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
   4286    } else {
   4287       tc = newTemp(Ity_I64);  /*  RCX  */
   4288       assign( tc, getIReg64(R_RCX) );
   4289       cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
   4290    }
   4291 
   4292    stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) );
   4293 
   4294    if (haveASO(pfx))
   4295       putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
   4296   else
   4297       putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
   4298 
   4299    dis_string_op_increment(sz, t_inc);
   4300    dis_OP (sz, t_inc, pfx);
   4301 
   4302    if (cond == AMD64CondAlways) {
   4303       jmp_lit(Ijk_Boring,rip);
   4304    } else {
   4305       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
   4306                          Ijk_Boring,
   4307                          IRConst_U64(rip) ) );
   4308       jmp_lit(Ijk_Boring,rip_next);
   4309    }
   4310    DIP("%s%c\n", name, nameISize(sz));
   4311 }
   4312 
   4313 
   4314 /*------------------------------------------------------------*/
   4315 /*--- Arithmetic, etc.                                     ---*/
   4316 /*------------------------------------------------------------*/
   4317 
   4318 /* IMUL E, G.  Supplied eip points to the modR/M byte. */
   4319 static
   4320 ULong dis_mul_E_G ( VexAbiInfo* vbi,
   4321                     Prefix      pfx,
   4322                     Int         size,
   4323                     Long        delta0 )
   4324 {
   4325    Int    alen;
   4326    HChar  dis_buf[50];
   4327    UChar  rm = getUChar(delta0);
   4328    IRType ty = szToITy(size);
   4329    IRTemp te = newTemp(ty);
   4330    IRTemp tg = newTemp(ty);
   4331    IRTemp resLo = newTemp(ty);
   4332 
   4333    assign( tg, getIRegG(size, pfx, rm) );
   4334    if (epartIsReg(rm)) {
   4335       assign( te, getIRegE(size, pfx, rm) );
   4336    } else {
   4337       IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
   4338       assign( te, loadLE(ty,mkexpr(addr)) );
   4339    }
   4340 
   4341    setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
   4342 
   4343    assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
   4344 
   4345    putIRegG(size, pfx, rm, mkexpr(resLo) );
   4346 
   4347    if (epartIsReg(rm)) {
   4348       DIP("imul%c %s, %s\n", nameISize(size),
   4349                              nameIRegE(size,pfx,rm),
   4350                              nameIRegG(size,pfx,rm));
   4351       return 1+delta0;
   4352    } else {
   4353       DIP("imul%c %s, %s\n", nameISize(size),
   4354                              dis_buf,
   4355                              nameIRegG(size,pfx,rm));
   4356       return alen+delta0;
   4357    }
   4358 }
   4359 
   4360 
   4361 /* IMUL I * E -> G.  Supplied rip points to the modR/M byte. */
   4362 static
   4363 ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
   4364                        Prefix      pfx,
   4365                        Int         size,
   4366                        Long        delta,
   4367                        Int         litsize )
   4368 {
   4369    Long   d64;
   4370    Int    alen;
   4371    HChar  dis_buf[50];
   4372    UChar  rm = getUChar(delta);
   4373    IRType ty = szToITy(size);
   4374    IRTemp te = newTemp(ty);
   4375    IRTemp tl = newTemp(ty);
   4376    IRTemp resLo = newTemp(ty);
   4377 
   4378    vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
   4379 
   4380    if (epartIsReg(rm)) {
   4381       assign(te, getIRegE(size, pfx, rm));
   4382       delta++;
   4383    } else {
   4384       IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
   4385                                      imin(4,litsize) );
   4386       assign(te, loadLE(ty, mkexpr(addr)));
   4387       delta += alen;
   4388    }
   4389    d64 = getSDisp(imin(4,litsize),delta);
   4390    delta += imin(4,litsize);
   4391 
   4392    d64 &= mkSizeMask(size);
   4393    assign(tl, mkU(ty,d64));
   4394 
   4395    assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
   4396 
   4397    setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
   4398 
   4399    putIRegG(size, pfx, rm, mkexpr(resLo));
   4400 
   4401    DIP("imul%c $%lld, %s, %s\n",
   4402        nameISize(size), d64,
   4403        ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
   4404        nameIRegG(size,pfx,rm) );
   4405    return delta;
   4406 }
   4407 
   4408 
   4409 /* Generate an IR sequence to do a popcount operation on the supplied
   4410    IRTemp, and return a new IRTemp holding the result.  'ty' may be
   4411    Ity_I16, Ity_I32 or Ity_I64 only. */
   4412 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
   4413 {
   4414    Int i;
   4415    if (ty == Ity_I16) {
   4416       IRTemp old = IRTemp_INVALID;
   4417       IRTemp nyu = IRTemp_INVALID;
   4418       IRTemp mask[4], shift[4];
   4419       for (i = 0; i < 4; i++) {
   4420          mask[i]  = newTemp(ty);
   4421          shift[i] = 1 << i;
   4422       }
   4423       assign(mask[0], mkU16(0x5555));
   4424       assign(mask[1], mkU16(0x3333));
   4425       assign(mask[2], mkU16(0x0F0F));
   4426       assign(mask[3], mkU16(0x00FF));
   4427       old = src;
   4428       for (i = 0; i < 4; i++) {
   4429          nyu = newTemp(ty);
   4430          assign(nyu,
   4431                 binop(Iop_Add16,
   4432                       binop(Iop_And16,
   4433                             mkexpr(old),
   4434                             mkexpr(mask[i])),
   4435                       binop(Iop_And16,
   4436                             binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
   4437                             mkexpr(mask[i]))));
   4438          old = nyu;
   4439       }
   4440       return nyu;
   4441    }
   4442    if (ty == Ity_I32) {
   4443       IRTemp old = IRTemp_INVALID;
   4444       IRTemp nyu = IRTemp_INVALID;
   4445       IRTemp mask[5], shift[5];
   4446       for (i = 0; i < 5; i++) {
   4447          mask[i]  = newTemp(ty);
   4448          shift[i] = 1 << i;
   4449       }
   4450       assign(mask[0], mkU32(0x55555555));
   4451       assign(mask[1], mkU32(0x33333333));
   4452       assign(mask[2], mkU32(0x0F0F0F0F));
   4453       assign(mask[3], mkU32(0x00FF00FF));
   4454       assign(mask[4], mkU32(0x0000FFFF));
   4455       old = src;
   4456       for (i = 0; i < 5; i++) {
   4457          nyu = newTemp(ty);
   4458          assign(nyu,
   4459                 binop(Iop_Add32,
   4460                       binop(Iop_And32,
   4461                             mkexpr(old),
   4462                             mkexpr(mask[i])),
   4463                       binop(Iop_And32,
   4464                             binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
   4465                             mkexpr(mask[i]))));
   4466          old = nyu;
   4467       }
   4468       return nyu;
   4469    }
   4470    if (ty == Ity_I64) {
   4471       IRTemp old = IRTemp_INVALID;
   4472       IRTemp nyu = IRTemp_INVALID;
   4473       IRTemp mask[6], shift[6];
   4474       for (i = 0; i < 6; i++) {
   4475          mask[i]  = newTemp(ty);
   4476          shift[i] = 1 << i;
   4477       }
   4478       assign(mask[0], mkU64(0x5555555555555555ULL));
   4479       assign(mask[1], mkU64(0x3333333333333333ULL));
   4480       assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
   4481       assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
   4482       assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
   4483       assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
   4484       old = src;
   4485       for (i = 0; i < 6; i++) {
   4486          nyu = newTemp(ty);
   4487          assign(nyu,
   4488                 binop(Iop_Add64,
   4489                       binop(Iop_And64,
   4490                             mkexpr(old),
   4491                             mkexpr(mask[i])),
   4492                       binop(Iop_And64,
   4493                             binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
   4494                             mkexpr(mask[i]))));
   4495          old = nyu;
   4496       }
   4497       return nyu;
   4498    }
   4499    /*NOTREACHED*/
   4500    vassert(0);
   4501 }
   4502 
   4503 
   4504 /* Generate an IR sequence to do a count-leading-zeroes operation on
   4505    the supplied IRTemp, and return a new IRTemp holding the result.
   4506    'ty' may be Ity_I16, Ity_I32 or Ity_I64 only.  In the case where
   4507    the argument is zero, return the number of bits in the word (the
   4508    natural semantics). */
   4509 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
   4510 {
   4511    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
   4512 
   4513    IRTemp src64 = newTemp(Ity_I64);
   4514    assign(src64, widenUto64( mkexpr(src) ));
   4515 
   4516    IRTemp src64x = newTemp(Ity_I64);
   4517    assign(src64x,
   4518           binop(Iop_Shl64, mkexpr(src64),
   4519                            mkU8(64 - 8 * sizeofIRType(ty))));
   4520 
   4521    // Clz64 has undefined semantics when its input is zero, so
   4522    // special-case around that.
   4523    IRTemp res64 = newTemp(Ity_I64);
   4524    assign(res64,
   4525           IRExpr_Mux0X(
   4526              unop(Iop_1Uto8,
   4527                   binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))),
   4528              unop(Iop_Clz64, mkexpr(src64x)),
   4529              mkU64(8 * sizeofIRType(ty))
   4530    ));
   4531 
   4532    IRTemp res = newTemp(ty);
   4533    assign(res, narrowTo(ty, mkexpr(res64)));
   4534    return res;
   4535 }
   4536 
   4537 
   4538 /*------------------------------------------------------------*/
   4539 /*---                                                      ---*/
   4540 /*--- x87 FLOATING POINT INSTRUCTIONS                      ---*/
   4541 /*---                                                      ---*/
   4542 /*------------------------------------------------------------*/
   4543 
   4544 /* --- Helper functions for dealing with the register stack. --- */
   4545 
   4546 /* --- Set the emulation-warning pseudo-register. --- */
   4547 
   4548 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
   4549 {
   4550    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4551    stmt( IRStmt_Put( OFFB_EMWARN, e ) );
   4552 }
   4553 
   4554 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
   4555 
   4556 static IRExpr* mkQNaN64 ( void )
   4557 {
   4558   /* QNaN is 0 2047 1 0(51times)
   4559      == 0b 11111111111b 1 0(51times)
   4560      == 0x7FF8 0000 0000 0000
   4561    */
   4562    return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
   4563 }
   4564 
   4565 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
   4566 
   4567 static IRExpr* get_ftop ( void )
   4568 {
   4569    return IRExpr_Get( OFFB_FTOP, Ity_I32 );
   4570 }
   4571 
   4572 static void put_ftop ( IRExpr* e )
   4573 {
   4574    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4575    stmt( IRStmt_Put( OFFB_FTOP, e ) );
   4576 }
   4577 
   4578 /* --------- Get/put the C3210 bits. --------- */
   4579 
   4580 static IRExpr*  /* :: Ity_I64 */ get_C3210 ( void )
   4581 {
   4582    return IRExpr_Get( OFFB_FC3210, Ity_I64 );
   4583 }
   4584 
   4585 static void put_C3210 ( IRExpr* e  /* :: Ity_I64 */ )
   4586 {
   4587    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   4588    stmt( IRStmt_Put( OFFB_FC3210, e ) );
   4589 }
   4590 
   4591 /* --------- Get/put the FPU rounding mode. --------- */
   4592 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
   4593 {
   4594    return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
   4595 }
   4596 
   4597 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
   4598 {
   4599    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4600    stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
   4601 }
   4602 
   4603 
   4604 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
   4605 /* Produces a value in 0 .. 3, which is encoded as per the type
   4606    IRRoundingMode.  Since the guest_FPROUND value is also encoded as
   4607    per IRRoundingMode, we merely need to get it and mask it for
   4608    safety.
   4609 */
   4610 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
   4611 {
   4612    return binop( Iop_And32, get_fpround(), mkU32(3) );
   4613 }
   4614 
   4615 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
   4616 {
   4617    return mkU32(Irrm_NEAREST);
   4618 }
   4619 
   4620 
   4621 /* --------- Get/set FP register tag bytes. --------- */
   4622 
   4623 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
   4624 
   4625 static void put_ST_TAG ( Int i, IRExpr* value )
   4626 {
   4627    IRRegArray* descr;
   4628    vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
   4629    descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   4630    stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
   4631 }
   4632 
   4633 /* Given i, generate an expression yielding 'ST_TAG(i)'.  This will be
   4634    zero to indicate "Empty" and nonzero to indicate "NonEmpty".  */
   4635 
   4636 static IRExpr* get_ST_TAG ( Int i )
   4637 {
   4638    IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   4639    return IRExpr_GetI( descr, get_ftop(), i );
   4640 }
   4641 
   4642 
   4643 /* --------- Get/set FP registers. --------- */
   4644 
   4645 /* Given i, and some expression e, emit 'ST(i) = e' and set the
   4646    register's tag to indicate the register is full.  The previous
   4647    state of the register is not checked. */
   4648 
   4649 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
   4650 {
   4651    IRRegArray* descr;
   4652    vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
   4653    descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
   4654    stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
   4655    /* Mark the register as in-use. */
   4656    put_ST_TAG(i, mkU8(1));
   4657 }
   4658 
   4659 /* Given i, and some expression e, emit
   4660       ST(i) = is_full(i) ? NaN : e
   4661    and set the tag accordingly.
   4662 */
   4663 
   4664 static void put_ST ( Int i, IRExpr* value )
   4665 {
   4666    put_ST_UNCHECKED( i,
   4667                      IRExpr_Mux0X( get_ST_TAG(i),
   4668                                    /* 0 means empty */
   4669                                    value,
   4670                                    /* non-0 means full */
   4671                                    mkQNaN64()
   4672                    )
   4673    );
   4674 }
   4675 
   4676 
   4677 /* Given i, generate an expression yielding 'ST(i)'. */
   4678 
   4679 static IRExpr* get_ST_UNCHECKED ( Int i )
   4680 {
   4681    IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
   4682    return IRExpr_GetI( descr, get_ftop(), i );
   4683 }
   4684 
   4685 
   4686 /* Given i, generate an expression yielding
   4687   is_full(i) ? ST(i) : NaN
   4688 */
   4689 
   4690 static IRExpr* get_ST ( Int i )
   4691 {
   4692    return
   4693       IRExpr_Mux0X( get_ST_TAG(i),
   4694                     /* 0 means empty */
   4695                     mkQNaN64(),
   4696                     /* non-0 means full */
   4697                     get_ST_UNCHECKED(i));
   4698 }
   4699 
   4700 
   4701 /* Adjust FTOP downwards by one register. */
   4702 
   4703 static void fp_push ( void )
   4704 {
   4705    put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
   4706 }
   4707 
   4708 /* Adjust FTOP upwards by one register, and mark the vacated register
   4709    as empty.  */
   4710 
   4711 static void fp_pop ( void )
   4712 {
   4713    put_ST_TAG(0, mkU8(0));
   4714    put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
   4715 }
   4716 
   4717 /* Clear the C2 bit of the FPU status register, for
   4718    sin/cos/tan/sincos. */
   4719 
   4720 static void clear_C2 ( void )
   4721 {
   4722    put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
   4723 }
   4724 
   4725 /* Invent a plausible-looking FPU status word value:
   4726       ((ftop & 7) << 11) | (c3210 & 0x4700)
   4727  */
   4728 static IRExpr* get_FPU_sw ( void )
   4729 {
   4730    return
   4731       unop(Iop_32to16,
   4732            binop(Iop_Or32,
   4733                  binop(Iop_Shl32,
   4734                        binop(Iop_And32, get_ftop(), mkU32(7)),
   4735                              mkU8(11)),
   4736                        binop(Iop_And32, unop(Iop_64to32, get_C3210()),
   4737                                         mkU32(0x4700))
   4738       ));
   4739 }
   4740 
   4741 
   4742 /* ------------------------------------------------------- */
   4743 /* Given all that stack-mangling junk, we can now go ahead
   4744    and describe FP instructions.
   4745 */
   4746 
   4747 /* ST(0) = ST(0) `op` mem64/32(addr)
   4748    Need to check ST(0)'s tag on read, but not on write.
   4749 */
   4750 static
   4751 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
   4752                          IROp op, Bool dbl )
   4753 {
   4754    DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
   4755    if (dbl) {
   4756       put_ST_UNCHECKED(0,
   4757          triop( op,
   4758                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4759                 get_ST(0),
   4760                 loadLE(Ity_F64,mkexpr(addr))
   4761          ));
   4762    } else {
   4763       put_ST_UNCHECKED(0,
   4764          triop( op,
   4765                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4766                 get_ST(0),
   4767                 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
   4768          ));
   4769    }
   4770 }
   4771 
   4772 
   4773 /* ST(0) = mem64/32(addr) `op` ST(0)
   4774    Need to check ST(0)'s tag on read, but not on write.
   4775 */
   4776 static
   4777 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
   4778                             IROp op, Bool dbl )
   4779 {
   4780    DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
   4781    if (dbl) {
   4782       put_ST_UNCHECKED(0,
   4783          triop( op,
   4784                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4785                 loadLE(Ity_F64,mkexpr(addr)),
   4786                 get_ST(0)
   4787          ));
   4788    } else {
   4789       put_ST_UNCHECKED(0,
   4790          triop( op,
   4791                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4792                 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
   4793                 get_ST(0)
   4794          ));
   4795    }
   4796 }
   4797 
   4798 
   4799 /* ST(dst) = ST(dst) `op` ST(src).
   4800    Check dst and src tags when reading but not on write.
   4801 */
   4802 static
   4803 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
   4804                       Bool pop_after )
   4805 {
   4806    DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
   4807    put_ST_UNCHECKED(
   4808       st_dst,
   4809       triop( op,
   4810              get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4811              get_ST(st_dst),
   4812              get_ST(st_src) )
   4813    );
   4814    if (pop_after)
   4815       fp_pop();
   4816 }
   4817 
   4818 /* ST(dst) = ST(src) `op` ST(dst).
   4819    Check dst and src tags when reading but not on write.
   4820 */
   4821 static
   4822 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
   4823                          Bool pop_after )
   4824 {
   4825    DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
   4826    put_ST_UNCHECKED(
   4827       st_dst,
   4828       triop( op,
   4829              get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4830              get_ST(st_src),
   4831              get_ST(st_dst) )
   4832    );
   4833    if (pop_after)
   4834       fp_pop();
   4835 }
   4836 
   4837 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
   4838 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
   4839 {
   4840    DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
   4841    /* This is a bit of a hack (and isn't really right).  It sets
   4842       Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
   4843       documentation implies A and S are unchanged.
   4844    */
   4845    /* It's also fishy in that it is used both for COMIP and
   4846       UCOMIP, and they aren't the same (although similar). */
   4847    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   4848    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   4849    stmt( IRStmt_Put(
   4850             OFFB_CC_DEP1,
   4851             binop( Iop_And64,
   4852                    unop( Iop_32Uto64,
   4853                          binop(Iop_CmpF64, get_ST(0), get_ST(i))),
   4854                    mkU64(0x45)
   4855         )));
   4856    if (pop_after)
   4857       fp_pop();
   4858 }
   4859 
   4860 
   4861 /* returns
   4862    32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
   4863 */
   4864 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
   4865 {
   4866    IRTemp t32 = newTemp(Ity_I32);
   4867    assign( t32, e32 );
   4868    return
   4869       IRExpr_Mux0X(
   4870          unop(Iop_1Uto8,
   4871               binop(Iop_CmpLT64U,
   4872                     unop(Iop_32Uto64,
   4873                          binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
   4874                     mkU64(65536))),
   4875          mkU16( 0x8000 ),
   4876          unop(Iop_32to16, mkexpr(t32)));
   4877 }
   4878 
   4879 
   4880 static
   4881 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
   4882                 VexAbiInfo* vbi, Prefix pfx, Long delta )
   4883 {
   4884    Int    len;
   4885    UInt   r_src, r_dst;
   4886    HChar  dis_buf[50];
   4887    IRTemp t1, t2;
   4888 
   4889    /* On entry, delta points at the second byte of the insn (the modrm
   4890       byte).*/
   4891    UChar first_opcode = getUChar(delta-1);
   4892    UChar modrm        = getUChar(delta+0);
   4893 
   4894    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
   4895 
   4896    if (first_opcode == 0xD8) {
   4897       if (modrm < 0xC0) {
   4898 
   4899          /* bits 5,4,3 are an opcode extension, and the modRM also
   4900            specifies an address. */
   4901          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   4902          delta += len;
   4903 
   4904          switch (gregLO3ofRM(modrm)) {
   4905 
   4906             case 0: /* FADD single-real */
   4907                fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
   4908                break;
   4909 
   4910             case 1: /* FMUL single-real */
   4911                fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
   4912                break;
   4913 
   4914 //..             case 2: /* FCOM single-real */
   4915 //..                DIP("fcoms %s\n", dis_buf);
   4916 //..                /* This forces C1 to zero, which isn't right. */
   4917 //..                put_C3210(
   4918 //..                    binop( Iop_And32,
   4919 //..                           binop(Iop_Shl32,
   4920 //..                                 binop(Iop_CmpF64,
   4921 //..                                       get_ST(0),
   4922 //..                                       unop(Iop_F32toF64,
   4923 //..                                            loadLE(Ity_F32,mkexpr(addr)))),
   4924 //..                                 mkU8(8)),
   4925 //..                           mkU32(0x4500)
   4926 //..                    ));
   4927 //..                break;
   4928 //..
   4929 //..             case 3: /* FCOMP single-real */
   4930 //..                DIP("fcomps %s\n", dis_buf);
   4931 //..                /* This forces C1 to zero, which isn't right. */
   4932 //..                put_C3210(
   4933 //..                    binop( Iop_And32,
   4934 //..                           binop(Iop_Shl32,
   4935 //..                                 binop(Iop_CmpF64,
   4936 //..                                       get_ST(0),
   4937 //..                                       unop(Iop_F32toF64,
   4938 //..                                            loadLE(Ity_F32,mkexpr(addr)))),
   4939 //..                                 mkU8(8)),
   4940 //..                           mkU32(0x4500)
   4941 //..                    ));
   4942 //..                fp_pop();
   4943 //..                break;
   4944 
   4945             case 4: /* FSUB single-real */
   4946                fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
   4947                break;
   4948 
   4949             case 5: /* FSUBR single-real */
   4950                fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
   4951                break;
   4952 
   4953             case 6: /* FDIV single-real */
   4954                fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
   4955                break;
   4956 
   4957             case 7: /* FDIVR single-real */
   4958                fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
   4959                break;
   4960 
   4961             default:
   4962                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   4963                vex_printf("first_opcode == 0xD8\n");
   4964                goto decode_fail;
   4965          }
   4966       } else {
   4967          delta++;
   4968          switch (modrm) {
   4969 
   4970             case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
   4971                fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
   4972                break;
   4973 
   4974             case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
   4975                fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
   4976                break;
   4977 
   4978             /* Dunno if this is right */
   4979             case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
   4980                r_dst = (UInt)modrm - 0xD0;
   4981                DIP("fcom %%st(0),%%st(%d)\n", r_dst);
   4982                /* This forces C1 to zero, which isn't right. */
   4983                put_C3210(
   4984                    unop(Iop_32Uto64,
   4985                    binop( Iop_And32,
   4986                           binop(Iop_Shl32,
   4987                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   4988                                 mkU8(8)),
   4989                           mkU32(0x4500)
   4990                    )));
   4991                break;
   4992 
   4993             /* Dunno if this is right */
   4994             case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
   4995                r_dst = (UInt)modrm - 0xD8;
   4996                DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
   4997                /* This forces C1 to zero, which isn't right. */
   4998                put_C3210(
   4999                    unop(Iop_32Uto64,
   5000                    binop( Iop_And32,
   5001                           binop(Iop_Shl32,
   5002                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   5003                                 mkU8(8)),
   5004                           mkU32(0x4500)
   5005                    )));
   5006                fp_pop();
   5007                break;
   5008 
   5009             case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
   5010                fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
   5011                break;
   5012 
   5013             case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
   5014                fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
   5015                break;
   5016 
   5017             case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
   5018                fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
   5019                break;
   5020 
   5021             case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
   5022                fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
   5023                break;
   5024 
   5025             default:
   5026                goto decode_fail;
   5027          }
   5028       }
   5029    }
   5030 
   5031    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
   5032    else
   5033    if (first_opcode == 0xD9) {
   5034       if (modrm < 0xC0) {
   5035 
   5036          /* bits 5,4,3 are an opcode extension, and the modRM also
   5037             specifies an address. */
   5038          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5039          delta += len;
   5040 
   5041          switch (gregLO3ofRM(modrm)) {
   5042 
   5043             case 0: /* FLD single-real */
   5044                DIP("flds %s\n", dis_buf);
   5045                fp_push();
   5046                put_ST(0, unop(Iop_F32toF64,
   5047                               loadLE(Ity_F32, mkexpr(addr))));
   5048                break;
   5049 
   5050             case 2: /* FST single-real */
   5051                DIP("fsts %s\n", dis_buf);
   5052                storeLE(mkexpr(addr),
   5053                        binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
   5054                break;
   5055 
   5056             case 3: /* FSTP single-real */
   5057                DIP("fstps %s\n", dis_buf);
   5058                storeLE(mkexpr(addr),
   5059                        binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
   5060                fp_pop();
   5061                break;
   5062 
   5063             case 4: { /* FLDENV m28 */
   5064                /* Uses dirty helper:
   5065                      VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
   5066                IRTemp    ew = newTemp(Ity_I32);
   5067                IRTemp   w64 = newTemp(Ity_I64);
   5068                IRDirty*   d = unsafeIRDirty_0_N (
   5069                                  0/*regparms*/,
   5070                                  "amd64g_dirtyhelper_FLDENV",
   5071                                  &amd64g_dirtyhelper_FLDENV,
   5072                                  mkIRExprVec_1( mkexpr(addr) )
   5073                               );
   5074                d->needsBBP = True;
   5075                d->tmp      = w64;
   5076                /* declare we're reading memory */
   5077                d->mFx   = Ifx_Read;
   5078                d->mAddr = mkexpr(addr);
   5079                d->mSize = 28;
   5080 
   5081                /* declare we're writing guest state */
   5082                d->nFxState = 4;
   5083 
   5084                d->fxState[0].fx     = Ifx_Write;
   5085                d->fxState[0].offset = OFFB_FTOP;
   5086                d->fxState[0].size   = sizeof(UInt);
   5087 
   5088                d->fxState[1].fx     = Ifx_Write;
   5089                d->fxState[1].offset = OFFB_FPTAGS;
   5090                d->fxState[1].size   = 8 * sizeof(UChar);
   5091 
   5092                d->fxState[2].fx     = Ifx_Write;
   5093                d->fxState[2].offset = OFFB_FPROUND;
   5094                d->fxState[2].size   = sizeof(ULong);
   5095 
   5096                d->fxState[3].fx     = Ifx_Write;
   5097                d->fxState[3].offset = OFFB_FC3210;
   5098                d->fxState[3].size   = sizeof(ULong);
   5099 
   5100                stmt( IRStmt_Dirty(d) );
   5101 
   5102                /* ew contains any emulation warning we may need to
   5103                   issue.  If needed, side-exit to the next insn,
   5104                   reporting the warning, so that Valgrind's dispatcher
   5105                   sees the warning. */
   5106 	       assign(ew, unop(Iop_64to32,mkexpr(w64)) );
   5107                put_emwarn( mkexpr(ew) );
   5108                stmt(
   5109                   IRStmt_Exit(
   5110                      binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
   5111                      Ijk_EmWarn,
   5112                      IRConst_U64( guest_RIP_bbstart+delta )
   5113                   )
   5114                );
   5115 
   5116                DIP("fldenv %s\n", dis_buf);
   5117                break;
   5118             }
   5119 
   5120             case 5: {/* FLDCW */
   5121                /* The only thing we observe in the control word is the
   5122                   rounding mode.  Therefore, pass the 16-bit value
   5123                   (x87 native-format control word) to a clean helper,
   5124                   getting back a 64-bit value, the lower half of which
   5125                   is the FPROUND value to store, and the upper half of
   5126                   which is the emulation-warning token which may be
   5127                   generated.
   5128                */
   5129                /* ULong amd64h_check_fldcw ( ULong ); */
   5130                IRTemp t64 = newTemp(Ity_I64);
   5131                IRTemp ew = newTemp(Ity_I32);
   5132                DIP("fldcw %s\n", dis_buf);
   5133                assign( t64, mkIRExprCCall(
   5134                                Ity_I64, 0/*regparms*/,
   5135                                "amd64g_check_fldcw",
   5136                                &amd64g_check_fldcw,
   5137                                mkIRExprVec_1(
   5138                                   unop( Iop_16Uto64,
   5139                                         loadLE(Ity_I16, mkexpr(addr)))
   5140                                )
   5141                             )
   5142                      );
   5143 
   5144                put_fpround( unop(Iop_64to32, mkexpr(t64)) );
   5145                assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
   5146                put_emwarn( mkexpr(ew) );
   5147                /* Finally, if an emulation warning was reported,
   5148                   side-exit to the next insn, reporting the warning,
   5149                   so that Valgrind's dispatcher sees the warning. */
   5150                stmt(
   5151                   IRStmt_Exit(
   5152                      binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
   5153                      Ijk_EmWarn,
   5154                      IRConst_U64( guest_RIP_bbstart+delta )
   5155                   )
   5156                );
   5157                break;
   5158             }
   5159 
   5160             case 6: { /* FNSTENV m28 */
   5161                /* Uses dirty helper:
   5162                      void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
   5163                IRDirty* d = unsafeIRDirty_0_N (
   5164                                0/*regparms*/,
   5165                                "amd64g_dirtyhelper_FSTENV",
   5166                                &amd64g_dirtyhelper_FSTENV,
   5167                                mkIRExprVec_1( mkexpr(addr) )
   5168                             );
   5169                d->needsBBP = True;
   5170                /* declare we're writing memory */
   5171                d->mFx   = Ifx_Write;
   5172                d->mAddr = mkexpr(addr);
   5173                d->mSize = 28;
   5174 
   5175                /* declare we're reading guest state */
   5176                d->nFxState = 4;
   5177 
   5178                d->fxState[0].fx     = Ifx_Read;
   5179                d->fxState[0].offset = OFFB_FTOP;
   5180                d->fxState[0].size   = sizeof(UInt);
   5181 
   5182                d->fxState[1].fx     = Ifx_Read;
   5183                d->fxState[1].offset = OFFB_FPTAGS;
   5184                d->fxState[1].size   = 8 * sizeof(UChar);
   5185 
   5186                d->fxState[2].fx     = Ifx_Read;
   5187                d->fxState[2].offset = OFFB_FPROUND;
   5188                d->fxState[2].size   = sizeof(ULong);
   5189 
   5190                d->fxState[3].fx     = Ifx_Read;
   5191                d->fxState[3].offset = OFFB_FC3210;
   5192                d->fxState[3].size   = sizeof(ULong);
   5193 
   5194                stmt( IRStmt_Dirty(d) );
   5195 
   5196                DIP("fnstenv %s\n", dis_buf);
   5197                break;
   5198             }
   5199 
   5200             case 7: /* FNSTCW */
   5201                /* Fake up a native x87 FPU control word.  The only
   5202                   thing it depends on is FPROUND[1:0], so call a clean
   5203                   helper to cook it up. */
   5204                /* ULong amd64g_create_fpucw ( ULong fpround ) */
   5205                DIP("fnstcw %s\n", dis_buf);
   5206                storeLE(
   5207                   mkexpr(addr),
   5208                   unop( Iop_64to16,
   5209                         mkIRExprCCall(
   5210                            Ity_I64, 0/*regp*/,
   5211                            "amd64g_create_fpucw", &amd64g_create_fpucw,
   5212                            mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
   5213                         )
   5214                   )
   5215                );
   5216                break;
   5217 
   5218             default:
   5219                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5220                vex_printf("first_opcode == 0xD9\n");
   5221                goto decode_fail;
   5222          }
   5223 
   5224       } else {
   5225          delta++;
   5226          switch (modrm) {
   5227 
   5228             case 0xC0 ... 0xC7: /* FLD %st(?) */
   5229                r_src = (UInt)modrm - 0xC0;
   5230                DIP("fld %%st(%u)\n", r_src);
   5231                t1 = newTemp(Ity_F64);
   5232                assign(t1, get_ST(r_src));
   5233                fp_push();
   5234                put_ST(0, mkexpr(t1));
   5235                break;
   5236 
   5237             case 0xC8 ... 0xCF: /* FXCH %st(?) */
   5238                r_src = (UInt)modrm - 0xC8;
   5239                DIP("fxch %%st(%u)\n", r_src);
   5240                t1 = newTemp(Ity_F64);
   5241                t2 = newTemp(Ity_F64);
   5242                assign(t1, get_ST(0));
   5243                assign(t2, get_ST(r_src));
   5244                put_ST_UNCHECKED(0, mkexpr(t2));
   5245                put_ST_UNCHECKED(r_src, mkexpr(t1));
   5246                break;
   5247 
   5248             case 0xE0: /* FCHS */
   5249                DIP("fchs\n");
   5250                put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
   5251                break;
   5252 
   5253             case 0xE1: /* FABS */
   5254                DIP("fabs\n");
   5255                put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
   5256                break;
   5257 
   5258             case 0xE5: { /* FXAM */
   5259                /* This is an interesting one.  It examines %st(0),
   5260                   regardless of whether the tag says it's empty or not.
   5261                   Here, just pass both the tag (in our format) and the
   5262                   value (as a double, actually a ULong) to a helper
   5263                   function. */
   5264                IRExpr** args
   5265                   = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
   5266                                    unop(Iop_ReinterpF64asI64,
   5267                                         get_ST_UNCHECKED(0)) );
   5268                put_C3210(mkIRExprCCall(
   5269                             Ity_I64,
   5270                             0/*regparm*/,
   5271                             "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
   5272                             args
   5273                         ));
   5274                DIP("fxam\n");
   5275                break;
   5276             }
   5277 
   5278             case 0xE8: /* FLD1 */
   5279                DIP("fld1\n");
   5280                fp_push();
   5281                /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
   5282                put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
   5283                break;
   5284 
   5285             case 0xE9: /* FLDL2T */
   5286                DIP("fldl2t\n");
   5287                fp_push();
   5288                /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
   5289                put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
   5290                break;
   5291 
   5292             case 0xEA: /* FLDL2E */
   5293                DIP("fldl2e\n");
   5294                fp_push();
   5295                /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
   5296                put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
   5297                break;
   5298 
   5299             case 0xEB: /* FLDPI */
   5300                DIP("fldpi\n");
   5301                fp_push();
   5302                /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
   5303                put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
   5304                break;
   5305 
   5306             case 0xEC: /* FLDLG2 */
   5307                DIP("fldlg2\n");
   5308                fp_push();
   5309                /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
   5310                put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
   5311                break;
   5312 
   5313             case 0xED: /* FLDLN2 */
   5314                DIP("fldln2\n");
   5315                fp_push();
   5316                /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
   5317                put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
   5318                break;
   5319 
   5320             case 0xEE: /* FLDZ */
   5321                DIP("fldz\n");
   5322                fp_push();
   5323                /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
   5324                put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
   5325                break;
   5326 
   5327             case 0xF0: /* F2XM1 */
   5328                DIP("f2xm1\n");
   5329                put_ST_UNCHECKED(0,
   5330                   binop(Iop_2xm1F64,
   5331                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5332                         get_ST(0)));
   5333                break;
   5334 
   5335             case 0xF1: /* FYL2X */
   5336                DIP("fyl2x\n");
   5337                put_ST_UNCHECKED(1,
   5338                   triop(Iop_Yl2xF64,
   5339                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5340                         get_ST(1),
   5341                         get_ST(0)));
   5342                fp_pop();
   5343                break;
   5344 
   5345             case 0xF2: /* FPTAN */
   5346                DIP("ftan\n");
   5347                put_ST_UNCHECKED(0,
   5348                   binop(Iop_TanF64,
   5349                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5350                         get_ST(0)));
   5351                fp_push();
   5352                put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
   5353                clear_C2(); /* HACK */
   5354                break;
   5355 
   5356             case 0xF3: /* FPATAN */
   5357                DIP("fpatan\n");
   5358                put_ST_UNCHECKED(1,
   5359                   triop(Iop_AtanF64,
   5360                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5361                         get_ST(1),
   5362                         get_ST(0)));
   5363                fp_pop();
   5364                break;
   5365 
   5366             case 0xF4: { /* FXTRACT */
   5367                IRTemp argF = newTemp(Ity_F64);
   5368                IRTemp sigF = newTemp(Ity_F64);
   5369                IRTemp expF = newTemp(Ity_F64);
   5370                IRTemp argI = newTemp(Ity_I64);
   5371                IRTemp sigI = newTemp(Ity_I64);
   5372                IRTemp expI = newTemp(Ity_I64);
   5373                DIP("fxtract\n");
   5374                assign( argF, get_ST(0) );
   5375                assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
   5376                assign( sigI,
   5377                        mkIRExprCCall(
   5378                           Ity_I64, 0/*regparms*/,
   5379                           "x86amd64g_calculate_FXTRACT",
   5380                           &x86amd64g_calculate_FXTRACT,
   5381                           mkIRExprVec_2( mkexpr(argI),
   5382                                          mkIRExpr_HWord(0)/*sig*/ ))
   5383                );
   5384                assign( expI,
   5385                        mkIRExprCCall(
   5386                           Ity_I64, 0/*regparms*/,
   5387                           "x86amd64g_calculate_FXTRACT",
   5388                           &x86amd64g_calculate_FXTRACT,
   5389                           mkIRExprVec_2( mkexpr(argI),
   5390                                          mkIRExpr_HWord(1)/*exp*/ ))
   5391                );
   5392                assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
   5393                assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
   5394                /* exponent */
   5395                put_ST_UNCHECKED(0, mkexpr(expF) );
   5396                fp_push();
   5397                /* significand */
   5398                put_ST(0, mkexpr(sigF) );
   5399                break;
   5400             }
   5401 
   5402             case 0xF5: { /* FPREM1 -- IEEE compliant */
   5403                IRTemp a1 = newTemp(Ity_F64);
   5404                IRTemp a2 = newTemp(Ity_F64);
   5405                DIP("fprem1\n");
   5406                /* Do FPREM1 twice, once to get the remainder, and once
   5407                   to get the C3210 flag values. */
   5408                assign( a1, get_ST(0) );
   5409                assign( a2, get_ST(1) );
   5410                put_ST_UNCHECKED(0,
   5411                   triop(Iop_PRem1F64,
   5412                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5413                         mkexpr(a1),
   5414                         mkexpr(a2)));
   5415                put_C3210(
   5416                   unop(Iop_32Uto64,
   5417                   triop(Iop_PRem1C3210F64,
   5418                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5419                         mkexpr(a1),
   5420                         mkexpr(a2)) ));
   5421                break;
   5422             }
   5423 
   5424             case 0xF7: /* FINCSTP */
   5425                DIP("fincstp\n");
   5426                put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
   5427                break;
   5428 
   5429             case 0xF8: { /* FPREM -- not IEEE compliant */
   5430                IRTemp a1 = newTemp(Ity_F64);
   5431                IRTemp a2 = newTemp(Ity_F64);
   5432                DIP("fprem\n");
   5433                /* Do FPREM twice, once to get the remainder, and once
   5434                   to get the C3210 flag values. */
   5435                assign( a1, get_ST(0) );
   5436                assign( a2, get_ST(1) );
   5437                put_ST_UNCHECKED(0,
   5438                   triop(Iop_PRemF64,
   5439                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5440                         mkexpr(a1),
   5441                         mkexpr(a2)));
   5442                put_C3210(
   5443                   unop(Iop_32Uto64,
   5444                   triop(Iop_PRemC3210F64,
   5445                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5446                         mkexpr(a1),
   5447                         mkexpr(a2)) ));
   5448                break;
   5449             }
   5450 
   5451             case 0xF9: /* FYL2XP1 */
   5452                DIP("fyl2xp1\n");
   5453                put_ST_UNCHECKED(1,
   5454                   triop(Iop_Yl2xp1F64,
   5455                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5456                         get_ST(1),
   5457                         get_ST(0)));
   5458                fp_pop();
   5459                break;
   5460 
   5461             case 0xFA: /* FSQRT */
   5462                DIP("fsqrt\n");
   5463                put_ST_UNCHECKED(0,
   5464                   binop(Iop_SqrtF64,
   5465                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5466                         get_ST(0)));
   5467                break;
   5468 
   5469             case 0xFB: { /* FSINCOS */
   5470                IRTemp a1 = newTemp(Ity_F64);
   5471                assign( a1, get_ST(0) );
   5472                DIP("fsincos\n");
   5473                put_ST_UNCHECKED(0,
   5474                   binop(Iop_SinF64,
   5475                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5476                         mkexpr(a1)));
   5477                fp_push();
   5478                put_ST(0,
   5479                   binop(Iop_CosF64,
   5480                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5481                         mkexpr(a1)));
   5482                clear_C2(); /* HACK */
   5483                break;
   5484             }
   5485 
   5486             case 0xFC: /* FRNDINT */
   5487                DIP("frndint\n");
   5488                put_ST_UNCHECKED(0,
   5489                   binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
   5490                break;
   5491 
   5492             case 0xFD: /* FSCALE */
   5493                DIP("fscale\n");
   5494                put_ST_UNCHECKED(0,
   5495                   triop(Iop_ScaleF64,
   5496                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5497                         get_ST(0),
   5498                         get_ST(1)));
   5499                break;
   5500 
   5501             case 0xFE: /* FSIN */
   5502                DIP("fsin\n");
   5503                put_ST_UNCHECKED(0,
   5504                   binop(Iop_SinF64,
   5505                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5506                         get_ST(0)));
   5507                clear_C2(); /* HACK */
   5508                break;
   5509 
   5510             case 0xFF: /* FCOS */
   5511                DIP("fcos\n");
   5512                put_ST_UNCHECKED(0,
   5513                   binop(Iop_CosF64,
   5514                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5515                         get_ST(0)));
   5516                clear_C2(); /* HACK */
   5517                break;
   5518 
   5519             default:
   5520                goto decode_fail;
   5521          }
   5522       }
   5523    }
   5524 
   5525    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
   5526    else
   5527    if (first_opcode == 0xDA) {
   5528 
   5529       if (modrm < 0xC0) {
   5530 
   5531          /* bits 5,4,3 are an opcode extension, and the modRM also
   5532             specifies an address. */
   5533          IROp   fop;
   5534          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5535          delta += len;
   5536          switch (gregLO3ofRM(modrm)) {
   5537 
   5538             case 0: /* FIADD m32int */ /* ST(0) += m32int */
   5539                DIP("fiaddl %s\n", dis_buf);
   5540                fop = Iop_AddF64;
   5541                goto do_fop_m32;
   5542 
   5543             case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
   5544                DIP("fimull %s\n", dis_buf);
   5545                fop = Iop_MulF64;
   5546                goto do_fop_m32;
   5547 
   5548             case 4: /* FISUB m32int */ /* ST(0) -= m32int */
   5549                DIP("fisubl %s\n", dis_buf);
   5550                fop = Iop_SubF64;
   5551                goto do_fop_m32;
   5552 
   5553             case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
   5554                DIP("fisubrl %s\n", dis_buf);
   5555                fop = Iop_SubF64;
   5556                goto do_foprev_m32;
   5557 
   5558             case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
   5559                DIP("fisubl %s\n", dis_buf);
   5560                fop = Iop_DivF64;
   5561                goto do_fop_m32;
   5562 
   5563             case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
   5564                DIP("fidivrl %s\n", dis_buf);
   5565                fop = Iop_DivF64;
   5566                goto do_foprev_m32;
   5567 
   5568             do_fop_m32:
   5569                put_ST_UNCHECKED(0,
   5570                   triop(fop,
   5571                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5572                         get_ST(0),
   5573                         unop(Iop_I32StoF64,
   5574                              loadLE(Ity_I32, mkexpr(addr)))));
   5575                break;
   5576 
   5577             do_foprev_m32:
   5578                put_ST_UNCHECKED(0,
   5579                   triop(fop,
   5580                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5581                         unop(Iop_I32StoF64,
   5582                              loadLE(Ity_I32, mkexpr(addr))),
   5583                         get_ST(0)));
   5584                break;
   5585 
   5586             default:
   5587                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5588                vex_printf("first_opcode == 0xDA\n");
   5589                goto decode_fail;
   5590          }
   5591 
   5592       } else {
   5593 
   5594          delta++;
   5595          switch (modrm) {
   5596 
   5597             case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
   5598                r_src = (UInt)modrm - 0xC0;
   5599                DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
   5600                put_ST_UNCHECKED(0,
   5601                                 IRExpr_Mux0X(
   5602                                     unop(Iop_1Uto8,
   5603                                          mk_amd64g_calculate_condition(AMD64CondB)),
   5604                                     get_ST(0), get_ST(r_src)) );
   5605                break;
   5606 
   5607             case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
   5608                r_src = (UInt)modrm - 0xC8;
   5609                DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
   5610                put_ST_UNCHECKED(0,
   5611                                 IRExpr_Mux0X(
   5612                                     unop(Iop_1Uto8,
   5613                                          mk_amd64g_calculate_condition(AMD64CondZ)),
   5614                                     get_ST(0), get_ST(r_src)) );
   5615                break;
   5616 
   5617             case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
   5618                r_src = (UInt)modrm - 0xD0;
   5619                DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
   5620                put_ST_UNCHECKED(0,
   5621                                 IRExpr_Mux0X(
   5622                                     unop(Iop_1Uto8,
   5623                                          mk_amd64g_calculate_condition(AMD64CondBE)),
   5624                                     get_ST(0), get_ST(r_src)) );
   5625                break;
   5626 
   5627             case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
   5628                r_src = (UInt)modrm - 0xD8;
   5629                DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
   5630                put_ST_UNCHECKED(0,
   5631                                 IRExpr_Mux0X(
   5632                                     unop(Iop_1Uto8,
   5633                                          mk_amd64g_calculate_condition(AMD64CondP)),
   5634                                     get_ST(0), get_ST(r_src)) );
   5635                break;
   5636 
   5637             case 0xE9: /* FUCOMPP %st(0),%st(1) */
   5638                DIP("fucompp %%st(0),%%st(1)\n");
   5639                /* This forces C1 to zero, which isn't right. */
   5640                put_C3210(
   5641                    unop(Iop_32Uto64,
   5642                    binop( Iop_And32,
   5643                           binop(Iop_Shl32,
   5644                                 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
   5645                                 mkU8(8)),
   5646                           mkU32(0x4500)
   5647                    )));
   5648                fp_pop();
   5649                fp_pop();
   5650                break;
   5651 
   5652             default:
   5653                goto decode_fail;
   5654          }
   5655 
   5656       }
   5657    }
   5658 
   5659    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
   5660    else
   5661    if (first_opcode == 0xDB) {
   5662       if (modrm < 0xC0) {
   5663 
   5664          /* bits 5,4,3 are an opcode extension, and the modRM also
   5665             specifies an address. */
   5666          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5667          delta += len;
   5668 
   5669          switch (gregLO3ofRM(modrm)) {
   5670 
   5671             case 0: /* FILD m32int */
   5672                DIP("fildl %s\n", dis_buf);
   5673                fp_push();
   5674                put_ST(0, unop(Iop_I32StoF64,
   5675                               loadLE(Ity_I32, mkexpr(addr))));
   5676                break;
   5677 
   5678             case 1: /* FISTTPL m32 (SSE3) */
   5679                DIP("fisttpl %s\n", dis_buf);
   5680                storeLE( mkexpr(addr),
   5681                         binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
   5682                fp_pop();
   5683                break;
   5684 
   5685             case 2: /* FIST m32 */
   5686                DIP("fistl %s\n", dis_buf);
   5687                storeLE( mkexpr(addr),
   5688                         binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
   5689                break;
   5690 
   5691             case 3: /* FISTP m32 */
   5692                DIP("fistpl %s\n", dis_buf);
   5693                storeLE( mkexpr(addr),
   5694                         binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
   5695                fp_pop();
   5696                break;
   5697 
   5698             case 5: { /* FLD extended-real */
   5699                /* Uses dirty helper:
   5700                      ULong amd64g_loadF80le ( ULong )
   5701                   addr holds the address.  First, do a dirty call to
   5702                   get hold of the data. */
   5703                IRTemp   val  = newTemp(Ity_I64);
   5704                IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
   5705 
   5706                IRDirty* d = unsafeIRDirty_1_N (
   5707                                val,
   5708                                0/*regparms*/,
   5709                                "amd64g_dirtyhelper_loadF80le",
   5710                                &amd64g_dirtyhelper_loadF80le,
   5711                                args
   5712                             );
   5713                /* declare that we're reading memory */
   5714                d->mFx   = Ifx_Read;
   5715                d->mAddr = mkexpr(addr);
   5716                d->mSize = 10;
   5717 
   5718                /* execute the dirty call, dumping the result in val. */
   5719                stmt( IRStmt_Dirty(d) );
   5720                fp_push();
   5721                put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
   5722 
   5723                DIP("fldt %s\n", dis_buf);
   5724                break;
   5725             }
   5726 
   5727             case 7: { /* FSTP extended-real */
   5728                /* Uses dirty helper:
   5729                      void amd64g_storeF80le ( ULong addr, ULong data )
   5730                */
   5731                IRExpr** args
   5732                   = mkIRExprVec_2( mkexpr(addr),
   5733                                    unop(Iop_ReinterpF64asI64, get_ST(0)) );
   5734 
   5735                IRDirty* d = unsafeIRDirty_0_N (
   5736                                0/*regparms*/,
   5737                                "amd64g_dirtyhelper_storeF80le",
   5738                                &amd64g_dirtyhelper_storeF80le,
   5739                                args
   5740                             );
   5741                /* declare we're writing memory */
   5742                d->mFx   = Ifx_Write;
   5743                d->mAddr = mkexpr(addr);
   5744                d->mSize = 10;
   5745 
   5746                /* execute the dirty call. */
   5747                stmt( IRStmt_Dirty(d) );
   5748                fp_pop();
   5749 
   5750                DIP("fstpt\n %s", dis_buf);
   5751                break;
   5752             }
   5753 
   5754             default:
   5755                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5756                vex_printf("first_opcode == 0xDB\n");
   5757                goto decode_fail;
   5758          }
   5759 
   5760       } else {
   5761 
   5762          delta++;
   5763          switch (modrm) {
   5764 
   5765             case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
   5766                r_src = (UInt)modrm - 0xC0;
   5767                DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
   5768                put_ST_UNCHECKED(0,
   5769                                 IRExpr_Mux0X(
   5770                                     unop(Iop_1Uto8,
   5771                                          mk_amd64g_calculate_condition(AMD64CondNB)),
   5772                                     get_ST(0), get_ST(r_src)) );
   5773                break;
   5774 
   5775             case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
   5776                r_src = (UInt)modrm - 0xC8;
   5777                DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
   5778                put_ST_UNCHECKED(
   5779                   0,
   5780                   IRExpr_Mux0X(
   5781                      unop(Iop_1Uto8,
   5782                           mk_amd64g_calculate_condition(AMD64CondNZ)),
   5783                      get_ST(0),
   5784                      get_ST(r_src)
   5785                   )
   5786                );
   5787                break;
   5788 
   5789             case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
   5790                r_src = (UInt)modrm - 0xD0;
   5791                DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
   5792                put_ST_UNCHECKED(
   5793                   0,
   5794                   IRExpr_Mux0X(
   5795                      unop(Iop_1Uto8,
   5796                           mk_amd64g_calculate_condition(AMD64CondNBE)),
   5797                      get_ST(0),
   5798                      get_ST(r_src)
   5799                   )
   5800                );
   5801                break;
   5802 
   5803             case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
   5804                r_src = (UInt)modrm - 0xD8;
   5805                DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
   5806                put_ST_UNCHECKED(
   5807                   0,
   5808                   IRExpr_Mux0X(
   5809                      unop(Iop_1Uto8,
   5810                           mk_amd64g_calculate_condition(AMD64CondNP)),
   5811                      get_ST(0),
   5812                      get_ST(r_src)
   5813                   )
   5814                );
   5815                break;
   5816 
   5817             case 0xE2:
   5818                DIP("fnclex\n");
   5819                break;
   5820 
   5821             case 0xE3: {
   5822                /* Uses dirty helper:
   5823                      void amd64g_do_FINIT ( VexGuestAMD64State* ) */
   5824                IRDirty* d  = unsafeIRDirty_0_N (
   5825                                 0/*regparms*/,
   5826                                 "amd64g_dirtyhelper_FINIT",
   5827                                 &amd64g_dirtyhelper_FINIT,
   5828                                 mkIRExprVec_0()
   5829                              );
   5830                d->needsBBP = True;
   5831 
   5832                /* declare we're writing guest state */
   5833                d->nFxState = 5;
   5834 
   5835                d->fxState[0].fx     = Ifx_Write;
   5836                d->fxState[0].offset = OFFB_FTOP;
   5837                d->fxState[0].size   = sizeof(UInt);
   5838 
   5839                d->fxState[1].fx     = Ifx_Write;
   5840                d->fxState[1].offset = OFFB_FPREGS;
   5841                d->fxState[1].size   = 8 * sizeof(ULong);
   5842 
   5843                d->fxState[2].fx     = Ifx_Write;
   5844                d->fxState[2].offset = OFFB_FPTAGS;
   5845                d->fxState[2].size   = 8 * sizeof(UChar);
   5846 
   5847                d->fxState[3].fx     = Ifx_Write;
   5848                d->fxState[3].offset = OFFB_FPROUND;
   5849                d->fxState[3].size   = sizeof(ULong);
   5850 
   5851                d->fxState[4].fx     = Ifx_Write;
   5852                d->fxState[4].offset = OFFB_FC3210;
   5853                d->fxState[4].size   = sizeof(ULong);
   5854 
   5855                stmt( IRStmt_Dirty(d) );
   5856 
   5857                DIP("fninit\n");
   5858                break;
   5859             }
   5860 
   5861             case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
   5862                fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
   5863                break;
   5864 
   5865             case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
   5866                fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
   5867                break;
   5868 
   5869             default:
   5870                goto decode_fail;
   5871          }
   5872       }
   5873    }
   5874 
   5875    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
   5876    else
   5877    if (first_opcode == 0xDC) {
   5878       if (modrm < 0xC0) {
   5879 
   5880          /* bits 5,4,3 are an opcode extension, and the modRM also
   5881             specifies an address. */
   5882          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5883          delta += len;
   5884 
   5885          switch (gregLO3ofRM(modrm)) {
   5886 
   5887             case 0: /* FADD double-real */
   5888                fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
   5889                break;
   5890 
   5891             case 1: /* FMUL double-real */
   5892                fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
   5893                break;
   5894 
   5895 //..             case 2: /* FCOM double-real */
   5896 //..                DIP("fcoml %s\n", dis_buf);
   5897 //..                /* This forces C1 to zero, which isn't right. */
   5898 //..                put_C3210(
   5899 //..                    binop( Iop_And32,
   5900 //..                           binop(Iop_Shl32,
   5901 //..                                 binop(Iop_CmpF64,
   5902 //..                                       get_ST(0),
   5903 //..                                       loadLE(Ity_F64,mkexpr(addr))),
   5904 //..                                 mkU8(8)),
   5905 //..                           mkU32(0x4500)
   5906 //..                    ));
   5907 //..                break;
   5908 
   5909             case 3: /* FCOMP double-real */
   5910                DIP("fcompl %s\n", dis_buf);
   5911                /* This forces C1 to zero, which isn't right. */
   5912                put_C3210(
   5913                    unop(Iop_32Uto64,
   5914                    binop( Iop_And32,
   5915                           binop(Iop_Shl32,
   5916                                 binop(Iop_CmpF64,
   5917                                       get_ST(0),
   5918                                       loadLE(Ity_F64,mkexpr(addr))),
   5919                                 mkU8(8)),
   5920                           mkU32(0x4500)
   5921                    )));
   5922                fp_pop();
   5923                break;
   5924 
   5925             case 4: /* FSUB double-real */
   5926                fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
   5927                break;
   5928 
   5929             case 5: /* FSUBR double-real */
   5930                fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
   5931                break;
   5932 
   5933             case 6: /* FDIV double-real */
   5934                fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
   5935                break;
   5936 
   5937             case 7: /* FDIVR double-real */
   5938                fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
   5939                break;
   5940 
   5941             default:
   5942                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5943                vex_printf("first_opcode == 0xDC\n");
   5944                goto decode_fail;
   5945          }
   5946 
   5947       } else {
   5948 
   5949          delta++;
   5950          switch (modrm) {
   5951 
   5952             case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
   5953                fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
   5954                break;
   5955 
   5956             case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
   5957                fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
   5958                break;
   5959 
   5960             case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
   5961                fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
   5962                break;
   5963 
   5964             case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
   5965                fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
   5966                break;
   5967 
   5968             case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
   5969                fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
   5970                break;
   5971 
   5972             case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
   5973                fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
   5974                break;
   5975 
   5976             default:
   5977                goto decode_fail;
   5978          }
   5979 
   5980       }
   5981    }
   5982 
   5983    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
   5984    else
   5985    if (first_opcode == 0xDD) {
   5986 
   5987       if (modrm < 0xC0) {
   5988 
   5989          /* bits 5,4,3 are an opcode extension, and the modRM also
   5990             specifies an address. */
   5991          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5992          delta += len;
   5993 
   5994          switch (gregLO3ofRM(modrm)) {
   5995 
   5996             case 0: /* FLD double-real */
   5997                DIP("fldl %s\n", dis_buf);
   5998                fp_push();
   5999                put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
   6000                break;
   6001 
   6002             case 1: /* FISTTPQ m64 (SSE3) */
   6003                DIP("fistppll %s\n", dis_buf);
   6004                storeLE( mkexpr(addr),
   6005                         binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
   6006                fp_pop();
   6007                break;
   6008 
   6009             case 2: /* FST double-real */
   6010                DIP("fstl %s\n", dis_buf);
   6011                storeLE(mkexpr(addr), get_ST(0));
   6012                break;
   6013 
   6014             case 3: /* FSTP double-real */
   6015                DIP("fstpl %s\n", dis_buf);
   6016                storeLE(mkexpr(addr), get_ST(0));
   6017                fp_pop();
   6018                break;
   6019 
   6020 //..             case 4: { /* FRSTOR m108 */
   6021 //..                /* Uses dirty helper:
   6022 //..                      VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
   6023 //..                IRTemp   ew = newTemp(Ity_I32);
   6024 //..                IRDirty* d  = unsafeIRDirty_0_N (
   6025 //..                                 0/*regparms*/,
   6026 //..                                 "x86g_dirtyhelper_FRSTOR",
   6027 //..                                 &x86g_dirtyhelper_FRSTOR,
   6028 //..                                 mkIRExprVec_1( mkexpr(addr) )
   6029 //..                              );
   6030 //..                d->needsBBP = True;
   6031 //..                d->tmp      = ew;
   6032 //..                /* declare we're reading memory */
   6033 //..                d->mFx   = Ifx_Read;
   6034 //..                d->mAddr = mkexpr(addr);
   6035 //..                d->mSize = 108;
   6036 //..
   6037 //..                /* declare we're writing guest state */
   6038 //..                d->nFxState = 5;
   6039 //..
   6040 //..                d->fxState[0].fx     = Ifx_Write;
   6041 //..                d->fxState[0].offset = OFFB_FTOP;
   6042 //..                d->fxState[0].size   = sizeof(UInt);
   6043 //..
   6044 //..                d->fxState[1].fx     = Ifx_Write;
   6045 //..                d->fxState[1].offset = OFFB_FPREGS;
   6046 //..                d->fxState[1].size   = 8 * sizeof(ULong);
   6047 //..
   6048 //..                d->fxState[2].fx     = Ifx_Write;
   6049 //..                d->fxState[2].offset = OFFB_FPTAGS;
   6050 //..                d->fxState[2].size   = 8 * sizeof(UChar);
   6051 //..
   6052 //..                d->fxState[3].fx     = Ifx_Write;
   6053 //..                d->fxState[3].offset = OFFB_FPROUND;
   6054 //..                d->fxState[3].size   = sizeof(UInt);
   6055 //..
   6056 //..                d->fxState[4].fx     = Ifx_Write;
   6057 //..                d->fxState[4].offset = OFFB_FC3210;
   6058 //..                d->fxState[4].size   = sizeof(UInt);
   6059 //..
   6060 //..                stmt( IRStmt_Dirty(d) );
   6061 //..
   6062 //..                /* ew contains any emulation warning we may need to
   6063 //..                   issue.  If needed, side-exit to the next insn,
   6064 //..                   reporting the warning, so that Valgrind's dispatcher
   6065 //..                   sees the warning. */
   6066 //..                put_emwarn( mkexpr(ew) );
   6067 //..                stmt(
   6068 //..                   IRStmt_Exit(
   6069 //..                      binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
   6070 //..                      Ijk_EmWarn,
   6071 //..                      IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
   6072 //..                   )
   6073 //..                );
   6074 //..
   6075 //..                DIP("frstor %s\n", dis_buf);
   6076 //..                break;
   6077 //..             }
   6078 //..
   6079 //..             case 6: { /* FNSAVE m108 */
   6080 //..                /* Uses dirty helper:
   6081 //..                      void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
   6082 //..                IRDirty* d = unsafeIRDirty_0_N (
   6083 //..                                0/*regparms*/,
   6084 //..                                "x86g_dirtyhelper_FSAVE",
   6085 //..                                &x86g_dirtyhelper_FSAVE,
   6086 //..                                mkIRExprVec_1( mkexpr(addr) )
   6087 //..                             );
   6088 //..                d->needsBBP = True;
   6089 //..                /* declare we're writing memory */
   6090 //..                d->mFx   = Ifx_Write;
   6091 //..                d->mAddr = mkexpr(addr);
   6092 //..                d->mSize = 108;
   6093 //..
   6094 //..                /* declare we're reading guest state */
   6095 //..                d->nFxState = 5;
   6096 //..
   6097 //..                d->fxState[0].fx     = Ifx_Read;
   6098 //..                d->fxState[0].offset = OFFB_FTOP;
   6099 //..                d->fxState[0].size   = sizeof(UInt);
   6100 //..
   6101 //..                d->fxState[1].fx     = Ifx_Read;
   6102 //..                d->fxState[1].offset = OFFB_FPREGS;
   6103 //..                d->fxState[1].size   = 8 * sizeof(ULong);
   6104 //..
   6105 //..                d->fxState[2].fx     = Ifx_Read;
   6106 //..                d->fxState[2].offset = OFFB_FPTAGS;
   6107 //..                d->fxState[2].size   = 8 * sizeof(UChar);
   6108 //..
   6109 //..                d->fxState[3].fx     = Ifx_Read;
   6110 //..                d->fxState[3].offset = OFFB_FPROUND;
   6111 //..                d->fxState[3].size   = sizeof(UInt);
   6112 //..
   6113 //..                d->fxState[4].fx     = Ifx_Read;
   6114 //..                d->fxState[4].offset = OFFB_FC3210;
   6115 //..                d->fxState[4].size   = sizeof(UInt);
   6116 //..
   6117 //..                stmt( IRStmt_Dirty(d) );
   6118 //..
   6119 //..                DIP("fnsave %s\n", dis_buf);
   6120 //..                break;
   6121 //..             }
   6122 
   6123             case 7: { /* FNSTSW m16 */
   6124                IRExpr* sw = get_FPU_sw();
   6125                vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
   6126                storeLE( mkexpr(addr), sw );
   6127                DIP("fnstsw %s\n", dis_buf);
   6128                break;
   6129             }
   6130 
   6131             default:
   6132                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   6133                vex_printf("first_opcode == 0xDD\n");
   6134                goto decode_fail;
   6135          }
   6136       } else {
   6137          delta++;
   6138          switch (modrm) {
   6139 
   6140             case 0xC0 ... 0xC7: /* FFREE %st(?) */
   6141                r_dst = (UInt)modrm - 0xC0;
   6142                DIP("ffree %%st(%u)\n", r_dst);
   6143                put_ST_TAG ( r_dst, mkU8(0) );
   6144                break;
   6145 
   6146             case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
   6147                r_dst = (UInt)modrm - 0xD0;
   6148                DIP("fst %%st(0),%%st(%u)\n", r_dst);
   6149                /* P4 manual says: "If the destination operand is a
   6150                   non-empty register, the invalid-operation exception
   6151                   is not generated.  Hence put_ST_UNCHECKED. */
   6152                put_ST_UNCHECKED(r_dst, get_ST(0));
   6153                break;
   6154 
   6155             case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
   6156                r_dst = (UInt)modrm - 0xD8;
   6157                DIP("fstp %%st(0),%%st(%u)\n", r_dst);
   6158                /* P4 manual says: "If the destination operand is a
   6159                   non-empty register, the invalid-operation exception
   6160                   is not generated.  Hence put_ST_UNCHECKED. */
   6161                put_ST_UNCHECKED(r_dst, get_ST(0));
   6162                fp_pop();
   6163                break;
   6164 
   6165             case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
   6166                r_dst = (UInt)modrm - 0xE0;
   6167                DIP("fucom %%st(0),%%st(%u)\n", r_dst);
   6168                /* This forces C1 to zero, which isn't right. */
   6169                put_C3210(
   6170                    unop(Iop_32Uto64,
   6171                    binop( Iop_And32,
   6172                           binop(Iop_Shl32,
   6173                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   6174                                 mkU8(8)),
   6175                           mkU32(0x4500)
   6176                    )));
   6177                break;
   6178 
   6179             case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
   6180                r_dst = (UInt)modrm - 0xE8;
   6181                DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
   6182                /* This forces C1 to zero, which isn't right. */
   6183                put_C3210(
   6184                    unop(Iop_32Uto64,
   6185                    binop( Iop_And32,
   6186                           binop(Iop_Shl32,
   6187                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   6188                                 mkU8(8)),
   6189                           mkU32(0x4500)
   6190                    )));
   6191                fp_pop();
   6192                break;
   6193 
   6194             default:
   6195                goto decode_fail;
   6196          }
   6197       }
   6198    }
   6199 
   6200    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
   6201    else
   6202    if (first_opcode == 0xDE) {
   6203 
   6204       if (modrm < 0xC0) {
   6205 
   6206          /* bits 5,4,3 are an opcode extension, and the modRM also
   6207             specifies an address. */
   6208          IROp   fop;
   6209          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6210          delta += len;
   6211 
   6212          switch (gregLO3ofRM(modrm)) {
   6213 
   6214             case 0: /* FIADD m16int */ /* ST(0) += m16int */
   6215                DIP("fiaddw %s\n", dis_buf);
   6216                fop = Iop_AddF64;
   6217                goto do_fop_m16;
   6218 
   6219             case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
   6220                DIP("fimulw %s\n", dis_buf);
   6221                fop = Iop_MulF64;
   6222                goto do_fop_m16;
   6223 
   6224             case 4: /* FISUB m16int */ /* ST(0) -= m16int */
   6225                DIP("fisubw %s\n", dis_buf);
   6226                fop = Iop_SubF64;
   6227                goto do_fop_m16;
   6228 
   6229             case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
   6230                DIP("fisubrw %s\n", dis_buf);
   6231                fop = Iop_SubF64;
   6232                goto do_foprev_m16;
   6233 
   6234             case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
   6235                DIP("fisubw %s\n", dis_buf);
   6236                fop = Iop_DivF64;
   6237                goto do_fop_m16;
   6238 
   6239             case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
   6240                DIP("fidivrw %s\n", dis_buf);
   6241                fop = Iop_DivF64;
   6242                goto do_foprev_m16;
   6243 
   6244             do_fop_m16:
   6245                put_ST_UNCHECKED(0,
   6246                   triop(fop,
   6247                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   6248                         get_ST(0),
   6249                         unop(Iop_I32StoF64,
   6250                              unop(Iop_16Sto32,
   6251                                   loadLE(Ity_I16, mkexpr(addr))))));
   6252                break;
   6253 
   6254             do_foprev_m16:
   6255                put_ST_UNCHECKED(0,
   6256                   triop(fop,
   6257                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   6258                         unop(Iop_I32StoF64,
   6259                              unop(Iop_16Sto32,
   6260                                   loadLE(Ity_I16, mkexpr(addr)))),
   6261                         get_ST(0)));
   6262                break;
   6263 
   6264             default:
   6265                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   6266                vex_printf("first_opcode == 0xDE\n");
   6267                goto decode_fail;
   6268          }
   6269 
   6270       } else {
   6271 
   6272          delta++;
   6273          switch (modrm) {
   6274 
   6275             case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
   6276                fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
   6277                break;
   6278 
   6279             case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
   6280                fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
   6281                break;
   6282 
   6283             case 0xD9: /* FCOMPP %st(0),%st(1) */
   6284                DIP("fcompp %%st(0),%%st(1)\n");
   6285                /* This forces C1 to zero, which isn't right. */
   6286                put_C3210(
   6287                    unop(Iop_32Uto64,
   6288                    binop( Iop_And32,
   6289                           binop(Iop_Shl32,
   6290                                 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
   6291                                 mkU8(8)),
   6292                           mkU32(0x4500)
   6293                    )));
   6294                fp_pop();
   6295                fp_pop();
   6296                break;
   6297 
   6298             case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
   6299                fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0,  modrm - 0xE0, True );
   6300                break;
   6301 
   6302             case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
   6303                fp_do_op_ST_ST ( "sub", Iop_SubF64, 0,  modrm - 0xE8, True );
   6304                break;
   6305 
   6306             case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
   6307                fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
   6308                break;
   6309 
   6310             case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
   6311                fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
   6312                break;
   6313 
   6314             default:
   6315                goto decode_fail;
   6316          }
   6317 
   6318       }
   6319    }
   6320 
   6321    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
   6322    else
   6323    if (first_opcode == 0xDF) {
   6324 
   6325       if (modrm < 0xC0) {
   6326 
   6327          /* bits 5,4,3 are an opcode extension, and the modRM also
   6328             specifies an address. */
   6329          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6330          delta += len;
   6331 
   6332          switch (gregLO3ofRM(modrm)) {
   6333 
   6334             case 0: /* FILD m16int */
   6335                DIP("fildw %s\n", dis_buf);
   6336                fp_push();
   6337                put_ST(0, unop(Iop_I32StoF64,
   6338                               unop(Iop_16Sto32,
   6339                                    loadLE(Ity_I16, mkexpr(addr)))));
   6340                break;
   6341 
   6342             case 1: /* FISTTPS m16 (SSE3) */
   6343                DIP("fisttps %s\n", dis_buf);
   6344                storeLE( mkexpr(addr),
   6345                         x87ishly_qnarrow_32_to_16(
   6346                         binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
   6347                fp_pop();
   6348                break;
   6349 
   6350             case 2: /* FIST m16 */
   6351                DIP("fists %s\n", dis_buf);
   6352                storeLE( mkexpr(addr),
   6353                         x87ishly_qnarrow_32_to_16(
   6354                         binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
   6355                break;
   6356 
   6357             case 3: /* FISTP m16 */
   6358                DIP("fistps %s\n", dis_buf);
   6359                storeLE( mkexpr(addr),
   6360                         x87ishly_qnarrow_32_to_16(
   6361                         binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
   6362                fp_pop();
   6363                break;
   6364 
   6365             case 5: /* FILD m64 */
   6366                DIP("fildll %s\n", dis_buf);
   6367                fp_push();
   6368                put_ST(0, binop(Iop_I64StoF64,
   6369                                get_roundingmode(),
   6370                                loadLE(Ity_I64, mkexpr(addr))));
   6371                break;
   6372 
   6373             case 7: /* FISTP m64 */
   6374                DIP("fistpll %s\n", dis_buf);
   6375                storeLE( mkexpr(addr),
   6376                         binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
   6377                fp_pop();
   6378                break;
   6379 
   6380             default:
   6381                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   6382                vex_printf("first_opcode == 0xDF\n");
   6383                goto decode_fail;
   6384          }
   6385 
   6386       } else {
   6387 
   6388          delta++;
   6389          switch (modrm) {
   6390 
   6391             case 0xC0: /* FFREEP %st(0) */
   6392                DIP("ffreep %%st(%d)\n", 0);
   6393                put_ST_TAG ( 0, mkU8(0) );
   6394                fp_pop();
   6395                break;
   6396 
   6397             case 0xE0: /* FNSTSW %ax */
   6398                DIP("fnstsw %%ax\n");
   6399                /* Invent a plausible-looking FPU status word value and
   6400                   dump it in %AX:
   6401                      ((ftop & 7) << 11) | (c3210 & 0x4700)
   6402                */
   6403                putIRegRAX(
   6404                   2,
   6405                   unop(Iop_32to16,
   6406                        binop(Iop_Or32,
   6407                              binop(Iop_Shl32,
   6408                                    binop(Iop_And32, get_ftop(), mkU32(7)),
   6409                                    mkU8(11)),
   6410                              binop(Iop_And32,
   6411                                    unop(Iop_64to32, get_C3210()),
   6412                                    mkU32(0x4700))
   6413                )));
   6414                break;
   6415 
   6416             case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
   6417                fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
   6418                break;
   6419 
   6420             case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
   6421                /* not really right since COMIP != UCOMIP */
   6422                fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
   6423                break;
   6424 
   6425             default:
   6426                goto decode_fail;
   6427          }
   6428       }
   6429 
   6430    }
   6431 
   6432    else
   6433       goto decode_fail;
   6434 
   6435    *decode_ok = True;
   6436    return delta;
   6437 
   6438   decode_fail:
   6439    *decode_ok = False;
   6440    return delta;
   6441 }
   6442 
   6443 
   6444 /*------------------------------------------------------------*/
   6445 /*---                                                      ---*/
   6446 /*--- MMX INSTRUCTIONS                                     ---*/
   6447 /*---                                                      ---*/
   6448 /*------------------------------------------------------------*/
   6449 
   6450 /* Effect of MMX insns on x87 FPU state (table 11-2 of
   6451    IA32 arch manual, volume 3):
   6452 
   6453    Read from, or write to MMX register (viz, any insn except EMMS):
   6454    * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
   6455    * FP stack pointer set to zero
   6456 
   6457    EMMS:
   6458    * All tags set to Invalid (empty) -- FPTAGS[i] := zero
   6459    * FP stack pointer set to zero
   6460 */
   6461 
   6462 static void do_MMX_preamble ( void )
   6463 {
   6464    Int         i;
   6465    IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   6466    IRExpr*     zero  = mkU32(0);
   6467    IRExpr*     tag1  = mkU8(1);
   6468    put_ftop(zero);
   6469    for (i = 0; i < 8; i++)
   6470       stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
   6471 }
   6472 
   6473 static void do_EMMS_preamble ( void )
   6474 {
   6475    Int         i;
   6476    IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   6477    IRExpr*     zero  = mkU32(0);
   6478    IRExpr*     tag0  = mkU8(0);
   6479    put_ftop(zero);
   6480    for (i = 0; i < 8; i++)
   6481       stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
   6482 }
   6483 
   6484 
   6485 static IRExpr* getMMXReg ( UInt archreg )
   6486 {
   6487    vassert(archreg < 8);
   6488    return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
   6489 }
   6490 
   6491 
   6492 static void putMMXReg ( UInt archreg, IRExpr* e )
   6493 {
   6494    vassert(archreg < 8);
   6495    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   6496    stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
   6497 }
   6498 
   6499 
   6500 /* Helper for non-shift MMX insns.  Note this is incomplete in the
   6501    sense that it does not first call do_MMX_preamble() -- that is the
   6502    responsibility of its caller. */
   6503 
   6504 static
   6505 ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
   6506                                 Prefix      pfx,
   6507                                 Long        delta,
   6508                                 UChar       opc,
   6509                                 HChar*      name,
   6510                                 Bool        show_granularity )
   6511 {
   6512    HChar   dis_buf[50];
   6513    UChar   modrm = getUChar(delta);
   6514    Bool    isReg = epartIsReg(modrm);
   6515    IRExpr* argL  = NULL;
   6516    IRExpr* argR  = NULL;
   6517    IRExpr* argG  = NULL;
   6518    IRExpr* argE  = NULL;
   6519    IRTemp  res   = newTemp(Ity_I64);
   6520 
   6521    Bool    invG  = False;
   6522    IROp    op    = Iop_INVALID;
   6523    void*   hAddr = NULL;
   6524    HChar*  hName = NULL;
   6525    Bool    eLeft = False;
   6526 
   6527 #  define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
   6528 
   6529    switch (opc) {
   6530       /* Original MMX ones */
   6531       case 0xFC: op = Iop_Add8x8; break;
   6532       case 0xFD: op = Iop_Add16x4; break;
   6533       case 0xFE: op = Iop_Add32x2; break;
   6534 
   6535       case 0xEC: op = Iop_QAdd8Sx8; break;
   6536       case 0xED: op = Iop_QAdd16Sx4; break;
   6537 
   6538       case 0xDC: op = Iop_QAdd8Ux8; break;
   6539       case 0xDD: op = Iop_QAdd16Ux4; break;
   6540 
   6541       case 0xF8: op = Iop_Sub8x8;  break;
   6542       case 0xF9: op = Iop_Sub16x4; break;
   6543       case 0xFA: op = Iop_Sub32x2; break;
   6544 
   6545       case 0xE8: op = Iop_QSub8Sx8; break;
   6546       case 0xE9: op = Iop_QSub16Sx4; break;
   6547 
   6548       case 0xD8: op = Iop_QSub8Ux8; break;
   6549       case 0xD9: op = Iop_QSub16Ux4; break;
   6550 
   6551       case 0xE5: op = Iop_MulHi16Sx4; break;
   6552       case 0xD5: op = Iop_Mul16x4; break;
   6553       case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
   6554 
   6555       case 0x74: op = Iop_CmpEQ8x8; break;
   6556       case 0x75: op = Iop_CmpEQ16x4; break;
   6557       case 0x76: op = Iop_CmpEQ32x2; break;
   6558 
   6559       case 0x64: op = Iop_CmpGT8Sx8; break;
   6560       case 0x65: op = Iop_CmpGT16Sx4; break;
   6561       case 0x66: op = Iop_CmpGT32Sx2; break;
   6562 
   6563       case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
   6564       case 0x63: op = Iop_QNarrowBin16Sto8Sx8;  eLeft = True; break;
   6565       case 0x67: op = Iop_QNarrowBin16Sto8Ux8;  eLeft = True; break;
   6566 
   6567       case 0x68: op = Iop_InterleaveHI8x8;  eLeft = True; break;
   6568       case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
   6569       case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
   6570 
   6571       case 0x60: op = Iop_InterleaveLO8x8;  eLeft = True; break;
   6572       case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
   6573       case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
   6574 
   6575       case 0xDB: op = Iop_And64; break;
   6576       case 0xDF: op = Iop_And64; invG = True; break;
   6577       case 0xEB: op = Iop_Or64; break;
   6578       case 0xEF: /* Possibly do better here if argL and argR are the
   6579                     same reg */
   6580                  op = Iop_Xor64; break;
   6581 
   6582       /* Introduced in SSE1 */
   6583       case 0xE0: op = Iop_Avg8Ux8;    break;
   6584       case 0xE3: op = Iop_Avg16Ux4;   break;
   6585       case 0xEE: op = Iop_Max16Sx4;   break;
   6586       case 0xDE: op = Iop_Max8Ux8;    break;
   6587       case 0xEA: op = Iop_Min16Sx4;   break;
   6588       case 0xDA: op = Iop_Min8Ux8;    break;
   6589       case 0xE4: op = Iop_MulHi16Ux4; break;
   6590       case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
   6591 
   6592       /* Introduced in SSE2 */
   6593       case 0xD4: op = Iop_Add64; break;
   6594       case 0xFB: op = Iop_Sub64; break;
   6595 
   6596       default:
   6597          vex_printf("\n0x%x\n", (Int)opc);
   6598          vpanic("dis_MMXop_regmem_to_reg");
   6599    }
   6600 
   6601 #  undef XXX
   6602 
   6603    argG = getMMXReg(gregLO3ofRM(modrm));
   6604    if (invG)
   6605       argG = unop(Iop_Not64, argG);
   6606 
   6607    if (isReg) {
   6608       delta++;
   6609       argE = getMMXReg(eregLO3ofRM(modrm));
   6610    } else {
   6611       Int    len;
   6612       IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6613       delta += len;
   6614       argE = loadLE(Ity_I64, mkexpr(addr));
   6615    }
   6616 
   6617    if (eLeft) {
   6618       argL = argE;
   6619       argR = argG;
   6620    } else {
   6621       argL = argG;
   6622       argR = argE;
   6623    }
   6624 
   6625    if (op != Iop_INVALID) {
   6626       vassert(hName == NULL);
   6627       vassert(hAddr == NULL);
   6628       assign(res, binop(op, argL, argR));
   6629    } else {
   6630       vassert(hName != NULL);
   6631       vassert(hAddr != NULL);
   6632       assign( res,
   6633               mkIRExprCCall(
   6634                  Ity_I64,
   6635                  0/*regparms*/, hName, hAddr,
   6636                  mkIRExprVec_2( argL, argR )
   6637               )
   6638             );
   6639    }
   6640 
   6641    putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
   6642 
   6643    DIP("%s%s %s, %s\n",
   6644        name, show_granularity ? nameMMXGran(opc & 3) : "",
   6645        ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
   6646        nameMMXReg(gregLO3ofRM(modrm)) );
   6647 
   6648    return delta;
   6649 }
   6650 
   6651 
   6652 /* Vector by scalar shift of G by the amount specified at the bottom
   6653    of E.  This is a straight copy of dis_SSE_shiftG_byE. */
   6654 
   6655 static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
   6656                                   Prefix pfx, Long delta,
   6657                                   HChar* opname, IROp op )
   6658 {
   6659    HChar   dis_buf[50];
   6660    Int     alen, size;
   6661    IRTemp  addr;
   6662    Bool    shl, shr, sar;
   6663    UChar   rm   = getUChar(delta);
   6664    IRTemp  g0   = newTemp(Ity_I64);
   6665    IRTemp  g1   = newTemp(Ity_I64);
   6666    IRTemp  amt  = newTemp(Ity_I64);
   6667    IRTemp  amt8 = newTemp(Ity_I8);
   6668 
   6669    if (epartIsReg(rm)) {
   6670       assign( amt, getMMXReg(eregLO3ofRM(rm)) );
   6671       DIP("%s %s,%s\n", opname,
   6672                         nameMMXReg(eregLO3ofRM(rm)),
   6673                         nameMMXReg(gregLO3ofRM(rm)) );
   6674       delta++;
   6675    } else {
   6676       addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
   6677       assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
   6678       DIP("%s %s,%s\n", opname,
   6679                         dis_buf,
   6680                         nameMMXReg(gregLO3ofRM(rm)) );
   6681       delta += alen;
   6682    }
   6683    assign( g0,   getMMXReg(gregLO3ofRM(rm)) );
   6684    assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
   6685 
   6686    shl = shr = sar = False;
   6687    size = 0;
   6688    switch (op) {
   6689       case Iop_ShlN16x4: shl = True; size = 32; break;
   6690       case Iop_ShlN32x2: shl = True; size = 32; break;
   6691       case Iop_Shl64:    shl = True; size = 64; break;
   6692       case Iop_ShrN16x4: shr = True; size = 16; break;
   6693       case Iop_ShrN32x2: shr = True; size = 32; break;
   6694       case Iop_Shr64:    shr = True; size = 64; break;
   6695       case Iop_SarN16x4: sar = True; size = 16; break;
   6696       case Iop_SarN32x2: sar = True; size = 32; break;
   6697       default: vassert(0);
   6698    }
   6699 
   6700    if (shl || shr) {
   6701      assign(
   6702         g1,
   6703         IRExpr_Mux0X(
   6704            unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
   6705            mkU64(0),
   6706            binop(op, mkexpr(g0), mkexpr(amt8))
   6707         )
   6708      );
   6709    } else
   6710    if (sar) {
   6711      assign(
   6712         g1,
   6713         IRExpr_Mux0X(
   6714            unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
   6715            binop(op, mkexpr(g0), mkU8(size-1)),
   6716            binop(op, mkexpr(g0), mkexpr(amt8))
   6717         )
   6718      );
   6719    } else {
   6720       vassert(0);
   6721    }
   6722 
   6723    putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
   6724    return delta;
   6725 }
   6726 
   6727 
   6728 /* Vector by scalar shift of E by an immediate byte.  This is a
   6729    straight copy of dis_SSE_shiftE_imm. */
   6730 
   6731 static
   6732 ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op )
   6733 {
   6734    Bool    shl, shr, sar;
   6735    UChar   rm   = getUChar(delta);
   6736    IRTemp  e0   = newTemp(Ity_I64);
   6737    IRTemp  e1   = newTemp(Ity_I64);
   6738    UChar   amt, size;
   6739    vassert(epartIsReg(rm));
   6740    vassert(gregLO3ofRM(rm) == 2
   6741            || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
   6742    amt = getUChar(delta+1);
   6743    delta += 2;
   6744    DIP("%s $%d,%s\n", opname,
   6745                       (Int)amt,
   6746                       nameMMXReg(eregLO3ofRM(rm)) );
   6747 
   6748    assign( e0, getMMXReg(eregLO3ofRM(rm)) );
   6749 
   6750    shl = shr = sar = False;
   6751    size = 0;
   6752    switch (op) {
   6753       case Iop_ShlN16x4: shl = True; size = 16; break;
   6754       case Iop_ShlN32x2: shl = True; size = 32; break;
   6755       case Iop_Shl64:    shl = True; size = 64; break;
   6756       case Iop_SarN16x4: sar = True; size = 16; break;
   6757       case Iop_SarN32x2: sar = True; size = 32; break;
   6758       case Iop_ShrN16x4: shr = True; size = 16; break;
   6759       case Iop_ShrN32x2: shr = True; size = 32; break;
   6760       case Iop_Shr64:    shr = True; size = 64; break;
   6761       default: vassert(0);
   6762    }
   6763 
   6764    if (shl || shr) {
   6765      assign( e1, amt >= size
   6766                     ? mkU64(0)
   6767                     : binop(op, mkexpr(e0), mkU8(amt))
   6768      );
   6769    } else
   6770    if (sar) {
   6771      assign( e1, amt >= size
   6772                     ? binop(op, mkexpr(e0), mkU8(size-1))
   6773                     : binop(op, mkexpr(e0), mkU8(amt))
   6774      );
   6775    } else {
   6776       vassert(0);
   6777    }
   6778 
   6779    putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
   6780    return delta;
   6781 }
   6782 
   6783 
   6784 /* Completely handle all MMX instructions except emms. */
   6785 
   6786 static
   6787 ULong dis_MMX ( Bool* decode_ok,
   6788                 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
   6789 {
   6790    Int   len;
   6791    UChar modrm;
   6792    HChar dis_buf[50];
   6793    UChar opc = getUChar(delta);
   6794    delta++;
   6795 
   6796    /* dis_MMX handles all insns except emms. */
   6797    do_MMX_preamble();
   6798 
   6799    switch (opc) {
   6800 
   6801       case 0x6E:
   6802          if (sz == 4) {
   6803             /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
   6804             modrm = getUChar(delta);
   6805             if (epartIsReg(modrm)) {
   6806                delta++;
   6807                putMMXReg(
   6808                   gregLO3ofRM(modrm),
   6809                   binop( Iop_32HLto64,
   6810                          mkU32(0),
   6811                          getIReg32(eregOfRexRM(pfx,modrm)) ) );
   6812                DIP("movd %s, %s\n",
   6813                    nameIReg32(eregOfRexRM(pfx,modrm)),
   6814                    nameMMXReg(gregLO3ofRM(modrm)));
   6815             } else {
   6816                IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6817                delta += len;
   6818                putMMXReg(
   6819                   gregLO3ofRM(modrm),
   6820                   binop( Iop_32HLto64,
   6821                          mkU32(0),
   6822                          loadLE(Ity_I32, mkexpr(addr)) ) );
   6823                DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
   6824             }
   6825          }
   6826          else
   6827          if (sz == 8) {
   6828             /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
   6829             modrm = getUChar(delta);
   6830             if (epartIsReg(modrm)) {
   6831                delta++;
   6832                putMMXReg( gregLO3ofRM(modrm),
   6833                           getIReg64(eregOfRexRM(pfx,modrm)) );
   6834                DIP("movd %s, %s\n",
   6835                    nameIReg64(eregOfRexRM(pfx,modrm)),
   6836                    nameMMXReg(gregLO3ofRM(modrm)));
   6837             } else {
   6838                IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6839                delta += len;
   6840                putMMXReg( gregLO3ofRM(modrm),
   6841                           loadLE(Ity_I64, mkexpr(addr)) );
   6842                DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
   6843             }
   6844          }
   6845          else {
   6846             goto mmx_decode_failure;
   6847          }
   6848          break;
   6849 
   6850       case 0x7E:
   6851          if (sz == 4) {
   6852             /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
   6853             modrm = getUChar(delta);
   6854             if (epartIsReg(modrm)) {
   6855                delta++;
   6856                putIReg32( eregOfRexRM(pfx,modrm),
   6857                           unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
   6858                DIP("movd %s, %s\n",
   6859                    nameMMXReg(gregLO3ofRM(modrm)),
   6860                    nameIReg32(eregOfRexRM(pfx,modrm)));
   6861             } else {
   6862                IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6863                delta += len;
   6864                storeLE( mkexpr(addr),
   6865                         unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
   6866                DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
   6867             }
   6868          }
   6869          else
   6870          if (sz == 8) {
   6871             /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
   6872             modrm = getUChar(delta);
   6873             if (epartIsReg(modrm)) {
   6874                delta++;
   6875                putIReg64( eregOfRexRM(pfx,modrm),
   6876                           getMMXReg(gregLO3ofRM(modrm)) );
   6877                DIP("movd %s, %s\n",
   6878                    nameMMXReg(gregLO3ofRM(modrm)),
   6879                    nameIReg64(eregOfRexRM(pfx,modrm)));
   6880             } else {
   6881                IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6882                delta += len;
   6883                storeLE( mkexpr(addr),
   6884                        getMMXReg(gregLO3ofRM(modrm)) );
   6885                DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
   6886             }
   6887          } else {
   6888             goto mmx_decode_failure;
   6889          }
   6890          break;
   6891 
   6892       case 0x6F:
   6893          /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
   6894          if (sz != 4
   6895              && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
   6896             goto mmx_decode_failure;
   6897          modrm = getUChar(delta);
   6898          if (epartIsReg(modrm)) {
   6899             delta++;
   6900             putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
   6901             DIP("movq %s, %s\n",
   6902                 nameMMXReg(eregLO3ofRM(modrm)),
   6903                 nameMMXReg(gregLO3ofRM(modrm)));
   6904          } else {
   6905             IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6906             delta += len;
   6907             putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
   6908             DIP("movq %s, %s\n",
   6909                 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
   6910          }
   6911          break;
   6912 
   6913       case 0x7F:
   6914          /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
   6915          if (sz != 4
   6916              && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
   6917             goto mmx_decode_failure;
   6918          modrm = getUChar(delta);
   6919          if (epartIsReg(modrm)) {
   6920             /* Fall through.  The assembler doesn't appear to generate
   6921                these. */
   6922             goto mmx_decode_failure;
   6923          } else {
   6924             IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   6925             delta += len;
   6926             storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
   6927             DIP("mov(nt)q %s, %s\n",
   6928                 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
   6929          }
   6930          break;
   6931 
   6932       case 0xFC:
   6933       case 0xFD:
   6934       case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
   6935          if (sz != 4)
   6936             goto mmx_decode_failure;
   6937          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
   6938          break;
   6939 
   6940       case 0xEC:
   6941       case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
   6942          if (sz != 4
   6943              && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
   6944             goto mmx_decode_failure;
   6945          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
   6946          break;
   6947 
   6948       case 0xDC:
   6949       case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
   6950          if (sz != 4)
   6951             goto mmx_decode_failure;
   6952          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
   6953          break;
   6954 
   6955       case 0xF8:
   6956       case 0xF9:
   6957       case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
   6958          if (sz != 4)
   6959             goto mmx_decode_failure;
   6960          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
   6961          break;
   6962 
   6963       case 0xE8:
   6964       case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
   6965          if (sz != 4)
   6966             goto mmx_decode_failure;
   6967          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
   6968          break;
   6969 
   6970       case 0xD8:
   6971       case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
   6972          if (sz != 4)
   6973             goto mmx_decode_failure;
   6974          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
   6975          break;
   6976 
   6977       case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
   6978          if (sz != 4)
   6979             goto mmx_decode_failure;
   6980          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
   6981          break;
   6982 
   6983       case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
   6984          if (sz != 4)
   6985             goto mmx_decode_failure;
   6986          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
   6987          break;
   6988 
   6989       case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
   6990          vassert(sz == 4);
   6991          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
   6992          break;
   6993 
   6994       case 0x74:
   6995       case 0x75:
   6996       case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
   6997          if (sz != 4)
   6998             goto mmx_decode_failure;
   6999          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
   7000          break;
   7001 
   7002       case 0x64:
   7003       case 0x65:
   7004       case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
   7005          if (sz != 4)
   7006             goto mmx_decode_failure;
   7007          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
   7008          break;
   7009 
   7010       case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
   7011          if (sz != 4)
   7012             goto mmx_decode_failure;
   7013          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
   7014          break;
   7015 
   7016       case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
   7017          if (sz != 4)
   7018             goto mmx_decode_failure;
   7019          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
   7020          break;
   7021 
   7022       case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
   7023          if (sz != 4)
   7024             goto mmx_decode_failure;
   7025          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
   7026          break;
   7027 
   7028       case 0x68:
   7029       case 0x69:
   7030       case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
   7031          if (sz != 4
   7032              && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
   7033             goto mmx_decode_failure;
   7034          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
   7035          break;
   7036 
   7037       case 0x60:
   7038       case 0x61:
   7039       case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
   7040          if (sz != 4
   7041              && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
   7042             goto mmx_decode_failure;
   7043          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
   7044          break;
   7045 
   7046       case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
   7047          if (sz != 4)
   7048             goto mmx_decode_failure;
   7049          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
   7050          break;
   7051 
   7052       case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
   7053          if (sz != 4)
   7054             goto mmx_decode_failure;
   7055          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
   7056          break;
   7057 
   7058       case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
   7059          if (sz != 4)
   7060             goto mmx_decode_failure;
   7061          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
   7062          break;
   7063 
   7064       case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
   7065          if (sz != 4)
   7066             goto mmx_decode_failure;
   7067          delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
   7068          break;
   7069 
   7070 #     define SHIFT_BY_REG(_name,_op)                                     \
   7071                 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
   7072                 break;
   7073 
   7074       /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
   7075       case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
   7076       case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
   7077       case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
   7078 
   7079       /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
   7080       case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
   7081       case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
   7082       case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
   7083 
   7084       /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
   7085       case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
   7086       case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
   7087 
   7088 #     undef SHIFT_BY_REG
   7089 
   7090       case 0x71:
   7091       case 0x72:
   7092       case 0x73: {
   7093          /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
   7094          UChar byte2, subopc;
   7095          if (sz != 4)
   7096             goto mmx_decode_failure;
   7097          byte2  = getUChar(delta);      /* amode / sub-opcode */
   7098          subopc = toUChar( (byte2 >> 3) & 7 );
   7099 
   7100 #        define SHIFT_BY_IMM(_name,_op)                        \
   7101             do { delta = dis_MMX_shiftE_imm(delta,_name,_op);  \
   7102             } while (0)
   7103 
   7104               if (subopc == 2 /*SRL*/ && opc == 0x71)
   7105                   SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
   7106          else if (subopc == 2 /*SRL*/ && opc == 0x72)
   7107                  SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
   7108          else if (subopc == 2 /*SRL*/ && opc == 0x73)
   7109                  SHIFT_BY_IMM("psrlq", Iop_Shr64);
   7110 
   7111          else if (subopc == 4 /*SAR*/ && opc == 0x71)
   7112                  SHIFT_BY_IMM("psraw", Iop_SarN16x4);
   7113          else if (subopc == 4 /*SAR*/ && opc == 0x72)
   7114                  SHIFT_BY_IMM("psrad", Iop_SarN32x2);
   7115 
   7116          else if (subopc == 6 /*SHL*/ && opc == 0x71)
   7117                  SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
   7118          else if (subopc == 6 /*SHL*/ && opc == 0x72)
   7119                   SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
   7120          else if (subopc == 6 /*SHL*/ && opc == 0x73)
   7121                  SHIFT_BY_IMM("psllq", Iop_Shl64);
   7122 
   7123          else goto mmx_decode_failure;
   7124 
   7125 #        undef SHIFT_BY_IMM
   7126          break;
   7127       }
   7128 
   7129       case 0xF7: {
   7130          IRTemp addr    = newTemp(Ity_I64);
   7131          IRTemp regD    = newTemp(Ity_I64);
   7132          IRTemp regM    = newTemp(Ity_I64);
   7133          IRTemp mask    = newTemp(Ity_I64);
   7134          IRTemp olddata = newTemp(Ity_I64);
   7135          IRTemp newdata = newTemp(Ity_I64);
   7136 
   7137          modrm = getUChar(delta);
   7138          if (sz != 4 || (!epartIsReg(modrm)))
   7139             goto mmx_decode_failure;
   7140          delta++;
   7141 
   7142          assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
   7143          assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
   7144          assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
   7145          assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
   7146          assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
   7147          assign( newdata,
   7148                  binop(Iop_Or64,
   7149                        binop(Iop_And64,
   7150                              mkexpr(regD),
   7151                              mkexpr(mask) ),
   7152                        binop(Iop_And64,
   7153                              mkexpr(olddata),
   7154                              unop(Iop_Not64, mkexpr(mask)))) );
   7155          storeLE( mkexpr(addr), mkexpr(newdata) );
   7156          DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
   7157                                  nameMMXReg( gregLO3ofRM(modrm) ) );
   7158          break;
   7159       }
   7160 
   7161       /* --- MMX decode failure --- */
   7162       default:
   7163       mmx_decode_failure:
   7164          *decode_ok = False;
   7165          return delta; /* ignored */
   7166 
   7167    }
   7168 
   7169    *decode_ok = True;
   7170    return delta;
   7171 }
   7172 
   7173 
   7174 /*------------------------------------------------------------*/
   7175 /*--- More misc arithmetic and other obscure insns.        ---*/
   7176 /*------------------------------------------------------------*/
   7177 
   7178 /* Generate base << amt with vacated places filled with stuff
   7179    from xtra.  amt guaranteed in 0 .. 63. */
   7180 static
   7181 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
   7182 {
   7183    /* if   amt == 0
   7184       then base
   7185       else (base << amt) | (xtra >>u (64-amt))
   7186    */
   7187    return
   7188       IRExpr_Mux0X(
   7189          mkexpr(amt),
   7190          mkexpr(base),
   7191          binop(Iop_Or64,
   7192                binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
   7193                binop(Iop_Shr64, mkexpr(xtra),
   7194                                 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
   7195          )
   7196       );
   7197 }
   7198 
   7199 /* Generate base >>u amt with vacated places filled with stuff
   7200    from xtra.  amt guaranteed in 0 .. 63. */
   7201 static
   7202 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
   7203 {
   7204    /* if   amt == 0
   7205       then base
   7206       else (base >>u amt) | (xtra << (64-amt))
   7207    */
   7208    return
   7209       IRExpr_Mux0X(
   7210          mkexpr(amt),
   7211          mkexpr(base),
   7212          binop(Iop_Or64,
   7213                binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
   7214                binop(Iop_Shl64, mkexpr(xtra),
   7215                                 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
   7216          )
   7217       );
   7218 }
   7219 
   7220 /* Double length left and right shifts.  Apparently only required in
   7221    v-size (no b- variant). */
   7222 static
   7223 ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
   7224                         Prefix pfx,
   7225                         Long delta, UChar modrm,
   7226                         Int sz,
   7227                         IRExpr* shift_amt,
   7228                         Bool amt_is_literal,
   7229                         HChar* shift_amt_txt,
   7230                         Bool left_shift )
   7231 {
   7232    /* shift_amt :: Ity_I8 is the amount to shift.  shift_amt_txt is used
   7233       for printing it.   And eip on entry points at the modrm byte. */
   7234    Int len;
   7235    HChar dis_buf[50];
   7236 
   7237    IRType ty     = szToITy(sz);
   7238    IRTemp gsrc   = newTemp(ty);
   7239    IRTemp esrc   = newTemp(ty);
   7240    IRTemp addr   = IRTemp_INVALID;
   7241    IRTemp tmpSH  = newTemp(Ity_I8);
   7242    IRTemp tmpSS  = newTemp(Ity_I8);
   7243    IRTemp tmp64  = IRTemp_INVALID;
   7244    IRTemp res64  = IRTemp_INVALID;
   7245    IRTemp rss64  = IRTemp_INVALID;
   7246    IRTemp resTy  = IRTemp_INVALID;
   7247    IRTemp rssTy  = IRTemp_INVALID;
   7248    Int    mask   = sz==8 ? 63 : 31;
   7249 
   7250    vassert(sz == 2 || sz == 4 || sz == 8);
   7251 
   7252    /* The E-part is the destination; this is shifted.  The G-part
   7253       supplies bits to be shifted into the E-part, but is not
   7254       changed.
   7255 
   7256       If shifting left, form a double-length word with E at the top
   7257       and G at the bottom, and shift this left.  The result is then in
   7258       the high part.
   7259 
   7260       If shifting right, form a double-length word with G at the top
   7261       and E at the bottom, and shift this right.  The result is then
   7262       at the bottom.  */
   7263 
   7264    /* Fetch the operands. */
   7265 
   7266    assign( gsrc, getIRegG(sz, pfx, modrm) );
   7267 
   7268    if (epartIsReg(modrm)) {
   7269       delta++;
   7270       assign( esrc, getIRegE(sz, pfx, modrm) );
   7271       DIP("sh%cd%c %s, %s, %s\n",
   7272           ( left_shift ? 'l' : 'r' ), nameISize(sz),
   7273           shift_amt_txt,
   7274           nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
   7275    } else {
   7276       addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
   7277                         /* # bytes following amode */
   7278                         amt_is_literal ? 1 : 0 );
   7279       delta += len;
   7280       assign( esrc, loadLE(ty, mkexpr(addr)) );
   7281       DIP("sh%cd%c %s, %s, %s\n",
   7282           ( left_shift ? 'l' : 'r' ), nameISize(sz),
   7283           shift_amt_txt,
   7284           nameIRegG(sz, pfx, modrm), dis_buf);
   7285    }
   7286 
   7287    /* Calculate the masked shift amount (tmpSH), the masked subshift
   7288       amount (tmpSS), the shifted value (res64) and the subshifted
   7289       value (rss64). */
   7290 
   7291    assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
   7292    assign( tmpSS, binop(Iop_And8,
   7293                         binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
   7294                         mkU8(mask)));
   7295 
   7296    tmp64 = newTemp(Ity_I64);
   7297    res64 = newTemp(Ity_I64);
   7298    rss64 = newTemp(Ity_I64);
   7299 
   7300    if (sz == 2 || sz == 4) {
   7301 
   7302       /* G is xtra; E is data */
   7303       /* what a freaking nightmare: */
   7304       if (sz == 4 && left_shift) {
   7305          assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
   7306          assign( res64,
   7307                  binop(Iop_Shr64,
   7308                        binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
   7309                        mkU8(32)) );
   7310          assign( rss64,
   7311                  binop(Iop_Shr64,
   7312                        binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
   7313                        mkU8(32)) );
   7314       }
   7315       else
   7316       if (sz == 4 && !left_shift) {
   7317          assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
   7318          assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
   7319          assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
   7320       }
   7321       else
   7322       if (sz == 2 && left_shift) {
   7323          assign( tmp64,
   7324                  binop(Iop_32HLto64,
   7325                        binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
   7326                        binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
   7327          ));
   7328 	 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
   7329          assign( res64,
   7330                  binop(Iop_Shr64,
   7331                        binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
   7332                        mkU8(48)) );
   7333          /* subshift formed by shifting [esrc'0000'0000'0000] */
   7334          assign( rss64,
   7335                  binop(Iop_Shr64,
   7336                        binop(Iop_Shl64,
   7337                              binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
   7338                                               mkU8(48)),
   7339                              mkexpr(tmpSS)),
   7340                        mkU8(48)) );
   7341       }
   7342       else
   7343       if (sz == 2 && !left_shift) {
   7344          assign( tmp64,
   7345                  binop(Iop_32HLto64,
   7346                        binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
   7347                        binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
   7348          ));
   7349          /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
   7350          assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
   7351          /* subshift formed by shifting [0000'0000'0000'esrc] */
   7352          assign( rss64, binop(Iop_Shr64,
   7353                               unop(Iop_16Uto64, mkexpr(esrc)),
   7354                               mkexpr(tmpSS)) );
   7355       }
   7356 
   7357    } else {
   7358 
   7359       vassert(sz == 8);
   7360       if (left_shift) {
   7361          assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
   7362          assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
   7363       } else {
   7364          assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
   7365          assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
   7366       }
   7367 
   7368    }
   7369 
   7370    resTy = newTemp(ty);
   7371    rssTy = newTemp(ty);
   7372    assign( resTy, narrowTo(ty, mkexpr(res64)) );
   7373    assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
   7374 
   7375    /* Put result back and write the flags thunk. */
   7376    setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
   7377                               resTy, rssTy, ty, tmpSH );
   7378 
   7379    if (epartIsReg(modrm)) {
   7380       putIRegE(sz, pfx, modrm, mkexpr(resTy));
   7381    } else {
   7382       storeLE( mkexpr(addr), mkexpr(resTy) );
   7383    }
   7384 
   7385    if (amt_is_literal) delta++;
   7386    return delta;
   7387 }
   7388 
   7389 
   7390 /* Handle BT/BTS/BTR/BTC Gv, Ev.  Apparently b-size is not
   7391    required. */
   7392 
   7393 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
   7394 
   7395 static HChar* nameBtOp ( BtOp op )
   7396 {
   7397    switch (op) {
   7398       case BtOpNone:  return "";
   7399       case BtOpSet:   return "s";
   7400       case BtOpReset: return "r";
   7401       case BtOpComp:  return "c";
   7402       default: vpanic("nameBtOp(amd64)");
   7403    }
   7404 }
   7405 
   7406 
   7407 static
   7408 ULong dis_bt_G_E ( VexAbiInfo* vbi,
   7409                    Prefix pfx, Int sz, Long delta, BtOp op )
   7410 {
   7411    HChar  dis_buf[50];
   7412    UChar  modrm;
   7413    Int    len;
   7414    IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
   7415      t_addr1, t_rsp, t_mask, t_new;
   7416 
   7417    vassert(sz == 2 || sz == 4 || sz == 8);
   7418 
   7419    t_fetched = t_bitno0 = t_bitno1 = t_bitno2
   7420              = t_addr0 = t_addr1 = t_rsp
   7421              = t_mask = t_new = IRTemp_INVALID;
   7422 
   7423    t_fetched = newTemp(Ity_I8);
   7424    t_new     = newTemp(Ity_I8);
   7425    t_bitno0  = newTemp(Ity_I64);
   7426    t_bitno1  = newTemp(Ity_I64);
   7427    t_bitno2  = newTemp(