Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                     guest_amd64_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 /* Translates AMD64 code to IR. */
     37 
     38 /* TODO:
     39 
     40    All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
     41    to ensure a 64-bit value is being written.
     42 
     43    x87 FP Limitations:
     44 
     45    * all arithmetic done at 64 bits
     46 
     47    * no FP exceptions, except for handling stack over/underflow
     48 
     49    * FP rounding mode observed only for float->int conversions and
     50      int->float conversions which could lose accuracy, and for
     51      float-to-float rounding.  For all other operations,
     52      round-to-nearest is used, regardless.
     53 
     54    * FP sin/cos/tan/sincos: C2 flag is always cleared.  IOW the
     55      simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
     56      even when it isn't.
     57 
     58    * some of the FCOM cases could do with testing -- not convinced
     59      that the args are the right way round.
     60 
     61    * FSAVE does not re-initialise the FPU; it should do
     62 
     63    * FINIT not only initialises the FPU environment, it also zeroes
     64      all the FP registers.  It should leave the registers unchanged.
     65 
     66     RDTSC returns zero, always.
     67 
     68     SAHF should cause eflags[1] == 1, and in fact it produces 0.  As
     69     per Intel docs this bit has no meaning anyway.  Since PUSHF is the
     70     only way to observe eflags[1], a proper fix would be to make that
     71     bit be set by PUSHF.
     72 
     73     This module uses global variables and so is not MT-safe (if that
     74     should ever become relevant).
     75 */
     76 
     77 /* Notes re address size overrides (0x67).
     78 
     79    According to the AMD documentation (24594 Rev 3.09, Sept 2003,
     80    "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
     81    and System Instructions"), Section 1.2.3 ("Address-Size Override
     82    Prefix"):
     83 
     84    0x67 applies to all explicit memory references, causing the top
     85    32 bits of the effective address to become zero.
     86 
     87    0x67 has no effect on stack references (push/pop); these always
     88    use a 64-bit address.
     89 
     90    0x67 changes the interpretation of instructions which implicitly
     91    reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
     92    instead.  These are:
     93 
     94       cmp{s,sb,sw,sd,sq}
     95       in{s,sb,sw,sd}
     96       jcxz, jecxz, jrcxz
     97       lod{s,sb,sw,sd,sq}
     98       loop{,e,bz,be,z}
     99       mov{s,sb,sw,sd,sq}
    100       out{s,sb,sw,sd}
    101       rep{,e,ne,nz}
    102       sca{s,sb,sw,sd,sq}
    103       sto{s,sb,sw,sd,sq}
    104       xlat{,b} */
    105 
    106 /* "Special" instructions.
    107 
    108    This instruction decoder can decode three special instructions
    109    which mean nothing natively (are no-ops as far as regs/mem are
    110    concerned) but have meaning for supporting Valgrind.  A special
    111    instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
    112    48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
    113    $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
    114    Following that, one of the following 3 are allowed (standard
    115    interpretation in parentheses):
    116 
    117       4887DB (xchgq %rbx,%rbx)   %RDX = client_request ( %RAX )
    118       4887C9 (xchgq %rcx,%rcx)   %RAX = guest_NRADDR
    119       4887D2 (xchgq %rdx,%rdx)   call-noredir *%RAX
    120 
    121    Any other bytes following the 16-byte preamble are illegal and
    122    constitute a failure in instruction decoding.  This all assumes
    123    that the preamble will never occur except in specific code
    124    fragments designed for Valgrind to catch.
    125 
    126    No prefixes may precede a "Special" instruction.
    127 */
    128 
    129 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
    130    insns: the side-exit back to the start of the insn is done with
    131    Ijk_Boring.  This is quite wrong, it should be done with
    132    Ijk_NoRedir, since otherwise the side exit, which is intended to
    133    restart the instruction for whatever reason, could go somewhere
    134    entirely else.  Doing it right (with Ijk_NoRedir jumps) would make
    135    no-redir jumps performance critical, at least for rep-prefixed
    136    instructions, since all iterations thereof would involve such a
    137    jump.  It's not such a big deal with casLE since the side exit is
    138    only taken if the CAS fails, that is, the location is contended,
    139    which is relatively unlikely.
    140 
    141    Note also, the test for CAS success vs failure is done using
    142    Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
    143    Iop_Cmp{EQ,NE} equivalents.  This is so as to tell Memcheck that it
    144    shouldn't definedness-check these comparisons.  See
    145    COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
    146    background/rationale.
    147 */
    148 
    149 /* LOCK prefixed instructions.  These are translated using IR-level
    150    CAS statements (IRCAS) and are believed to preserve atomicity, even
    151    from the point of view of some other process racing against a
    152    simulated one (presumably they communicate via a shared memory
    153    segment).
    154 
    155    Handlers which are aware of LOCK prefixes are:
    156       dis_op2_G_E      (add, or, adc, sbb, and, sub, xor)
    157       dis_cmpxchg_G_E  (cmpxchg)
    158       dis_Grp1         (add, or, adc, sbb, and, sub, xor)
    159       dis_Grp3         (not, neg)
    160       dis_Grp4         (inc, dec)
    161       dis_Grp5         (inc, dec)
    162       dis_Grp8_Imm     (bts, btc, btr)
    163       dis_bt_G_E       (bts, btc, btr)
    164       dis_xadd_G_E     (xadd)
    165 */
    166 
    167 
    168 #include "libvex_basictypes.h"
    169 #include "libvex_ir.h"
    170 #include "libvex.h"
    171 #include "libvex_guest_amd64.h"
    172 
    173 #include "main_util.h"
    174 #include "main_globals.h"
    175 #include "guest_generic_bb_to_IR.h"
    176 #include "guest_generic_x87.h"
    177 #include "guest_amd64_defs.h"
    178 
    179 
    180 /*------------------------------------------------------------*/
    181 /*--- Globals                                              ---*/
    182 /*------------------------------------------------------------*/
    183 
    184 /* These are set at the start of the translation of an insn, right
    185    down in disInstr_AMD64, so that we don't have to pass them around
    186    endlessly.  They are all constant during the translation of any
    187    given insn. */
    188 
    189 /* These are set at the start of the translation of a BB, so
    190    that we don't have to pass them around endlessly. */
    191 
    192 /* We need to know this to do sub-register accesses correctly. */
    193 static Bool host_is_bigendian;
    194 
    195 /* Pointer to the guest code area (points to start of BB, not to the
    196    insn being processed). */
    197 static UChar* guest_code;
    198 
    199 /* The guest address corresponding to guest_code[0]. */
    200 static Addr64 guest_RIP_bbstart;
    201 
    202 /* The guest address for the instruction currently being
    203    translated. */
    204 static Addr64 guest_RIP_curr_instr;
    205 
    206 /* The IRSB* into which we're generating code. */
    207 static IRSB* irsb;
    208 
    209 /* For ensuring that %rip-relative addressing is done right.  A read
    210    of %rip generates the address of the next instruction.  It may be
    211    that we don't conveniently know that inside disAMode().  For sanity
    212    checking, if the next insn %rip is needed, we make a guess at what
    213    it is, record that guess here, and set the accompanying Bool to
    214    indicate that -- after this insn's decode is finished -- that guess
    215    needs to be checked.  */
    216 
    217 /* At the start of each insn decode, is set to (0, False).
    218    After the decode, if _mustcheck is now True, _assumed is
    219    checked. */
    220 
    221 static Addr64 guest_RIP_next_assumed;
    222 static Bool   guest_RIP_next_mustcheck;
    223 
    224 
    225 /*------------------------------------------------------------*/
    226 /*--- Helpers for constructing IR.                         ---*/
    227 /*------------------------------------------------------------*/
    228 
    229 /* Generate a new temporary of the given type. */
    230 static IRTemp newTemp ( IRType ty )
    231 {
    232    vassert(isPlausibleIRType(ty));
    233    return newIRTemp( irsb->tyenv, ty );
    234 }
    235 
    236 /* Add a statement to the list held by "irsb". */
    237 static void stmt ( IRStmt* st )
    238 {
    239    addStmtToIRSB( irsb, st );
    240 }
    241 
    242 /* Generate a statement "dst := e". */
    243 static void assign ( IRTemp dst, IRExpr* e )
    244 {
    245    stmt( IRStmt_WrTmp(dst, e) );
    246 }
    247 
    248 static IRExpr* unop ( IROp op, IRExpr* a )
    249 {
    250    return IRExpr_Unop(op, a);
    251 }
    252 
    253 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    254 {
    255    return IRExpr_Binop(op, a1, a2);
    256 }
    257 
    258 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    259 {
    260    return IRExpr_Triop(op, a1, a2, a3);
    261 }
    262 
    263 static IRExpr* mkexpr ( IRTemp tmp )
    264 {
    265    return IRExpr_RdTmp(tmp);
    266 }
    267 
    268 static IRExpr* mkU8 ( ULong i )
    269 {
    270    vassert(i < 256);
    271    return IRExpr_Const(IRConst_U8( (UChar)i ));
    272 }
    273 
    274 static IRExpr* mkU16 ( ULong i )
    275 {
    276    vassert(i < 0x10000ULL);
    277    return IRExpr_Const(IRConst_U16( (UShort)i ));
    278 }
    279 
    280 static IRExpr* mkU32 ( ULong i )
    281 {
    282    vassert(i < 0x100000000ULL);
    283    return IRExpr_Const(IRConst_U32( (UInt)i ));
    284 }
    285 
    286 static IRExpr* mkU64 ( ULong i )
    287 {
    288    return IRExpr_Const(IRConst_U64(i));
    289 }
    290 
    291 static IRExpr* mkU ( IRType ty, ULong i )
    292 {
    293    switch (ty) {
    294       case Ity_I8:  return mkU8(i);
    295       case Ity_I16: return mkU16(i);
    296       case Ity_I32: return mkU32(i);
    297       case Ity_I64: return mkU64(i);
    298       default: vpanic("mkU(amd64)");
    299    }
    300 }
    301 
    302 static void storeLE ( IRExpr* addr, IRExpr* data )
    303 {
    304    stmt( IRStmt_Store(Iend_LE, addr, data) );
    305 }
    306 
    307 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    308 {
    309    return IRExpr_Load(Iend_LE, ty, addr);
    310 }
    311 
    312 static IROp mkSizedOp ( IRType ty, IROp op8 )
    313 {
    314    vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
    315            || op8 == Iop_Mul8
    316            || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
    317            || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
    318            || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
    319            || op8 == Iop_CasCmpNE8
    320            || op8 == Iop_Not8 );
    321    switch (ty) {
    322       case Ity_I8:  return 0 +op8;
    323       case Ity_I16: return 1 +op8;
    324       case Ity_I32: return 2 +op8;
    325       case Ity_I64: return 3 +op8;
    326       default: vpanic("mkSizedOp(amd64)");
    327    }
    328 }
    329 
    330 static
    331 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
    332 {
    333    if (szSmall == 1 && szBig == 4) {
    334       return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
    335    }
    336    if (szSmall == 1 && szBig == 2) {
    337       return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
    338    }
    339    if (szSmall == 2 && szBig == 4) {
    340       return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
    341    }
    342    if (szSmall == 1 && szBig == 8 && !signd) {
    343       return unop(Iop_8Uto64, src);
    344    }
    345    if (szSmall == 1 && szBig == 8 && signd) {
    346       return unop(Iop_8Sto64, src);
    347    }
    348    if (szSmall == 2 && szBig == 8 && !signd) {
    349       return unop(Iop_16Uto64, src);
    350    }
    351    if (szSmall == 2 && szBig == 8 && signd) {
    352       return unop(Iop_16Sto64, src);
    353    }
    354    vpanic("doScalarWidening(amd64)");
    355 }
    356 
    357 
    358 
    359 /*------------------------------------------------------------*/
    360 /*--- Debugging output                                     ---*/
    361 /*------------------------------------------------------------*/
    362 
    363 /* Bomb out if we can't handle something. */
    364 __attribute__ ((noreturn))
    365 static void unimplemented ( HChar* str )
    366 {
    367    vex_printf("amd64toIR: unimplemented feature\n");
    368    vpanic(str);
    369 }
    370 
    371 #define DIP(format, args...)           \
    372    if (vex_traceflags & VEX_TRACE_FE)  \
    373       vex_printf(format, ## args)
    374 
    375 #define DIS(buf, format, args...)      \
    376    if (vex_traceflags & VEX_TRACE_FE)  \
    377       vex_sprintf(buf, format, ## args)
    378 
    379 
    380 /*------------------------------------------------------------*/
    381 /*--- Offsets of various parts of the amd64 guest state.   ---*/
    382 /*------------------------------------------------------------*/
    383 
    384 #define OFFB_RAX       offsetof(VexGuestAMD64State,guest_RAX)
    385 #define OFFB_RBX       offsetof(VexGuestAMD64State,guest_RBX)
    386 #define OFFB_RCX       offsetof(VexGuestAMD64State,guest_RCX)
    387 #define OFFB_RDX       offsetof(VexGuestAMD64State,guest_RDX)
    388 #define OFFB_RSP       offsetof(VexGuestAMD64State,guest_RSP)
    389 #define OFFB_RBP       offsetof(VexGuestAMD64State,guest_RBP)
    390 #define OFFB_RSI       offsetof(VexGuestAMD64State,guest_RSI)
    391 #define OFFB_RDI       offsetof(VexGuestAMD64State,guest_RDI)
    392 #define OFFB_R8        offsetof(VexGuestAMD64State,guest_R8)
    393 #define OFFB_R9        offsetof(VexGuestAMD64State,guest_R9)
    394 #define OFFB_R10       offsetof(VexGuestAMD64State,guest_R10)
    395 #define OFFB_R11       offsetof(VexGuestAMD64State,guest_R11)
    396 #define OFFB_R12       offsetof(VexGuestAMD64State,guest_R12)
    397 #define OFFB_R13       offsetof(VexGuestAMD64State,guest_R13)
    398 #define OFFB_R14       offsetof(VexGuestAMD64State,guest_R14)
    399 #define OFFB_R15       offsetof(VexGuestAMD64State,guest_R15)
    400 
    401 #define OFFB_RIP       offsetof(VexGuestAMD64State,guest_RIP)
    402 
    403 #define OFFB_FS_ZERO   offsetof(VexGuestAMD64State,guest_FS_ZERO)
    404 #define OFFB_GS_0x60   offsetof(VexGuestAMD64State,guest_GS_0x60)
    405 
    406 #define OFFB_CC_OP     offsetof(VexGuestAMD64State,guest_CC_OP)
    407 #define OFFB_CC_DEP1   offsetof(VexGuestAMD64State,guest_CC_DEP1)
    408 #define OFFB_CC_DEP2   offsetof(VexGuestAMD64State,guest_CC_DEP2)
    409 #define OFFB_CC_NDEP   offsetof(VexGuestAMD64State,guest_CC_NDEP)
    410 
    411 #define OFFB_FPREGS    offsetof(VexGuestAMD64State,guest_FPREG[0])
    412 #define OFFB_FPTAGS    offsetof(VexGuestAMD64State,guest_FPTAG[0])
    413 #define OFFB_DFLAG     offsetof(VexGuestAMD64State,guest_DFLAG)
    414 #define OFFB_ACFLAG    offsetof(VexGuestAMD64State,guest_ACFLAG)
    415 #define OFFB_IDFLAG    offsetof(VexGuestAMD64State,guest_IDFLAG)
    416 #define OFFB_FTOP      offsetof(VexGuestAMD64State,guest_FTOP)
    417 #define OFFB_FC3210    offsetof(VexGuestAMD64State,guest_FC3210)
    418 #define OFFB_FPROUND   offsetof(VexGuestAMD64State,guest_FPROUND)
    419 
    420 #define OFFB_SSEROUND  offsetof(VexGuestAMD64State,guest_SSEROUND)
    421 #define OFFB_YMM0      offsetof(VexGuestAMD64State,guest_YMM0)
    422 #define OFFB_YMM1      offsetof(VexGuestAMD64State,guest_YMM1)
    423 #define OFFB_YMM2      offsetof(VexGuestAMD64State,guest_YMM2)
    424 #define OFFB_YMM3      offsetof(VexGuestAMD64State,guest_YMM3)
    425 #define OFFB_YMM4      offsetof(VexGuestAMD64State,guest_YMM4)
    426 #define OFFB_YMM5      offsetof(VexGuestAMD64State,guest_YMM5)
    427 #define OFFB_YMM6      offsetof(VexGuestAMD64State,guest_YMM6)
    428 #define OFFB_YMM7      offsetof(VexGuestAMD64State,guest_YMM7)
    429 #define OFFB_YMM8      offsetof(VexGuestAMD64State,guest_YMM8)
    430 #define OFFB_YMM9      offsetof(VexGuestAMD64State,guest_YMM9)
    431 #define OFFB_YMM10     offsetof(VexGuestAMD64State,guest_YMM10)
    432 #define OFFB_YMM11     offsetof(VexGuestAMD64State,guest_YMM11)
    433 #define OFFB_YMM12     offsetof(VexGuestAMD64State,guest_YMM12)
    434 #define OFFB_YMM13     offsetof(VexGuestAMD64State,guest_YMM13)
    435 #define OFFB_YMM14     offsetof(VexGuestAMD64State,guest_YMM14)
    436 #define OFFB_YMM15     offsetof(VexGuestAMD64State,guest_YMM15)
    437 #define OFFB_YMM16     offsetof(VexGuestAMD64State,guest_YMM16)
    438 
    439 #define OFFB_EMWARN    offsetof(VexGuestAMD64State,guest_EMWARN)
    440 #define OFFB_TISTART   offsetof(VexGuestAMD64State,guest_TISTART)
    441 #define OFFB_TILEN     offsetof(VexGuestAMD64State,guest_TILEN)
    442 
    443 #define OFFB_NRADDR    offsetof(VexGuestAMD64State,guest_NRADDR)
    444 
    445 
    446 /*------------------------------------------------------------*/
    447 /*--- Helper bits and pieces for deconstructing the        ---*/
    448 /*--- amd64 insn stream.                                   ---*/
    449 /*------------------------------------------------------------*/
    450 
    451 /* This is the AMD64 register encoding -- integer regs. */
    452 #define R_RAX 0
    453 #define R_RCX 1
    454 #define R_RDX 2
    455 #define R_RBX 3
    456 #define R_RSP 4
    457 #define R_RBP 5
    458 #define R_RSI 6
    459 #define R_RDI 7
    460 #define R_R8  8
    461 #define R_R9  9
    462 #define R_R10 10
    463 #define R_R11 11
    464 #define R_R12 12
    465 #define R_R13 13
    466 #define R_R14 14
    467 #define R_R15 15
    468 
    469 /* This is the Intel register encoding -- segment regs. */
    470 #define R_ES 0
    471 #define R_CS 1
    472 #define R_SS 2
    473 #define R_DS 3
    474 #define R_FS 4
    475 #define R_GS 5
    476 
    477 
    478 /* Various simple conversions */
    479 
    480 static ULong extend_s_8to64 ( UChar x )
    481 {
    482    return (ULong)((((Long)x) << 56) >> 56);
    483 }
    484 
    485 static ULong extend_s_16to64 ( UShort x )
    486 {
    487    return (ULong)((((Long)x) << 48) >> 48);
    488 }
    489 
    490 static ULong extend_s_32to64 ( UInt x )
    491 {
    492    return (ULong)((((Long)x) << 32) >> 32);
    493 }
    494 
    495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
    496    register or memory.  If so, the byte will have the form 11XXXYYY,
    497    where YYY is the register number. */
    498 inline
    499 static Bool epartIsReg ( UChar mod_reg_rm )
    500 {
    501    return toBool(0xC0 == (mod_reg_rm & 0xC0));
    502 }
    503 
    504 /* Extract the 'g' field from a modRM byte.  This only produces 3
    505    bits, which is not a complete register number.  You should avoid
    506    this function if at all possible. */
    507 inline
    508 static Int gregLO3ofRM ( UChar mod_reg_rm )
    509 {
    510    return (Int)( (mod_reg_rm >> 3) & 7 );
    511 }
    512 
    513 /* Ditto the 'e' field of a modRM byte. */
    514 inline
    515 static Int eregLO3ofRM ( UChar mod_reg_rm )
    516 {
    517    return (Int)(mod_reg_rm & 0x7);
    518 }
    519 
    520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
    521 
    522 static inline UChar getUChar ( Long delta )
    523 {
    524    UChar v = guest_code[delta+0];
    525    return v;
    526 }
    527 
    528 static UInt getUDisp16 ( Long delta )
    529 {
    530    UInt v = guest_code[delta+1]; v <<= 8;
    531    v |= guest_code[delta+0];
    532    return v & 0xFFFF;
    533 }
    534 
    535 //.. static UInt getUDisp ( Int size, Long delta )
    536 //.. {
    537 //..    switch (size) {
    538 //..       case 4: return getUDisp32(delta);
    539 //..       case 2: return getUDisp16(delta);
    540 //..       case 1: return getUChar(delta);
    541 //..       default: vpanic("getUDisp(x86)");
    542 //..    }
    543 //..    return 0; /*notreached*/
    544 //.. }
    545 
    546 
    547 /* Get a byte value out of the insn stream and sign-extend to 64
    548    bits. */
    549 static Long getSDisp8 ( Long delta )
    550 {
    551    return extend_s_8to64( guest_code[delta] );
    552 }
    553 
    554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
    555    bits. */
    556 static Long getSDisp16 ( Long delta )
    557 {
    558    UInt v = guest_code[delta+1]; v <<= 8;
    559    v |= guest_code[delta+0];
    560    return extend_s_16to64( (UShort)v );
    561 }
    562 
    563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
    564    bits. */
    565 static Long getSDisp32 ( Long delta )
    566 {
    567    UInt v = guest_code[delta+3]; v <<= 8;
    568    v |= guest_code[delta+2]; v <<= 8;
    569    v |= guest_code[delta+1]; v <<= 8;
    570    v |= guest_code[delta+0];
    571    return extend_s_32to64( v );
    572 }
    573 
    574 /* Get a 64-bit value out of the insn stream. */
    575 static Long getDisp64 ( Long delta )
    576 {
    577    ULong v = 0;
    578    v |= guest_code[delta+7]; v <<= 8;
    579    v |= guest_code[delta+6]; v <<= 8;
    580    v |= guest_code[delta+5]; v <<= 8;
    581    v |= guest_code[delta+4]; v <<= 8;
    582    v |= guest_code[delta+3]; v <<= 8;
    583    v |= guest_code[delta+2]; v <<= 8;
    584    v |= guest_code[delta+1]; v <<= 8;
    585    v |= guest_code[delta+0];
    586    return v;
    587 }
    588 
    589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
    590    if this is called with size==8.  Should not happen. */
    591 static Long getSDisp ( Int size, Long delta )
    592 {
    593    switch (size) {
    594       case 4: return getSDisp32(delta);
    595       case 2: return getSDisp16(delta);
    596       case 1: return getSDisp8(delta);
    597       default: vpanic("getSDisp(amd64)");
    598   }
    599 }
    600 
    601 static ULong mkSizeMask ( Int sz )
    602 {
    603    switch (sz) {
    604       case 1: return 0x00000000000000FFULL;
    605       case 2: return 0x000000000000FFFFULL;
    606       case 4: return 0x00000000FFFFFFFFULL;
    607       case 8: return 0xFFFFFFFFFFFFFFFFULL;
    608       default: vpanic("mkSzMask(amd64)");
    609    }
    610 }
    611 
    612 static Int imin ( Int a, Int b )
    613 {
    614    return (a < b) ? a : b;
    615 }
    616 
    617 static IRType szToITy ( Int n )
    618 {
    619    switch (n) {
    620       case 1: return Ity_I8;
    621       case 2: return Ity_I16;
    622       case 4: return Ity_I32;
    623       case 8: return Ity_I64;
    624       default: vex_printf("\nszToITy(%d)\n", n);
    625                vpanic("szToITy(amd64)");
    626    }
    627 }
    628 
    629 
    630 /*------------------------------------------------------------*/
    631 /*--- For dealing with prefixes.                           ---*/
    632 /*------------------------------------------------------------*/
    633 
    634 /* The idea is to pass around an int holding a bitmask summarising
    635    info from the prefixes seen on the current instruction, including
    636    info from the REX byte.  This info is used in various places, but
    637    most especially when making sense of register fields in
    638    instructions.
    639 
    640    The top 8 bits of the prefix are 0x55, just as a hacky way to
    641    ensure it really is a valid prefix.
    642 
    643    Things you can safely assume about a well-formed prefix:
    644    * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
    645    * if REX is not present then REXW,REXR,REXX,REXB will read
    646      as zero.
    647    * F2 and F3 will not both be 1.
    648 */
    649 
    650 typedef UInt  Prefix;
    651 
    652 #define PFX_ASO    (1<<0)    /* address-size override present (0x67) */
    653 #define PFX_66     (1<<1)    /* operand-size override-to-16 present (0x66) */
    654 #define PFX_REX    (1<<2)    /* REX byte present (0x40 to 0x4F) */
    655 #define PFX_REXW   (1<<3)    /* REX W bit, if REX present, else 0 */
    656 #define PFX_REXR   (1<<4)    /* REX R bit, if REX present, else 0 */
    657 #define PFX_REXX   (1<<5)    /* REX X bit, if REX present, else 0 */
    658 #define PFX_REXB   (1<<6)    /* REX B bit, if REX present, else 0 */
    659 #define PFX_LOCK   (1<<7)    /* bus LOCK prefix present (0xF0) */
    660 #define PFX_F2     (1<<8)    /* REP/REPE/REPZ prefix present (0xF2) */
    661 #define PFX_F3     (1<<9)    /* REPNE/REPNZ prefix present (0xF3) */
    662 #define PFX_CS     (1<<10)   /* CS segment prefix present (0x2E) */
    663 #define PFX_DS     (1<<11)   /* DS segment prefix present (0x3E) */
    664 #define PFX_ES     (1<<12)   /* ES segment prefix present (0x26) */
    665 #define PFX_FS     (1<<13)   /* FS segment prefix present (0x64) */
    666 #define PFX_GS     (1<<14)   /* GS segment prefix present (0x65) */
    667 #define PFX_SS     (1<<15)   /* SS segment prefix present (0x36) */
    668 #define PFX_VEX    (1<<16)   /* VEX prefix present (0xC4 or 0xC5) */
    669 #define PFX_VEXL   (1<<17)   /* VEX L bit, if VEX present, else 0 */
    670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
    671    PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
    672    positions. */
    673 #define PFX_VEXnV0 (1<<18)   /* ~VEX vvvv[0], if VEX present, else 0 */
    674 #define PFX_VEXnV1 (1<<19)   /* ~VEX vvvv[1], if VEX present, else 0 */
    675 #define PFX_VEXnV2 (1<<20)   /* ~VEX vvvv[2], if VEX present, else 0 */
    676 #define PFX_VEXnV3 (1<<21)   /* ~VEX vvvv[3], if VEX present, else 0 */
    677 
    678 
    679 #define PFX_EMPTY 0x55000000
    680 
    681 static Bool IS_VALID_PFX ( Prefix pfx ) {
    682    return toBool((pfx & 0xFF000000) == PFX_EMPTY);
    683 }
    684 
    685 static Bool haveREX ( Prefix pfx ) {
    686    return toBool(pfx & PFX_REX);
    687 }
    688 
    689 static Int getRexW ( Prefix pfx ) {
    690    return (pfx & PFX_REXW) ? 1 : 0;
    691 }
    692 static Int getRexR ( Prefix pfx ) {
    693    return (pfx & PFX_REXR) ? 1 : 0;
    694 }
    695 static Int getRexX ( Prefix pfx ) {
    696    return (pfx & PFX_REXX) ? 1 : 0;
    697 }
    698 static Int getRexB ( Prefix pfx ) {
    699    return (pfx & PFX_REXB) ? 1 : 0;
    700 }
    701 
    702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
    703    completely changes what instruction it really is. */
    704 static Bool haveF2orF3 ( Prefix pfx ) {
    705    return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
    706 }
    707 static Bool haveF2 ( Prefix pfx ) {
    708    return toBool((pfx & PFX_F2) > 0);
    709 }
    710 static Bool haveF3 ( Prefix pfx ) {
    711    return toBool((pfx & PFX_F3) > 0);
    712 }
    713 
    714 static Bool have66 ( Prefix pfx ) {
    715    return toBool((pfx & PFX_66) > 0);
    716 }
    717 static Bool haveASO ( Prefix pfx ) {
    718    return toBool((pfx & PFX_ASO) > 0);
    719 }
    720 
    721 /* Return True iff pfx has 66 set and F2 and F3 clear */
    722 static Bool have66noF2noF3 ( Prefix pfx )
    723 {
    724   return
    725      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
    726 }
    727 
    728 /* Return True iff pfx has F2 set and 66 and F3 clear */
    729 static Bool haveF2no66noF3 ( Prefix pfx )
    730 {
    731   return
    732      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
    733 }
    734 
    735 /* Return True iff pfx has F3 set and 66 and F2 clear */
    736 static Bool haveF3no66noF2 ( Prefix pfx )
    737 {
    738   return
    739      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
    740 }
    741 
    742 /* Return True iff pfx has F3 set and F2 clear */
    743 static Bool haveF3noF2 ( Prefix pfx )
    744 {
    745   return
    746      toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
    747 }
    748 
    749 /* Return True iff pfx has F2 set and F3 clear */
    750 static Bool haveF2noF3 ( Prefix pfx )
    751 {
    752   return
    753      toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
    754 }
    755 
    756 /* Return True iff pfx has 66, F2 and F3 clear */
    757 static Bool haveNo66noF2noF3 ( Prefix pfx )
    758 {
    759   return
    760      toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
    761 }
    762 
    763 /* Return True iff pfx has any of 66, F2 and F3 set */
    764 static Bool have66orF2orF3 ( Prefix pfx )
    765 {
    766   return toBool( ! haveNo66noF2noF3(pfx) );
    767 }
    768 
    769 /* Return True iff pfx has 66 or F2 set */
    770 static Bool have66orF2 ( Prefix pfx )
    771 {
    772    return toBool((pfx & (PFX_66|PFX_F2)) > 0);
    773 }
    774 
    775 /* Clear all the segment-override bits in a prefix. */
    776 static Prefix clearSegBits ( Prefix p )
    777 {
    778    return
    779       p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
    780 }
    781 
    782 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
    783 static UInt getVexNvvvv ( Prefix pfx ) {
    784    UInt r = (UInt)pfx;
    785    r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
    786    return r & 0xF;
    787 }
    788 
    789 static Bool haveVEX ( Prefix pfx ) {
    790    return toBool(pfx & PFX_VEX);
    791 }
    792 
    793 static Int getVexL ( Prefix pfx ) {
    794    return (pfx & PFX_VEXL) ? 1 : 0;
    795 }
    796 
    797 
    798 /*------------------------------------------------------------*/
    799 /*--- For dealing with escapes                             ---*/
    800 /*------------------------------------------------------------*/
    801 
    802 
    803 /* Escapes come after the prefixes, but before the primary opcode
    804    byte.  They escape the primary opcode byte into a bigger space.
    805    The 0xF0000000 isn't significant, except so as to make it not
    806    overlap valid Prefix values, for sanity checking.
    807 */
    808 
    809 typedef
    810    enum {
    811       ESC_NONE=0xF0000000, // none
    812       ESC_0F,              // 0F
    813       ESC_0F38,            // 0F 38
    814       ESC_0F3A             // 0F 3A
    815    }
    816    Escape;
    817 
    818 
    819 /*------------------------------------------------------------*/
    820 /*--- For dealing with integer registers                   ---*/
    821 /*------------------------------------------------------------*/
    822 
    823 /* This is somewhat complex.  The rules are:
    824 
    825    For 64, 32 and 16 bit register references, the e or g fields in the
    826    modrm bytes supply the low 3 bits of the register number.  The
    827    fourth (most-significant) bit of the register number is supplied by
    828    the REX byte, if it is present; else that bit is taken to be zero.
    829 
    830    The REX.R bit supplies the high bit corresponding to the g register
    831    field, and the REX.B bit supplies the high bit corresponding to the
    832    e register field (when the mod part of modrm indicates that modrm's
    833    e component refers to a register and not to memory).
    834 
    835    The REX.X bit supplies a high register bit for certain registers
    836    in SIB address modes, and is generally rarely used.
    837 
    838    For 8 bit register references, the presence of the REX byte itself
    839    has significance.  If there is no REX present, then the 3-bit
    840    number extracted from the modrm e or g field is treated as an index
    841    into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
    842    old x86 encoding scheme.
    843 
    844    But if there is a REX present, the register reference is
    845    interpreted in the same way as for 64/32/16-bit references: a high
    846    bit is extracted from REX, giving a 4-bit number, and the denoted
    847    register is the lowest 8 bits of the 16 integer registers denoted
    848    by the number.  In particular, values 3 through 7 of this sequence
    849    do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
    850    %rsp %rbp %rsi %rdi.
    851 
    852    The REX.W bit has no bearing at all on register numbers.  Instead
    853    its presence indicates that the operand size is to be overridden
    854    from its default value (32 bits) to 64 bits instead.  This is in
    855    the same fashion that an 0x66 prefix indicates the operand size is
    856    to be overridden from 32 bits down to 16 bits.  When both REX.W and
    857    0x66 are present there is a conflict, and REX.W takes precedence.
    858 
    859    Rather than try to handle this complexity using a single huge
    860    function, several smaller ones are provided.  The aim is to make it
    861    as difficult as possible to screw up register decoding in a subtle
    862    and hard-to-track-down way.
    863 
    864    Because these routines fish around in the host's memory (that is,
    865    in the guest state area) for sub-parts of guest registers, their
    866    correctness depends on the host's endianness.  So far these
    867    routines only work for little-endian hosts.  Those for which
    868    endianness is important have assertions to ensure sanity.
    869 */
    870 
    871 
    872 /* About the simplest question you can ask: where do the 64-bit
    873    integer registers live (in the guest state) ? */
    874 
    875 static Int integerGuestReg64Offset ( UInt reg )
    876 {
    877    switch (reg) {
    878       case R_RAX: return OFFB_RAX;
    879       case R_RCX: return OFFB_RCX;
    880       case R_RDX: return OFFB_RDX;
    881       case R_RBX: return OFFB_RBX;
    882       case R_RSP: return OFFB_RSP;
    883       case R_RBP: return OFFB_RBP;
    884       case R_RSI: return OFFB_RSI;
    885       case R_RDI: return OFFB_RDI;
    886       case R_R8:  return OFFB_R8;
    887       case R_R9:  return OFFB_R9;
    888       case R_R10: return OFFB_R10;
    889       case R_R11: return OFFB_R11;
    890       case R_R12: return OFFB_R12;
    891       case R_R13: return OFFB_R13;
    892       case R_R14: return OFFB_R14;
    893       case R_R15: return OFFB_R15;
    894       default: vpanic("integerGuestReg64Offset(amd64)");
    895    }
    896 }
    897 
    898 
    899 /* Produce the name of an integer register, for printing purposes.
    900    reg is a number in the range 0 .. 15 that has been generated from a
    901    3-bit reg-field number and a REX extension bit.  irregular denotes
    902    the case where sz==1 and no REX byte is present. */
    903 
    904 static
    905 HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
    906 {
    907    static HChar* ireg64_names[16]
    908      = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
    909          "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
    910    static HChar* ireg32_names[16]
    911      = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
    912          "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
    913    static HChar* ireg16_names[16]
    914      = { "%ax",  "%cx",  "%dx",  "%bx",  "%sp",  "%bp",  "%si",  "%di",
    915          "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
    916    static HChar* ireg8_names[16]
    917      = { "%al",  "%cl",  "%dl",  "%bl",  "%spl", "%bpl", "%sil", "%dil",
    918          "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
    919    static HChar* ireg8_irregular[8]
    920      = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
    921 
    922    vassert(reg < 16);
    923    if (sz == 1) {
    924       if (irregular)
    925          vassert(reg < 8);
    926    } else {
    927       vassert(irregular == False);
    928    }
    929 
    930    switch (sz) {
    931       case 8: return ireg64_names[reg];
    932       case 4: return ireg32_names[reg];
    933       case 2: return ireg16_names[reg];
    934       case 1: if (irregular) {
    935                  return ireg8_irregular[reg];
    936               } else {
    937                  return ireg8_names[reg];
    938               }
    939       default: vpanic("nameIReg(amd64)");
    940    }
    941 }
    942 
    943 /* Using the same argument conventions as nameIReg, produce the
    944    guest state offset of an integer register. */
    945 
    946 static
    947 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
    948 {
    949    vassert(reg < 16);
    950    if (sz == 1) {
    951       if (irregular)
    952          vassert(reg < 8);
    953    } else {
    954       vassert(irregular == False);
    955    }
    956 
    957    /* Deal with irregular case -- sz==1 and no REX present */
    958    if (sz == 1 && irregular) {
    959       switch (reg) {
    960          case R_RSP: return 1+ OFFB_RAX;
    961          case R_RBP: return 1+ OFFB_RCX;
    962          case R_RSI: return 1+ OFFB_RDX;
    963          case R_RDI: return 1+ OFFB_RBX;
    964          default:    break; /* use the normal case */
    965       }
    966    }
    967 
    968    /* Normal case */
    969    return integerGuestReg64Offset(reg);
    970 }
    971 
    972 
    973 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
    974 
    975 static IRExpr* getIRegCL ( void )
    976 {
    977    vassert(!host_is_bigendian);
    978    return IRExpr_Get( OFFB_RCX, Ity_I8 );
    979 }
    980 
    981 
    982 /* Write to the %AH register. */
    983 
    984 static void putIRegAH ( IRExpr* e )
    985 {
    986    vassert(!host_is_bigendian);
    987    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
    988    stmt( IRStmt_Put( OFFB_RAX+1, e ) );
    989 }
    990 
    991 
    992 /* Read/write various widths of %RAX, as it has various
    993    special-purpose uses. */
    994 
    995 static HChar* nameIRegRAX ( Int sz )
    996 {
    997    switch (sz) {
    998       case 1: return "%al";
    999       case 2: return "%ax";
   1000       case 4: return "%eax";
   1001       case 8: return "%rax";
   1002       default: vpanic("nameIRegRAX(amd64)");
   1003    }
   1004 }
   1005 
   1006 static IRExpr* getIRegRAX ( Int sz )
   1007 {
   1008    vassert(!host_is_bigendian);
   1009    switch (sz) {
   1010       case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
   1011       case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
   1012       case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
   1013       case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
   1014       default: vpanic("getIRegRAX(amd64)");
   1015    }
   1016 }
   1017 
   1018 static void putIRegRAX ( Int sz, IRExpr* e )
   1019 {
   1020    IRType ty = typeOfIRExpr(irsb->tyenv, e);
   1021    vassert(!host_is_bigendian);
   1022    switch (sz) {
   1023       case 8: vassert(ty == Ity_I64);
   1024               stmt( IRStmt_Put( OFFB_RAX, e ));
   1025               break;
   1026       case 4: vassert(ty == Ity_I32);
   1027               stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
   1028               break;
   1029       case 2: vassert(ty == Ity_I16);
   1030               stmt( IRStmt_Put( OFFB_RAX, e ));
   1031               break;
   1032       case 1: vassert(ty == Ity_I8);
   1033               stmt( IRStmt_Put( OFFB_RAX, e ));
   1034               break;
   1035       default: vpanic("putIRegRAX(amd64)");
   1036    }
   1037 }
   1038 
   1039 
   1040 /* Read/write various widths of %RDX, as it has various
   1041    special-purpose uses. */
   1042 
   1043 static HChar* nameIRegRDX ( Int sz )
   1044 {
   1045    switch (sz) {
   1046       case 1: return "%dl";
   1047       case 2: return "%dx";
   1048       case 4: return "%edx";
   1049       case 8: return "%rdx";
   1050       default: vpanic("nameIRegRDX(amd64)");
   1051    }
   1052 }
   1053 
   1054 static IRExpr* getIRegRDX ( Int sz )
   1055 {
   1056    vassert(!host_is_bigendian);
   1057    switch (sz) {
   1058       case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
   1059       case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
   1060       case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
   1061       case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
   1062       default: vpanic("getIRegRDX(amd64)");
   1063    }
   1064 }
   1065 
   1066 static void putIRegRDX ( Int sz, IRExpr* e )
   1067 {
   1068    vassert(!host_is_bigendian);
   1069    vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
   1070    switch (sz) {
   1071       case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
   1072               break;
   1073       case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
   1074               break;
   1075       case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
   1076               break;
   1077       case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
   1078               break;
   1079       default: vpanic("putIRegRDX(amd64)");
   1080    }
   1081 }
   1082 
   1083 
   1084 /* Simplistic functions to deal with the integer registers as a
   1085    straightforward bank of 16 64-bit regs. */
   1086 
   1087 static IRExpr* getIReg64 ( UInt regno )
   1088 {
   1089    return IRExpr_Get( integerGuestReg64Offset(regno),
   1090                       Ity_I64 );
   1091 }
   1092 
   1093 static void putIReg64 ( UInt regno, IRExpr* e )
   1094 {
   1095    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   1096    stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
   1097 }
   1098 
   1099 static HChar* nameIReg64 ( UInt regno )
   1100 {
   1101    return nameIReg( 8, regno, False );
   1102 }
   1103 
   1104 
   1105 /* Simplistic functions to deal with the lower halves of integer
   1106    registers as a straightforward bank of 16 32-bit regs. */
   1107 
   1108 static IRExpr* getIReg32 ( UInt regno )
   1109 {
   1110    vassert(!host_is_bigendian);
   1111    return unop(Iop_64to32,
   1112                IRExpr_Get( integerGuestReg64Offset(regno),
   1113                            Ity_I64 ));
   1114 }
   1115 
   1116 static void putIReg32 ( UInt regno, IRExpr* e )
   1117 {
   1118    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
   1119    stmt( IRStmt_Put( integerGuestReg64Offset(regno),
   1120                      unop(Iop_32Uto64,e) ) );
   1121 }
   1122 
   1123 static HChar* nameIReg32 ( UInt regno )
   1124 {
   1125    return nameIReg( 4, regno, False );
   1126 }
   1127 
   1128 
   1129 /* Simplistic functions to deal with the lower quarters of integer
   1130    registers as a straightforward bank of 16 16-bit regs. */
   1131 
   1132 static IRExpr* getIReg16 ( UInt regno )
   1133 {
   1134    vassert(!host_is_bigendian);
   1135    return IRExpr_Get( integerGuestReg64Offset(regno),
   1136                       Ity_I16 );
   1137 }
   1138 
   1139 static void putIReg16 ( UInt regno, IRExpr* e )
   1140 {
   1141    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
   1142    stmt( IRStmt_Put( integerGuestReg64Offset(regno),
   1143                      unop(Iop_16Uto64,e) ) );
   1144 }
   1145 
   1146 static HChar* nameIReg16 ( UInt regno )
   1147 {
   1148    return nameIReg( 2, regno, False );
   1149 }
   1150 
   1151 
   1152 /* Sometimes what we know is a 3-bit register number, a REX byte, and
   1153    which field of the REX byte is to be used to extend to a 4-bit
   1154    number.  These functions cater for that situation.
   1155 */
   1156 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
   1157 {
   1158    vassert(lo3bits < 8);
   1159    vassert(IS_VALID_PFX(pfx));
   1160    return getIReg64( lo3bits | (getRexX(pfx) << 3) );
   1161 }
   1162 
   1163 static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
   1164 {
   1165    vassert(lo3bits < 8);
   1166    vassert(IS_VALID_PFX(pfx));
   1167    return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
   1168 }
   1169 
   1170 static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
   1171 {
   1172    vassert(lo3bits < 8);
   1173    vassert(IS_VALID_PFX(pfx));
   1174    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1175    return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1176                         toBool(sz==1 && !haveREX(pfx)) );
   1177 }
   1178 
   1179 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
   1180 {
   1181    vassert(lo3bits < 8);
   1182    vassert(IS_VALID_PFX(pfx));
   1183    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1184    if (sz == 4) {
   1185       sz = 8;
   1186       return unop(Iop_64to32,
   1187                   IRExpr_Get(
   1188                      offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1189                                      toBool(sz==1 && !haveREX(pfx)) ),
   1190                      szToITy(sz)
   1191                  )
   1192              );
   1193    } else {
   1194       return IRExpr_Get(
   1195                 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1196                                 toBool(sz==1 && !haveREX(pfx)) ),
   1197                 szToITy(sz)
   1198              );
   1199    }
   1200 }
   1201 
   1202 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
   1203 {
   1204    vassert(lo3bits < 8);
   1205    vassert(IS_VALID_PFX(pfx));
   1206    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1207    vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
   1208    stmt( IRStmt_Put(
   1209             offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
   1210                             toBool(sz==1 && !haveREX(pfx)) ),
   1211             sz==4 ? unop(Iop_32Uto64,e) : e
   1212    ));
   1213 }
   1214 
   1215 
   1216 /* Functions for getting register numbers from modrm bytes and REX
   1217    when we don't have to consider the complexities of integer subreg
   1218    accesses.
   1219 */
   1220 /* Extract the g reg field from a modRM byte, and augment it using the
   1221    REX.R bit from the supplied REX byte.  The R bit usually is
   1222    associated with the g register field.
   1223 */
   1224 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
   1225 {
   1226    Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
   1227    reg += (pfx & PFX_REXR) ? 8 : 0;
   1228    return reg;
   1229 }
   1230 
   1231 /* Extract the e reg field from a modRM byte, and augment it using the
   1232    REX.B bit from the supplied REX byte.  The B bit usually is
   1233    associated with the e register field (when modrm indicates e is a
   1234    register, that is).
   1235 */
   1236 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
   1237 {
   1238    Int rm;
   1239    vassert(epartIsReg(mod_reg_rm));
   1240    rm = (Int)(mod_reg_rm & 0x7);
   1241    rm += (pfx & PFX_REXB) ? 8 : 0;
   1242    return rm;
   1243 }
   1244 
   1245 
   1246 /* General functions for dealing with integer register access. */
   1247 
   1248 /* Produce the guest state offset for a reference to the 'g' register
   1249    field in a modrm byte, taking into account REX (or its absence),
   1250    and the size of the access.
   1251 */
   1252 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1253 {
   1254    UInt reg;
   1255    vassert(!host_is_bigendian);
   1256    vassert(IS_VALID_PFX(pfx));
   1257    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1258    reg = gregOfRexRM( pfx, mod_reg_rm );
   1259    return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
   1260 }
   1261 
   1262 static
   1263 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1264 {
   1265    if (sz == 4) {
   1266       sz = 8;
   1267       return unop(Iop_64to32,
   1268                   IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
   1269                               szToITy(sz) ));
   1270    } else {
   1271       return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
   1272                          szToITy(sz) );
   1273    }
   1274 }
   1275 
   1276 static
   1277 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
   1278 {
   1279    vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
   1280    if (sz == 4) {
   1281       e = unop(Iop_32Uto64,e);
   1282    }
   1283    stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
   1284 }
   1285 
   1286 static
   1287 HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1288 {
   1289    return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
   1290                         toBool(sz==1 && !haveREX(pfx)) );
   1291 }
   1292 
   1293 
   1294 /* Produce the guest state offset for a reference to the 'e' register
   1295    field in a modrm byte, taking into account REX (or its absence),
   1296    and the size of the access.  eregOfRexRM will assert if mod_reg_rm
   1297    denotes a memory access rather than a register access.
   1298 */
   1299 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1300 {
   1301    UInt reg;
   1302    vassert(!host_is_bigendian);
   1303    vassert(IS_VALID_PFX(pfx));
   1304    vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
   1305    reg = eregOfRexRM( pfx, mod_reg_rm );
   1306    return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
   1307 }
   1308 
   1309 static
   1310 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1311 {
   1312    if (sz == 4) {
   1313       sz = 8;
   1314       return unop(Iop_64to32,
   1315                   IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
   1316                               szToITy(sz) ));
   1317    } else {
   1318       return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
   1319                          szToITy(sz) );
   1320    }
   1321 }
   1322 
   1323 static
   1324 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
   1325 {
   1326    vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
   1327    if (sz == 4) {
   1328       e = unop(Iop_32Uto64,e);
   1329    }
   1330    stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
   1331 }
   1332 
   1333 static
   1334 HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
   1335 {
   1336    return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
   1337                         toBool(sz==1 && !haveREX(pfx)) );
   1338 }
   1339 
   1340 
   1341 /*------------------------------------------------------------*/
   1342 /*--- For dealing with XMM registers                       ---*/
   1343 /*------------------------------------------------------------*/
   1344 
   1345 static Int ymmGuestRegOffset ( UInt ymmreg )
   1346 {
   1347    switch (ymmreg) {
   1348       case 0:  return OFFB_YMM0;
   1349       case 1:  return OFFB_YMM1;
   1350       case 2:  return OFFB_YMM2;
   1351       case 3:  return OFFB_YMM3;
   1352       case 4:  return OFFB_YMM4;
   1353       case 5:  return OFFB_YMM5;
   1354       case 6:  return OFFB_YMM6;
   1355       case 7:  return OFFB_YMM7;
   1356       case 8:  return OFFB_YMM8;
   1357       case 9:  return OFFB_YMM9;
   1358       case 10: return OFFB_YMM10;
   1359       case 11: return OFFB_YMM11;
   1360       case 12: return OFFB_YMM12;
   1361       case 13: return OFFB_YMM13;
   1362       case 14: return OFFB_YMM14;
   1363       case 15: return OFFB_YMM15;
   1364       default: vpanic("ymmGuestRegOffset(amd64)");
   1365    }
   1366 }
   1367 
   1368 static Int xmmGuestRegOffset ( UInt xmmreg )
   1369 {
   1370    /* Correct for little-endian host only. */
   1371    vassert(!host_is_bigendian);
   1372    return ymmGuestRegOffset( xmmreg );
   1373 }
   1374 
   1375 /* Lanes of vector registers are always numbered from zero being the
   1376    least significant lane (rightmost in the register).  */
   1377 
   1378 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
   1379 {
   1380    /* Correct for little-endian host only. */
   1381    vassert(!host_is_bigendian);
   1382    vassert(laneno >= 0 && laneno < 8);
   1383    return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
   1384 }
   1385 
   1386 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
   1387 {
   1388    /* Correct for little-endian host only. */
   1389    vassert(!host_is_bigendian);
   1390    vassert(laneno >= 0 && laneno < 4);
   1391    return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
   1392 }
   1393 
   1394 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
   1395 {
   1396    /* Correct for little-endian host only. */
   1397    vassert(!host_is_bigendian);
   1398    vassert(laneno >= 0 && laneno < 2);
   1399    return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
   1400 }
   1401 
   1402 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
   1403 {
   1404    /* Correct for little-endian host only. */
   1405    vassert(!host_is_bigendian);
   1406    vassert(laneno >= 0 && laneno < 2);
   1407    return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
   1408 }
   1409 
   1410 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
   1411 {
   1412    /* Correct for little-endian host only. */
   1413    vassert(!host_is_bigendian);
   1414    vassert(laneno >= 0 && laneno < 4);
   1415    return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
   1416 }
   1417 
   1418 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
   1419 {
   1420    /* Correct for little-endian host only. */
   1421    vassert(!host_is_bigendian);
   1422    vassert(laneno >= 0 && laneno < 8);
   1423    return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
   1424 }
   1425 
   1426 static IRExpr* getXMMReg ( UInt xmmreg )
   1427 {
   1428    return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
   1429 }
   1430 
   1431 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
   1432 {
   1433    return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
   1434 }
   1435 
   1436 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
   1437 {
   1438    return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
   1439 }
   1440 
   1441 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
   1442 {
   1443    return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
   1444 }
   1445 
   1446 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
   1447 {
   1448    return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
   1449 }
   1450 
   1451 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
   1452 {
   1453   return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
   1454 }
   1455 
   1456 static void putXMMReg ( UInt xmmreg, IRExpr* e )
   1457 {
   1458    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
   1459    stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
   1460 }
   1461 
   1462 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
   1463 {
   1464    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   1465    stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
   1466 }
   1467 
   1468 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
   1469 {
   1470    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
   1471    stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
   1472 }
   1473 
   1474 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
   1475 {
   1476    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
   1477    stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
   1478 }
   1479 
   1480 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
   1481 {
   1482    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
   1483    stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
   1484 }
   1485 
   1486 static IRExpr* getYMMReg ( UInt xmmreg )
   1487 {
   1488    return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
   1489 }
   1490 
   1491 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
   1492 {
   1493    return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
   1494 }
   1495 
   1496 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
   1497 {
   1498    return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
   1499 }
   1500 
   1501 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
   1502 {
   1503    return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
   1504 }
   1505 
   1506 static void putYMMReg ( UInt ymmreg, IRExpr* e )
   1507 {
   1508    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
   1509    stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
   1510 }
   1511 
   1512 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
   1513 {
   1514    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
   1515    stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
   1516 }
   1517 
   1518 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
   1519 {
   1520    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
   1521    stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
   1522 }
   1523 
   1524 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
   1525 {
   1526    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
   1527    stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
   1528 }
   1529 
   1530 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
   1531 {
   1532    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
   1533    stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
   1534 }
   1535 
   1536 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
   1537 {
   1538    vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
   1539    stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
   1540 }
   1541 
   1542 static IRExpr* mkV128 ( UShort mask )
   1543 {
   1544    return IRExpr_Const(IRConst_V128(mask));
   1545 }
   1546 
   1547 /* Write the low half of a YMM reg and zero out the upper half. */
   1548 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
   1549 {
   1550    putYMMRegLane128( ymmreg, 0, e );
   1551    putYMMRegLane128( ymmreg, 1, mkV128(0) );
   1552 }
   1553 
   1554 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
   1555 {
   1556    vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
   1557    vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
   1558    return unop(Iop_64to1,
   1559                binop(Iop_And64,
   1560                      unop(Iop_1Uto64,x),
   1561                      unop(Iop_1Uto64,y)));
   1562 }
   1563 
   1564 /* Generate a compare-and-swap operation, operating on memory at
   1565    'addr'.  The expected value is 'expVal' and the new value is
   1566    'newVal'.  If the operation fails, then transfer control (with a
   1567    no-redir jump (XXX no -- see comment at top of this file)) to
   1568    'restart_point', which is presumably the address of the guest
   1569    instruction again -- retrying, essentially. */
   1570 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
   1571                     Addr64 restart_point )
   1572 {
   1573    IRCAS* cas;
   1574    IRType tyE    = typeOfIRExpr(irsb->tyenv, expVal);
   1575    IRType tyN    = typeOfIRExpr(irsb->tyenv, newVal);
   1576    IRTemp oldTmp = newTemp(tyE);
   1577    IRTemp expTmp = newTemp(tyE);
   1578    vassert(tyE == tyN);
   1579    vassert(tyE == Ity_I64 || tyE == Ity_I32
   1580            || tyE == Ity_I16 || tyE == Ity_I8);
   1581    assign(expTmp, expVal);
   1582    cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
   1583                   NULL, mkexpr(expTmp), NULL, newVal );
   1584    stmt( IRStmt_CAS(cas) );
   1585    stmt( IRStmt_Exit(
   1586             binop( mkSizedOp(tyE,Iop_CasCmpNE8),
   1587                    mkexpr(oldTmp), mkexpr(expTmp) ),
   1588             Ijk_Boring, /*Ijk_NoRedir*/
   1589             IRConst_U64( restart_point ),
   1590             OFFB_RIP
   1591          ));
   1592 }
   1593 
   1594 
   1595 /*------------------------------------------------------------*/
   1596 /*--- Helpers for %rflags.                                 ---*/
   1597 /*------------------------------------------------------------*/
   1598 
   1599 /* -------------- Evaluating the flags-thunk. -------------- */
   1600 
   1601 /* Build IR to calculate all the eflags from stored
   1602    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1603    Ity_I64. */
   1604 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
   1605 {
   1606    IRExpr** args
   1607       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1608                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1609                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1610                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1611    IRExpr* call
   1612       = mkIRExprCCall(
   1613            Ity_I64,
   1614            0/*regparm*/,
   1615            "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
   1616            args
   1617         );
   1618    /* Exclude OP and NDEP from definedness checking.  We're only
   1619       interested in DEP1 and DEP2. */
   1620    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1621    return call;
   1622 }
   1623 
   1624 /* Build IR to calculate some particular condition from stored
   1625    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1626    Ity_Bit. */
   1627 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
   1628 {
   1629    IRExpr** args
   1630       = mkIRExprVec_5( mkU64(cond),
   1631                        IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1632                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1633                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1634                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1635    IRExpr* call
   1636       = mkIRExprCCall(
   1637            Ity_I64,
   1638            0/*regparm*/,
   1639            "amd64g_calculate_condition", &amd64g_calculate_condition,
   1640            args
   1641         );
   1642    /* Exclude the requested condition, OP and NDEP from definedness
   1643       checking.  We're only interested in DEP1 and DEP2. */
   1644    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
   1645    return unop(Iop_64to1, call);
   1646 }
   1647 
   1648 /* Build IR to calculate just the carry flag from stored
   1649    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression :: Ity_I64. */
   1650 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
   1651 {
   1652    IRExpr** args
   1653       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1654                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1655                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1656                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1657    IRExpr* call
   1658       = mkIRExprCCall(
   1659            Ity_I64,
   1660            0/*regparm*/,
   1661            "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
   1662            args
   1663         );
   1664    /* Exclude OP and NDEP from definedness checking.  We're only
   1665       interested in DEP1 and DEP2. */
   1666    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1667    return call;
   1668 }
   1669 
   1670 
   1671 /* -------------- Building the flags-thunk. -------------- */
   1672 
   1673 /* The machinery in this section builds the flag-thunk following a
   1674    flag-setting operation.  Hence the various setFlags_* functions.
   1675 */
   1676 
   1677 static Bool isAddSub ( IROp op8 )
   1678 {
   1679    return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
   1680 }
   1681 
   1682 static Bool isLogic ( IROp op8 )
   1683 {
   1684    return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
   1685 }
   1686 
   1687 /* U-widen 8/16/32/64 bit int expr to 64. */
   1688 static IRExpr* widenUto64 ( IRExpr* e )
   1689 {
   1690    switch (typeOfIRExpr(irsb->tyenv,e)) {
   1691       case Ity_I64: return e;
   1692       case Ity_I32: return unop(Iop_32Uto64, e);
   1693       case Ity_I16: return unop(Iop_16Uto64, e);
   1694       case Ity_I8:  return unop(Iop_8Uto64, e);
   1695       default: vpanic("widenUto64");
   1696    }
   1697 }
   1698 
   1699 /* S-widen 8/16/32/64 bit int expr to 32. */
   1700 static IRExpr* widenSto64 ( IRExpr* e )
   1701 {
   1702    switch (typeOfIRExpr(irsb->tyenv,e)) {
   1703       case Ity_I64: return e;
   1704       case Ity_I32: return unop(Iop_32Sto64, e);
   1705       case Ity_I16: return unop(Iop_16Sto64, e);
   1706       case Ity_I8:  return unop(Iop_8Sto64, e);
   1707       default: vpanic("widenSto64");
   1708    }
   1709 }
   1710 
   1711 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64.  Clearly only some
   1712    of these combinations make sense. */
   1713 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
   1714 {
   1715    IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
   1716    if (src_ty == dst_ty)
   1717       return e;
   1718    if (src_ty == Ity_I32 && dst_ty == Ity_I16)
   1719       return unop(Iop_32to16, e);
   1720    if (src_ty == Ity_I32 && dst_ty == Ity_I8)
   1721       return unop(Iop_32to8, e);
   1722    if (src_ty == Ity_I64 && dst_ty == Ity_I32)
   1723       return unop(Iop_64to32, e);
   1724    if (src_ty == Ity_I64 && dst_ty == Ity_I16)
   1725       return unop(Iop_64to16, e);
   1726    if (src_ty == Ity_I64 && dst_ty == Ity_I8)
   1727       return unop(Iop_64to8, e);
   1728 
   1729    vex_printf("\nsrc, dst tys are: ");
   1730    ppIRType(src_ty);
   1731    vex_printf(", ");
   1732    ppIRType(dst_ty);
   1733    vex_printf("\n");
   1734    vpanic("narrowTo(amd64)");
   1735 }
   1736 
   1737 
   1738 /* Set the flags thunk OP, DEP1 and DEP2 fields.  The supplied op is
   1739    auto-sized up to the real op. */
   1740 
   1741 static
   1742 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
   1743 {
   1744    Int ccOp = 0;
   1745    switch (ty) {
   1746       case Ity_I8:  ccOp = 0; break;
   1747       case Ity_I16: ccOp = 1; break;
   1748       case Ity_I32: ccOp = 2; break;
   1749       case Ity_I64: ccOp = 3; break;
   1750       default: vassert(0);
   1751    }
   1752    switch (op8) {
   1753       case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB;   break;
   1754       case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB;   break;
   1755       default:       ppIROp(op8);
   1756                      vpanic("setFlags_DEP1_DEP2(amd64)");
   1757    }
   1758    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1759    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
   1760    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
   1761 }
   1762 
   1763 
   1764 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
   1765 
   1766 static
   1767 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
   1768 {
   1769    Int ccOp = 0;
   1770    switch (ty) {
   1771       case Ity_I8:  ccOp = 0; break;
   1772       case Ity_I16: ccOp = 1; break;
   1773       case Ity_I32: ccOp = 2; break;
   1774       case Ity_I64: ccOp = 3; break;
   1775       default: vassert(0);
   1776    }
   1777    switch (op8) {
   1778       case Iop_Or8:
   1779       case Iop_And8:
   1780       case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
   1781       default:       ppIROp(op8);
   1782                      vpanic("setFlags_DEP1(amd64)");
   1783    }
   1784    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1785    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
   1786    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
   1787 }
   1788 
   1789 
   1790 /* For shift operations, we put in the result and the undershifted
   1791    result.  Except if the shift amount is zero, the thunk is left
   1792    unchanged. */
   1793 
   1794 static void setFlags_DEP1_DEP2_shift ( IROp    op64,
   1795                                        IRTemp  res,
   1796                                        IRTemp  resUS,
   1797                                        IRType  ty,
   1798                                        IRTemp  guard )
   1799 {
   1800    Int ccOp = 0;
   1801    switch (ty) {
   1802       case Ity_I8:  ccOp = 0; break;
   1803       case Ity_I16: ccOp = 1; break;
   1804       case Ity_I32: ccOp = 2; break;
   1805       case Ity_I64: ccOp = 3; break;
   1806       default: vassert(0);
   1807    }
   1808 
   1809    vassert(guard);
   1810 
   1811    /* Both kinds of right shifts are handled by the same thunk
   1812       operation. */
   1813    switch (op64) {
   1814       case Iop_Shr64:
   1815       case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
   1816       case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
   1817       default:        ppIROp(op64);
   1818                       vpanic("setFlags_DEP1_DEP2_shift(amd64)");
   1819    }
   1820 
   1821    /* DEP1 contains the result, DEP2 contains the undershifted value. */
   1822    stmt( IRStmt_Put( OFFB_CC_OP,
   1823                      IRExpr_Mux0X( mkexpr(guard),
   1824                                    IRExpr_Get(OFFB_CC_OP,Ity_I64),
   1825                                    mkU64(ccOp))) );
   1826    stmt( IRStmt_Put( OFFB_CC_DEP1,
   1827                      IRExpr_Mux0X( mkexpr(guard),
   1828                                    IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
   1829                                    widenUto64(mkexpr(res)))) );
   1830    stmt( IRStmt_Put( OFFB_CC_DEP2,
   1831                      IRExpr_Mux0X( mkexpr(guard),
   1832                                    IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
   1833                                    widenUto64(mkexpr(resUS)))) );
   1834 }
   1835 
   1836 
   1837 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
   1838    the former value of the carry flag, which unfortunately we have to
   1839    compute. */
   1840 
   1841 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
   1842 {
   1843    Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
   1844 
   1845    switch (ty) {
   1846       case Ity_I8:  ccOp += 0; break;
   1847       case Ity_I16: ccOp += 1; break;
   1848       case Ity_I32: ccOp += 2; break;
   1849       case Ity_I64: ccOp += 3; break;
   1850       default: vassert(0);
   1851    }
   1852 
   1853    /* This has to come first, because calculating the C flag
   1854       may require reading all four thunk fields. */
   1855    stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
   1856    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(ccOp)) );
   1857    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
   1858    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
   1859 }
   1860 
   1861 
   1862 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
   1863    two arguments. */
   1864 
   1865 static
   1866 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
   1867 {
   1868    switch (ty) {
   1869       case Ity_I8:
   1870          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
   1871          break;
   1872       case Ity_I16:
   1873          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
   1874          break;
   1875       case Ity_I32:
   1876          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
   1877          break;
   1878       case Ity_I64:
   1879          stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
   1880          break;
   1881       default:
   1882          vpanic("setFlags_MUL(amd64)");
   1883    }
   1884    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
   1885    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
   1886 }
   1887 
   1888 
   1889 /* -------------- Condition codes. -------------- */
   1890 
   1891 /* Condition codes, using the AMD encoding.  */
   1892 
   1893 static HChar* name_AMD64Condcode ( AMD64Condcode cond )
   1894 {
   1895    switch (cond) {
   1896       case AMD64CondO:      return "o";
   1897       case AMD64CondNO:     return "no";
   1898       case AMD64CondB:      return "b";
   1899       case AMD64CondNB:     return "ae"; /*"nb";*/
   1900       case AMD64CondZ:      return "e"; /*"z";*/
   1901       case AMD64CondNZ:     return "ne"; /*"nz";*/
   1902       case AMD64CondBE:     return "be";
   1903       case AMD64CondNBE:    return "a"; /*"nbe";*/
   1904       case AMD64CondS:      return "s";
   1905       case AMD64CondNS:     return "ns";
   1906       case AMD64CondP:      return "p";
   1907       case AMD64CondNP:     return "np";
   1908       case AMD64CondL:      return "l";
   1909       case AMD64CondNL:     return "ge"; /*"nl";*/
   1910       case AMD64CondLE:     return "le";
   1911       case AMD64CondNLE:    return "g"; /*"nle";*/
   1912       case AMD64CondAlways: return "ALWAYS";
   1913       default: vpanic("name_AMD64Condcode");
   1914    }
   1915 }
   1916 
   1917 static
   1918 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode  cond,
   1919                                           /*OUT*/Bool*   needInvert )
   1920 {
   1921    vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
   1922    if (cond & 1) {
   1923       *needInvert = True;
   1924       return cond-1;
   1925    } else {
   1926       *needInvert = False;
   1927       return cond;
   1928    }
   1929 }
   1930 
   1931 
   1932 /* -------------- Helpers for ADD/SUB with carry. -------------- */
   1933 
   1934 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
   1935    appropriately.
   1936 
   1937    Optionally, generate a store for the 'tres' value.  This can either
   1938    be a normal store, or it can be a cas-with-possible-failure style
   1939    store:
   1940 
   1941    if taddr is IRTemp_INVALID, then no store is generated.
   1942 
   1943    if taddr is not IRTemp_INVALID, then a store (using taddr as
   1944    the address) is generated:
   1945 
   1946      if texpVal is IRTemp_INVALID then a normal store is
   1947      generated, and restart_point must be zero (it is irrelevant).
   1948 
   1949      if texpVal is not IRTemp_INVALID then a cas-style store is
   1950      generated.  texpVal is the expected value, restart_point
   1951      is the restart point if the store fails, and texpVal must
   1952      have the same type as tres.
   1953 
   1954 */
   1955 static void helper_ADC ( Int sz,
   1956                          IRTemp tres, IRTemp ta1, IRTemp ta2,
   1957                          /* info about optional store: */
   1958                          IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
   1959 {
   1960    UInt    thunkOp;
   1961    IRType  ty    = szToITy(sz);
   1962    IRTemp  oldc  = newTemp(Ity_I64);
   1963    IRTemp  oldcn = newTemp(ty);
   1964    IROp    plus  = mkSizedOp(ty, Iop_Add8);
   1965    IROp    xor   = mkSizedOp(ty, Iop_Xor8);
   1966 
   1967    vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
   1968 
   1969    switch (sz) {
   1970       case 8:  thunkOp = AMD64G_CC_OP_ADCQ; break;
   1971       case 4:  thunkOp = AMD64G_CC_OP_ADCL; break;
   1972       case 2:  thunkOp = AMD64G_CC_OP_ADCW; break;
   1973       case 1:  thunkOp = AMD64G_CC_OP_ADCB; break;
   1974       default: vassert(0);
   1975    }
   1976 
   1977    /* oldc = old carry flag, 0 or 1 */
   1978    assign( oldc,  binop(Iop_And64,
   1979                         mk_amd64g_calculate_rflags_c(),
   1980                         mkU64(1)) );
   1981 
   1982    assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
   1983 
   1984    assign( tres, binop(plus,
   1985                        binop(plus,mkexpr(ta1),mkexpr(ta2)),
   1986                        mkexpr(oldcn)) );
   1987 
   1988    /* Possibly generate a store of 'tres' to 'taddr'.  See comment at
   1989       start of this function. */
   1990    if (taddr != IRTemp_INVALID) {
   1991       if (texpVal == IRTemp_INVALID) {
   1992          vassert(restart_point == 0);
   1993          storeLE( mkexpr(taddr), mkexpr(tres) );
   1994       } else {
   1995          vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
   1996          /* .. and hence 'texpVal' has the same type as 'tres'. */
   1997          casLE( mkexpr(taddr),
   1998                 mkexpr(texpVal), mkexpr(tres), restart_point );
   1999       }
   2000    }
   2001 
   2002    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(thunkOp) ) );
   2003    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1))  ));
   2004    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
   2005                                                          mkexpr(oldcn)) )) );
   2006    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
   2007 }
   2008 
   2009 
   2010 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
   2011    appropriately.  As with helper_ADC, possibly generate a store of
   2012    the result -- see comments on helper_ADC for details.
   2013 */
   2014 static void helper_SBB ( Int sz,
   2015                          IRTemp tres, IRTemp ta1, IRTemp ta2,
   2016                          /* info about optional store: */
   2017                          IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
   2018 {
   2019    UInt    thunkOp;
   2020    IRType  ty    = szToITy(sz);
   2021    IRTemp  oldc  = newTemp(Ity_I64);
   2022    IRTemp  oldcn = newTemp(ty);
   2023    IROp    minus = mkSizedOp(ty, Iop_Sub8);
   2024    IROp    xor   = mkSizedOp(ty, Iop_Xor8);
   2025 
   2026    vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
   2027 
   2028    switch (sz) {
   2029       case 8:  thunkOp = AMD64G_CC_OP_SBBQ; break;
   2030       case 4:  thunkOp = AMD64G_CC_OP_SBBL; break;
   2031       case 2:  thunkOp = AMD64G_CC_OP_SBBW; break;
   2032       case 1:  thunkOp = AMD64G_CC_OP_SBBB; break;
   2033       default: vassert(0);
   2034    }
   2035 
   2036    /* oldc = old carry flag, 0 or 1 */
   2037    assign( oldc, binop(Iop_And64,
   2038                        mk_amd64g_calculate_rflags_c(),
   2039                        mkU64(1)) );
   2040 
   2041    assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
   2042 
   2043    assign( tres, binop(minus,
   2044                        binop(minus,mkexpr(ta1),mkexpr(ta2)),
   2045                        mkexpr(oldcn)) );
   2046 
   2047    /* Possibly generate a store of 'tres' to 'taddr'.  See comment at
   2048       start of this function. */
   2049    if (taddr != IRTemp_INVALID) {
   2050       if (texpVal == IRTemp_INVALID) {
   2051          vassert(restart_point == 0);
   2052          storeLE( mkexpr(taddr), mkexpr(tres) );
   2053       } else {
   2054          vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
   2055          /* .. and hence 'texpVal' has the same type as 'tres'. */
   2056          casLE( mkexpr(taddr),
   2057                 mkexpr(texpVal), mkexpr(tres), restart_point );
   2058       }
   2059    }
   2060 
   2061    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(thunkOp) ) );
   2062    stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
   2063    stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
   2064                                                          mkexpr(oldcn)) )) );
   2065    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
   2066 }
   2067 
   2068 
   2069 /* -------------- Helpers for disassembly printing. -------------- */
   2070 
   2071 static HChar* nameGrp1 ( Int opc_aux )
   2072 {
   2073    static HChar* grp1_names[8]
   2074      = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
   2075    if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
   2076    return grp1_names[opc_aux];
   2077 }
   2078 
   2079 static HChar* nameGrp2 ( Int opc_aux )
   2080 {
   2081    static HChar* grp2_names[8]
   2082      = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
   2083    if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
   2084    return grp2_names[opc_aux];
   2085 }
   2086 
   2087 static HChar* nameGrp4 ( Int opc_aux )
   2088 {
   2089    static HChar* grp4_names[8]
   2090      = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
   2091    if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
   2092    return grp4_names[opc_aux];
   2093 }
   2094 
   2095 static HChar* nameGrp5 ( Int opc_aux )
   2096 {
   2097    static HChar* grp5_names[8]
   2098      = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
   2099    if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
   2100    return grp5_names[opc_aux];
   2101 }
   2102 
   2103 static HChar* nameGrp8 ( Int opc_aux )
   2104 {
   2105    static HChar* grp8_names[8]
   2106       = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
   2107    if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
   2108    return grp8_names[opc_aux];
   2109 }
   2110 
   2111 //.. static HChar* nameSReg ( UInt sreg )
   2112 //.. {
   2113 //..    switch (sreg) {
   2114 //..       case R_ES: return "%es";
   2115 //..       case R_CS: return "%cs";
   2116 //..       case R_SS: return "%ss";
   2117 //..       case R_DS: return "%ds";
   2118 //..       case R_FS: return "%fs";
   2119 //..       case R_GS: return "%gs";
   2120 //..       default: vpanic("nameSReg(x86)");
   2121 //..    }
   2122 //.. }
   2123 
   2124 static HChar* nameMMXReg ( Int mmxreg )
   2125 {
   2126    static HChar* mmx_names[8]
   2127      = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
   2128    if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
   2129    return mmx_names[mmxreg];
   2130 }
   2131 
   2132 static HChar* nameXMMReg ( Int xmmreg )
   2133 {
   2134    static HChar* xmm_names[16]
   2135      = { "%xmm0",  "%xmm1",  "%xmm2",  "%xmm3",
   2136          "%xmm4",  "%xmm5",  "%xmm6",  "%xmm7",
   2137          "%xmm8",  "%xmm9",  "%xmm10", "%xmm11",
   2138          "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
   2139    if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
   2140    return xmm_names[xmmreg];
   2141 }
   2142 
   2143 static HChar* nameMMXGran ( Int gran )
   2144 {
   2145    switch (gran) {
   2146       case 0: return "b";
   2147       case 1: return "w";
   2148       case 2: return "d";
   2149       case 3: return "q";
   2150       default: vpanic("nameMMXGran(amd64,guest)");
   2151    }
   2152 }
   2153 
   2154 static HChar nameISize ( Int size )
   2155 {
   2156    switch (size) {
   2157       case 8: return 'q';
   2158       case 4: return 'l';
   2159       case 2: return 'w';
   2160       case 1: return 'b';
   2161       default: vpanic("nameISize(amd64)");
   2162    }
   2163 }
   2164 
   2165 static HChar* nameYMMReg ( Int ymmreg )
   2166 {
   2167    static HChar* ymm_names[16]
   2168      = { "%ymm0",  "%ymm1",  "%ymm2",  "%ymm3",
   2169          "%ymm4",  "%ymm5",  "%ymm6",  "%ymm7",
   2170          "%ymm8",  "%ymm9",  "%ymm10", "%ymm11",
   2171          "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
   2172    if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
   2173    return ymm_names[ymmreg];
   2174 }
   2175 
   2176 
   2177 /*------------------------------------------------------------*/
   2178 /*--- JMP helpers                                          ---*/
   2179 /*------------------------------------------------------------*/
   2180 
   2181 static void jmp_lit( /*MOD*/DisResult* dres,
   2182                      IRJumpKind kind, Addr64 d64 )
   2183 {
   2184    vassert(dres->whatNext    == Dis_Continue);
   2185    vassert(dres->len         == 0);
   2186    vassert(dres->continueAt  == 0);
   2187    vassert(dres->jk_StopHere == Ijk_INVALID);
   2188    dres->whatNext    = Dis_StopHere;
   2189    dres->jk_StopHere = kind;
   2190    stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
   2191 }
   2192 
   2193 static void jmp_treg( /*MOD*/DisResult* dres,
   2194                       IRJumpKind kind, IRTemp t )
   2195 {
   2196    vassert(dres->whatNext    == Dis_Continue);
   2197    vassert(dres->len         == 0);
   2198    vassert(dres->continueAt  == 0);
   2199    vassert(dres->jk_StopHere == Ijk_INVALID);
   2200    dres->whatNext    = Dis_StopHere;
   2201    dres->jk_StopHere = kind;
   2202    stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
   2203 }
   2204 
   2205 static
   2206 void jcc_01 ( /*MOD*/DisResult* dres,
   2207               AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
   2208 {
   2209    Bool          invert;
   2210    AMD64Condcode condPos;
   2211    vassert(dres->whatNext    == Dis_Continue);
   2212    vassert(dres->len         == 0);
   2213    vassert(dres->continueAt  == 0);
   2214    vassert(dres->jk_StopHere == Ijk_INVALID);
   2215    dres->whatNext    = Dis_StopHere;
   2216    dres->jk_StopHere = Ijk_Boring;
   2217    condPos = positiveIse_AMD64Condcode ( cond, &invert );
   2218    if (invert) {
   2219       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
   2220                          Ijk_Boring,
   2221                          IRConst_U64(d64_false),
   2222                          OFFB_RIP ) );
   2223       stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
   2224    } else {
   2225       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
   2226                          Ijk_Boring,
   2227                          IRConst_U64(d64_true),
   2228                          OFFB_RIP ) );
   2229       stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
   2230    }
   2231 }
   2232 
   2233 /* Let new_rsp be the %rsp value after a call/return.  Let nia be the
   2234    guest address of the next instruction to be executed.
   2235 
   2236    This function generates an AbiHint to say that -128(%rsp)
   2237    .. -1(%rsp) should now be regarded as uninitialised.
   2238 */
   2239 static
   2240 void make_redzone_AbiHint ( VexAbiInfo* vbi,
   2241                             IRTemp new_rsp, IRTemp nia, HChar* who )
   2242 {
   2243    Int szB = vbi->guest_stack_redzone_size;
   2244    vassert(szB >= 0);
   2245 
   2246    /* A bit of a kludge.  Currently the only AbI we've guested AMD64
   2247       for is ELF.  So just check it's the expected 128 value
   2248       (paranoia). */
   2249    vassert(szB == 128);
   2250 
   2251    if (0) vex_printf("AbiHint: %s\n", who);
   2252    vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
   2253    vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
   2254    if (szB > 0)
   2255       stmt( IRStmt_AbiHint(
   2256                binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
   2257                szB,
   2258                mkexpr(nia)
   2259             ));
   2260 }
   2261 
   2262 
   2263 /*------------------------------------------------------------*/
   2264 /*--- Disassembling addressing modes                       ---*/
   2265 /*------------------------------------------------------------*/
   2266 
   2267 static
   2268 HChar* segRegTxt ( Prefix pfx )
   2269 {
   2270    if (pfx & PFX_CS) return "%cs:";
   2271    if (pfx & PFX_DS) return "%ds:";
   2272    if (pfx & PFX_ES) return "%es:";
   2273    if (pfx & PFX_FS) return "%fs:";
   2274    if (pfx & PFX_GS) return "%gs:";
   2275    if (pfx & PFX_SS) return "%ss:";
   2276    return ""; /* no override */
   2277 }
   2278 
   2279 
   2280 /* 'virtual' is an IRExpr* holding a virtual address.  Convert it to a
   2281    linear address by adding any required segment override as indicated
   2282    by sorb, and also dealing with any address size override
   2283    present. */
   2284 static
   2285 IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
   2286                               Prefix pfx, IRExpr* virtual )
   2287 {
   2288    /* --- segment overrides --- */
   2289    if (pfx & PFX_FS) {
   2290       if (vbi->guest_amd64_assume_fs_is_zero) {
   2291          /* Note that this is a linux-kernel specific hack that relies
   2292             on the assumption that %fs is always zero. */
   2293          /* return virtual + guest_FS_ZERO. */
   2294          virtual = binop(Iop_Add64, virtual,
   2295                                     IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
   2296       } else {
   2297          unimplemented("amd64 %fs segment override");
   2298       }
   2299    }
   2300 
   2301    if (pfx & PFX_GS) {
   2302       if (vbi->guest_amd64_assume_gs_is_0x60) {
   2303          /* Note that this is a darwin-kernel specific hack that relies
   2304             on the assumption that %gs is always 0x60. */
   2305          /* return virtual + guest_GS_0x60. */
   2306          virtual = binop(Iop_Add64, virtual,
   2307                                     IRExpr_Get(OFFB_GS_0x60, Ity_I64));
   2308       } else {
   2309          unimplemented("amd64 %gs segment override");
   2310       }
   2311    }
   2312 
   2313    /* cs, ds, es and ss are simply ignored in 64-bit mode. */
   2314 
   2315    /* --- address size override --- */
   2316    if (haveASO(pfx))
   2317       virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
   2318 
   2319    return virtual;
   2320 }
   2321 
   2322 //.. {
   2323 //..    Int    sreg;
   2324 //..    IRType hWordTy;
   2325 //..    IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
   2326 //..
   2327 //..    if (sorb == 0)
   2328 //..       /* the common case - no override */
   2329 //..       return virtual;
   2330 //..
   2331 //..    switch (sorb) {
   2332 //..       case 0x3E: sreg = R_DS; break;
   2333 //..       case 0x26: sreg = R_ES; break;
   2334 //..       case 0x64: sreg = R_FS; break;
   2335 //..       case 0x65: sreg = R_GS; break;
   2336 //..       default: vpanic("handleAddrOverrides(x86,guest)");
   2337 //..    }
   2338 //..
   2339 //..    hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
   2340 //..
   2341 //..    seg_selector = newTemp(Ity_I32);
   2342 //..    ldt_ptr      = newTemp(hWordTy);
   2343 //..    gdt_ptr      = newTemp(hWordTy);
   2344 //..    r64          = newTemp(Ity_I64);
   2345 //..
   2346 //..    assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
   2347 //..    assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
   2348 //..    assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
   2349 //..
   2350 //..    /*
   2351 //..    Call this to do the translation and limit checks:
   2352 //..    ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
   2353 //..                                  UInt seg_selector, UInt virtual_addr )
   2354 //..    */
   2355 //..    assign(
   2356 //..       r64,
   2357 //..       mkIRExprCCall(
   2358 //..          Ity_I64,
   2359 //..          0/*regparms*/,
   2360 //..          "x86g_use_seg_selector",
   2361 //..          &x86g_use_seg_selector,
   2362 //..          mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
   2363 //..                         mkexpr(seg_selector), virtual)
   2364 //..       )
   2365 //..    );
   2366 //..
   2367 //..    /* If the high 32 of the result are non-zero, there was a
   2368 //..       failure in address translation.  In which case, make a
   2369 //..       quick exit.
   2370 //..    */
   2371 //..    stmt(
   2372 //..       IRStmt_Exit(
   2373 //..          binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
   2374 //..          Ijk_MapFail,
   2375 //..          IRConst_U32( guest_eip_curr_instr )
   2376 //..       )
   2377 //..    );
   2378 //..
   2379 //..    /* otherwise, here's the translated result. */
   2380 //..    return unop(Iop_64to32, mkexpr(r64));
   2381 //.. }
   2382 
   2383 
   2384 /* Generate IR to calculate an address indicated by a ModRM and
   2385    following SIB bytes.  The expression, and the number of bytes in
   2386    the address mode, are returned (the latter in *len).  Note that
   2387    this fn should not be called if the R/M part of the address denotes
   2388    a register instead of memory.  If print_codegen is true, text of
   2389    the addressing mode is placed in buf.
   2390 
   2391    The computed address is stored in a new tempreg, and the
   2392    identity of the tempreg is returned.
   2393 
   2394    extra_bytes holds the number of bytes after the amode, as supplied
   2395    by the caller.  This is needed to make sense of %rip-relative
   2396    addresses.  Note that the value that *len is set to is only the
   2397    length of the amode itself and does not include the value supplied
   2398    in extra_bytes.
   2399  */
   2400 
   2401 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
   2402 {
   2403    IRTemp tmp = newTemp(Ity_I64);
   2404    assign( tmp, addr64 );
   2405    return tmp;
   2406 }
   2407 
   2408 static
   2409 IRTemp disAMode ( /*OUT*/Int* len,
   2410                   VexAbiInfo* vbi, Prefix pfx, Long delta,
   2411                   /*OUT*/HChar* buf, Int extra_bytes )
   2412 {
   2413    UChar mod_reg_rm = getUChar(delta);
   2414    delta++;
   2415 
   2416    buf[0] = (UChar)0;
   2417    vassert(extra_bytes >= 0 && extra_bytes < 10);
   2418 
   2419    /* squeeze out the reg field from mod_reg_rm, since a 256-entry
   2420       jump table seems a bit excessive.
   2421    */
   2422    mod_reg_rm &= 0xC7;                         /* is now XX000YYY */
   2423    mod_reg_rm  = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
   2424                                                /* is now XX0XXYYY */
   2425    mod_reg_rm &= 0x1F;                         /* is now 000XXYYY */
   2426    switch (mod_reg_rm) {
   2427 
   2428       /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
   2429          REX.B==1: (%r8)  .. (%r15), not including (%r12) or (%r13).
   2430       */
   2431       case 0x00: case 0x01: case 0x02: case 0x03:
   2432       /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
   2433          { UChar rm = toUChar(mod_reg_rm & 7);
   2434            DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
   2435            *len = 1;
   2436            return disAMode_copy2tmp(
   2437                   handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
   2438          }
   2439 
   2440       /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
   2441          REX.B==1: d8(%r8)  ... d8(%r15), not including d8(%r12)
   2442       */
   2443       case 0x08: case 0x09: case 0x0A: case 0x0B:
   2444       /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
   2445          { UChar rm = toUChar(mod_reg_rm & 7);
   2446            Long d   = getSDisp8(delta);
   2447            if (d == 0) {
   2448               DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
   2449            } else {
   2450               DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
   2451            }
   2452            *len = 2;
   2453            return disAMode_copy2tmp(
   2454                   handleAddrOverrides(vbi, pfx,
   2455                      binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
   2456          }
   2457 
   2458       /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
   2459          REX.B==1: d32(%r8)  ... d32(%r15), not including d32(%r12)
   2460       */
   2461       case 0x10: case 0x11: case 0x12: case 0x13:
   2462       /* ! 14 */ case 0x15: case 0x16: case 0x17:
   2463          { UChar rm = toUChar(mod_reg_rm & 7);
   2464            Long  d  = getSDisp32(delta);
   2465            DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
   2466            *len = 5;
   2467            return disAMode_copy2tmp(
   2468                   handleAddrOverrides(vbi, pfx,
   2469                      binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
   2470          }
   2471 
   2472       /* REX.B==0: a register, %rax .. %rdi.  This shouldn't happen. */
   2473       /* REX.B==1: a register, %r8  .. %r16.  This shouldn't happen. */
   2474       case 0x18: case 0x19: case 0x1A: case 0x1B:
   2475       case 0x1C: case 0x1D: case 0x1E: case 0x1F:
   2476          vpanic("disAMode(amd64): not an addr!");
   2477 
   2478       /* RIP + disp32.  This assumes that guest_RIP_curr_instr is set
   2479          correctly at the start of handling each instruction. */
   2480       case 0x05:
   2481          { Long d = getSDisp32(delta);
   2482            *len = 5;
   2483            DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
   2484            /* We need to know the next instruction's start address.
   2485               Try and figure out what it is, record the guess, and ask
   2486               the top-level driver logic (bbToIR_AMD64) to check we
   2487               guessed right, after the instruction is completely
   2488               decoded. */
   2489            guest_RIP_next_mustcheck = True;
   2490            guest_RIP_next_assumed = guest_RIP_bbstart
   2491                                     + delta+4 + extra_bytes;
   2492            return disAMode_copy2tmp(
   2493                      handleAddrOverrides(vbi, pfx,
   2494                         binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
   2495                                          mkU64(d))));
   2496          }
   2497 
   2498       case 0x04: {
   2499          /* SIB, with no displacement.  Special cases:
   2500             -- %rsp cannot act as an index value.
   2501                If index_r indicates %rsp, zero is used for the index.
   2502             -- when mod is zero and base indicates RBP or R13, base is
   2503                instead a 32-bit sign-extended literal.
   2504             It's all madness, I tell you.  Extract %index, %base and
   2505             scale from the SIB byte.  The value denoted is then:
   2506                | %index == %RSP && (%base == %RBP || %base == %R13)
   2507                = d32 following SIB byte
   2508                | %index == %RSP && !(%base == %RBP || %base == %R13)
   2509                = %base
   2510                | %index != %RSP && (%base == %RBP || %base == %R13)
   2511                = d32 following SIB byte + (%index << scale)
   2512                | %index != %RSP && !(%base == %RBP || %base == %R13)
   2513                = %base + (%index << scale)
   2514          */
   2515          UChar sib     = getUChar(delta);
   2516          UChar scale   = toUChar((sib >> 6) & 3);
   2517          UChar index_r = toUChar((sib >> 3) & 7);
   2518          UChar base_r  = toUChar(sib & 7);
   2519          /* correct since #(R13) == 8 + #(RBP) */
   2520          Bool  base_is_BPor13 = toBool(base_r == R_RBP);
   2521          Bool  index_is_SP    = toBool(index_r == R_RSP && 0==getRexX(pfx));
   2522          delta++;
   2523 
   2524          if ((!index_is_SP) && (!base_is_BPor13)) {
   2525             if (scale == 0) {
   2526                DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
   2527                          nameIRegRexB(8,pfx,base_r),
   2528                          nameIReg64rexX(pfx,index_r));
   2529             } else {
   2530                DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
   2531                          nameIRegRexB(8,pfx,base_r),
   2532                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2533             }
   2534             *len = 2;
   2535             return
   2536                disAMode_copy2tmp(
   2537                handleAddrOverrides(vbi, pfx,
   2538                   binop(Iop_Add64,
   2539                         getIRegRexB(8,pfx,base_r),
   2540                         binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
   2541                               mkU8(scale)))));
   2542          }
   2543 
   2544          if ((!index_is_SP) && base_is_BPor13) {
   2545             Long d = getSDisp32(delta);
   2546             DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
   2547                       nameIReg64rexX(pfx,index_r), 1<<scale);
   2548             *len = 6;
   2549             return
   2550                disAMode_copy2tmp(
   2551                handleAddrOverrides(vbi, pfx,
   2552                   binop(Iop_Add64,
   2553                         binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
   2554                                          mkU8(scale)),
   2555                         mkU64(d))));
   2556          }
   2557 
   2558          if (index_is_SP && (!base_is_BPor13)) {
   2559             DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
   2560             *len = 2;
   2561             return disAMode_copy2tmp(
   2562                    handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
   2563          }
   2564 
   2565          if (index_is_SP && base_is_BPor13) {
   2566             Long d = getSDisp32(delta);
   2567             DIS(buf, "%s%lld", segRegTxt(pfx), d);
   2568             *len = 6;
   2569             return disAMode_copy2tmp(
   2570                    handleAddrOverrides(vbi, pfx, mkU64(d)));
   2571          }
   2572 
   2573          vassert(0);
   2574       }
   2575 
   2576       /* SIB, with 8-bit displacement.  Special cases:
   2577          -- %esp cannot act as an index value.
   2578             If index_r indicates %esp, zero is used for the index.
   2579          Denoted value is:
   2580             | %index == %ESP
   2581             = d8 + %base
   2582             | %index != %ESP
   2583             = d8 + %base + (%index << scale)
   2584       */
   2585       case 0x0C: {
   2586          UChar sib     = getUChar(delta);
   2587          UChar scale   = toUChar((sib >> 6) & 3);
   2588          UChar index_r = toUChar((sib >> 3) & 7);
   2589          UChar base_r  = toUChar(sib & 7);
   2590          Long d        = getSDisp8(delta+1);
   2591 
   2592          if (index_r == R_RSP && 0==getRexX(pfx)) {
   2593             DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
   2594                                    d, nameIRegRexB(8,pfx,base_r));
   2595             *len = 3;
   2596             return disAMode_copy2tmp(
   2597                    handleAddrOverrides(vbi, pfx,
   2598                       binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
   2599          } else {
   2600             if (scale == 0) {
   2601                DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
   2602                          nameIRegRexB(8,pfx,base_r),
   2603                          nameIReg64rexX(pfx,index_r));
   2604             } else {
   2605                DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
   2606                          nameIRegRexB(8,pfx,base_r),
   2607                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2608             }
   2609             *len = 3;
   2610             return
   2611                 disAMode_copy2tmp(
   2612                 handleAddrOverrides(vbi, pfx,
   2613                   binop(Iop_Add64,
   2614                         binop(Iop_Add64,
   2615                               getIRegRexB(8,pfx,base_r),
   2616                               binop(Iop_Shl64,
   2617                                     getIReg64rexX(pfx,index_r), mkU8(scale))),
   2618                         mkU64(d))));
   2619          }
   2620          vassert(0); /*NOTREACHED*/
   2621       }
   2622 
   2623       /* SIB, with 32-bit displacement.  Special cases:
   2624          -- %rsp cannot act as an index value.
   2625             If index_r indicates %rsp, zero is used for the index.
   2626          Denoted value is:
   2627             | %index == %RSP
   2628             = d32 + %base
   2629             | %index != %RSP
   2630             = d32 + %base + (%index << scale)
   2631       */
   2632       case 0x14: {
   2633          UChar sib     = getUChar(delta);
   2634          UChar scale   = toUChar((sib >> 6) & 3);
   2635          UChar index_r = toUChar((sib >> 3) & 7);
   2636          UChar base_r  = toUChar(sib & 7);
   2637          Long d        = getSDisp32(delta+1);
   2638 
   2639          if (index_r == R_RSP && 0==getRexX(pfx)) {
   2640             DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
   2641                                    d, nameIRegRexB(8,pfx,base_r));
   2642             *len = 6;
   2643             return disAMode_copy2tmp(
   2644                    handleAddrOverrides(vbi, pfx,
   2645                       binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
   2646          } else {
   2647             if (scale == 0) {
   2648                DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
   2649                          nameIRegRexB(8,pfx,base_r),
   2650                          nameIReg64rexX(pfx,index_r));
   2651             } else {
   2652                DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
   2653                          nameIRegRexB(8,pfx,base_r),
   2654                          nameIReg64rexX(pfx,index_r), 1<<scale);
   2655             }
   2656             *len = 6;
   2657             return
   2658                 disAMode_copy2tmp(
   2659                 handleAddrOverrides(vbi, pfx,
   2660                   binop(Iop_Add64,
   2661                         binop(Iop_Add64,
   2662                               getIRegRexB(8,pfx,base_r),
   2663                               binop(Iop_Shl64,
   2664                                     getIReg64rexX(pfx,index_r), mkU8(scale))),
   2665                         mkU64(d))));
   2666          }
   2667          vassert(0); /*NOTREACHED*/
   2668       }
   2669 
   2670       default:
   2671          vpanic("disAMode(amd64)");
   2672          return 0; /*notreached*/
   2673    }
   2674 }
   2675 
   2676 
   2677 /* Figure out the number of (insn-stream) bytes constituting the amode
   2678    beginning at delta.  Is useful for getting hold of literals beyond
   2679    the end of the amode before it has been disassembled.  */
   2680 
   2681 static UInt lengthAMode ( Prefix pfx, Long delta )
   2682 {
   2683    UChar mod_reg_rm = getUChar(delta);
   2684    delta++;
   2685 
   2686    /* squeeze out the reg field from mod_reg_rm, since a 256-entry
   2687       jump table seems a bit excessive.
   2688    */
   2689    mod_reg_rm &= 0xC7;                         /* is now XX000YYY */
   2690    mod_reg_rm  = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
   2691                                                /* is now XX0XXYYY */
   2692    mod_reg_rm &= 0x1F;                         /* is now 000XXYYY */
   2693    switch (mod_reg_rm) {
   2694 
   2695       /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
   2696          REX.B==1: (%r8)  .. (%r15), not including (%r12) or (%r13).
   2697       */
   2698       case 0x00: case 0x01: case 0x02: case 0x03:
   2699       /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
   2700          return 1;
   2701 
   2702       /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
   2703          REX.B==1: d8(%r8)  ... d8(%r15), not including d8(%r12)
   2704       */
   2705       case 0x08: case 0x09: case 0x0A: case 0x0B:
   2706       /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
   2707          return 2;
   2708 
   2709       /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
   2710          REX.B==1: d32(%r8)  ... d32(%r15), not including d32(%r12)
   2711       */
   2712       case 0x10: case 0x11: case 0x12: case 0x13:
   2713       /* ! 14 */ case 0x15: case 0x16: case 0x17:
   2714          return 5;
   2715 
   2716       /* REX.B==0: a register, %rax .. %rdi.  This shouldn't happen. */
   2717       /* REX.B==1: a register, %r8  .. %r16.  This shouldn't happen. */
   2718       /* Not an address, but still handled. */
   2719       case 0x18: case 0x19: case 0x1A: case 0x1B:
   2720       case 0x1C: case 0x1D: case 0x1E: case 0x1F:
   2721          return 1;
   2722 
   2723       /* RIP + disp32. */
   2724       case 0x05:
   2725          return 5;
   2726 
   2727       case 0x04: {
   2728          /* SIB, with no displacement. */
   2729          UChar sib     = getUChar(delta);
   2730          UChar base_r  = toUChar(sib & 7);
   2731          /* correct since #(R13) == 8 + #(RBP) */
   2732          Bool  base_is_BPor13 = toBool(base_r == R_RBP);
   2733 
   2734          if (base_is_BPor13) {
   2735             return 6;
   2736          } else {
   2737             return 2;
   2738          }
   2739       }
   2740 
   2741       /* SIB, with 8-bit displacement. */
   2742       case 0x0C:
   2743          return 3;
   2744 
   2745       /* SIB, with 32-bit displacement. */
   2746       case 0x14:
   2747          return 6;
   2748 
   2749       default:
   2750          vpanic("lengthAMode(amd64)");
   2751          return 0; /*notreached*/
   2752    }
   2753 }
   2754 
   2755 
   2756 /*------------------------------------------------------------*/
   2757 /*--- Disassembling common idioms                          ---*/
   2758 /*------------------------------------------------------------*/
   2759 
   2760 /* Handle binary integer instructions of the form
   2761       op E, G  meaning
   2762       op reg-or-mem, reg
   2763    Is passed the a ptr to the modRM byte, the actual operation, and the
   2764    data size.  Returns the address advanced completely over this
   2765    instruction.
   2766 
   2767    E(src) is reg-or-mem
   2768    G(dst) is reg.
   2769 
   2770    If E is reg, -->    GET %G,  tmp
   2771                        OP %E,   tmp
   2772                        PUT tmp, %G
   2773 
   2774    If E is mem and OP is not reversible,
   2775                 -->    (getAddr E) -> tmpa
   2776                        LD (tmpa), tmpa
   2777                        GET %G, tmp2
   2778                        OP tmpa, tmp2
   2779                        PUT tmp2, %G
   2780 
   2781    If E is mem and OP is reversible
   2782                 -->    (getAddr E) -> tmpa
   2783                        LD (tmpa), tmpa
   2784                        OP %G, tmpa
   2785                        PUT tmpa, %G
   2786 */
   2787 static
   2788 ULong dis_op2_E_G ( VexAbiInfo* vbi,
   2789                     Prefix      pfx,
   2790                     Bool        addSubCarry,
   2791                     IROp        op8,
   2792                     Bool        keep,
   2793                     Int         size,
   2794                     Long        delta0,
   2795                     HChar*      t_amd64opc )
   2796 {
   2797    HChar   dis_buf[50];
   2798    Int     len;
   2799    IRType  ty   = szToITy(size);
   2800    IRTemp  dst1 = newTemp(ty);
   2801    IRTemp  src  = newTemp(ty);
   2802    IRTemp  dst0 = newTemp(ty);
   2803    UChar   rm   = getUChar(delta0);
   2804    IRTemp  addr = IRTemp_INVALID;
   2805 
   2806    /* addSubCarry == True indicates the intended operation is
   2807       add-with-carry or subtract-with-borrow. */
   2808    if (addSubCarry) {
   2809       vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
   2810       vassert(keep);
   2811    }
   2812 
   2813    if (epartIsReg(rm)) {
   2814       /* Specially handle XOR reg,reg, because that doesn't really
   2815          depend on reg, and doing the obvious thing potentially
   2816          generates a spurious value check failure due to the bogus
   2817          dependency. */
   2818       if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
   2819           && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
   2820          if (False && op8 == Iop_Sub8)
   2821             vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
   2822 	 putIRegG(size,pfx,rm, mkU(ty,0));
   2823       }
   2824 
   2825       assign( dst0, getIRegG(size,pfx,rm) );
   2826       assign( src,  getIRegE(size,pfx,rm) );
   2827 
   2828       if (addSubCarry && op8 == Iop_Add8) {
   2829          helper_ADC( size, dst1, dst0, src,
   2830                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2831          putIRegG(size, pfx, rm, mkexpr(dst1));
   2832       } else
   2833       if (addSubCarry && op8 == Iop_Sub8) {
   2834          helper_SBB( size, dst1, dst0, src,
   2835                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2836          putIRegG(size, pfx, rm, mkexpr(dst1));
   2837       } else {
   2838          assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   2839          if (isAddSub(op8))
   2840             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2841          else
   2842             setFlags_DEP1(op8, dst1, ty);
   2843          if (keep)
   2844             putIRegG(size, pfx, rm, mkexpr(dst1));
   2845       }
   2846 
   2847       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2848                           nameIRegE(size,pfx,rm),
   2849                           nameIRegG(size,pfx,rm));
   2850       return 1+delta0;
   2851    } else {
   2852       /* E refers to memory */
   2853       addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2854       assign( dst0, getIRegG(size,pfx,rm) );
   2855       assign( src,  loadLE(szToITy(size), mkexpr(addr)) );
   2856 
   2857       if (addSubCarry && op8 == Iop_Add8) {
   2858          helper_ADC( size, dst1, dst0, src,
   2859                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2860          putIRegG(size, pfx, rm, mkexpr(dst1));
   2861       } else
   2862       if (addSubCarry && op8 == Iop_Sub8) {
   2863          helper_SBB( size, dst1, dst0, src,
   2864                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2865          putIRegG(size, pfx, rm, mkexpr(dst1));
   2866       } else {
   2867          assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   2868          if (isAddSub(op8))
   2869             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2870          else
   2871             setFlags_DEP1(op8, dst1, ty);
   2872          if (keep)
   2873             putIRegG(size, pfx, rm, mkexpr(dst1));
   2874       }
   2875 
   2876       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2877                           dis_buf, nameIRegG(size, pfx, rm));
   2878       return len+delta0;
   2879    }
   2880 }
   2881 
   2882 
   2883 
   2884 /* Handle binary integer instructions of the form
   2885       op G, E  meaning
   2886       op reg, reg-or-mem
   2887    Is passed the a ptr to the modRM byte, the actual operation, and the
   2888    data size.  Returns the address advanced completely over this
   2889    instruction.
   2890 
   2891    G(src) is reg.
   2892    E(dst) is reg-or-mem
   2893 
   2894    If E is reg, -->    GET %E,  tmp
   2895                        OP %G,   tmp
   2896                        PUT tmp, %E
   2897 
   2898    If E is mem, -->    (getAddr E) -> tmpa
   2899                        LD (tmpa), tmpv
   2900                        OP %G, tmpv
   2901                        ST tmpv, (tmpa)
   2902 */
   2903 static
   2904 ULong dis_op2_G_E ( VexAbiInfo* vbi,
   2905                     Prefix      pfx,
   2906                     Bool        addSubCarry,
   2907                     IROp        op8,
   2908                     Bool        keep,
   2909                     Int         size,
   2910                     Long        delta0,
   2911                     HChar*      t_amd64opc )
   2912 {
   2913    HChar   dis_buf[50];
   2914    Int     len;
   2915    IRType  ty   = szToITy(size);
   2916    IRTemp  dst1 = newTemp(ty);
   2917    IRTemp  src  = newTemp(ty);
   2918    IRTemp  dst0 = newTemp(ty);
   2919    UChar   rm   = getUChar(delta0);
   2920    IRTemp  addr = IRTemp_INVALID;
   2921 
   2922    /* addSubCarry == True indicates the intended operation is
   2923       add-with-carry or subtract-with-borrow. */
   2924    if (addSubCarry) {
   2925       vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
   2926       vassert(keep);
   2927    }
   2928 
   2929    if (epartIsReg(rm)) {
   2930       /* Specially handle XOR reg,reg, because that doesn't really
   2931          depend on reg, and doing the obvious thing potentially
   2932          generates a spurious value check failure due to the bogus
   2933          dependency.  Ditto SBB reg,reg. */
   2934       if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
   2935           && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
   2936          putIRegE(size,pfx,rm, mkU(ty,0));
   2937       }
   2938 
   2939       assign(dst0, getIRegE(size,pfx,rm));
   2940       assign(src,  getIRegG(size,pfx,rm));
   2941 
   2942       if (addSubCarry && op8 == Iop_Add8) {
   2943          helper_ADC( size, dst1, dst0, src,
   2944                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2945          putIRegE(size, pfx, rm, mkexpr(dst1));
   2946       } else
   2947       if (addSubCarry && op8 == Iop_Sub8) {
   2948          helper_SBB( size, dst1, dst0, src,
   2949                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   2950          putIRegE(size, pfx, rm, mkexpr(dst1));
   2951       } else {
   2952          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   2953          if (isAddSub(op8))
   2954             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   2955          else
   2956             setFlags_DEP1(op8, dst1, ty);
   2957          if (keep)
   2958             putIRegE(size, pfx, rm, mkexpr(dst1));
   2959       }
   2960 
   2961       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   2962                           nameIRegG(size,pfx,rm),
   2963                           nameIRegE(size,pfx,rm));
   2964       return 1+delta0;
   2965    }
   2966 
   2967    /* E refers to memory */
   2968    {
   2969       addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   2970       assign(dst0, loadLE(ty,mkexpr(addr)));
   2971       assign(src,  getIRegG(size,pfx,rm));
   2972 
   2973       if (addSubCarry && op8 == Iop_Add8) {
   2974          if (pfx & PFX_LOCK) {
   2975             /* cas-style store */
   2976             helper_ADC( size, dst1, dst0, src,
   2977                         /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   2978          } else {
   2979             /* normal store */
   2980             helper_ADC( size, dst1, dst0, src,
   2981                         /*store*/addr, IRTemp_INVALID, 0 );
   2982          }
   2983       } else
   2984       if (addSubCarry && op8 == Iop_Sub8) {
   2985          if (pfx & PFX_LOCK) {
   2986             /* cas-style store */
   2987             helper_SBB( size, dst1, dst0, src,
   2988                         /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   2989          } else {
   2990             /* normal store */
   2991             helper_SBB( size, dst1, dst0, src,
   2992                         /*store*/addr, IRTemp_INVALID, 0 );
   2993          }
   2994       } else {
   2995          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   2996          if (keep) {
   2997             if (pfx & PFX_LOCK) {
   2998                if (0) vex_printf("locked case\n" );
   2999                casLE( mkexpr(addr),
   3000                       mkexpr(dst0)/*expval*/,
   3001                       mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
   3002             } else {
   3003                if (0) vex_printf("nonlocked case\n");
   3004                storeLE(mkexpr(addr), mkexpr(dst1));
   3005             }
   3006          }
   3007          if (isAddSub(op8))
   3008             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3009          else
   3010             setFlags_DEP1(op8, dst1, ty);
   3011       }
   3012 
   3013       DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
   3014                           nameIRegG(size,pfx,rm), dis_buf);
   3015       return len+delta0;
   3016    }
   3017 }
   3018 
   3019 
   3020 /* Handle move instructions of the form
   3021       mov E, G  meaning
   3022       mov reg-or-mem, reg
   3023    Is passed the a ptr to the modRM byte, and the data size.  Returns
   3024    the address advanced completely over this instruction.
   3025 
   3026    E(src) is reg-or-mem
   3027    G(dst) is reg.
   3028 
   3029    If E is reg, -->    GET %E,  tmpv
   3030                        PUT tmpv, %G
   3031 
   3032    If E is mem  -->    (getAddr E) -> tmpa
   3033                        LD (tmpa), tmpb
   3034                        PUT tmpb, %G
   3035 */
   3036 static
   3037 ULong dis_mov_E_G ( VexAbiInfo* vbi,
   3038                     Prefix      pfx,
   3039                     Int         size,
   3040                     Long        delta0 )
   3041 {
   3042    Int len;
   3043    UChar rm = getUChar(delta0);
   3044    HChar dis_buf[50];
   3045 
   3046    if (epartIsReg(rm)) {
   3047       putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
   3048       DIP("mov%c %s,%s\n", nameISize(size),
   3049                            nameIRegE(size,pfx,rm),
   3050                            nameIRegG(size,pfx,rm));
   3051       return 1+delta0;
   3052    }
   3053 
   3054    /* E refers to memory */
   3055    {
   3056       IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   3057       putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
   3058       DIP("mov%c %s,%s\n", nameISize(size),
   3059                            dis_buf,
   3060                            nameIRegG(size,pfx,rm));
   3061       return delta0+len;
   3062    }
   3063 }
   3064 
   3065 
   3066 /* Handle move instructions of the form
   3067       mov G, E  meaning
   3068       mov reg, reg-or-mem
   3069    Is passed the a ptr to the modRM byte, and the data size.  Returns
   3070    the address advanced completely over this instruction.
   3071 
   3072    G(src) is reg.
   3073    E(dst) is reg-or-mem
   3074 
   3075    If E is reg, -->    GET %G,  tmp
   3076                        PUT tmp, %E
   3077 
   3078    If E is mem, -->    (getAddr E) -> tmpa
   3079                        GET %G, tmpv
   3080                        ST tmpv, (tmpa)
   3081 */
   3082 static
   3083 ULong dis_mov_G_E ( VexAbiInfo* vbi,
   3084                     Prefix      pfx,
   3085                     Int         size,
   3086                     Long        delta0 )
   3087 {
   3088    Int len;
   3089    UChar rm = getUChar(delta0);
   3090    HChar dis_buf[50];
   3091 
   3092    if (epartIsReg(rm)) {
   3093       putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
   3094       DIP("mov%c %s,%s\n", nameISize(size),
   3095                            nameIRegG(size,pfx,rm),
   3096                            nameIRegE(size,pfx,rm));
   3097       return 1+delta0;
   3098    }
   3099 
   3100    /* E refers to memory */
   3101    {
   3102       IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
   3103       storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
   3104       DIP("mov%c %s,%s\n", nameISize(size),
   3105                            nameIRegG(size,pfx,rm),
   3106                            dis_buf);
   3107       return len+delta0;
   3108    }
   3109 }
   3110 
   3111 
   3112 /* op $immediate, AL/AX/EAX/RAX. */
   3113 static
   3114 ULong dis_op_imm_A ( Int    size,
   3115                      Bool   carrying,
   3116                      IROp   op8,
   3117                      Bool   keep,
   3118                      Long   delta,
   3119                      HChar* t_amd64opc )
   3120 {
   3121    Int    size4 = imin(size,4);
   3122    IRType ty    = szToITy(size);
   3123    IRTemp dst0  = newTemp(ty);
   3124    IRTemp src   = newTemp(ty);
   3125    IRTemp dst1  = newTemp(ty);
   3126    Long  lit    = getSDisp(size4,delta);
   3127    assign(dst0, getIRegRAX(size));
   3128    assign(src,  mkU(ty,lit & mkSizeMask(size)));
   3129 
   3130    if (isAddSub(op8) && !carrying) {
   3131       assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   3132       setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3133    }
   3134    else
   3135    if (isLogic(op8)) {
   3136       vassert(!carrying);
   3137       assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
   3138       setFlags_DEP1(op8, dst1, ty);
   3139    }
   3140    else
   3141    if (op8 == Iop_Add8 && carrying) {
   3142       helper_ADC( size, dst1, dst0, src,
   3143                   /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3144    }
   3145    else
   3146    if (op8 == Iop_Sub8 && carrying) {
   3147       helper_SBB( size, dst1, dst0, src,
   3148                   /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3149    }
   3150    else
   3151       vpanic("dis_op_imm_A(amd64,guest)");
   3152 
   3153    if (keep)
   3154       putIRegRAX(size, mkexpr(dst1));
   3155 
   3156    DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
   3157                            lit, nameIRegRAX(size));
   3158    return delta+size4;
   3159 }
   3160 
   3161 
   3162 /* Sign- and Zero-extending moves. */
   3163 static
   3164 ULong dis_movx_E_G ( VexAbiInfo* vbi,
   3165                      Prefix pfx,
   3166                      Long delta, Int szs, Int szd, Bool sign_extend )
   3167 {
   3168    UChar rm = getUChar(delta);
   3169    if (epartIsReg(rm)) {
   3170       putIRegG(szd, pfx, rm,
   3171                     doScalarWidening(
   3172                        szs,szd,sign_extend,
   3173                        getIRegE(szs,pfx,rm)));
   3174       DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
   3175                                nameISize(szs),
   3176                                nameISize(szd),
   3177                                nameIRegE(szs,pfx,rm),
   3178                                nameIRegG(szd,pfx,rm));
   3179       return 1+delta;
   3180    }
   3181 
   3182    /* E refers to memory */
   3183    {
   3184       Int    len;
   3185       HChar  dis_buf[50];
   3186       IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
   3187       putIRegG(szd, pfx, rm,
   3188                     doScalarWidening(
   3189                        szs,szd,sign_extend,
   3190                        loadLE(szToITy(szs),mkexpr(addr))));
   3191       DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
   3192                                nameISize(szs),
   3193                                nameISize(szd),
   3194                                dis_buf,
   3195                                nameIRegG(szd,pfx,rm));
   3196       return len+delta;
   3197    }
   3198 }
   3199 
   3200 
   3201 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
   3202    the 64 / 32 / 16 / 8 bit quantity in the given IRTemp.  */
   3203 static
   3204 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
   3205 {
   3206    /* special-case the 64-bit case */
   3207    if (sz == 8) {
   3208       IROp   op     = signed_divide ? Iop_DivModS128to64
   3209                                     : Iop_DivModU128to64;
   3210       IRTemp src128 = newTemp(Ity_I128);
   3211       IRTemp dst128 = newTemp(Ity_I128);
   3212       assign( src128, binop(Iop_64HLto128,
   3213                             getIReg64(R_RDX),
   3214                             getIReg64(R_RAX)) );
   3215       assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
   3216       putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
   3217       putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
   3218    } else {
   3219       IROp   op    = signed_divide ? Iop_DivModS64to32
   3220                                    : Iop_DivModU64to32;
   3221       IRTemp src64 = newTemp(Ity_I64);
   3222       IRTemp dst64 = newTemp(Ity_I64);
   3223       switch (sz) {
   3224       case 4:
   3225          assign( src64,
   3226                  binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
   3227          assign( dst64,
   3228                  binop(op, mkexpr(src64), mkexpr(t)) );
   3229          putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
   3230          putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
   3231          break;
   3232       case 2: {
   3233          IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
   3234          IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
   3235          assign( src64, unop(widen3264,
   3236                              binop(Iop_16HLto32,
   3237                                    getIRegRDX(2),
   3238                                    getIRegRAX(2))) );
   3239          assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
   3240          putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
   3241          putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
   3242          break;
   3243       }
   3244       case 1: {
   3245          IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
   3246          IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
   3247          IROp widen816  = signed_divide ? Iop_8Sto16  : Iop_8Uto16;
   3248          assign( src64, unop(widen3264,
   3249                         unop(widen1632, getIRegRAX(2))) );
   3250          assign( dst64,
   3251                  binop(op, mkexpr(src64),
   3252                            unop(widen1632, unop(widen816, mkexpr(t)))) );
   3253          putIRegRAX( 1, unop(Iop_16to8,
   3254                         unop(Iop_32to16,
   3255                         unop(Iop_64to32,mkexpr(dst64)))) );
   3256          putIRegAH( unop(Iop_16to8,
   3257                     unop(Iop_32to16,
   3258                     unop(Iop_64HIto32,mkexpr(dst64)))) );
   3259          break;
   3260       }
   3261       default:
   3262          vpanic("codegen_div(amd64)");
   3263       }
   3264    }
   3265 }
   3266 
   3267 static
   3268 ULong dis_Grp1 ( VexAbiInfo* vbi,
   3269                  Prefix pfx,
   3270                  Long delta, UChar modrm,
   3271                  Int am_sz, Int d_sz, Int sz, Long d64 )
   3272 {
   3273    Int     len;
   3274    HChar   dis_buf[50];
   3275    IRType  ty   = szToITy(sz);
   3276    IRTemp  dst1 = newTemp(ty);
   3277    IRTemp  src  = newTemp(ty);
   3278    IRTemp  dst0 = newTemp(ty);
   3279    IRTemp  addr = IRTemp_INVALID;
   3280    IROp    op8  = Iop_INVALID;
   3281    ULong   mask = mkSizeMask(sz);
   3282 
   3283    switch (gregLO3ofRM(modrm)) {
   3284       case 0: op8 = Iop_Add8; break;  case 1: op8 = Iop_Or8;  break;
   3285       case 2: break;  // ADC
   3286       case 3: break;  // SBB
   3287       case 4: op8 = Iop_And8; break;  case 5: op8 = Iop_Sub8; break;
   3288       case 6: op8 = Iop_Xor8; break;  case 7: op8 = Iop_Sub8; break;
   3289       /*NOTREACHED*/
   3290       default: vpanic("dis_Grp1(amd64): unhandled case");
   3291    }
   3292 
   3293    if (epartIsReg(modrm)) {
   3294       vassert(am_sz == 1);
   3295 
   3296       assign(dst0, getIRegE(sz,pfx,modrm));
   3297       assign(src,  mkU(ty,d64 & mask));
   3298 
   3299       if (gregLO3ofRM(modrm) == 2 /* ADC */) {
   3300          helper_ADC( sz, dst1, dst0, src,
   3301                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3302       } else
   3303       if (gregLO3ofRM(modrm) == 3 /* SBB */) {
   3304          helper_SBB( sz, dst1, dst0, src,
   3305                      /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
   3306       } else {
   3307          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   3308          if (isAddSub(op8))
   3309             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3310          else
   3311             setFlags_DEP1(op8, dst1, ty);
   3312       }
   3313 
   3314       if (gregLO3ofRM(modrm) < 7)
   3315          putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3316 
   3317       delta += (am_sz + d_sz);
   3318       DIP("%s%c $%lld, %s\n",
   3319           nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
   3320           nameIRegE(sz,pfx,modrm));
   3321    } else {
   3322       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
   3323 
   3324       assign(dst0, loadLE(ty,mkexpr(addr)));
   3325       assign(src, mkU(ty,d64 & mask));
   3326 
   3327       if (gregLO3ofRM(modrm) == 2 /* ADC */) {
   3328          if (pfx & PFX_LOCK) {
   3329             /* cas-style store */
   3330             helper_ADC( sz, dst1, dst0, src,
   3331                        /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   3332          } else {
   3333             /* normal store */
   3334             helper_ADC( sz, dst1, dst0, src,
   3335                         /*store*/addr, IRTemp_INVALID, 0 );
   3336          }
   3337       } else
   3338       if (gregLO3ofRM(modrm) == 3 /* SBB */) {
   3339          if (pfx & PFX_LOCK) {
   3340             /* cas-style store */
   3341             helper_SBB( sz, dst1, dst0, src,
   3342                        /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
   3343          } else {
   3344             /* normal store */
   3345             helper_SBB( sz, dst1, dst0, src,
   3346                         /*store*/addr, IRTemp_INVALID, 0 );
   3347          }
   3348       } else {
   3349          assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
   3350          if (gregLO3ofRM(modrm) < 7) {
   3351             if (pfx & PFX_LOCK) {
   3352                casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
   3353                                     mkexpr(dst1)/*newVal*/,
   3354                                     guest_RIP_curr_instr );
   3355             } else {
   3356                storeLE(mkexpr(addr), mkexpr(dst1));
   3357             }
   3358          }
   3359          if (isAddSub(op8))
   3360             setFlags_DEP1_DEP2(op8, dst0, src, ty);
   3361          else
   3362             setFlags_DEP1(op8, dst1, ty);
   3363       }
   3364 
   3365       delta += (len+d_sz);
   3366       DIP("%s%c $%lld, %s\n",
   3367           nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
   3368           d64, dis_buf);
   3369    }
   3370    return delta;
   3371 }
   3372 
   3373 
   3374 /* Group 2 extended opcodes.  shift_expr must be an 8-bit typed
   3375    expression. */
   3376 
   3377 static
   3378 ULong dis_Grp2 ( VexAbiInfo* vbi,
   3379                  Prefix pfx,
   3380                  Long delta, UChar modrm,
   3381                  Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
   3382                  HChar* shift_expr_txt, Bool* decode_OK )
   3383 {
   3384    /* delta on entry points at the modrm byte. */
   3385    HChar  dis_buf[50];
   3386    Int    len;
   3387    Bool   isShift, isRotate, isRotateC;
   3388    IRType ty    = szToITy(sz);
   3389    IRTemp dst0  = newTemp(ty);
   3390    IRTemp dst1  = newTemp(ty);
   3391    IRTemp addr  = IRTemp_INVALID;
   3392 
   3393    *decode_OK = True;
   3394 
   3395    vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
   3396 
   3397    /* Put value to shift/rotate in dst0. */
   3398    if (epartIsReg(modrm)) {
   3399       assign(dst0, getIRegE(sz, pfx, modrm));
   3400       delta += (am_sz + d_sz);
   3401    } else {
   3402       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
   3403       assign(dst0, loadLE(ty,mkexpr(addr)));
   3404       delta += len + d_sz;
   3405    }
   3406 
   3407    isShift = False;
   3408    switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
   3409 
   3410    isRotate = False;
   3411    switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
   3412 
   3413    isRotateC = False;
   3414    switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
   3415 
   3416    if (!isShift && !isRotate && !isRotateC) {
   3417       /*NOTREACHED*/
   3418       vpanic("dis_Grp2(Reg): unhandled case(amd64)");
   3419    }
   3420 
   3421    if (isRotateC) {
   3422       /* Call a helper; this insn is so ridiculous it does not deserve
   3423          better.  One problem is, the helper has to calculate both the
   3424          new value and the new flags.  This is more than 64 bits, and
   3425          there is no way to return more than 64 bits from the helper.
   3426          Hence the crude and obvious solution is to call it twice,
   3427          using the sign of the sz field to indicate whether it is the
   3428          value or rflags result we want.
   3429       */
   3430       Bool     left = toBool(gregLO3ofRM(modrm) == 2);
   3431       IRExpr** argsVALUE;
   3432       IRExpr** argsRFLAGS;
   3433 
   3434       IRTemp new_value  = newTemp(Ity_I64);
   3435       IRTemp new_rflags = newTemp(Ity_I64);
   3436       IRTemp old_rflags = newTemp(Ity_I64);
   3437 
   3438       assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
   3439 
   3440       argsVALUE
   3441          = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
   3442                           widenUto64(shift_expr),   /* rotate amount */
   3443                           mkexpr(old_rflags),
   3444                           mkU64(sz) );
   3445       assign( new_value,
   3446                  mkIRExprCCall(
   3447                     Ity_I64,
   3448                     0/*regparm*/,
   3449                     left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
   3450                     left ? &amd64g_calculate_RCL  : &amd64g_calculate_RCR,
   3451                     argsVALUE
   3452                  )
   3453             );
   3454 
   3455       argsRFLAGS
   3456          = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
   3457                           widenUto64(shift_expr),   /* rotate amount */
   3458                           mkexpr(old_rflags),
   3459                           mkU64(-sz) );
   3460       assign( new_rflags,
   3461                  mkIRExprCCall(
   3462                     Ity_I64,
   3463                     0/*regparm*/,
   3464                     left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
   3465                     left ? &amd64g_calculate_RCL  : &amd64g_calculate_RCR,
   3466                     argsRFLAGS
   3467                  )
   3468             );
   3469 
   3470       assign( dst1, narrowTo(ty, mkexpr(new_value)) );
   3471       stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   3472       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
   3473       stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   3474       stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
   3475    }
   3476 
   3477    else
   3478    if (isShift) {
   3479 
   3480       IRTemp pre64     = newTemp(Ity_I64);
   3481       IRTemp res64     = newTemp(Ity_I64);
   3482       IRTemp res64ss   = newTemp(Ity_I64);
   3483       IRTemp shift_amt = newTemp(Ity_I8);
   3484       UChar  mask      = toUChar(sz==8 ? 63 : 31);
   3485       IROp   op64;
   3486 
   3487       switch (gregLO3ofRM(modrm)) {
   3488          case 4: op64 = Iop_Shl64; break;
   3489          case 5: op64 = Iop_Shr64; break;
   3490          case 6: op64 = Iop_Shl64; break;
   3491          case 7: op64 = Iop_Sar64; break;
   3492          /*NOTREACHED*/
   3493          default: vpanic("dis_Grp2:shift"); break;
   3494       }
   3495 
   3496       /* Widen the value to be shifted to 64 bits, do the shift, and
   3497          narrow back down.  This seems surprisingly long-winded, but
   3498          unfortunately the AMD semantics requires that 8/16/32-bit
   3499          shifts give defined results for shift values all the way up
   3500          to 32, and this seems the simplest way to do it.  It has the
   3501          advantage that the only IR level shifts generated are of 64
   3502          bit values, and the shift amount is guaranteed to be in the
   3503          range 0 .. 63, thereby observing the IR semantics requiring
   3504          all shift values to be in the range 0 .. 2^word_size-1.
   3505 
   3506          Therefore the shift amount is masked with 63 for 64-bit shifts
   3507          and 31 for all others.
   3508       */
   3509       /* shift_amt = shift_expr & MASK, regardless of operation size */
   3510       assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
   3511 
   3512       /* suitably widen the value to be shifted to 64 bits. */
   3513       assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
   3514                                      : widenUto64(mkexpr(dst0)) );
   3515 
   3516       /* res64 = pre64 `shift` shift_amt */
   3517       assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
   3518 
   3519       /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
   3520       assign( res64ss,
   3521               binop(op64,
   3522                     mkexpr(pre64),
   3523                     binop(Iop_And8,
   3524                           binop(Iop_Sub8,
   3525                                 mkexpr(shift_amt), mkU8(1)),
   3526                           mkU8(mask))) );
   3527 
   3528       /* Build the flags thunk. */
   3529       setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
   3530 
   3531       /* Narrow the result back down. */
   3532       assign( dst1, narrowTo(ty, mkexpr(res64)) );
   3533 
   3534    } /* if (isShift) */
   3535 
   3536    else
   3537    if (isRotate) {
   3538       Int    ccOp      = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
   3539                                         : (ty==Ity_I32 ? 2 : 3));
   3540       Bool   left      = toBool(gregLO3ofRM(modrm) == 0);
   3541       IRTemp rot_amt   = newTemp(Ity_I8);
   3542       IRTemp rot_amt64 = newTemp(Ity_I8);
   3543       IRTemp oldFlags  = newTemp(Ity_I64);
   3544       UChar  mask      = toUChar(sz==8 ? 63 : 31);
   3545 
   3546       /* rot_amt = shift_expr & mask */
   3547       /* By masking the rotate amount thusly, the IR-level Shl/Shr
   3548          expressions never shift beyond the word size and thus remain
   3549          well defined. */
   3550       assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
   3551 
   3552       if (ty == Ity_I64)
   3553          assign(rot_amt, mkexpr(rot_amt64));
   3554       else
   3555          assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
   3556 
   3557       if (left) {
   3558 
   3559          /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
   3560          assign(dst1,
   3561             binop( mkSizedOp(ty,Iop_Or8),
   3562                    binop( mkSizedOp(ty,Iop_Shl8),
   3563                           mkexpr(dst0),
   3564                           mkexpr(rot_amt)
   3565                    ),
   3566                    binop( mkSizedOp(ty,Iop_Shr8),
   3567                           mkexpr(dst0),
   3568                           binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
   3569                    )
   3570             )
   3571          );
   3572          ccOp += AMD64G_CC_OP_ROLB;
   3573 
   3574       } else { /* right */
   3575 
   3576          /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
   3577          assign(dst1,
   3578             binop( mkSizedOp(ty,Iop_Or8),
   3579                    binop( mkSizedOp(ty,Iop_Shr8),
   3580                           mkexpr(dst0),
   3581                           mkexpr(rot_amt)
   3582                    ),
   3583                    binop( mkSizedOp(ty,Iop_Shl8),
   3584                           mkexpr(dst0),
   3585                           binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
   3586                    )
   3587             )
   3588          );
   3589          ccOp += AMD64G_CC_OP_RORB;
   3590 
   3591       }
   3592 
   3593       /* dst1 now holds the rotated value.  Build flag thunk.  We
   3594          need the resulting value for this, and the previous flags.
   3595          Except don't set it if the rotate count is zero. */
   3596 
   3597       assign(oldFlags, mk_amd64g_calculate_rflags_all());
   3598 
   3599       /* CC_DEP1 is the rotated value.  CC_NDEP is flags before. */
   3600       stmt( IRStmt_Put( OFFB_CC_OP,
   3601                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3602                                       IRExpr_Get(OFFB_CC_OP,Ity_I64),
   3603                                       mkU64(ccOp))) );
   3604       stmt( IRStmt_Put( OFFB_CC_DEP1,
   3605                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3606                                       IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
   3607                                       widenUto64(mkexpr(dst1)))) );
   3608       stmt( IRStmt_Put( OFFB_CC_DEP2,
   3609                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3610                                       IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
   3611                                       mkU64(0))) );
   3612       stmt( IRStmt_Put( OFFB_CC_NDEP,
   3613                         IRExpr_Mux0X( mkexpr(rot_amt64),
   3614                                       IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
   3615                                       mkexpr(oldFlags))) );
   3616    } /* if (isRotate) */
   3617 
   3618    /* Save result, and finish up. */
   3619    if (epartIsReg(modrm)) {
   3620       putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3621       if (vex_traceflags & VEX_TRACE_FE) {
   3622          vex_printf("%s%c ",
   3623                     nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
   3624          if (shift_expr_txt)
   3625             vex_printf("%s", shift_expr_txt);
   3626          else
   3627             ppIRExpr(shift_expr);
   3628          vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
   3629       }
   3630    } else {
   3631       storeLE(mkexpr(addr), mkexpr(dst1));
   3632       if (vex_traceflags & VEX_TRACE_FE) {
   3633          vex_printf("%s%c ",
   3634                     nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
   3635          if (shift_expr_txt)
   3636             vex_printf("%s", shift_expr_txt);
   3637          else
   3638             ppIRExpr(shift_expr);
   3639          vex_printf(", %s\n", dis_buf);
   3640       }
   3641    }
   3642    return delta;
   3643 }
   3644 
   3645 
   3646 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
   3647 static
   3648 ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
   3649                      Prefix pfx,
   3650                      Long delta, UChar modrm,
   3651                      Int am_sz, Int sz, ULong src_val,
   3652                      Bool* decode_OK )
   3653 {
   3654    /* src_val denotes a d8.
   3655       And delta on entry points at the modrm byte. */
   3656 
   3657    IRType ty     = szToITy(sz);
   3658    IRTemp t2     = newTemp(Ity_I64);
   3659    IRTemp t2m    = newTemp(Ity_I64);
   3660    IRTemp t_addr = IRTemp_INVALID;
   3661    HChar  dis_buf[50];
   3662    ULong  mask;
   3663 
   3664    /* we're optimists :-) */
   3665    *decode_OK = True;
   3666 
   3667    /* Limit src_val -- the bit offset -- to something within a word.
   3668       The Intel docs say that literal offsets larger than a word are
   3669       masked in this way. */
   3670    switch (sz) {
   3671       case 2:  src_val &= 15; break;
   3672       case 4:  src_val &= 31; break;
   3673       case 8:  src_val &= 63; break;
   3674       default: *decode_OK = False; return delta;
   3675    }
   3676 
   3677    /* Invent a mask suitable for the operation. */
   3678    switch (gregLO3ofRM(modrm)) {
   3679       case 4: /* BT */  mask = 0;                  break;
   3680       case 5: /* BTS */ mask = 1ULL << src_val;    break;
   3681       case 6: /* BTR */ mask = ~(1ULL << src_val); break;
   3682       case 7: /* BTC */ mask = 1ULL << src_val;    break;
   3683          /* If this needs to be extended, probably simplest to make a
   3684             new function to handle the other cases (0 .. 3).  The
   3685             Intel docs do however not indicate any use for 0 .. 3, so
   3686             we don't expect this to happen. */
   3687       default: *decode_OK = False; return delta;
   3688    }
   3689 
   3690    /* Fetch the value to be tested and modified into t2, which is
   3691       64-bits wide regardless of sz. */
   3692    if (epartIsReg(modrm)) {
   3693       vassert(am_sz == 1);
   3694       assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
   3695       delta += (am_sz + 1);
   3696       DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
   3697                                 nameISize(sz),
   3698                                 src_val, nameIRegE(sz,pfx,modrm));
   3699    } else {
   3700       Int len;
   3701       t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
   3702       delta  += (len+1);
   3703       assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
   3704       DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
   3705                                 nameISize(sz),
   3706                                 src_val, dis_buf);
   3707    }
   3708 
   3709    /* Compute the new value into t2m, if non-BT. */
   3710    switch (gregLO3ofRM(modrm)) {
   3711       case 4: /* BT */
   3712          break;
   3713       case 5: /* BTS */
   3714          assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
   3715          break;
   3716       case 6: /* BTR */
   3717          assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
   3718          break;
   3719       case 7: /* BTC */
   3720          assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
   3721          break;
   3722      default:
   3723          /*NOTREACHED*/ /*the previous switch guards this*/
   3724          vassert(0);
   3725    }
   3726 
   3727    /* Write the result back, if non-BT. */
   3728    if (gregLO3ofRM(modrm) != 4 /* BT */) {
   3729       if (epartIsReg(modrm)) {
   3730 	putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
   3731       } else {
   3732          if (pfx & PFX_LOCK) {
   3733             casLE( mkexpr(t_addr),
   3734                    narrowTo(ty, mkexpr(t2))/*expd*/,
   3735                    narrowTo(ty, mkexpr(t2m))/*new*/,
   3736                    guest_RIP_curr_instr );
   3737          } else {
   3738             storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
   3739          }
   3740       }
   3741    }
   3742 
   3743    /* Copy relevant bit from t2 into the carry flag. */
   3744    /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
   3745    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   3746    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   3747    stmt( IRStmt_Put(
   3748             OFFB_CC_DEP1,
   3749             binop(Iop_And64,
   3750                   binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
   3751                   mkU64(1))
   3752        ));
   3753    /* Set NDEP even though it isn't used.  This makes redundant-PUT
   3754       elimination of previous stores to this field work better. */
   3755    stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
   3756 
   3757    return delta;
   3758 }
   3759 
   3760 
   3761 /* Signed/unsigned widening multiply.  Generate IR to multiply the
   3762    value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
   3763    RDX:RAX/EDX:EAX/DX:AX/AX.
   3764 */
   3765 static void codegen_mulL_A_D ( Int sz, Bool syned,
   3766                                IRTemp tmp, HChar* tmp_txt )
   3767 {
   3768    IRType ty = szToITy(sz);
   3769    IRTemp t1 = newTemp(ty);
   3770 
   3771    assign( t1, getIRegRAX(sz) );
   3772 
   3773    switch (ty) {
   3774       case Ity_I64: {
   3775          IRTemp res128  = newTemp(Ity_I128);
   3776          IRTemp resHi   = newTemp(Ity_I64);
   3777          IRTemp resLo   = newTemp(Ity_I64);
   3778          IROp   mulOp   = syned ? Iop_MullS64 : Iop_MullU64;
   3779          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3780          setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
   3781          assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3782          assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
   3783          assign( resLo, unop(Iop_128to64,mkexpr(res128)));
   3784          putIReg64(R_RDX, mkexpr(resHi));
   3785          putIReg64(R_RAX, mkexpr(resLo));
   3786          break;
   3787       }
   3788       case Ity_I32: {
   3789          IRTemp res64   = newTemp(Ity_I64);
   3790          IRTemp resHi   = newTemp(Ity_I32);
   3791          IRTemp resLo   = newTemp(Ity_I32);
   3792          IROp   mulOp   = syned ? Iop_MullS32 : Iop_MullU32;
   3793          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3794          setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
   3795          assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3796          assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
   3797          assign( resLo, unop(Iop_64to32,mkexpr(res64)));
   3798          putIRegRDX(4, mkexpr(resHi));
   3799          putIRegRAX(4, mkexpr(resLo));
   3800          break;
   3801       }
   3802       case Ity_I16: {
   3803          IRTemp res32   = newTemp(Ity_I32);
   3804          IRTemp resHi   = newTemp(Ity_I16);
   3805          IRTemp resLo   = newTemp(Ity_I16);
   3806          IROp   mulOp   = syned ? Iop_MullS16 : Iop_MullU16;
   3807          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3808          setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
   3809          assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3810          assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
   3811          assign( resLo, unop(Iop_32to16,mkexpr(res32)));
   3812          putIRegRDX(2, mkexpr(resHi));
   3813          putIRegRAX(2, mkexpr(resLo));
   3814          break;
   3815       }
   3816       case Ity_I8: {
   3817          IRTemp res16   = newTemp(Ity_I16);
   3818          IRTemp resHi   = newTemp(Ity_I8);
   3819          IRTemp resLo   = newTemp(Ity_I8);
   3820          IROp   mulOp   = syned ? Iop_MullS8 : Iop_MullU8;
   3821          UInt   tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
   3822          setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
   3823          assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
   3824          assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
   3825          assign( resLo, unop(Iop_16to8,mkexpr(res16)));
   3826          putIRegRAX(2, mkexpr(res16));
   3827          break;
   3828       }
   3829       default:
   3830          ppIRType(ty);
   3831          vpanic("codegen_mulL_A_D(amd64)");
   3832    }
   3833    DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
   3834 }
   3835 
   3836 
   3837 /* Group 3 extended opcodes. */
   3838 static
   3839 ULong dis_Grp3 ( VexAbiInfo* vbi,
   3840                  Prefix pfx, Int sz, Long delta, Bool* decode_OK )
   3841 {
   3842    Long    d64;
   3843    UChar   modrm;
   3844    HChar   dis_buf[50];
   3845    Int     len;
   3846    IRTemp  addr;
   3847    IRType  ty = szToITy(sz);
   3848    IRTemp  t1 = newTemp(ty);
   3849    IRTemp dst1, src, dst0;
   3850    *decode_OK = True;
   3851    modrm = getUChar(delta);
   3852    if (epartIsReg(modrm)) {
   3853       switch (gregLO3ofRM(modrm)) {
   3854          case 0: { /* TEST */
   3855             delta++;
   3856             d64 = getSDisp(imin(4,sz), delta);
   3857             delta += imin(4,sz);
   3858             dst1 = newTemp(ty);
   3859             assign(dst1, binop(mkSizedOp(ty,Iop_And8),
   3860                                getIRegE(sz,pfx,modrm),
   3861                                mkU(ty, d64 & mkSizeMask(sz))));
   3862             setFlags_DEP1( Iop_And8, dst1, ty );
   3863             DIP("test%c $%lld, %s\n",
   3864                 nameISize(sz), d64,
   3865                 nameIRegE(sz, pfx, modrm));
   3866             break;
   3867          }
   3868          case 1:
   3869             *decode_OK = False;
   3870             return delta;
   3871          case 2: /* NOT */
   3872             delta++;
   3873             putIRegE(sz, pfx, modrm,
   3874                               unop(mkSizedOp(ty,Iop_Not8),
   3875                                    getIRegE(sz, pfx, modrm)));
   3876             DIP("not%c %s\n", nameISize(sz),
   3877                               nameIRegE(sz, pfx, modrm));
   3878             break;
   3879          case 3: /* NEG */
   3880             delta++;
   3881             dst0 = newTemp(ty);
   3882             src  = newTemp(ty);
   3883             dst1 = newTemp(ty);
   3884             assign(dst0, mkU(ty,0));
   3885             assign(src,  getIRegE(sz, pfx, modrm));
   3886             assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
   3887                                                        mkexpr(src)));
   3888             setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
   3889             putIRegE(sz, pfx, modrm, mkexpr(dst1));
   3890             DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
   3891             break;
   3892          case 4: /* MUL (unsigned widening) */
   3893             delta++;
   3894             src = newTemp(ty);
   3895             assign(src, getIRegE(sz,pfx,modrm));
   3896             codegen_mulL_A_D ( sz, False, src,
   3897                                nameIRegE(sz,pfx,modrm) );
   3898             break;
   3899          case 5: /* IMUL (signed widening) */
   3900             delta++;
   3901             src = newTemp(ty);
   3902             assign(src, getIRegE(sz,pfx,modrm));
   3903             codegen_mulL_A_D ( sz, True, src,
   3904                                nameIRegE(sz,pfx,modrm) );
   3905             break;
   3906          case 6: /* DIV */
   3907             delta++;
   3908             assign( t1, getIRegE(sz, pfx, modrm) );
   3909             codegen_div ( sz, t1, False );
   3910             DIP("div%c %s\n", nameISize(sz),
   3911                               nameIRegE(sz, pfx, modrm));
   3912             break;
   3913          case 7: /* IDIV */
   3914             delta++;
   3915             assign( t1, getIRegE(sz, pfx, modrm) );
   3916             codegen_div ( sz, t1, True );
   3917             DIP("idiv%c %s\n", nameISize(sz),
   3918                                nameIRegE(sz, pfx, modrm));
   3919             break;
   3920          default:
   3921             /*NOTREACHED*/
   3922             vpanic("Grp3(amd64,R)");
   3923       }
   3924    } else {
   3925       addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
   3926                         /* we have to inform disAMode of any immediate
   3927 			   bytes used */
   3928                         gregLO3ofRM(modrm)==0/*TEST*/
   3929                            ? imin(4,sz)
   3930                            : 0
   3931                       );
   3932       t1   = newTemp(ty);
   3933       delta += len;
   3934       assign(t1, loadLE(ty,mkexpr(addr)));
   3935       switch (gregLO3ofRM(modrm)) {
   3936          case 0: { /* TEST */
   3937             d64 = getSDisp(imin(4,sz), delta);
   3938             delta += imin(4,sz);
   3939             dst1 = newTemp(ty);
   3940             assign(dst1, binop(mkSizedOp(ty,Iop_And8),
   3941                                mkexpr(t1),
   3942                                mkU(ty, d64 & mkSizeMask(sz))));
   3943             setFlags_DEP1( Iop_And8, dst1, ty );
   3944             DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
   3945             break;
   3946          }
   3947          case 1:
   3948             *decode_OK = False;
   3949             return delta;
   3950          case 2: /* NOT */
   3951             dst1 = newTemp(ty);
   3952             assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
   3953             if (pfx & PFX_LOCK) {
   3954                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
   3955                                     guest_RIP_curr_instr );
   3956             } else {
   3957                storeLE( mkexpr(addr), mkexpr(dst1) );
   3958             }
   3959             DIP("not%c %s\n", nameISize(sz), dis_buf);
   3960             break;
   3961          case 3: /* NEG */
   3962             dst0 = newTemp(ty);
   3963             src  = newTemp(ty);
   3964             dst1 = newTemp(ty);
   3965             assign(dst0, mkU(ty,0));
   3966             assign(src,  mkexpr(t1));
   3967             assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
   3968                                                        mkexpr(src)));
   3969             if (pfx & PFX_LOCK) {
   3970                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
   3971                                     guest_RIP_curr_instr );
   3972             } else {
   3973                storeLE( mkexpr(addr), mkexpr(dst1) );
   3974             }
   3975             setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
   3976             DIP("neg%c %s\n", nameISize(sz), dis_buf);
   3977             break;
   3978          case 4: /* MUL (unsigned widening) */
   3979             codegen_mulL_A_D ( sz, False, t1, dis_buf );
   3980             break;
   3981          case 5: /* IMUL */
   3982             codegen_mulL_A_D ( sz, True, t1, dis_buf );
   3983             break;
   3984          case 6: /* DIV */
   3985             codegen_div ( sz, t1, False );
   3986             DIP("div%c %s\n", nameISize(sz), dis_buf);
   3987             break;
   3988          case 7: /* IDIV */
   3989             codegen_div ( sz, t1, True );
   3990             DIP("idiv%c %s\n", nameISize(sz), dis_buf);
   3991             break;
   3992          default:
   3993             /*NOTREACHED*/
   3994             vpanic("Grp3(amd64,M)");
   3995       }
   3996    }
   3997    return delta;
   3998 }
   3999 
   4000 
   4001 /* Group 4 extended opcodes. */
   4002 static
   4003 ULong dis_Grp4 ( VexAbiInfo* vbi,
   4004                  Prefix pfx, Long delta, Bool* decode_OK )
   4005 {
   4006    Int   alen;
   4007    UChar modrm;
   4008    HChar dis_buf[50];
   4009    IRType ty = Ity_I8;
   4010    IRTemp t1 = newTemp(ty);
   4011    IRTemp t2 = newTemp(ty);
   4012 
   4013    *decode_OK = True;
   4014 
   4015    modrm = getUChar(delta);
   4016    if (epartIsReg(modrm)) {
   4017       assign(t1, getIRegE(1, pfx, modrm));
   4018       switch (gregLO3ofRM(modrm)) {
   4019          case 0: /* INC */
   4020             assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
   4021             putIRegE(1, pfx, modrm, mkexpr(t2));
   4022             setFlags_INC_DEC( True, t2, ty );
   4023             break;
   4024          case 1: /* DEC */
   4025             assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
   4026             putIRegE(1, pfx, modrm, mkexpr(t2));
   4027             setFlags_INC_DEC( False, t2, ty );
   4028             break;
   4029          default:
   4030             *decode_OK = False;
   4031             return delta;
   4032       }
   4033       delta++;
   4034       DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
   4035                       nameIRegE(1, pfx, modrm));
   4036    } else {
   4037       IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
   4038       assign( t1, loadLE(ty, mkexpr(addr)) );
   4039       switch (gregLO3ofRM(modrm)) {
   4040          case 0: /* INC */
   4041             assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
   4042             if (pfx & PFX_LOCK) {
   4043                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
   4044                       guest_RIP_curr_instr );
   4045             } else {
   4046                storeLE( mkexpr(addr), mkexpr(t2) );
   4047             }
   4048             setFlags_INC_DEC( True, t2, ty );
   4049             break;
   4050          case 1: /* DEC */
   4051             assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
   4052             if (pfx & PFX_LOCK) {
   4053                casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
   4054                       guest_RIP_curr_instr );
   4055             } else {
   4056                storeLE( mkexpr(addr), mkexpr(t2) );
   4057             }
   4058             setFlags_INC_DEC( False, t2, ty );
   4059             break;
   4060          default:
   4061             *decode_OK = False;
   4062             return delta;
   4063       }
   4064       delta += alen;
   4065       DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
   4066    }
   4067    return delta;
   4068 }
   4069 
   4070 
   4071 /* Group 5 extended opcodes. */
   4072 static
   4073 ULong dis_Grp5 ( VexAbiInfo* vbi,
   4074                  Prefix pfx, Int sz, Long delta,
   4075                  /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
   4076 {
   4077    Int     len;
   4078    UChar   modrm;
   4079    HChar   dis_buf[50];
   4080    IRTemp  addr = IRTemp_INVALID;
   4081    IRType  ty = szToITy(sz);
   4082    IRTemp  t1 = newTemp(ty);
   4083    IRTemp  t2 = IRTemp_INVALID;
   4084    IRTemp  t3 = IRTemp_INVALID;
   4085    Bool    showSz = True;
   4086 
   4087    *decode_OK = True;
   4088 
   4089    modrm = getUChar(delta);
   4090    if (epartIsReg(modrm)) {
   4091       assign(t1, getIRegE(sz,pfx,modrm));
   4092       switch (gregLO3ofRM(modrm)) {
   4093          case 0: /* INC */
   4094             t2 = newTemp(ty);
   4095             assign(t2, binop(mkSizedOp(ty,Iop_Add8),
   4096                              mkexpr(t1), mkU(ty,1)));
   4097             setFlags_INC_DEC( True, t2, ty );
   4098             putIRegE(sz,pfx,modrm, mkexpr(t2));
   4099             break;
   4100          case 1: /* DEC */
   4101             t2 = newTemp(ty);
   4102             assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
   4103                              mkexpr(t1), mkU(ty,1)));
   4104             setFlags_INC_DEC( False, t2, ty );
   4105             putIRegE(sz,pfx,modrm, mkexpr(t2));
   4106             break;
   4107          case 2: /* call Ev */
   4108             /* Ignore any sz value and operate as if sz==8. */
   4109             if (!(sz == 4 || sz == 8)) goto unhandled;
   4110             sz = 8;
   4111             t3 = newTemp(Ity_I64);
   4112             assign(t3, getIRegE(sz,pfx,modrm));
   4113             t2 = newTemp(Ity_I64);
   4114             assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
   4115             putIReg64(R_RSP, mkexpr(t2));
   4116             storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
   4117             make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
   4118             jmp_treg(dres, Ijk_Call, t3);
   4119             vassert(dres->whatNext == Dis_StopHere);
   4120             showSz = False;
   4121             break;
   4122          case 4: /* jmp Ev */
   4123             /* Ignore any sz value and operate as if sz==8. */
   4124             if (!(sz == 4 || sz == 8)) goto unhandled;
   4125             sz = 8;
   4126             t3 = newTemp(Ity_I64);
   4127             assign(t3, getIRegE(sz,pfx,modrm));
   4128             jmp_treg(dres, Ijk_Boring, t3);
   4129             vassert(dres->whatNext == Dis_StopHere);
   4130             showSz = False;
   4131             break;
   4132          default:
   4133             *decode_OK = False;
   4134             return delta;
   4135       }
   4136       delta++;
   4137       DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
   4138                        showSz ? nameISize(sz) : ' ',
   4139                        nameIRegE(sz, pfx, modrm));
   4140    } else {
   4141       addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
   4142       if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
   4143                                   && gregLO3ofRM(modrm) != 6) {
   4144          assign(t1, loadLE(ty,mkexpr(addr)));
   4145       }
   4146       switch (gregLO3ofRM(modrm)) {
   4147          case 0: /* INC */
   4148             t2 = newTemp(ty);
   4149             assign(t2, binop(mkSizedOp(ty,Iop_Add8),
   4150                              mkexpr(t1), mkU(ty,1)));
   4151             if (pfx & PFX_LOCK) {
   4152                casLE( mkexpr(addr),
   4153                       mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
   4154             } else {
   4155                storeLE(mkexpr(addr),mkexpr(t2));
   4156             }
   4157             setFlags_INC_DEC( True, t2, ty );
   4158             break;
   4159          case 1: /* DEC */
   4160             t2 = newTemp(ty);
   4161             assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
   4162                              mkexpr(t1), mkU(ty,1)));
   4163             if (pfx & PFX_LOCK) {
   4164                casLE( mkexpr(addr),
   4165                       mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
   4166             } else {
   4167                storeLE(mkexpr(addr),mkexpr(t2));
   4168             }
   4169             setFlags_INC_DEC( False, t2, ty );
   4170             break;
   4171          case 2: /* call Ev */
   4172             /* Ignore any sz value and operate as if sz==8. */
   4173             if (!(sz == 4 || sz == 8)) goto unhandled;
   4174             sz = 8;
   4175             t3 = newTemp(Ity_I64);
   4176             assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4177             t2 = newTemp(Ity_I64);
   4178             assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
   4179             putIReg64(R_RSP, mkexpr(t2));
   4180             storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
   4181             make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
   4182             jmp_treg(dres, Ijk_Call, t3);
   4183             vassert(dres->whatNext == Dis_StopHere);
   4184             showSz = False;
   4185             break;
   4186          case 4: /* JMP Ev */
   4187             /* Ignore any sz value and operate as if sz==8. */
   4188             if (!(sz == 4 || sz == 8)) goto unhandled;
   4189             sz = 8;
   4190             t3 = newTemp(Ity_I64);
   4191             assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4192             jmp_treg(dres, Ijk_Boring, t3);
   4193             vassert(dres->whatNext == Dis_StopHere);
   4194             showSz = False;
   4195             break;
   4196          case 6: /* PUSH Ev */
   4197             /* There is no encoding for 32-bit operand size; hence ... */
   4198             if (sz == 4) sz = 8;
   4199             if (!(sz == 8 || sz == 2)) goto unhandled;
   4200             if (sz == 8) {
   4201                t3 = newTemp(Ity_I64);
   4202                assign(t3, loadLE(Ity_I64,mkexpr(addr)));
   4203                t2 = newTemp(Ity_I64);
   4204                assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
   4205                putIReg64(R_RSP, mkexpr(t2) );
   4206                storeLE( mkexpr(t2), mkexpr(t3) );
   4207                break;
   4208 	    } else {
   4209                goto unhandled; /* awaiting test case */
   4210 	    }
   4211          default:
   4212          unhandled:
   4213             *decode_OK = False;
   4214             return delta;
   4215       }
   4216       delta += len;
   4217       DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
   4218                        showSz ? nameISize(sz) : ' ',
   4219                        dis_buf);
   4220    }
   4221    return delta;
   4222 }
   4223 
   4224 
   4225 /*------------------------------------------------------------*/
   4226 /*--- Disassembling string ops (including REP prefixes)    ---*/
   4227 /*------------------------------------------------------------*/
   4228 
   4229 /* Code shared by all the string ops */
   4230 static
   4231 void dis_string_op_increment ( Int sz, IRTemp t_inc )
   4232 {
   4233    UChar logSz;
   4234    if (sz == 8 || sz == 4 || sz == 2) {
   4235       logSz = 1;
   4236       if (sz == 4) logSz = 2;
   4237       if (sz == 8) logSz = 3;
   4238       assign( t_inc,
   4239               binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
   4240                                mkU8(logSz) ) );
   4241    } else {
   4242       assign( t_inc,
   4243               IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
   4244    }
   4245 }
   4246 
   4247 static
   4248 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
   4249                     Int sz, HChar* name, Prefix pfx )
   4250 {
   4251    IRTemp t_inc = newTemp(Ity_I64);
   4252    /* Really we ought to inspect the override prefixes, but we don't.
   4253       The following assertion catches any resulting sillyness. */
   4254    vassert(pfx == clearSegBits(pfx));
   4255    dis_string_op_increment(sz, t_inc);
   4256    dis_OP( sz, t_inc, pfx );
   4257    DIP("%s%c\n", name, nameISize(sz));
   4258 }
   4259 
   4260 static
   4261 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
   4262 {
   4263    IRType ty = szToITy(sz);
   4264    IRTemp td = newTemp(Ity_I64);   /* RDI */
   4265    IRTemp ts = newTemp(Ity_I64);   /* RSI */
   4266    IRExpr *incd, *incs;
   4267 
   4268    if (haveASO(pfx)) {
   4269       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4270       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4271    } else {
   4272       assign( td, getIReg64(R_RDI) );
   4273       assign( ts, getIReg64(R_RSI) );
   4274    }
   4275 
   4276    storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
   4277 
   4278    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4279    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4280    if (haveASO(pfx)) {
   4281       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4282       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4283    }
   4284    putIReg64( R_RDI, incd );
   4285    putIReg64( R_RSI, incs );
   4286 }
   4287 
   4288 static
   4289 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
   4290 {
   4291    IRType ty = szToITy(sz);
   4292    IRTemp ts = newTemp(Ity_I64);   /* RSI */
   4293    IRExpr *incs;
   4294 
   4295    if (haveASO(pfx))
   4296       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4297    else
   4298       assign( ts, getIReg64(R_RSI) );
   4299 
   4300    putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
   4301 
   4302    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4303    if (haveASO(pfx))
   4304       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4305    putIReg64( R_RSI, incs );
   4306 }
   4307 
   4308 static
   4309 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
   4310 {
   4311    IRType ty = szToITy(sz);
   4312    IRTemp ta = newTemp(ty);        /* rAX */
   4313    IRTemp td = newTemp(Ity_I64);   /* RDI */
   4314    IRExpr *incd;
   4315 
   4316    assign( ta, getIRegRAX(sz) );
   4317 
   4318    if (haveASO(pfx))
   4319       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4320    else
   4321       assign( td, getIReg64(R_RDI) );
   4322 
   4323    storeLE( mkexpr(td), mkexpr(ta) );
   4324 
   4325    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4326    if (haveASO(pfx))
   4327       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4328    putIReg64( R_RDI, incd );
   4329 }
   4330 
   4331 static
   4332 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
   4333 {
   4334    IRType ty  = szToITy(sz);
   4335    IRTemp tdv = newTemp(ty);      /* (RDI) */
   4336    IRTemp tsv = newTemp(ty);      /* (RSI) */
   4337    IRTemp td  = newTemp(Ity_I64); /*  RDI  */
   4338    IRTemp ts  = newTemp(Ity_I64); /*  RSI  */
   4339    IRExpr *incd, *incs;
   4340 
   4341    if (haveASO(pfx)) {
   4342       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4343       assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
   4344    } else {
   4345       assign( td, getIReg64(R_RDI) );
   4346       assign( ts, getIReg64(R_RSI) );
   4347    }
   4348 
   4349    assign( tdv, loadLE(ty,mkexpr(td)) );
   4350 
   4351    assign( tsv, loadLE(ty,mkexpr(ts)) );
   4352 
   4353    setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
   4354 
   4355    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4356    incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
   4357    if (haveASO(pfx)) {
   4358       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4359       incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
   4360    }
   4361    putIReg64( R_RDI, incd );
   4362    putIReg64( R_RSI, incs );
   4363 }
   4364 
   4365 static
   4366 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
   4367 {
   4368    IRType ty  = szToITy(sz);
   4369    IRTemp ta  = newTemp(ty);       /*  rAX  */
   4370    IRTemp td  = newTemp(Ity_I64);  /*  RDI  */
   4371    IRTemp tdv = newTemp(ty);       /* (RDI) */
   4372    IRExpr *incd;
   4373 
   4374    assign( ta, getIRegRAX(sz) );
   4375 
   4376    if (haveASO(pfx))
   4377       assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
   4378    else
   4379       assign( td, getIReg64(R_RDI) );
   4380 
   4381    assign( tdv, loadLE(ty,mkexpr(td)) );
   4382 
   4383    setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
   4384 
   4385    incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
   4386    if (haveASO(pfx))
   4387       incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
   4388    putIReg64( R_RDI, incd );
   4389 }
   4390 
   4391 
   4392 /* Wrap the appropriate string op inside a REP/REPE/REPNE.  We assume
   4393    the insn is the last one in the basic block, and so emit a jump to
   4394    the next insn, rather than just falling through. */
   4395 static
   4396 void dis_REP_op ( /*MOD*/DisResult* dres,
   4397                   AMD64Condcode cond,
   4398                   void (*dis_OP)(Int, IRTemp, Prefix),
   4399                   Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
   4400                   Prefix pfx )
   4401 {
   4402    IRTemp t_inc = newTemp(Ity_I64);
   4403    IRTemp tc;
   4404    IRExpr* cmp;
   4405 
   4406    /* Really we ought to inspect the override prefixes, but we don't.
   4407       The following assertion catches any resulting sillyness. */
   4408    vassert(pfx == clearSegBits(pfx));
   4409 
   4410    if (haveASO(pfx)) {
   4411       tc = newTemp(Ity_I32);  /*  ECX  */
   4412       assign( tc, getIReg32(R_RCX) );
   4413       cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
   4414    } else {
   4415       tc = newTemp(Ity_I64);  /*  RCX  */
   4416       assign( tc, getIReg64(R_RCX) );
   4417       cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
   4418    }
   4419 
   4420    stmt( IRStmt_Exit( cmp, Ijk_Boring,
   4421                       IRConst_U64(rip_next), OFFB_RIP ) );
   4422 
   4423    if (haveASO(pfx))
   4424       putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
   4425   else
   4426       putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
   4427 
   4428    dis_string_op_increment(sz, t_inc);
   4429    dis_OP (sz, t_inc, pfx);
   4430 
   4431    if (cond == AMD64CondAlways) {
   4432       jmp_lit(dres, Ijk_Boring, rip);
   4433       vassert(dres->whatNext == Dis_StopHere);
   4434    } else {
   4435       stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
   4436                          Ijk_Boring,
   4437                          IRConst_U64(rip),
   4438                          OFFB_RIP ) );
   4439       jmp_lit(dres, Ijk_Boring, rip_next);
   4440       vassert(dres->whatNext == Dis_StopHere);
   4441    }
   4442    DIP("%s%c\n", name, nameISize(sz));
   4443 }
   4444 
   4445 
   4446 /*------------------------------------------------------------*/
   4447 /*--- Arithmetic, etc.                                     ---*/
   4448 /*------------------------------------------------------------*/
   4449 
   4450 /* IMUL E, G.  Supplied eip points to the modR/M byte. */
   4451 static
   4452 ULong dis_mul_E_G ( VexAbiInfo* vbi,
   4453                     Prefix      pfx,
   4454                     Int         size,
   4455                     Long        delta0 )
   4456 {
   4457    Int    alen;
   4458    HChar  dis_buf[50];
   4459    UChar  rm = getUChar(delta0);
   4460    IRType ty = szToITy(size);
   4461    IRTemp te = newTemp(ty);
   4462    IRTemp tg = newTemp(ty);
   4463    IRTemp resLo = newTemp(ty);
   4464 
   4465    assign( tg, getIRegG(size, pfx, rm) );
   4466    if (epartIsReg(rm)) {
   4467       assign( te, getIRegE(size, pfx, rm) );
   4468    } else {
   4469       IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
   4470       assign( te, loadLE(ty,mkexpr(addr)) );
   4471    }
   4472 
   4473    setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
   4474 
   4475    assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
   4476 
   4477    putIRegG(size, pfx, rm, mkexpr(resLo) );
   4478 
   4479    if (epartIsReg(rm)) {
   4480       DIP("imul%c %s, %s\n", nameISize(size),
   4481                              nameIRegE(size,pfx,rm),
   4482                              nameIRegG(size,pfx,rm));
   4483       return 1+delta0;
   4484    } else {
   4485       DIP("imul%c %s, %s\n", nameISize(size),
   4486                              dis_buf,
   4487                              nameIRegG(size,pfx,rm));
   4488       return alen+delta0;
   4489    }
   4490 }
   4491 
   4492 
   4493 /* IMUL I * E -> G.  Supplied rip points to the modR/M byte. */
   4494 static
   4495 ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
   4496                        Prefix      pfx,
   4497                        Int         size,
   4498                        Long        delta,
   4499                        Int         litsize )
   4500 {
   4501    Long   d64;
   4502    Int    alen;
   4503    HChar  dis_buf[50];
   4504    UChar  rm = getUChar(delta);
   4505    IRType ty = szToITy(size);
   4506    IRTemp te = newTemp(ty);
   4507    IRTemp tl = newTemp(ty);
   4508    IRTemp resLo = newTemp(ty);
   4509 
   4510    vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
   4511 
   4512    if (epartIsReg(rm)) {
   4513       assign(te, getIRegE(size, pfx, rm));
   4514       delta++;
   4515    } else {
   4516       IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
   4517                                      imin(4,litsize) );
   4518       assign(te, loadLE(ty, mkexpr(addr)));
   4519       delta += alen;
   4520    }
   4521    d64 = getSDisp(imin(4,litsize),delta);
   4522    delta += imin(4,litsize);
   4523 
   4524    d64 &= mkSizeMask(size);
   4525    assign(tl, mkU(ty,d64));
   4526 
   4527    assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
   4528 
   4529    setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
   4530 
   4531    putIRegG(size, pfx, rm, mkexpr(resLo));
   4532 
   4533    DIP("imul%c $%lld, %s, %s\n",
   4534        nameISize(size), d64,
   4535        ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
   4536        nameIRegG(size,pfx,rm) );
   4537    return delta;
   4538 }
   4539 
   4540 
   4541 /* Generate an IR sequence to do a popcount operation on the supplied
   4542    IRTemp, and return a new IRTemp holding the result.  'ty' may be
   4543    Ity_I16, Ity_I32 or Ity_I64 only. */
   4544 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
   4545 {
   4546    Int i;
   4547    if (ty == Ity_I16) {
   4548       IRTemp old = IRTemp_INVALID;
   4549       IRTemp nyu = IRTemp_INVALID;
   4550       IRTemp mask[4], shift[4];
   4551       for (i = 0; i < 4; i++) {
   4552          mask[i]  = newTemp(ty);
   4553          shift[i] = 1 << i;
   4554       }
   4555       assign(mask[0], mkU16(0x5555));
   4556       assign(mask[1], mkU16(0x3333));
   4557       assign(mask[2], mkU16(0x0F0F));
   4558       assign(mask[3], mkU16(0x00FF));
   4559       old = src;
   4560       for (i = 0; i < 4; i++) {
   4561          nyu = newTemp(ty);
   4562          assign(nyu,
   4563                 binop(Iop_Add16,
   4564                       binop(Iop_And16,
   4565                             mkexpr(old),
   4566                             mkexpr(mask[i])),
   4567                       binop(Iop_And16,
   4568                             binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
   4569                             mkexpr(mask[i]))));
   4570          old = nyu;
   4571       }
   4572       return nyu;
   4573    }
   4574    if (ty == Ity_I32) {
   4575       IRTemp old = IRTemp_INVALID;
   4576       IRTemp nyu = IRTemp_INVALID;
   4577       IRTemp mask[5], shift[5];
   4578       for (i = 0; i < 5; i++) {
   4579          mask[i]  = newTemp(ty);
   4580          shift[i] = 1 << i;
   4581       }
   4582       assign(mask[0], mkU32(0x55555555));
   4583       assign(mask[1], mkU32(0x33333333));
   4584       assign(mask[2], mkU32(0x0F0F0F0F));
   4585       assign(mask[3], mkU32(0x00FF00FF));
   4586       assign(mask[4], mkU32(0x0000FFFF));
   4587       old = src;
   4588       for (i = 0; i < 5; i++) {
   4589          nyu = newTemp(ty);
   4590          assign(nyu,
   4591                 binop(Iop_Add32,
   4592                       binop(Iop_And32,
   4593                             mkexpr(old),
   4594                             mkexpr(mask[i])),
   4595                       binop(Iop_And32,
   4596                             binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
   4597                             mkexpr(mask[i]))));
   4598          old = nyu;
   4599       }
   4600       return nyu;
   4601    }
   4602    if (ty == Ity_I64) {
   4603       IRTemp old = IRTemp_INVALID;
   4604       IRTemp nyu = IRTemp_INVALID;
   4605       IRTemp mask[6], shift[6];
   4606       for (i = 0; i < 6; i++) {
   4607          mask[i]  = newTemp(ty);
   4608          shift[i] = 1 << i;
   4609       }
   4610       assign(mask[0], mkU64(0x5555555555555555ULL));
   4611       assign(mask[1], mkU64(0x3333333333333333ULL));
   4612       assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
   4613       assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
   4614       assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
   4615       assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
   4616       old = src;
   4617       for (i = 0; i < 6; i++) {
   4618          nyu = newTemp(ty);
   4619          assign(nyu,
   4620                 binop(Iop_Add64,
   4621                       binop(Iop_And64,
   4622                             mkexpr(old),
   4623                             mkexpr(mask[i])),
   4624                       binop(Iop_And64,
   4625                             binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
   4626                             mkexpr(mask[i]))));
   4627          old = nyu;
   4628       }
   4629       return nyu;
   4630    }
   4631    /*NOTREACHED*/
   4632    vassert(0);
   4633 }
   4634 
   4635 
   4636 /* Generate an IR sequence to do a count-leading-zeroes operation on
   4637    the supplied IRTemp, and return a new IRTemp holding the result.
   4638    'ty' may be Ity_I16, Ity_I32 or Ity_I64 only.  In the case where
   4639    the argument is zero, return the number of bits in the word (the
   4640    natural semantics). */
   4641 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
   4642 {
   4643    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
   4644 
   4645    IRTemp src64 = newTemp(Ity_I64);
   4646    assign(src64, widenUto64( mkexpr(src) ));
   4647 
   4648    IRTemp src64x = newTemp(Ity_I64);
   4649    assign(src64x,
   4650           binop(Iop_Shl64, mkexpr(src64),
   4651                            mkU8(64 - 8 * sizeofIRType(ty))));
   4652 
   4653    // Clz64 has undefined semantics when its input is zero, so
   4654    // special-case around that.
   4655    IRTemp res64 = newTemp(Ity_I64);
   4656    assign(res64,
   4657           IRExpr_Mux0X(
   4658              unop(Iop_1Uto8,
   4659                   binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))),
   4660              unop(Iop_Clz64, mkexpr(src64x)),
   4661              mkU64(8 * sizeofIRType(ty))
   4662    ));
   4663 
   4664    IRTemp res = newTemp(ty);
   4665    assign(res, narrowTo(ty, mkexpr(res64)));
   4666    return res;
   4667 }
   4668 
   4669 
   4670 /*------------------------------------------------------------*/
   4671 /*---                                                      ---*/
   4672 /*--- x87 FLOATING POINT INSTRUCTIONS                      ---*/
   4673 /*---                                                      ---*/
   4674 /*------------------------------------------------------------*/
   4675 
   4676 /* --- Helper functions for dealing with the register stack. --- */
   4677 
   4678 /* --- Set the emulation-warning pseudo-register. --- */
   4679 
   4680 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
   4681 {
   4682    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4683    stmt( IRStmt_Put( OFFB_EMWARN, e ) );
   4684 }
   4685 
   4686 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
   4687 
   4688 static IRExpr* mkQNaN64 ( void )
   4689 {
   4690   /* QNaN is 0 2047 1 0(51times)
   4691      == 0b 11111111111b 1 0(51times)
   4692      == 0x7FF8 0000 0000 0000
   4693    */
   4694    return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
   4695 }
   4696 
   4697 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
   4698 
   4699 static IRExpr* get_ftop ( void )
   4700 {
   4701    return IRExpr_Get( OFFB_FTOP, Ity_I32 );
   4702 }
   4703 
   4704 static void put_ftop ( IRExpr* e )
   4705 {
   4706    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4707    stmt( IRStmt_Put( OFFB_FTOP, e ) );
   4708 }
   4709 
   4710 /* --------- Get/put the C3210 bits. --------- */
   4711 
   4712 static IRExpr*  /* :: Ity_I64 */ get_C3210 ( void )
   4713 {
   4714    return IRExpr_Get( OFFB_FC3210, Ity_I64 );
   4715 }
   4716 
   4717 static void put_C3210 ( IRExpr* e  /* :: Ity_I64 */ )
   4718 {
   4719    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   4720    stmt( IRStmt_Put( OFFB_FC3210, e ) );
   4721 }
   4722 
   4723 /* --------- Get/put the FPU rounding mode. --------- */
   4724 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
   4725 {
   4726    return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
   4727 }
   4728 
   4729 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
   4730 {
   4731    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   4732    stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
   4733 }
   4734 
   4735 
   4736 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
   4737 /* Produces a value in 0 .. 3, which is encoded as per the type
   4738    IRRoundingMode.  Since the guest_FPROUND value is also encoded as
   4739    per IRRoundingMode, we merely need to get it and mask it for
   4740    safety.
   4741 */
   4742 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
   4743 {
   4744    return binop( Iop_And32, get_fpround(), mkU32(3) );
   4745 }
   4746 
   4747 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
   4748 {
   4749    return mkU32(Irrm_NEAREST);
   4750 }
   4751 
   4752 
   4753 /* --------- Get/set FP register tag bytes. --------- */
   4754 
   4755 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
   4756 
   4757 static void put_ST_TAG ( Int i, IRExpr* value )
   4758 {
   4759    IRRegArray* descr;
   4760    vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
   4761    descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   4762    stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
   4763 }
   4764 
   4765 /* Given i, generate an expression yielding 'ST_TAG(i)'.  This will be
   4766    zero to indicate "Empty" and nonzero to indicate "NonEmpty".  */
   4767 
   4768 static IRExpr* get_ST_TAG ( Int i )
   4769 {
   4770    IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
   4771    return IRExpr_GetI( descr, get_ftop(), i );
   4772 }
   4773 
   4774 
   4775 /* --------- Get/set FP registers. --------- */
   4776 
   4777 /* Given i, and some expression e, emit 'ST(i) = e' and set the
   4778    register's tag to indicate the register is full.  The previous
   4779    state of the register is not checked. */
   4780 
   4781 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
   4782 {
   4783    IRRegArray* descr;
   4784    vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
   4785    descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
   4786    stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
   4787    /* Mark the register as in-use. */
   4788    put_ST_TAG(i, mkU8(1));
   4789 }
   4790 
   4791 /* Given i, and some expression e, emit
   4792       ST(i) = is_full(i) ? NaN : e
   4793    and set the tag accordingly.
   4794 */
   4795 
   4796 static void put_ST ( Int i, IRExpr* value )
   4797 {
   4798    put_ST_UNCHECKED( i,
   4799                      IRExpr_Mux0X( get_ST_TAG(i),
   4800                                    /* 0 means empty */
   4801                                    value,
   4802                                    /* non-0 means full */
   4803                                    mkQNaN64()
   4804                    )
   4805    );
   4806 }
   4807 
   4808 
   4809 /* Given i, generate an expression yielding 'ST(i)'. */
   4810 
   4811 static IRExpr* get_ST_UNCHECKED ( Int i )
   4812 {
   4813    IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
   4814    return IRExpr_GetI( descr, get_ftop(), i );
   4815 }
   4816 
   4817 
   4818 /* Given i, generate an expression yielding
   4819   is_full(i) ? ST(i) : NaN
   4820 */
   4821 
   4822 static IRExpr* get_ST ( Int i )
   4823 {
   4824    return
   4825       IRExpr_Mux0X( get_ST_TAG(i),
   4826                     /* 0 means empty */
   4827                     mkQNaN64(),
   4828                     /* non-0 means full */
   4829                     get_ST_UNCHECKED(i));
   4830 }
   4831 
   4832 
   4833 /* Adjust FTOP downwards by one register. */
   4834 
   4835 static void fp_push ( void )
   4836 {
   4837    put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
   4838 }
   4839 
   4840 /* Adjust FTOP upwards by one register, and mark the vacated register
   4841    as empty.  */
   4842 
   4843 static void fp_pop ( void )
   4844 {
   4845    put_ST_TAG(0, mkU8(0));
   4846    put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
   4847 }
   4848 
   4849 /* Clear the C2 bit of the FPU status register, for
   4850    sin/cos/tan/sincos. */
   4851 
   4852 static void clear_C2 ( void )
   4853 {
   4854    put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
   4855 }
   4856 
   4857 /* Invent a plausible-looking FPU status word value:
   4858       ((ftop & 7) << 11) | (c3210 & 0x4700)
   4859  */
   4860 static IRExpr* get_FPU_sw ( void )
   4861 {
   4862    return
   4863       unop(Iop_32to16,
   4864            binop(Iop_Or32,
   4865                  binop(Iop_Shl32,
   4866                        binop(Iop_And32, get_ftop(), mkU32(7)),
   4867                              mkU8(11)),
   4868                        binop(Iop_And32, unop(Iop_64to32, get_C3210()),
   4869                                         mkU32(0x4700))
   4870       ));
   4871 }
   4872 
   4873 
   4874 /* ------------------------------------------------------- */
   4875 /* Given all that stack-mangling junk, we can now go ahead
   4876    and describe FP instructions.
   4877 */
   4878 
   4879 /* ST(0) = ST(0) `op` mem64/32(addr)
   4880    Need to check ST(0)'s tag on read, but not on write.
   4881 */
   4882 static
   4883 void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
   4884                          IROp op, Bool dbl )
   4885 {
   4886    DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
   4887    if (dbl) {
   4888       put_ST_UNCHECKED(0,
   4889          triop( op,
   4890                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4891                 get_ST(0),
   4892                 loadLE(Ity_F64,mkexpr(addr))
   4893          ));
   4894    } else {
   4895       put_ST_UNCHECKED(0,
   4896          triop( op,
   4897                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4898                 get_ST(0),
   4899                 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
   4900          ));
   4901    }
   4902 }
   4903 
   4904 
   4905 /* ST(0) = mem64/32(addr) `op` ST(0)
   4906    Need to check ST(0)'s tag on read, but not on write.
   4907 */
   4908 static
   4909 void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
   4910                             IROp op, Bool dbl )
   4911 {
   4912    DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
   4913    if (dbl) {
   4914       put_ST_UNCHECKED(0,
   4915          triop( op,
   4916                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4917                 loadLE(Ity_F64,mkexpr(addr)),
   4918                 get_ST(0)
   4919          ));
   4920    } else {
   4921       put_ST_UNCHECKED(0,
   4922          triop( op,
   4923                 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4924                 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
   4925                 get_ST(0)
   4926          ));
   4927    }
   4928 }
   4929 
   4930 
   4931 /* ST(dst) = ST(dst) `op` ST(src).
   4932    Check dst and src tags when reading but not on write.
   4933 */
   4934 static
   4935 void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
   4936                       Bool pop_after )
   4937 {
   4938    DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
   4939    put_ST_UNCHECKED(
   4940       st_dst,
   4941       triop( op,
   4942              get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4943              get_ST(st_dst),
   4944              get_ST(st_src) )
   4945    );
   4946    if (pop_after)
   4947       fp_pop();
   4948 }
   4949 
   4950 /* ST(dst) = ST(src) `op` ST(dst).
   4951    Check dst and src tags when reading but not on write.
   4952 */
   4953 static
   4954 void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
   4955                          Bool pop_after )
   4956 {
   4957    DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
   4958    put_ST_UNCHECKED(
   4959       st_dst,
   4960       triop( op,
   4961              get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   4962              get_ST(st_src),
   4963              get_ST(st_dst) )
   4964    );
   4965    if (pop_after)
   4966       fp_pop();
   4967 }
   4968 
   4969 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
   4970 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
   4971 {
   4972    DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
   4973    /* This is a bit of a hack (and isn't really right).  It sets
   4974       Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
   4975       documentation implies A and S are unchanged.
   4976    */
   4977    /* It's also fishy in that it is used both for COMIP and
   4978       UCOMIP, and they aren't the same (although similar). */
   4979    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
   4980    stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
   4981    stmt( IRStmt_Put(
   4982             OFFB_CC_DEP1,
   4983             binop( Iop_And64,
   4984                    unop( Iop_32Uto64,
   4985                          binop(Iop_CmpF64, get_ST(0), get_ST(i))),
   4986                    mkU64(0x45)
   4987         )));
   4988    if (pop_after)
   4989       fp_pop();
   4990 }
   4991 
   4992 
   4993 /* returns
   4994    32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
   4995 */
   4996 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
   4997 {
   4998    IRTemp t32 = newTemp(Ity_I32);
   4999    assign( t32, e32 );
   5000    return
   5001       IRExpr_Mux0X(
   5002          unop(Iop_1Uto8,
   5003               binop(Iop_CmpLT64U,
   5004                     unop(Iop_32Uto64,
   5005                          binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
   5006                     mkU64(65536))),
   5007          mkU16( 0x8000 ),
   5008          unop(Iop_32to16, mkexpr(t32)));
   5009 }
   5010 
   5011 
   5012 static
   5013 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
   5014                 VexAbiInfo* vbi, Prefix pfx, Long delta )
   5015 {
   5016    Int    len;
   5017    UInt   r_src, r_dst;
   5018    HChar  dis_buf[50];
   5019    IRTemp t1, t2;
   5020 
   5021    /* On entry, delta points at the second byte of the insn (the modrm
   5022       byte).*/
   5023    UChar first_opcode = getUChar(delta-1);
   5024    UChar modrm        = getUChar(delta+0);
   5025 
   5026    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
   5027 
   5028    if (first_opcode == 0xD8) {
   5029       if (modrm < 0xC0) {
   5030 
   5031          /* bits 5,4,3 are an opcode extension, and the modRM also
   5032            specifies an address. */
   5033          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5034          delta += len;
   5035 
   5036          switch (gregLO3ofRM(modrm)) {
   5037 
   5038             case 0: /* FADD single-real */
   5039                fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
   5040                break;
   5041 
   5042             case 1: /* FMUL single-real */
   5043                fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
   5044                break;
   5045 
   5046             case 2: /* FCOM single-real */
   5047                DIP("fcoms %s\n", dis_buf);
   5048                /* This forces C1 to zero, which isn't right. */
   5049                /* The AMD documentation suggests that forcing C1 to
   5050                   zero is correct (Eliot Moss) */
   5051                put_C3210(
   5052                    unop( Iop_32Uto64,
   5053                        binop( Iop_And32,
   5054                               binop(Iop_Shl32,
   5055                                     binop(Iop_CmpF64,
   5056                                           get_ST(0),
   5057                                           unop(Iop_F32toF64,
   5058                                                loadLE(Ity_F32,mkexpr(addr)))),
   5059                                     mkU8(8)),
   5060                               mkU32(0x4500)
   5061                    )));
   5062                break;
   5063 
   5064             case 3: /* FCOMP single-real */
   5065                /* The AMD documentation suggests that forcing C1 to
   5066                   zero is correct (Eliot Moss) */
   5067                DIP("fcomps %s\n", dis_buf);
   5068                /* This forces C1 to zero, which isn't right. */
   5069                put_C3210(
   5070                    unop( Iop_32Uto64,
   5071                        binop( Iop_And32,
   5072                               binop(Iop_Shl32,
   5073                                     binop(Iop_CmpF64,
   5074                                           get_ST(0),
   5075                                           unop(Iop_F32toF64,
   5076                                                loadLE(Ity_F32,mkexpr(addr)))),
   5077                                     mkU8(8)),
   5078                               mkU32(0x4500)
   5079                    )));
   5080                fp_pop();
   5081                break;
   5082 
   5083             case 4: /* FSUB single-real */
   5084                fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
   5085                break;
   5086 
   5087             case 5: /* FSUBR single-real */
   5088                fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
   5089                break;
   5090 
   5091             case 6: /* FDIV single-real */
   5092                fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
   5093                break;
   5094 
   5095             case 7: /* FDIVR single-real */
   5096                fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
   5097                break;
   5098 
   5099             default:
   5100                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5101                vex_printf("first_opcode == 0xD8\n");
   5102                goto decode_fail;
   5103          }
   5104       } else {
   5105          delta++;
   5106          switch (modrm) {
   5107 
   5108             case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
   5109                fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
   5110                break;
   5111 
   5112             case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
   5113                fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
   5114                break;
   5115 
   5116             /* Dunno if this is right */
   5117             case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
   5118                r_dst = (UInt)modrm - 0xD0;
   5119                DIP("fcom %%st(0),%%st(%d)\n", r_dst);
   5120                /* This forces C1 to zero, which isn't right. */
   5121                put_C3210(
   5122                    unop(Iop_32Uto64,
   5123                    binop( Iop_And32,
   5124                           binop(Iop_Shl32,
   5125                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   5126                                 mkU8(8)),
   5127                           mkU32(0x4500)
   5128                    )));
   5129                break;
   5130 
   5131             /* Dunno if this is right */
   5132             case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
   5133                r_dst = (UInt)modrm - 0xD8;
   5134                DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
   5135                /* This forces C1 to zero, which isn't right. */
   5136                put_C3210(
   5137                    unop(Iop_32Uto64,
   5138                    binop( Iop_And32,
   5139                           binop(Iop_Shl32,
   5140                                 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
   5141                                 mkU8(8)),
   5142                           mkU32(0x4500)
   5143                    )));
   5144                fp_pop();
   5145                break;
   5146 
   5147             case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
   5148                fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
   5149                break;
   5150 
   5151             case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
   5152                fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
   5153                break;
   5154 
   5155             case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
   5156                fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
   5157                break;
   5158 
   5159             case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
   5160                fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
   5161                break;
   5162 
   5163             default:
   5164                goto decode_fail;
   5165          }
   5166       }
   5167    }
   5168 
   5169    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
   5170    else
   5171    if (first_opcode == 0xD9) {
   5172       if (modrm < 0xC0) {
   5173 
   5174          /* bits 5,4,3 are an opcode extension, and the modRM also
   5175             specifies an address. */
   5176          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5177          delta += len;
   5178 
   5179          switch (gregLO3ofRM(modrm)) {
   5180 
   5181             case 0: /* FLD single-real */
   5182                DIP("flds %s\n", dis_buf);
   5183                fp_push();
   5184                put_ST(0, unop(Iop_F32toF64,
   5185                               loadLE(Ity_F32, mkexpr(addr))));
   5186                break;
   5187 
   5188             case 2: /* FST single-real */
   5189                DIP("fsts %s\n", dis_buf);
   5190                storeLE(mkexpr(addr),
   5191                        binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
   5192                break;
   5193 
   5194             case 3: /* FSTP single-real */
   5195                DIP("fstps %s\n", dis_buf);
   5196                storeLE(mkexpr(addr),
   5197                        binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
   5198                fp_pop();
   5199                break;
   5200 
   5201             case 4: { /* FLDENV m28 */
   5202                /* Uses dirty helper:
   5203                      VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
   5204                IRTemp    ew = newTemp(Ity_I32);
   5205                IRTemp   w64 = newTemp(Ity_I64);
   5206                IRDirty*   d = unsafeIRDirty_0_N (
   5207                                  0/*regparms*/,
   5208                                  "amd64g_dirtyhelper_FLDENV",
   5209                                  &amd64g_dirtyhelper_FLDENV,
   5210                                  mkIRExprVec_1( mkexpr(addr) )
   5211                               );
   5212                d->needsBBP = True;
   5213                d->tmp      = w64;
   5214                /* declare we're reading memory */
   5215                d->mFx   = Ifx_Read;
   5216                d->mAddr = mkexpr(addr);
   5217                d->mSize = 28;
   5218 
   5219                /* declare we're writing guest state */
   5220                d->nFxState = 4;
   5221                vex_bzero(&d->fxState, sizeof(d->fxState));
   5222 
   5223                d->fxState[0].fx     = Ifx_Write;
   5224                d->fxState[0].offset = OFFB_FTOP;
   5225                d->fxState[0].size   = sizeof(UInt);
   5226 
   5227                d->fxState[1].fx     = Ifx_Write;
   5228                d->fxState[1].offset = OFFB_FPTAGS;
   5229                d->fxState[1].size   = 8 * sizeof(UChar);
   5230 
   5231                d->fxState[2].fx     = Ifx_Write;
   5232                d->fxState[2].offset = OFFB_FPROUND;
   5233                d->fxState[2].size   = sizeof(ULong);
   5234 
   5235                d->fxState[3].fx     = Ifx_Write;
   5236                d->fxState[3].offset = OFFB_FC3210;
   5237                d->fxState[3].size   = sizeof(ULong);
   5238 
   5239                stmt( IRStmt_Dirty(d) );
   5240 
   5241                /* ew contains any emulation warning we may need to
   5242                   issue.  If needed, side-exit to the next insn,
   5243                   reporting the warning, so that Valgrind's dispatcher
   5244                   sees the warning. */
   5245 	       assign(ew, unop(Iop_64to32,mkexpr(w64)) );
   5246                put_emwarn( mkexpr(ew) );
   5247                stmt(
   5248                   IRStmt_Exit(
   5249                      binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
   5250                      Ijk_EmWarn,
   5251                      IRConst_U64( guest_RIP_bbstart+delta ),
   5252                      OFFB_RIP
   5253                   )
   5254                );
   5255 
   5256                DIP("fldenv %s\n", dis_buf);
   5257                break;
   5258             }
   5259 
   5260             case 5: {/* FLDCW */
   5261                /* The only thing we observe in the control word is the
   5262                   rounding mode.  Therefore, pass the 16-bit value
   5263                   (x87 native-format control word) to a clean helper,
   5264                   getting back a 64-bit value, the lower half of which
   5265                   is the FPROUND value to store, and the upper half of
   5266                   which is the emulation-warning token which may be
   5267                   generated.
   5268                */
   5269                /* ULong amd64h_check_fldcw ( ULong ); */
   5270                IRTemp t64 = newTemp(Ity_I64);
   5271                IRTemp ew = newTemp(Ity_I32);
   5272                DIP("fldcw %s\n", dis_buf);
   5273                assign( t64, mkIRExprCCall(
   5274                                Ity_I64, 0/*regparms*/,
   5275                                "amd64g_check_fldcw",
   5276                                &amd64g_check_fldcw,
   5277                                mkIRExprVec_1(
   5278                                   unop( Iop_16Uto64,
   5279                                         loadLE(Ity_I16, mkexpr(addr)))
   5280                                )
   5281                             )
   5282                      );
   5283 
   5284                put_fpround( unop(Iop_64to32, mkexpr(t64)) );
   5285                assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
   5286                put_emwarn( mkexpr(ew) );
   5287                /* Finally, if an emulation warning was reported,
   5288                   side-exit to the next insn, reporting the warning,
   5289                   so that Valgrind's dispatcher sees the warning. */
   5290                stmt(
   5291                   IRStmt_Exit(
   5292                      binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
   5293                      Ijk_EmWarn,
   5294                      IRConst_U64( guest_RIP_bbstart+delta ),
   5295                      OFFB_RIP
   5296                   )
   5297                );
   5298                break;
   5299             }
   5300 
   5301             case 6: { /* FNSTENV m28 */
   5302                /* Uses dirty helper:
   5303                      void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
   5304                IRDirty* d = unsafeIRDirty_0_N (
   5305                                0/*regparms*/,
   5306                                "amd64g_dirtyhelper_FSTENV",
   5307                                &amd64g_dirtyhelper_FSTENV,
   5308                                mkIRExprVec_1( mkexpr(addr) )
   5309                             );
   5310                d->needsBBP = True;
   5311                /* declare we're writing memory */
   5312                d->mFx   = Ifx_Write;
   5313                d->mAddr = mkexpr(addr);
   5314                d->mSize = 28;
   5315 
   5316                /* declare we're reading guest state */
   5317                d->nFxState = 4;
   5318                vex_bzero(&d->fxState, sizeof(d->fxState));
   5319 
   5320                d->fxState[0].fx     = Ifx_Read;
   5321                d->fxState[0].offset = OFFB_FTOP;
   5322                d->fxState[0].size   = sizeof(UInt);
   5323 
   5324                d->fxState[1].fx     = Ifx_Read;
   5325                d->fxState[1].offset = OFFB_FPTAGS;
   5326                d->fxState[1].size   = 8 * sizeof(UChar);
   5327 
   5328                d->fxState[2].fx     = Ifx_Read;
   5329                d->fxState[2].offset = OFFB_FPROUND;
   5330                d->fxState[2].size   = sizeof(ULong);
   5331 
   5332                d->fxState[3].fx     = Ifx_Read;
   5333                d->fxState[3].offset = OFFB_FC3210;
   5334                d->fxState[3].size   = sizeof(ULong);
   5335 
   5336                stmt( IRStmt_Dirty(d) );
   5337 
   5338                DIP("fnstenv %s\n", dis_buf);
   5339                break;
   5340             }
   5341 
   5342             case 7: /* FNSTCW */
   5343                /* Fake up a native x87 FPU control word.  The only
   5344                   thing it depends on is FPROUND[1:0], so call a clean
   5345                   helper to cook it up. */
   5346                /* ULong amd64g_create_fpucw ( ULong fpround ) */
   5347                DIP("fnstcw %s\n", dis_buf);
   5348                storeLE(
   5349                   mkexpr(addr),
   5350                   unop( Iop_64to16,
   5351                         mkIRExprCCall(
   5352                            Ity_I64, 0/*regp*/,
   5353                            "amd64g_create_fpucw", &amd64g_create_fpucw,
   5354                            mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
   5355                         )
   5356                   )
   5357                );
   5358                break;
   5359 
   5360             default:
   5361                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5362                vex_printf("first_opcode == 0xD9\n");
   5363                goto decode_fail;
   5364          }
   5365 
   5366       } else {
   5367          delta++;
   5368          switch (modrm) {
   5369 
   5370             case 0xC0 ... 0xC7: /* FLD %st(?) */
   5371                r_src = (UInt)modrm - 0xC0;
   5372                DIP("fld %%st(%u)\n", r_src);
   5373                t1 = newTemp(Ity_F64);
   5374                assign(t1, get_ST(r_src));
   5375                fp_push();
   5376                put_ST(0, mkexpr(t1));
   5377                break;
   5378 
   5379             case 0xC8 ... 0xCF: /* FXCH %st(?) */
   5380                r_src = (UInt)modrm - 0xC8;
   5381                DIP("fxch %%st(%u)\n", r_src);
   5382                t1 = newTemp(Ity_F64);
   5383                t2 = newTemp(Ity_F64);
   5384                assign(t1, get_ST(0));
   5385                assign(t2, get_ST(r_src));
   5386                put_ST_UNCHECKED(0, mkexpr(t2));
   5387                put_ST_UNCHECKED(r_src, mkexpr(t1));
   5388                break;
   5389 
   5390             case 0xE0: /* FCHS */
   5391                DIP("fchs\n");
   5392                put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
   5393                break;
   5394 
   5395             case 0xE1: /* FABS */
   5396                DIP("fabs\n");
   5397                put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
   5398                break;
   5399 
   5400             case 0xE5: { /* FXAM */
   5401                /* This is an interesting one.  It examines %st(0),
   5402                   regardless of whether the tag says it's empty or not.
   5403                   Here, just pass both the tag (in our format) and the
   5404                   value (as a double, actually a ULong) to a helper
   5405                   function. */
   5406                IRExpr** args
   5407                   = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
   5408                                    unop(Iop_ReinterpF64asI64,
   5409                                         get_ST_UNCHECKED(0)) );
   5410                put_C3210(mkIRExprCCall(
   5411                             Ity_I64,
   5412                             0/*regparm*/,
   5413                             "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
   5414                             args
   5415                         ));
   5416                DIP("fxam\n");
   5417                break;
   5418             }
   5419 
   5420             case 0xE8: /* FLD1 */
   5421                DIP("fld1\n");
   5422                fp_push();
   5423                /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
   5424                put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
   5425                break;
   5426 
   5427             case 0xE9: /* FLDL2T */
   5428                DIP("fldl2t\n");
   5429                fp_push();
   5430                /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
   5431                put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
   5432                break;
   5433 
   5434             case 0xEA: /* FLDL2E */
   5435                DIP("fldl2e\n");
   5436                fp_push();
   5437                /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
   5438                put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
   5439                break;
   5440 
   5441             case 0xEB: /* FLDPI */
   5442                DIP("fldpi\n");
   5443                fp_push();
   5444                /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
   5445                put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
   5446                break;
   5447 
   5448             case 0xEC: /* FLDLG2 */
   5449                DIP("fldlg2\n");
   5450                fp_push();
   5451                /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
   5452                put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
   5453                break;
   5454 
   5455             case 0xED: /* FLDLN2 */
   5456                DIP("fldln2\n");
   5457                fp_push();
   5458                /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
   5459                put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
   5460                break;
   5461 
   5462             case 0xEE: /* FLDZ */
   5463                DIP("fldz\n");
   5464                fp_push();
   5465                /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
   5466                put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
   5467                break;
   5468 
   5469             case 0xF0: /* F2XM1 */
   5470                DIP("f2xm1\n");
   5471                put_ST_UNCHECKED(0,
   5472                   binop(Iop_2xm1F64,
   5473                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5474                         get_ST(0)));
   5475                break;
   5476 
   5477             case 0xF1: /* FYL2X */
   5478                DIP("fyl2x\n");
   5479                put_ST_UNCHECKED(1,
   5480                   triop(Iop_Yl2xF64,
   5481                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5482                         get_ST(1),
   5483                         get_ST(0)));
   5484                fp_pop();
   5485                break;
   5486 
   5487             case 0xF2: /* FPTAN */
   5488                DIP("ftan\n");
   5489                put_ST_UNCHECKED(0,
   5490                   binop(Iop_TanF64,
   5491                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5492                         get_ST(0)));
   5493                fp_push();
   5494                put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
   5495                clear_C2(); /* HACK */
   5496                break;
   5497 
   5498             case 0xF3: /* FPATAN */
   5499                DIP("fpatan\n");
   5500                put_ST_UNCHECKED(1,
   5501                   triop(Iop_AtanF64,
   5502                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5503                         get_ST(1),
   5504                         get_ST(0)));
   5505                fp_pop();
   5506                break;
   5507 
   5508             case 0xF4: { /* FXTRACT */
   5509                IRTemp argF = newTemp(Ity_F64);
   5510                IRTemp sigF = newTemp(Ity_F64);
   5511                IRTemp expF = newTemp(Ity_F64);
   5512                IRTemp argI = newTemp(Ity_I64);
   5513                IRTemp sigI = newTemp(Ity_I64);
   5514                IRTemp expI = newTemp(Ity_I64);
   5515                DIP("fxtract\n");
   5516                assign( argF, get_ST(0) );
   5517                assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
   5518                assign( sigI,
   5519                        mkIRExprCCall(
   5520                           Ity_I64, 0/*regparms*/,
   5521                           "x86amd64g_calculate_FXTRACT",
   5522                           &x86amd64g_calculate_FXTRACT,
   5523                           mkIRExprVec_2( mkexpr(argI),
   5524                                          mkIRExpr_HWord(0)/*sig*/ ))
   5525                );
   5526                assign( expI,
   5527                        mkIRExprCCall(
   5528                           Ity_I64, 0/*regparms*/,
   5529                           "x86amd64g_calculate_FXTRACT",
   5530                           &x86amd64g_calculate_FXTRACT,
   5531                           mkIRExprVec_2( mkexpr(argI),
   5532                                          mkIRExpr_HWord(1)/*exp*/ ))
   5533                );
   5534                assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
   5535                assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
   5536                /* exponent */
   5537                put_ST_UNCHECKED(0, mkexpr(expF) );
   5538                fp_push();
   5539                /* significand */
   5540                put_ST(0, mkexpr(sigF) );
   5541                break;
   5542             }
   5543 
   5544             case 0xF5: { /* FPREM1 -- IEEE compliant */
   5545                IRTemp a1 = newTemp(Ity_F64);
   5546                IRTemp a2 = newTemp(Ity_F64);
   5547                DIP("fprem1\n");
   5548                /* Do FPREM1 twice, once to get the remainder, and once
   5549                   to get the C3210 flag values. */
   5550                assign( a1, get_ST(0) );
   5551                assign( a2, get_ST(1) );
   5552                put_ST_UNCHECKED(0,
   5553                   triop(Iop_PRem1F64,
   5554                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5555                         mkexpr(a1),
   5556                         mkexpr(a2)));
   5557                put_C3210(
   5558                   unop(Iop_32Uto64,
   5559                   triop(Iop_PRem1C3210F64,
   5560                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5561                         mkexpr(a1),
   5562                         mkexpr(a2)) ));
   5563                break;
   5564             }
   5565 
   5566             case 0xF7: /* FINCSTP */
   5567                DIP("fincstp\n");
   5568                put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
   5569                break;
   5570 
   5571             case 0xF8: { /* FPREM -- not IEEE compliant */
   5572                IRTemp a1 = newTemp(Ity_F64);
   5573                IRTemp a2 = newTemp(Ity_F64);
   5574                DIP("fprem\n");
   5575                /* Do FPREM twice, once to get the remainder, and once
   5576                   to get the C3210 flag values. */
   5577                assign( a1, get_ST(0) );
   5578                assign( a2, get_ST(1) );
   5579                put_ST_UNCHECKED(0,
   5580                   triop(Iop_PRemF64,
   5581                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5582                         mkexpr(a1),
   5583                         mkexpr(a2)));
   5584                put_C3210(
   5585                   unop(Iop_32Uto64,
   5586                   triop(Iop_PRemC3210F64,
   5587                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5588                         mkexpr(a1),
   5589                         mkexpr(a2)) ));
   5590                break;
   5591             }
   5592 
   5593             case 0xF9: /* FYL2XP1 */
   5594                DIP("fyl2xp1\n");
   5595                put_ST_UNCHECKED(1,
   5596                   triop(Iop_Yl2xp1F64,
   5597                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5598                         get_ST(1),
   5599                         get_ST(0)));
   5600                fp_pop();
   5601                break;
   5602 
   5603             case 0xFA: /* FSQRT */
   5604                DIP("fsqrt\n");
   5605                put_ST_UNCHECKED(0,
   5606                   binop(Iop_SqrtF64,
   5607                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5608                         get_ST(0)));
   5609                break;
   5610 
   5611             case 0xFB: { /* FSINCOS */
   5612                IRTemp a1 = newTemp(Ity_F64);
   5613                assign( a1, get_ST(0) );
   5614                DIP("fsincos\n");
   5615                put_ST_UNCHECKED(0,
   5616                   binop(Iop_SinF64,
   5617                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5618                         mkexpr(a1)));
   5619                fp_push();
   5620                put_ST(0,
   5621                   binop(Iop_CosF64,
   5622                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5623                         mkexpr(a1)));
   5624                clear_C2(); /* HACK */
   5625                break;
   5626             }
   5627 
   5628             case 0xFC: /* FRNDINT */
   5629                DIP("frndint\n");
   5630                put_ST_UNCHECKED(0,
   5631                   binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
   5632                break;
   5633 
   5634             case 0xFD: /* FSCALE */
   5635                DIP("fscale\n");
   5636                put_ST_UNCHECKED(0,
   5637                   triop(Iop_ScaleF64,
   5638                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5639                         get_ST(0),
   5640                         get_ST(1)));
   5641                break;
   5642 
   5643             case 0xFE: /* FSIN */
   5644                DIP("fsin\n");
   5645                put_ST_UNCHECKED(0,
   5646                   binop(Iop_SinF64,
   5647                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5648                         get_ST(0)));
   5649                clear_C2(); /* HACK */
   5650                break;
   5651 
   5652             case 0xFF: /* FCOS */
   5653                DIP("fcos\n");
   5654                put_ST_UNCHECKED(0,
   5655                   binop(Iop_CosF64,
   5656                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5657                         get_ST(0)));
   5658                clear_C2(); /* HACK */
   5659                break;
   5660 
   5661             default:
   5662                goto decode_fail;
   5663          }
   5664       }
   5665    }
   5666 
   5667    /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
   5668    else
   5669    if (first_opcode == 0xDA) {
   5670 
   5671       if (modrm < 0xC0) {
   5672 
   5673          /* bits 5,4,3 are an opcode extension, and the modRM also
   5674             specifies an address. */
   5675          IROp   fop;
   5676          IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
   5677          delta += len;
   5678          switch (gregLO3ofRM(modrm)) {
   5679 
   5680             case 0: /* FIADD m32int */ /* ST(0) += m32int */
   5681                DIP("fiaddl %s\n", dis_buf);
   5682                fop = Iop_AddF64;
   5683                goto do_fop_m32;
   5684 
   5685             case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
   5686                DIP("fimull %s\n", dis_buf);
   5687                fop = Iop_MulF64;
   5688                goto do_fop_m32;
   5689 
   5690             case 4: /* FISUB m32int */ /* ST(0) -= m32int */
   5691                DIP("fisubl %s\n", dis_buf);
   5692                fop = Iop_SubF64;
   5693                goto do_fop_m32;
   5694 
   5695             case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
   5696                DIP("fisubrl %s\n", dis_buf);
   5697                fop = Iop_SubF64;
   5698                goto do_foprev_m32;
   5699 
   5700             case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
   5701                DIP("fisubl %s\n", dis_buf);
   5702                fop = Iop_DivF64;
   5703                goto do_fop_m32;
   5704 
   5705             case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
   5706                DIP("fidivrl %s\n", dis_buf);
   5707                fop = Iop_DivF64;
   5708                goto do_foprev_m32;
   5709 
   5710             do_fop_m32:
   5711                put_ST_UNCHECKED(0,
   5712                   triop(fop,
   5713                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5714                         get_ST(0),
   5715                         unop(Iop_I32StoF64,
   5716                              loadLE(Ity_I32, mkexpr(addr)))));
   5717                break;
   5718 
   5719             do_foprev_m32:
   5720                put_ST_UNCHECKED(0,
   5721                   triop(fop,
   5722                         get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
   5723                         unop(Iop_I32StoF64,
   5724                              loadLE(Ity_I32, mkexpr(addr))),
   5725                         get_ST(0)));
   5726                break;
   5727 
   5728             default:
   5729                vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
   5730                vex_printf("first_opcode == 0xDA\n");
   5731                goto decode_fail;
   5732          }
   5733 
   5734       } else {
   5735 
   5736          delta++;
   5737          switch (modrm) {
   5738 
   5739             case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
   5740                r_src = (UInt)modrm - 0xC0;
   5741                DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
   5742                put_ST_UNCHECKED(0,
   5743                                 IRExpr_Mux0X(
   5744                                     unop(Iop_1Uto8,
   5745                                          mk_amd64g_calculate_condition(AMD64CondB)),
   5746                                     get_ST(0), get_ST(r_src)) );
   5747                break;
   5748 
   5749             case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
   5750                r_src = (UInt)modrm - 0xC8;
   5751                DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
   5752                put_ST_UNCHECKED(0,
   5753                                 IRExpr_Mux0X(
   5754                                     unop(Iop_1Uto8,
   5755                                          mk_amd64g_calculate_condition(AMD64CondZ)),
   5756                                     get_ST(0), get_ST(r_src)) );
   5757                break;
   5758 
   5759             case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
   5760                r_src = (UInt)modrm - 0xD0;
   5761                DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
   5762                put_ST_UNCHECKED(0,
   5763                                 IRExpr_Mux0X(
   5764                                     unop(Iop_1Uto8,
   5765                                          mk_amd64g_calculate_condition(AMD64CondBE)),
   5766                                     get_ST(0), get_ST(r_src)) );
   5767                break;
   5768 
   5769             case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
   5770                r_src = (UInt)modrm - 0xD8;
   5771                DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
   5772                put_ST_UNCHECKED(0,
   5773                                 IRExpr_Mux0X(
   5774                                     unop(Iop_1Uto8,
   5775                                          mk_amd64g_calculate_condition(AMD64CondP)),
   5776                                     get_ST(0), get_ST(r_src)) );
   5777                break;
   5778 
   5779             case 0xE9: /* FUCOMPP %st(0),%st(1) */
   5780                DIP("fucompp %%st(0),%%st(1)\n");
   5781                /* This forces C1 to zero, which isn't right. */
   5782                put_C3210(
   5783                    unop(Iop_32Uto64,
   5784