Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_isel.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 
     40 #include "ir_match.h"
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "host_generic_regs.h"
     44 #include "host_generic_simd64.h"
     45 #include "host_generic_simd128.h"
     46 #include "host_x86_defs.h"
     47 
     48 /* TODO 21 Apr 2005:
     49 
     50    -- (Really an assembler issue) don't emit CMov32 as a cmov
     51       insn, since that's expensive on P4 and conditional branch
     52       is cheaper if (as we expect) the condition is highly predictable
     53 
     54    -- preserve xmm registers across function calls (by declaring them
     55       as trashed by call insns)
     56 
     57    -- preserve x87 ST stack discipline across function calls.  Sigh.
     58 
     59    -- Check doHelperCall: if a call is conditional, we cannot safely
     60       compute any regparm args directly to registers.  Hence, the
     61       fast-regparm marshalling should be restricted to unconditional
     62       calls only.
     63 */
     64 
     65 /*---------------------------------------------------------*/
     66 /*--- x87 control word stuff                            ---*/
     67 /*---------------------------------------------------------*/
     68 
     69 /* Vex-generated code expects to run with the FPU set as follows: all
     70    exceptions masked, round-to-nearest, precision = 53 bits.  This
     71    corresponds to a FPU control word value of 0x027F.
     72 
     73    Similarly the SSE control word (%mxcsr) should be 0x1F80.
     74 
     75    %fpucw and %mxcsr should have these values on entry to
     76    Vex-generated code, and should those values should be
     77    unchanged at exit.
     78 */
     79 
     80 #define DEFAULT_FPUCW 0x027F
     81 
     82 /* debugging only, do not use */
     83 /* define DEFAULT_FPUCW 0x037F */
     84 
     85 
     86 /*---------------------------------------------------------*/
     87 /*--- misc helpers                                      ---*/
     88 /*---------------------------------------------------------*/
     89 
     90 /* These are duplicated in guest-x86/toIR.c */
     91 static IRExpr* unop ( IROp op, IRExpr* a )
     92 {
     93    return IRExpr_Unop(op, a);
     94 }
     95 
     96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
     97 {
     98    return IRExpr_Binop(op, a1, a2);
     99 }
    100 
    101 static IRExpr* bind ( Int binder )
    102 {
    103    return IRExpr_Binder(binder);
    104 }
    105 
    106 static Bool isZeroU8 ( IRExpr* e )
    107 {
    108    return e->tag == Iex_Const
    109           && e->Iex.Const.con->tag == Ico_U8
    110           && e->Iex.Const.con->Ico.U8 == 0;
    111 }
    112 
    113 static Bool isZeroU32 ( IRExpr* e )
    114 {
    115    return e->tag == Iex_Const
    116           && e->Iex.Const.con->tag == Ico_U32
    117           && e->Iex.Const.con->Ico.U32 == 0;
    118 }
    119 
    120 //static Bool isZeroU64 ( IRExpr* e )
    121 //{
    122 //   return e->tag == Iex_Const
    123 //          && e->Iex.Const.con->tag == Ico_U64
    124 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
    125 //}
    126 
    127 
    128 /*---------------------------------------------------------*/
    129 /*--- ISelEnv                                           ---*/
    130 /*---------------------------------------------------------*/
    131 
    132 /* This carries around:
    133 
    134    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
    135      might encounter.  This is computed before insn selection starts,
    136      and does not change.
    137 
    138    - A mapping from IRTemp to HReg.  This tells the insn selector
    139      which virtual register(s) are associated with each IRTemp
    140      temporary.  This is computed before insn selection starts, and
    141      does not change.  We expect this mapping to map precisely the
    142      same set of IRTemps as the type mapping does.
    143 
    144         - vregmap   holds the primary register for the IRTemp.
    145         - vregmapHI is only used for 64-bit integer-typed
    146              IRTemps.  It holds the identity of a second
    147              32-bit virtual HReg, which holds the high half
    148              of the value.
    149 
    150    - The code array, that is, the insns selected so far.
    151 
    152    - A counter, for generating new virtual registers.
    153 
    154    - The host subarchitecture we are selecting insns for.
    155      This is set at the start and does not change.
    156 
    157    - A Bool for indicating whether we may generate chain-me
    158      instructions for control flow transfers, or whether we must use
    159      XAssisted.
    160 
    161    - The maximum guest address of any guest insn in this block.
    162      Actually, the address of the highest-addressed byte from any insn
    163      in this block.  Is set at the start and does not change.  This is
    164      used for detecting jumps which are definitely forward-edges from
    165      this block, and therefore can be made (chained) to the fast entry
    166      point of the destination, thereby avoiding the destination's
    167      event check.
    168 
    169    Note, this is all (well, mostly) host-independent.
    170 */
    171 
    172 typedef
    173    struct {
    174       /* Constant -- are set at the start and do not change. */
    175       IRTypeEnv*   type_env;
    176 
    177       HReg*        vregmap;
    178       HReg*        vregmapHI;
    179       Int          n_vregmap;
    180 
    181       UInt         hwcaps;
    182 
    183       Bool         chainingAllowed;
    184       Addr64       max_ga;
    185 
    186       /* These are modified as we go along. */
    187       HInstrArray* code;
    188       Int          vreg_ctr;
    189    }
    190    ISelEnv;
    191 
    192 
    193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
    194 {
    195    vassert(tmp >= 0);
    196    vassert(tmp < env->n_vregmap);
    197    return env->vregmap[tmp];
    198 }
    199 
    200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
    201 {
    202    vassert(tmp >= 0);
    203    vassert(tmp < env->n_vregmap);
    204    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
    205    *vrLO = env->vregmap[tmp];
    206    *vrHI = env->vregmapHI[tmp];
    207 }
    208 
    209 static void addInstr ( ISelEnv* env, X86Instr* instr )
    210 {
    211    addHInstr(env->code, instr);
    212    if (vex_traceflags & VEX_TRACE_VCODE) {
    213       ppX86Instr(instr, False);
    214       vex_printf("\n");
    215    }
    216 }
    217 
    218 static HReg newVRegI ( ISelEnv* env )
    219 {
    220    HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
    221    env->vreg_ctr++;
    222    return reg;
    223 }
    224 
    225 static HReg newVRegF ( ISelEnv* env )
    226 {
    227    HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
    228    env->vreg_ctr++;
    229    return reg;
    230 }
    231 
    232 static HReg newVRegV ( ISelEnv* env )
    233 {
    234    HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
    235    env->vreg_ctr++;
    236    return reg;
    237 }
    238 
    239 
    240 /*---------------------------------------------------------*/
    241 /*--- ISEL: Forward declarations                        ---*/
    242 /*---------------------------------------------------------*/
    243 
    244 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
    245    iselXXX_wrk do the real work, but are not to be called directly.
    246    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
    247    checks that all returned registers are virtual.  You should not
    248    call the _wrk version directly.
    249 */
    250 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
    251 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, IRExpr* e );
    252 
    253 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
    254 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, IRExpr* e );
    255 
    256 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
    257 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, IRExpr* e );
    258 
    259 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
    260 static HReg        iselIntExpr_R     ( ISelEnv* env, IRExpr* e );
    261 
    262 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
    263 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, IRExpr* e );
    264 
    265 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
    266                                        ISelEnv* env, IRExpr* e );
    267 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
    268                                        ISelEnv* env, IRExpr* e );
    269 
    270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
    271 static X86CondCode iselCondCode     ( ISelEnv* env, IRExpr* e );
    272 
    273 static HReg        iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
    274 static HReg        iselDblExpr     ( ISelEnv* env, IRExpr* e );
    275 
    276 static HReg        iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
    277 static HReg        iselFltExpr     ( ISelEnv* env, IRExpr* e );
    278 
    279 static HReg        iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
    280 static HReg        iselVecExpr     ( ISelEnv* env, IRExpr* e );
    281 
    282 
    283 /*---------------------------------------------------------*/
    284 /*--- ISEL: Misc helpers                                ---*/
    285 /*---------------------------------------------------------*/
    286 
    287 /* Make a int reg-reg move. */
    288 
    289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
    290 {
    291    vassert(hregClass(src) == HRcInt32);
    292    vassert(hregClass(dst) == HRcInt32);
    293    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
    294 }
    295 
    296 
    297 /* Make a vector reg-reg move. */
    298 
    299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
    300 {
    301    vassert(hregClass(src) == HRcVec128);
    302    vassert(hregClass(dst) == HRcVec128);
    303    return X86Instr_SseReRg(Xsse_MOV, src, dst);
    304 }
    305 
    306 /* Advance/retreat %esp by n. */
    307 
    308 static void add_to_esp ( ISelEnv* env, Int n )
    309 {
    310    vassert(n > 0 && n < 256 && (n%4) == 0);
    311    addInstr(env,
    312             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
    313 }
    314 
    315 static void sub_from_esp ( ISelEnv* env, Int n )
    316 {
    317    vassert(n > 0 && n < 256 && (n%4) == 0);
    318    addInstr(env,
    319             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
    320 }
    321 
    322 
    323 /* Given an amode, return one which references 4 bytes further
    324    along. */
    325 
    326 static X86AMode* advance4 ( X86AMode* am )
    327 {
    328    X86AMode* am4 = dopyX86AMode(am);
    329    switch (am4->tag) {
    330       case Xam_IRRS:
    331          am4->Xam.IRRS.imm += 4; break;
    332       case Xam_IR:
    333          am4->Xam.IR.imm += 4; break;
    334       default:
    335          vpanic("advance4(x86,host)");
    336    }
    337    return am4;
    338 }
    339 
    340 
    341 /* Push an arg onto the host stack, in preparation for a call to a
    342    helper function of some kind.  Returns the number of 32-bit words
    343    pushed.  If we encounter an IRExpr_VECRET() then we expect that
    344    r_vecRetAddr will be a valid register, that holds the relevant
    345    address.
    346 */
    347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
    348 {
    349    if (UNLIKELY(arg->tag == Iex_VECRET)) {
    350       vassert(0); //ATC
    351       vassert(!hregIsInvalid(r_vecRetAddr));
    352       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
    353       return 1;
    354    }
    355    if (UNLIKELY(arg->tag == Iex_BBPTR)) {
    356       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
    357       return 1;
    358    }
    359    /* Else it's a "normal" expression. */
    360    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
    361    if (arg_ty == Ity_I32) {
    362       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
    363       return 1;
    364    } else
    365    if (arg_ty == Ity_I64) {
    366       HReg rHi, rLo;
    367       iselInt64Expr(&rHi, &rLo, env, arg);
    368       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
    369       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
    370       return 2;
    371    }
    372    ppIRExpr(arg);
    373    vpanic("pushArg(x86): can't handle arg of this type");
    374 }
    375 
    376 
    377 /* Complete the call to a helper function, by calling the
    378    helper and clearing the args off the stack. */
    379 
    380 static
    381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
    382                               IRCallee* cee, Int n_arg_ws,
    383                               RetLoc rloc )
    384 {
    385    /* Complication.  Need to decide which reg to use as the fn address
    386       pointer, in a way that doesn't trash regparm-passed
    387       parameters. */
    388    vassert(sizeof(void*) == 4);
    389 
    390    addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
    391                                 cee->regparms, rloc));
    392    if (n_arg_ws > 0)
    393       add_to_esp(env, 4*n_arg_ws);
    394 }
    395 
    396 
    397 /* Used only in doHelperCall.  See big comment in doHelperCall re
    398    handling of regparm args.  This function figures out whether
    399    evaluation of an expression might require use of a fixed register.
    400    If in doubt return True (safe but suboptimal).
    401 */
    402 static
    403 Bool mightRequireFixedRegs ( IRExpr* e )
    404 {
    405    if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
    406       // These are always "safe" -- either a copy of %esp in some
    407       // arbitrary vreg, or a copy of %ebp, respectively.
    408       return False;
    409    }
    410    /* Else it's a "normal" expression. */
    411    switch (e->tag) {
    412       case Iex_RdTmp: case Iex_Const: case Iex_Get:
    413          return False;
    414       default:
    415          return True;
    416    }
    417 }
    418 
    419 
    420 /* Do a complete function call.  |guard| is a Ity_Bit expression
    421    indicating whether or not the call happens.  If guard==NULL, the
    422    call is unconditional.  |retloc| is set to indicate where the
    423    return value is after the call.  The caller (of this fn) must
    424    generate code to add |stackAdjustAfterCall| to the stack pointer
    425    after the call is done. */
    426 
    427 static
    428 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
    429                     /*OUT*/RetLoc* retloc,
    430                     ISelEnv* env,
    431                     IRExpr* guard,
    432                     IRCallee* cee, IRType retTy, IRExpr** args )
    433 {
    434    X86CondCode cc;
    435    HReg        argregs[3];
    436    HReg        tmpregs[3];
    437    Bool        danger;
    438    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
    439                i, argreg, argregX;
    440 
    441    /* Set default returns.  We'll update them later if needed. */
    442    *stackAdjustAfterCall = 0;
    443    *retloc               = mk_RetLoc_INVALID();
    444 
    445    /* These are used for cross-checking that IR-level constraints on
    446       the use of Iex_VECRET and Iex_BBPTR are observed. */
    447    UInt nVECRETs = 0;
    448    UInt nBBPTRs  = 0;
    449 
    450    /* Marshal args for a call, do the call, and clear the stack.
    451       Complexities to consider:
    452 
    453       * The return type can be I{64,32,16,8} or V128.  In the V128
    454         case, it is expected that |args| will contain the special
    455         node IRExpr_VECRET(), in which case this routine generates
    456         code to allocate space on the stack for the vector return
    457         value.  Since we are not passing any scalars on the stack, it
    458         is enough to preallocate the return space before marshalling
    459         any arguments, in this case.
    460 
    461         |args| may also contain IRExpr_BBPTR(), in which case the
    462         value in %ebp is passed as the corresponding argument.
    463 
    464       * If the callee claims regparmness of 1, 2 or 3, we must pass the
    465         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
    466         respectively).  To keep things relatively simple, only args of
    467         type I32 may be passed as regparms -- just bomb out if anything
    468         else turns up.  Clearly this depends on the front ends not
    469         trying to pass any other types as regparms.
    470    */
    471 
    472    /* 16 Nov 2004: the regparm handling is complicated by the
    473       following problem.
    474 
    475       Consider a call two a function with two regparm parameters:
    476       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
    477       Suppose code is first generated to compute e1 into %eax.  Then,
    478       code is generated to compute e2 into %edx.  Unfortunately, if
    479       the latter code sequence uses %eax, it will trash the value of
    480       e1 computed by the former sequence.  This could happen if (for
    481       example) e2 itself involved a function call.  In the code below,
    482       args are evaluated right-to-left, not left-to-right, but the
    483       principle and the problem are the same.
    484 
    485       One solution is to compute all regparm-bound args into vregs
    486       first, and once they are all done, move them to the relevant
    487       real regs.  This always gives correct code, but it also gives
    488       a bunch of vreg-to-rreg moves which are usually redundant but
    489       are hard for the register allocator to get rid of.
    490 
    491       A compromise is to first examine all regparm'd argument
    492       expressions.  If they are all so simple that it is clear
    493       they will be evaluated without use of any fixed registers,
    494       use the old compute-directly-to-fixed-target scheme.  If not,
    495       be safe and use the via-vregs scheme.
    496 
    497       Note this requires being able to examine an expression and
    498       determine whether or not evaluation of it might use a fixed
    499       register.  That requires knowledge of how the rest of this
    500       insn selector works.  Currently just the following 3 are
    501       regarded as safe -- hopefully they cover the majority of
    502       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
    503    */
    504    vassert(cee->regparms >= 0 && cee->regparms <= 3);
    505 
    506    /* Count the number of args and also the VECRETs */
    507    n_args = n_arg_ws = 0;
    508    while (args[n_args]) {
    509       IRExpr* arg = args[n_args];
    510       n_args++;
    511       if (UNLIKELY(arg->tag == Iex_VECRET)) {
    512          nVECRETs++;
    513       } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
    514          nBBPTRs++;
    515       }
    516    }
    517 
    518    /* If this fails, the IR is ill-formed */
    519    vassert(nBBPTRs == 0 || nBBPTRs == 1);
    520 
    521    /* If we have a VECRET, allocate space on the stack for the return
    522       value, and record the stack pointer after that. */
    523    HReg r_vecRetAddr = INVALID_HREG;
    524    if (nVECRETs == 1) {
    525       vassert(retTy == Ity_V128 || retTy == Ity_V256);
    526       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
    527       r_vecRetAddr = newVRegI(env);
    528       sub_from_esp(env, 16);
    529       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
    530    } else {
    531       // If either of these fail, the IR is ill-formed
    532       vassert(retTy != Ity_V128 && retTy != Ity_V256);
    533       vassert(nVECRETs == 0);
    534    }
    535 
    536    not_done_yet = n_args;
    537 
    538    stack_limit = cee->regparms;
    539 
    540    /* ------ BEGIN marshall all arguments ------ */
    541 
    542    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
    543    for (i = n_args-1; i >= stack_limit; i--) {
    544       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
    545       not_done_yet--;
    546    }
    547 
    548    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
    549       registers. */
    550 
    551    if (cee->regparms > 0) {
    552 
    553       /* ------ BEGIN deal with regparms ------ */
    554 
    555       /* deal with regparms, not forgetting %ebp if needed. */
    556       argregs[0] = hregX86_EAX();
    557       argregs[1] = hregX86_EDX();
    558       argregs[2] = hregX86_ECX();
    559       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
    560 
    561       argreg = cee->regparms;
    562 
    563       /* In keeping with big comment above, detect potential danger
    564          and use the via-vregs scheme if needed. */
    565       danger = False;
    566       for (i = stack_limit-1; i >= 0; i--) {
    567          if (mightRequireFixedRegs(args[i])) {
    568             danger = True;
    569             break;
    570          }
    571       }
    572 
    573       if (danger) {
    574 
    575          /* Move via temporaries */
    576          argregX = argreg;
    577          for (i = stack_limit-1; i >= 0; i--) {
    578 
    579             if (0) {
    580                vex_printf("x86 host: register param is complex: ");
    581                ppIRExpr(args[i]);
    582                vex_printf("\n");
    583             }
    584 
    585             IRExpr* arg = args[i];
    586             argreg--;
    587             vassert(argreg >= 0);
    588             if (UNLIKELY(arg->tag == Iex_VECRET)) {
    589                vassert(0); //ATC
    590             }
    591             else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
    592                vassert(0); //ATC
    593             } else {
    594                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
    595                tmpregs[argreg] = iselIntExpr_R(env, arg);
    596             }
    597             not_done_yet--;
    598          }
    599          for (i = stack_limit-1; i >= 0; i--) {
    600             argregX--;
    601             vassert(argregX >= 0);
    602             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
    603          }
    604 
    605       } else {
    606          /* It's safe to compute all regparm args directly into their
    607             target registers. */
    608          for (i = stack_limit-1; i >= 0; i--) {
    609             IRExpr* arg = args[i];
    610             argreg--;
    611             vassert(argreg >= 0);
    612             if (UNLIKELY(arg->tag == Iex_VECRET)) {
    613                vassert(!hregIsInvalid(r_vecRetAddr));
    614                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    615                                              X86RMI_Reg(r_vecRetAddr),
    616                                              argregs[argreg]));
    617             }
    618             else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
    619                vassert(0); //ATC
    620             } else {
    621                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
    622                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    623                                              iselIntExpr_RMI(env, arg),
    624                                              argregs[argreg]));
    625             }
    626             not_done_yet--;
    627          }
    628 
    629       }
    630 
    631       /* ------ END deal with regparms ------ */
    632 
    633    }
    634 
    635    vassert(not_done_yet == 0);
    636 
    637    /* ------ END marshall all arguments ------ */
    638 
    639    /* Now we can compute the condition.  We can't do it earlier
    640       because the argument computations could trash the condition
    641       codes.  Be a bit clever to handle the common case where the
    642       guard is 1:Bit. */
    643    cc = Xcc_ALWAYS;
    644    if (guard) {
    645       if (guard->tag == Iex_Const
    646           && guard->Iex.Const.con->tag == Ico_U1
    647           && guard->Iex.Const.con->Ico.U1 == True) {
    648          /* unconditional -- do nothing */
    649       } else {
    650          cc = iselCondCode( env, guard );
    651       }
    652    }
    653 
    654    /* Do final checks, set the return values, and generate the call
    655       instruction proper. */
    656    vassert(*stackAdjustAfterCall == 0);
    657    vassert(is_RetLoc_INVALID(*retloc));
    658    switch (retTy) {
    659          case Ity_INVALID:
    660             /* Function doesn't return a value. */
    661             *retloc = mk_RetLoc_simple(RLPri_None);
    662             break;
    663          case Ity_I64:
    664             *retloc = mk_RetLoc_simple(RLPri_2Int);
    665             break;
    666          case Ity_I32: case Ity_I16: case Ity_I8:
    667             *retloc = mk_RetLoc_simple(RLPri_Int);
    668             break;
    669          case Ity_V128:
    670             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
    671             *stackAdjustAfterCall = 16;
    672             break;
    673          case Ity_V256:
    674             vassert(0); // ATC
    675             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
    676             *stackAdjustAfterCall = 32;
    677             break;
    678          default:
    679             /* IR can denote other possible return types, but we don't
    680                handle those here. */
    681            vassert(0);
    682    }
    683 
    684    /* Finally, generate the call itself.  This needs the *retloc value
    685       set in the switch above, which is why it's at the end. */
    686    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
    687 }
    688 
    689 
    690 /* Given a guest-state array descriptor, an index expression and a
    691    bias, generate an X86AMode holding the relevant guest state
    692    offset. */
    693 
    694 static
    695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
    696                                 IRExpr* off, Int bias )
    697 {
    698    HReg tmp, roff;
    699    Int  elemSz = sizeofIRType(descr->elemTy);
    700    Int  nElems = descr->nElems;
    701    Int  shift  = 0;
    702 
    703    /* throw out any cases not generated by an x86 front end.  In
    704       theory there might be a day where we need to handle them -- if
    705       we ever run non-x86-guest on x86 host. */
    706 
    707    if (nElems != 8)
    708       vpanic("genGuestArrayOffset(x86 host)(1)");
    709 
    710    switch (elemSz) {
    711       case 1:  shift = 0; break;
    712       case 4:  shift = 2; break;
    713       case 8:  shift = 3; break;
    714       default: vpanic("genGuestArrayOffset(x86 host)(2)");
    715    }
    716 
    717    /* Compute off into a reg, %off.  Then return:
    718 
    719          movl %off, %tmp
    720          addl $bias, %tmp  (if bias != 0)
    721          andl %tmp, 7
    722          ... base(%ebp, %tmp, shift) ...
    723    */
    724    tmp  = newVRegI(env);
    725    roff = iselIntExpr_R(env, off);
    726    addInstr(env, mk_iMOVsd_RR(roff, tmp));
    727    if (bias != 0) {
    728       addInstr(env,
    729                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
    730    }
    731    addInstr(env,
    732             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
    733    return
    734       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
    735 }
    736 
    737 
    738 /* Mess with the FPU's rounding mode: set to the default rounding mode
    739    (DEFAULT_FPUCW). */
    740 static
    741 void set_FPU_rounding_default ( ISelEnv* env )
    742 {
    743    /* pushl $DEFAULT_FPUCW
    744       fldcw 0(%esp)
    745       addl $4, %esp
    746    */
    747    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    748    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
    749    addInstr(env, X86Instr_FpLdCW(zero_esp));
    750    add_to_esp(env, 4);
    751 }
    752 
    753 
    754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
    755    expression denoting a value in the range 0 .. 3, indicating a round
    756    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
    757    the same rounding.
    758 */
    759 static
    760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
    761 {
    762    HReg rrm  = iselIntExpr_R(env, mode);
    763    HReg rrm2 = newVRegI(env);
    764    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    765 
    766    /* movl  %rrm, %rrm2
    767       andl  $3, %rrm2   -- shouldn't be needed; paranoia
    768       shll  $10, %rrm2
    769       orl   $DEFAULT_FPUCW, %rrm2
    770       pushl %rrm2
    771       fldcw 0(%esp)
    772       addl  $4, %esp
    773    */
    774    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
    775    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
    776    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
    777    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
    778    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
    779    addInstr(env, X86Instr_FpLdCW(zero_esp));
    780    add_to_esp(env, 4);
    781 }
    782 
    783 
    784 /* Generate !src into a new vector register, and be sure that the code
    785    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
    786    way to do this.
    787 */
    788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
    789 {
    790    HReg dst = newVRegV(env);
    791    /* Set dst to zero.  If dst contains a NaN then all hell might
    792       break loose after the comparison.  So, first zero it. */
    793    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
    794    /* And now make it all 1s ... */
    795    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
    796    /* Finally, xor 'src' into it. */
    797    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
    798    /* Doesn't that just totally suck? */
    799    return dst;
    800 }
    801 
    802 
    803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
    804    after most non-simple FPU operations (simple = +, -, *, / and
    805    sqrt).
    806 
    807    This could be done a lot more efficiently if needed, by loading
    808    zero and adding it to the value to be rounded (fldz ; faddp?).
    809 */
    810 static void roundToF64 ( ISelEnv* env, HReg reg )
    811 {
    812    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    813    sub_from_esp(env, 8);
    814    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
    815    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
    816    add_to_esp(env, 8);
    817 }
    818 
    819 
    820 /*---------------------------------------------------------*/
    821 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
    822 /*---------------------------------------------------------*/
    823 
    824 /* Select insns for an integer-typed expression, and add them to the
    825    code list.  Return a reg holding the result.  This reg will be a
    826    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
    827    want to modify it, ask for a new vreg, copy it in there, and modify
    828    the copy.  The register allocator will do its best to map both
    829    vregs to the same real register, so the copies will often disappear
    830    later in the game.
    831 
    832    This should handle expressions of 32, 16 and 8-bit type.  All
    833    results are returned in a 32-bit register.  For 16- and 8-bit
    834    expressions, the upper 16/24 bits are arbitrary, so you should mask
    835    or sign extend partial values if necessary.
    836 */
    837 
    838 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
    839 {
    840    HReg r = iselIntExpr_R_wrk(env, e);
    841    /* sanity checks ... */
    842 #  if 0
    843    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
    844 #  endif
    845    vassert(hregClass(r) == HRcInt32);
    846    vassert(hregIsVirtual(r));
    847    return r;
    848 }
    849 
    850 /* DO NOT CALL THIS DIRECTLY ! */
    851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
    852 {
    853    MatchInfo mi;
    854 
    855    IRType ty = typeOfIRExpr(env->type_env,e);
    856    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
    857 
    858    switch (e->tag) {
    859 
    860    /* --------- TEMP --------- */
    861    case Iex_RdTmp: {
    862       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    863    }
    864 
    865    /* --------- LOAD --------- */
    866    case Iex_Load: {
    867       HReg dst = newVRegI(env);
    868       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
    869 
    870       /* We can't handle big-endian loads, nor load-linked. */
    871       if (e->Iex.Load.end != Iend_LE)
    872          goto irreducible;
    873 
    874       if (ty == Ity_I32) {
    875          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    876                                        X86RMI_Mem(amode), dst) );
    877          return dst;
    878       }
    879       if (ty == Ity_I16) {
    880          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
    881          return dst;
    882       }
    883       if (ty == Ity_I8) {
    884          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
    885          return dst;
    886       }
    887       break;
    888    }
    889 
    890    /* --------- TERNARY OP --------- */
    891    case Iex_Triop: {
    892       IRTriop *triop = e->Iex.Triop.details;
    893       /* C3210 flags following FPU partial remainder (fprem), both
    894          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
    895       if (triop->op == Iop_PRemC3210F64
    896           || triop->op == Iop_PRem1C3210F64) {
    897          HReg junk = newVRegF(env);
    898          HReg dst  = newVRegI(env);
    899          HReg srcL = iselDblExpr(env, triop->arg2);
    900          HReg srcR = iselDblExpr(env, triop->arg3);
    901          /* XXXROUNDINGFIXME */
    902          /* set roundingmode here */
    903          addInstr(env, X86Instr_FpBinary(
    904                            e->Iex.Binop.op==Iop_PRemC3210F64
    905                               ? Xfp_PREM : Xfp_PREM1,
    906                            srcL,srcR,junk
    907                  ));
    908          /* The previous pseudo-insn will have left the FPU's C3210
    909             flags set correctly.  So bag them. */
    910          addInstr(env, X86Instr_FpStSW_AX());
    911          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
    912          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
    913          return dst;
    914       }
    915 
    916       break;
    917    }
    918 
    919    /* --------- BINARY OP --------- */
    920    case Iex_Binop: {
    921       X86AluOp   aluOp;
    922       X86ShiftOp shOp;
    923 
    924       /* Pattern: Sub32(0,x) */
    925       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
    926          HReg dst = newVRegI(env);
    927          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
    928          addInstr(env, mk_iMOVsd_RR(reg,dst));
    929          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
    930          return dst;
    931       }
    932 
    933       /* Is it an addition or logical style op? */
    934       switch (e->Iex.Binop.op) {
    935          case Iop_Add8: case Iop_Add16: case Iop_Add32:
    936             aluOp = Xalu_ADD; break;
    937          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
    938             aluOp = Xalu_SUB; break;
    939          case Iop_And8: case Iop_And16: case Iop_And32:
    940             aluOp = Xalu_AND; break;
    941          case Iop_Or8: case Iop_Or16: case Iop_Or32:
    942             aluOp = Xalu_OR; break;
    943          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
    944             aluOp = Xalu_XOR; break;
    945          case Iop_Mul16: case Iop_Mul32:
    946             aluOp = Xalu_MUL; break;
    947          default:
    948             aluOp = Xalu_INVALID; break;
    949       }
    950       /* For commutative ops we assume any literal
    951          values are on the second operand. */
    952       if (aluOp != Xalu_INVALID) {
    953          HReg dst    = newVRegI(env);
    954          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
    955          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
    956          addInstr(env, mk_iMOVsd_RR(reg,dst));
    957          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
    958          return dst;
    959       }
    960       /* Could do better here; forcing the first arg into a reg
    961          isn't always clever.
    962          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
    963                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
    964                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
    965             movl 0xFFFFFFA0(%vr41),%vr107
    966             movl 0xFFFFFFA4(%vr41),%vr108
    967             movl %vr107,%vr106
    968             xorl %vr108,%vr106
    969             movl 0xFFFFFFA8(%vr41),%vr109
    970             movl %vr106,%vr105
    971             andl %vr109,%vr105
    972             movl 0xFFFFFFA0(%vr41),%vr110
    973             movl %vr105,%vr104
    974             xorl %vr110,%vr104
    975             movl %vr104,%vr70
    976       */
    977 
    978       /* Perhaps a shift op? */
    979       switch (e->Iex.Binop.op) {
    980          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
    981             shOp = Xsh_SHL; break;
    982          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
    983             shOp = Xsh_SHR; break;
    984          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
    985             shOp = Xsh_SAR; break;
    986          default:
    987             shOp = Xsh_INVALID; break;
    988       }
    989       if (shOp != Xsh_INVALID) {
    990          HReg dst = newVRegI(env);
    991 
    992          /* regL = the value to be shifted */
    993          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
    994          addInstr(env, mk_iMOVsd_RR(regL,dst));
    995 
    996          /* Do any necessary widening for 16/8 bit operands */
    997          switch (e->Iex.Binop.op) {
    998             case Iop_Shr8:
    999                addInstr(env, X86Instr_Alu32R(
   1000                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
   1001                break;
   1002             case Iop_Shr16:
   1003                addInstr(env, X86Instr_Alu32R(
   1004                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
   1005                break;
   1006             case Iop_Sar8:
   1007                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
   1008                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
   1009                break;
   1010             case Iop_Sar16:
   1011                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
   1012                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
   1013                break;
   1014             default: break;
   1015          }
   1016 
   1017          /* Now consider the shift amount.  If it's a literal, we
   1018             can do a much better job than the general case. */
   1019          if (e->Iex.Binop.arg2->tag == Iex_Const) {
   1020             /* assert that the IR is well-typed */
   1021             Int nshift;
   1022             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1023             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1024 	    vassert(nshift >= 0);
   1025 	    if (nshift > 0)
   1026                /* Can't allow nshift==0 since that means %cl */
   1027                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
   1028          } else {
   1029             /* General case; we have to force the amount into %cl. */
   1030             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1031             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
   1032             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
   1033          }
   1034          return dst;
   1035       }
   1036 
   1037       /* Handle misc other ops. */
   1038 
   1039       if (e->Iex.Binop.op == Iop_Max32U) {
   1040          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1041          HReg dst  = newVRegI(env);
   1042          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1043          addInstr(env, mk_iMOVsd_RR(src1,dst));
   1044          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
   1045          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
   1046          return dst;
   1047       }
   1048 
   1049       if (e->Iex.Binop.op == Iop_8HLto16) {
   1050          HReg hi8  = newVRegI(env);
   1051          HReg lo8  = newVRegI(env);
   1052          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1053          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1054          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
   1055          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
   1056          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
   1057          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
   1058          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
   1059          return hi8;
   1060       }
   1061 
   1062       if (e->Iex.Binop.op == Iop_16HLto32) {
   1063          HReg hi16  = newVRegI(env);
   1064          HReg lo16  = newVRegI(env);
   1065          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1066          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1067          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
   1068          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
   1069          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
   1070          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
   1071          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
   1072          return hi16;
   1073       }
   1074 
   1075       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
   1076           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
   1077          HReg a16   = newVRegI(env);
   1078          HReg b16   = newVRegI(env);
   1079          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1080          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1081          Int  shift = (e->Iex.Binop.op == Iop_MullS8
   1082                        || e->Iex.Binop.op == Iop_MullU8)
   1083                          ? 24 : 16;
   1084          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
   1085                               || e->Iex.Binop.op == Iop_MullS16)
   1086                                 ? Xsh_SAR : Xsh_SHR;
   1087 
   1088          addInstr(env, mk_iMOVsd_RR(a16s, a16));
   1089          addInstr(env, mk_iMOVsd_RR(b16s, b16));
   1090          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
   1091          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
   1092          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
   1093          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
   1094          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
   1095          return b16;
   1096       }
   1097 
   1098       if (e->Iex.Binop.op == Iop_CmpF64) {
   1099          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
   1100          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
   1101          HReg dst = newVRegI(env);
   1102          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
   1103          /* shift this right 8 bits so as to conform to CmpF64
   1104             definition. */
   1105          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
   1106          return dst;
   1107       }
   1108 
   1109       if (e->Iex.Binop.op == Iop_F64toI32S
   1110           || e->Iex.Binop.op == Iop_F64toI16S) {
   1111          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
   1112          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   1113          HReg dst = newVRegI(env);
   1114 
   1115          /* Used several times ... */
   1116          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   1117 
   1118 	 /* rf now holds the value to be converted, and rrm holds the
   1119 	    rounding mode value, encoded as per the IRRoundingMode
   1120 	    enum.  The first thing to do is set the FPU's rounding
   1121 	    mode accordingly. */
   1122 
   1123          /* Create a space for the format conversion. */
   1124          /* subl $4, %esp */
   1125          sub_from_esp(env, 4);
   1126 
   1127 	 /* Set host rounding mode */
   1128 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   1129 
   1130          /* gistw/l %rf, 0(%esp) */
   1131          addInstr(env, X86Instr_FpLdStI(False/*store*/,
   1132                                         toUChar(sz), rf, zero_esp));
   1133 
   1134          if (sz == 2) {
   1135             /* movzwl 0(%esp), %dst */
   1136             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
   1137          } else {
   1138             /* movl 0(%esp), %dst */
   1139             vassert(sz == 4);
   1140             addInstr(env, X86Instr_Alu32R(
   1141                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
   1142          }
   1143 
   1144 	 /* Restore default FPU rounding. */
   1145          set_FPU_rounding_default( env );
   1146 
   1147          /* addl $4, %esp */
   1148 	 add_to_esp(env, 4);
   1149          return dst;
   1150       }
   1151 
   1152       break;
   1153    }
   1154 
   1155    /* --------- UNARY OP --------- */
   1156    case Iex_Unop: {
   1157 
   1158       /* 1Uto8(32to1(expr32)) */
   1159       if (e->Iex.Unop.op == Iop_1Uto8) {
   1160          DECLARE_PATTERN(p_32to1_then_1Uto8);
   1161          DEFINE_PATTERN(p_32to1_then_1Uto8,
   1162                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
   1163          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
   1164             IRExpr* expr32 = mi.bindee[0];
   1165             HReg dst = newVRegI(env);
   1166             HReg src = iselIntExpr_R(env, expr32);
   1167             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1168             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1169                                           X86RMI_Imm(1), dst));
   1170             return dst;
   1171          }
   1172       }
   1173 
   1174       /* 8Uto32(LDle(expr32)) */
   1175       if (e->Iex.Unop.op == Iop_8Uto32) {
   1176          DECLARE_PATTERN(p_LDle8_then_8Uto32);
   1177          DEFINE_PATTERN(p_LDle8_then_8Uto32,
   1178                         unop(Iop_8Uto32,
   1179                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1180          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
   1181             HReg dst = newVRegI(env);
   1182             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1183             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1184             return dst;
   1185          }
   1186       }
   1187 
   1188       /* 8Sto32(LDle(expr32)) */
   1189       if (e->Iex.Unop.op == Iop_8Sto32) {
   1190          DECLARE_PATTERN(p_LDle8_then_8Sto32);
   1191          DEFINE_PATTERN(p_LDle8_then_8Sto32,
   1192                         unop(Iop_8Sto32,
   1193                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1194          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
   1195             HReg dst = newVRegI(env);
   1196             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1197             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
   1198             return dst;
   1199          }
   1200       }
   1201 
   1202       /* 16Uto32(LDle(expr32)) */
   1203       if (e->Iex.Unop.op == Iop_16Uto32) {
   1204          DECLARE_PATTERN(p_LDle16_then_16Uto32);
   1205          DEFINE_PATTERN(p_LDle16_then_16Uto32,
   1206                         unop(Iop_16Uto32,
   1207                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
   1208          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
   1209             HReg dst = newVRegI(env);
   1210             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1211             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1212             return dst;
   1213          }
   1214       }
   1215 
   1216       /* 8Uto32(GET:I8) */
   1217       if (e->Iex.Unop.op == Iop_8Uto32) {
   1218          if (e->Iex.Unop.arg->tag == Iex_Get) {
   1219             HReg      dst;
   1220             X86AMode* amode;
   1221             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
   1222             dst = newVRegI(env);
   1223             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1224                                 hregX86_EBP());
   1225             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1226             return dst;
   1227          }
   1228       }
   1229 
   1230       /* 16to32(GET:I16) */
   1231       if (e->Iex.Unop.op == Iop_16Uto32) {
   1232          if (e->Iex.Unop.arg->tag == Iex_Get) {
   1233             HReg      dst;
   1234             X86AMode* amode;
   1235             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
   1236             dst = newVRegI(env);
   1237             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1238                                 hregX86_EBP());
   1239             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1240             return dst;
   1241          }
   1242       }
   1243 
   1244       switch (e->Iex.Unop.op) {
   1245          case Iop_8Uto16:
   1246          case Iop_8Uto32:
   1247          case Iop_16Uto32: {
   1248             HReg dst = newVRegI(env);
   1249             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1250             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
   1251             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1252             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1253                                           X86RMI_Imm(mask), dst));
   1254             return dst;
   1255          }
   1256          case Iop_8Sto16:
   1257          case Iop_8Sto32:
   1258          case Iop_16Sto32: {
   1259             HReg dst = newVRegI(env);
   1260             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1261             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
   1262             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1263             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
   1264             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
   1265             return dst;
   1266          }
   1267 	 case Iop_Not8:
   1268 	 case Iop_Not16:
   1269          case Iop_Not32: {
   1270             HReg dst = newVRegI(env);
   1271             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1272             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1273             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
   1274             return dst;
   1275          }
   1276          case Iop_64HIto32: {
   1277             HReg rHi, rLo;
   1278             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1279             return rHi; /* and abandon rLo .. poor wee thing :-) */
   1280          }
   1281          case Iop_64to32: {
   1282             HReg rHi, rLo;
   1283             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1284             return rLo; /* similar stupid comment to the above ... */
   1285          }
   1286          case Iop_16HIto8:
   1287          case Iop_32HIto16: {
   1288             HReg dst  = newVRegI(env);
   1289             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
   1290             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
   1291             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1292             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
   1293             return dst;
   1294          }
   1295          case Iop_1Uto32:
   1296          case Iop_1Uto8: {
   1297             HReg dst         = newVRegI(env);
   1298             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1299             addInstr(env, X86Instr_Set32(cond,dst));
   1300             return dst;
   1301          }
   1302          case Iop_1Sto8:
   1303          case Iop_1Sto16:
   1304          case Iop_1Sto32: {
   1305             /* could do better than this, but for now ... */
   1306             HReg dst         = newVRegI(env);
   1307             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1308             addInstr(env, X86Instr_Set32(cond,dst));
   1309             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
   1310             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1311             return dst;
   1312          }
   1313          case Iop_Ctz32: {
   1314             /* Count trailing zeroes, implemented by x86 'bsfl' */
   1315             HReg dst = newVRegI(env);
   1316             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1317             addInstr(env, X86Instr_Bsfr32(True,src,dst));
   1318             return dst;
   1319          }
   1320          case Iop_Clz32: {
   1321             /* Count leading zeroes.  Do 'bsrl' to establish the index
   1322                of the highest set bit, and subtract that value from
   1323                31. */
   1324             HReg tmp = newVRegI(env);
   1325             HReg dst = newVRegI(env);
   1326             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1327             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
   1328             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
   1329                                           X86RMI_Imm(31), dst));
   1330             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
   1331                                           X86RMI_Reg(tmp), dst));
   1332             return dst;
   1333          }
   1334 
   1335          case Iop_CmpwNEZ32: {
   1336             HReg dst = newVRegI(env);
   1337             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1338             addInstr(env, mk_iMOVsd_RR(src,dst));
   1339             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
   1340             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   1341                                           X86RMI_Reg(src), dst));
   1342             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1343             return dst;
   1344          }
   1345          case Iop_Left8:
   1346          case Iop_Left16:
   1347          case Iop_Left32: {
   1348             HReg dst = newVRegI(env);
   1349             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1350             addInstr(env, mk_iMOVsd_RR(src, dst));
   1351             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
   1352             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
   1353             return dst;
   1354          }
   1355 
   1356          case Iop_V128to32: {
   1357             HReg      dst  = newVRegI(env);
   1358             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
   1359             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   1360             sub_from_esp(env, 16);
   1361             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   1362             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
   1363             add_to_esp(env, 16);
   1364             return dst;
   1365          }
   1366 
   1367          /* ReinterpF32asI32(e) */
   1368          /* Given an IEEE754 single, produce an I32 with the same bit
   1369             pattern.  Keep stack 8-aligned even though only using 4
   1370             bytes. */
   1371          case Iop_ReinterpF32asI32: {
   1372             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
   1373             HReg dst  = newVRegI(env);
   1374             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   1375             /* paranoia */
   1376             set_FPU_rounding_default(env);
   1377             /* subl $8, %esp */
   1378             sub_from_esp(env, 8);
   1379             /* gstF %rf, 0(%esp) */
   1380             addInstr(env,
   1381                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
   1382             /* movl 0(%esp), %dst */
   1383             addInstr(env,
   1384                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
   1385             /* addl $8, %esp */
   1386             add_to_esp(env, 8);
   1387             return dst;
   1388          }
   1389 
   1390          case Iop_16to8:
   1391          case Iop_32to8:
   1392          case Iop_32to16:
   1393             /* These are no-ops. */
   1394             return iselIntExpr_R(env, e->Iex.Unop.arg);
   1395 
   1396          case Iop_GetMSBs8x8: {
   1397             /* Note: the following assumes the helper is of
   1398                signature
   1399                   UInt fn ( ULong ), and is not a regparm fn.
   1400             */
   1401             HReg  xLo, xHi;
   1402             HReg  dst = newVRegI(env);
   1403             HWord fn = (HWord)h_generic_calc_GetMSBs8x8;
   1404             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
   1405             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   1406             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   1407             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
   1408                                          0, mk_RetLoc_simple(RLPri_Int) ));
   1409             add_to_esp(env, 2*4);
   1410             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1411             return dst;
   1412          }
   1413 
   1414          default:
   1415             break;
   1416       }
   1417       break;
   1418    }
   1419 
   1420    /* --------- GET --------- */
   1421    case Iex_Get: {
   1422       if (ty == Ity_I32) {
   1423          HReg dst = newVRegI(env);
   1424          addInstr(env, X86Instr_Alu32R(
   1425                           Xalu_MOV,
   1426                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
   1427                                                  hregX86_EBP())),
   1428                           dst));
   1429          return dst;
   1430       }
   1431       if (ty == Ity_I8 || ty == Ity_I16) {
   1432          HReg dst = newVRegI(env);
   1433          addInstr(env, X86Instr_LoadEX(
   1434                           toUChar(ty==Ity_I8 ? 1 : 2),
   1435                           False,
   1436                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
   1437                           dst));
   1438          return dst;
   1439       }
   1440       break;
   1441    }
   1442 
   1443    case Iex_GetI: {
   1444       X86AMode* am
   1445          = genGuestArrayOffset(
   1446               env, e->Iex.GetI.descr,
   1447                    e->Iex.GetI.ix, e->Iex.GetI.bias );
   1448       HReg dst = newVRegI(env);
   1449       if (ty == Ity_I8) {
   1450          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
   1451          return dst;
   1452       }
   1453       if (ty == Ity_I32) {
   1454          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
   1455          return dst;
   1456       }
   1457       break;
   1458    }
   1459 
   1460    /* --------- CCALL --------- */
   1461    case Iex_CCall: {
   1462       HReg    dst = newVRegI(env);
   1463       vassert(ty == e->Iex.CCall.retty);
   1464 
   1465       /* be very restrictive for now.  Only 32/64-bit ints allowed for
   1466          args, and 32 bits for return type.  Don't forget to change
   1467          the RetLoc if more return types are allowed in future. */
   1468       if (e->Iex.CCall.retty != Ity_I32)
   1469          goto irreducible;
   1470 
   1471       /* Marshal args, do the call, clear stack. */
   1472       UInt   addToSp = 0;
   1473       RetLoc rloc    = mk_RetLoc_INVALID();
   1474       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   1475                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
   1476       vassert(is_sane_RetLoc(rloc));
   1477       vassert(rloc.pri == RLPri_Int);
   1478       vassert(addToSp == 0);
   1479 
   1480       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1481       return dst;
   1482    }
   1483 
   1484    /* --------- LITERAL --------- */
   1485    /* 32/16/8-bit literals */
   1486    case Iex_Const: {
   1487       X86RMI* rmi = iselIntExpr_RMI ( env, e );
   1488       HReg    r   = newVRegI(env);
   1489       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
   1490       return r;
   1491    }
   1492 
   1493    /* --------- MULTIPLEX --------- */
   1494    case Iex_ITE: { // VFD
   1495      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
   1496          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   1497         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
   1498         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
   1499         HReg   dst = newVRegI(env);
   1500         addInstr(env, mk_iMOVsd_RR(r1,dst));
   1501         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   1502         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
   1503         return dst;
   1504       }
   1505       break;
   1506    }
   1507 
   1508    default:
   1509    break;
   1510    } /* switch (e->tag) */
   1511 
   1512    /* We get here if no pattern matched. */
   1513   irreducible:
   1514    ppIRExpr(e);
   1515    vpanic("iselIntExpr_R: cannot reduce tree");
   1516 }
   1517 
   1518 
   1519 /*---------------------------------------------------------*/
   1520 /*--- ISEL: Integer expression auxiliaries              ---*/
   1521 /*---------------------------------------------------------*/
   1522 
   1523 /* --------------------- AMODEs --------------------- */
   1524 
   1525 /* Return an AMode which computes the value of the specified
   1526    expression, possibly also adding insns to the code list as a
   1527    result.  The expression may only be a 32-bit one.
   1528 */
   1529 
   1530 static Bool sane_AMode ( X86AMode* am )
   1531 {
   1532    switch (am->tag) {
   1533       case Xam_IR:
   1534          return
   1535             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
   1536                     && (hregIsVirtual(am->Xam.IR.reg)
   1537                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
   1538       case Xam_IRRS:
   1539          return
   1540             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
   1541                     && hregIsVirtual(am->Xam.IRRS.base)
   1542                     && hregClass(am->Xam.IRRS.index) == HRcInt32
   1543                     && hregIsVirtual(am->Xam.IRRS.index) );
   1544       default:
   1545         vpanic("sane_AMode: unknown x86 amode tag");
   1546    }
   1547 }
   1548 
   1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
   1550 {
   1551    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
   1552    vassert(sane_AMode(am));
   1553    return am;
   1554 }
   1555 
   1556 /* DO NOT CALL THIS DIRECTLY ! */
   1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
   1558 {
   1559    IRType ty = typeOfIRExpr(env->type_env,e);
   1560    vassert(ty == Ity_I32);
   1561 
   1562    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
   1563    if (e->tag == Iex_Binop
   1564        && e->Iex.Binop.op == Iop_Add32
   1565        && e->Iex.Binop.arg2->tag == Iex_Const
   1566        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
   1567        && e->Iex.Binop.arg1->tag == Iex_Binop
   1568        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
   1569        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
   1570        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
   1571        && e->Iex.Binop.arg1
   1572            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
   1573        && e->Iex.Binop.arg1
   1574            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
   1575       UInt shift = e->Iex.Binop.arg1
   1576                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1577       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1578       if (shift == 1 || shift == 2 || shift == 3) {
   1579          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
   1580          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
   1581                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
   1582          return X86AMode_IRRS(imm32, r1, r2, shift);
   1583       }
   1584    }
   1585 
   1586    /* Add32(expr1, Shl32(expr2, imm)) */
   1587    if (e->tag == Iex_Binop
   1588        && e->Iex.Binop.op == Iop_Add32
   1589        && e->Iex.Binop.arg2->tag == Iex_Binop
   1590        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
   1591        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
   1592        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
   1593       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1594       if (shift == 1 || shift == 2 || shift == 3) {
   1595          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1596          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
   1597          return X86AMode_IRRS(0, r1, r2, shift);
   1598       }
   1599    }
   1600 
   1601    /* Add32(expr,i) */
   1602    if (e->tag == Iex_Binop
   1603        && e->Iex.Binop.op == Iop_Add32
   1604        && e->Iex.Binop.arg2->tag == Iex_Const
   1605        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
   1606       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
   1607       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
   1608    }
   1609 
   1610    /* Doesn't match anything in particular.  Generate it into
   1611       a register and use that. */
   1612    {
   1613       HReg r1 = iselIntExpr_R(env, e);
   1614       return X86AMode_IR(0, r1);
   1615    }
   1616 }
   1617 
   1618 
   1619 /* --------------------- RMIs --------------------- */
   1620 
   1621 /* Similarly, calculate an expression into an X86RMI operand.  As with
   1622    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
   1623 
   1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
   1625 {
   1626    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
   1627    /* sanity checks ... */
   1628    switch (rmi->tag) {
   1629       case Xrmi_Imm:
   1630          return rmi;
   1631       case Xrmi_Reg:
   1632          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
   1633          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
   1634          return rmi;
   1635       case Xrmi_Mem:
   1636          vassert(sane_AMode(rmi->Xrmi.Mem.am));
   1637          return rmi;
   1638       default:
   1639          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
   1640    }
   1641 }
   1642 
   1643 /* DO NOT CALL THIS DIRECTLY ! */
   1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
   1645 {
   1646    IRType ty = typeOfIRExpr(env->type_env,e);
   1647    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1648 
   1649    /* special case: immediate */
   1650    if (e->tag == Iex_Const) {
   1651       UInt u;
   1652       switch (e->Iex.Const.con->tag) {
   1653          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1654          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1655          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1656          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
   1657       }
   1658       return X86RMI_Imm(u);
   1659    }
   1660 
   1661    /* special case: 32-bit GET */
   1662    if (e->tag == Iex_Get && ty == Ity_I32) {
   1663       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
   1664                                     hregX86_EBP()));
   1665    }
   1666 
   1667    /* special case: 32-bit load from memory */
   1668    if (e->tag == Iex_Load && ty == Ity_I32
   1669        && e->Iex.Load.end == Iend_LE) {
   1670       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   1671       return X86RMI_Mem(am);
   1672    }
   1673 
   1674    /* default case: calculate into a register and return that */
   1675    {
   1676       HReg r = iselIntExpr_R ( env, e );
   1677       return X86RMI_Reg(r);
   1678    }
   1679 }
   1680 
   1681 
   1682 /* --------------------- RIs --------------------- */
   1683 
   1684 /* Calculate an expression into an X86RI operand.  As with
   1685    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
   1686 
   1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
   1688 {
   1689    X86RI* ri = iselIntExpr_RI_wrk(env, e);
   1690    /* sanity checks ... */
   1691    switch (ri->tag) {
   1692       case Xri_Imm:
   1693          return ri;
   1694       case Xri_Reg:
   1695          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
   1696          vassert(hregIsVirtual(ri->Xri.Reg.reg));
   1697          return ri;
   1698       default:
   1699          vpanic("iselIntExpr_RI: unknown x86 RI tag");
   1700    }
   1701 }
   1702 
   1703 /* DO NOT CALL THIS DIRECTLY ! */
   1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
   1705 {
   1706    IRType ty = typeOfIRExpr(env->type_env,e);
   1707    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1708 
   1709    /* special case: immediate */
   1710    if (e->tag == Iex_Const) {
   1711       UInt u;
   1712       switch (e->Iex.Const.con->tag) {
   1713          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1714          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1715          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1716          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
   1717       }
   1718       return X86RI_Imm(u);
   1719    }
   1720 
   1721    /* default case: calculate into a register and return that */
   1722    {
   1723       HReg r = iselIntExpr_R ( env, e );
   1724       return X86RI_Reg(r);
   1725    }
   1726 }
   1727 
   1728 
   1729 /* --------------------- RMs --------------------- */
   1730 
   1731 /* Similarly, calculate an expression into an X86RM operand.  As with
   1732    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
   1733 
   1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
   1735 {
   1736    X86RM* rm = iselIntExpr_RM_wrk(env, e);
   1737    /* sanity checks ... */
   1738    switch (rm->tag) {
   1739       case Xrm_Reg:
   1740          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
   1741          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
   1742          return rm;
   1743       case Xrm_Mem:
   1744          vassert(sane_AMode(rm->Xrm.Mem.am));
   1745          return rm;
   1746       default:
   1747          vpanic("iselIntExpr_RM: unknown x86 RM tag");
   1748    }
   1749 }
   1750 
   1751 /* DO NOT CALL THIS DIRECTLY ! */
   1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
   1753 {
   1754    IRType ty = typeOfIRExpr(env->type_env,e);
   1755    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1756 
   1757    /* special case: 32-bit GET */
   1758    if (e->tag == Iex_Get && ty == Ity_I32) {
   1759       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
   1760                                    hregX86_EBP()));
   1761    }
   1762 
   1763    /* special case: load from memory */
   1764 
   1765    /* default case: calculate into a register and return that */
   1766    {
   1767       HReg r = iselIntExpr_R ( env, e );
   1768       return X86RM_Reg(r);
   1769    }
   1770 }
   1771 
   1772 
   1773 /* --------------------- CONDCODE --------------------- */
   1774 
   1775 /* Generate code to evaluated a bit-typed expression, returning the
   1776    condition code which would correspond when the expression would
   1777    notionally have returned 1. */
   1778 
   1779 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
   1780 {
   1781    /* Uh, there's nothing we can sanity check here, unfortunately. */
   1782    return iselCondCode_wrk(env,e);
   1783 }
   1784 
   1785 /* DO NOT CALL THIS DIRECTLY ! */
   1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
   1787 {
   1788    MatchInfo mi;
   1789 
   1790    vassert(e);
   1791    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
   1792 
   1793    /* var */
   1794    if (e->tag == Iex_RdTmp) {
   1795       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1796       /* Test32 doesn't modify r32; so this is OK. */
   1797       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
   1798       return Xcc_NZ;
   1799    }
   1800 
   1801    /* Constant 1:Bit */
   1802    if (e->tag == Iex_Const) {
   1803       HReg r;
   1804       vassert(e->Iex.Const.con->tag == Ico_U1);
   1805       vassert(e->Iex.Const.con->Ico.U1 == True
   1806               || e->Iex.Const.con->Ico.U1 == False);
   1807       r = newVRegI(env);
   1808       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
   1809       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
   1810       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
   1811    }
   1812 
   1813    /* Not1(e) */
   1814    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
   1815       /* Generate code for the arg, and negate the test condition */
   1816       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
   1817    }
   1818 
   1819    /* --- patterns rooted at: 32to1 --- */
   1820 
   1821    if (e->tag == Iex_Unop
   1822        && e->Iex.Unop.op == Iop_32to1) {
   1823       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1824       addInstr(env, X86Instr_Test32(1,rm));
   1825       return Xcc_NZ;
   1826    }
   1827 
   1828    /* --- patterns rooted at: CmpNEZ8 --- */
   1829 
   1830    /* CmpNEZ8(x) */
   1831    if (e->tag == Iex_Unop
   1832        && e->Iex.Unop.op == Iop_CmpNEZ8) {
   1833       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1834       addInstr(env, X86Instr_Test32(0xFF,rm));
   1835       return Xcc_NZ;
   1836    }
   1837 
   1838    /* --- patterns rooted at: CmpNEZ16 --- */
   1839 
   1840    /* CmpNEZ16(x) */
   1841    if (e->tag == Iex_Unop
   1842        && e->Iex.Unop.op == Iop_CmpNEZ16) {
   1843       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1844       addInstr(env, X86Instr_Test32(0xFFFF,rm));
   1845       return Xcc_NZ;
   1846    }
   1847 
   1848    /* --- patterns rooted at: CmpNEZ32 --- */
   1849 
   1850    /* CmpNEZ32(And32(x,y)) */
   1851    {
   1852       DECLARE_PATTERN(p_CmpNEZ32_And32);
   1853       DEFINE_PATTERN(p_CmpNEZ32_And32,
   1854                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
   1855       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
   1856          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
   1857          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
   1858          HReg    tmp  = newVRegI(env);
   1859          addInstr(env, mk_iMOVsd_RR(r0, tmp));
   1860          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
   1861          return Xcc_NZ;
   1862       }
   1863    }
   1864 
   1865    /* CmpNEZ32(Or32(x,y)) */
   1866    {
   1867       DECLARE_PATTERN(p_CmpNEZ32_Or32);
   1868       DEFINE_PATTERN(p_CmpNEZ32_Or32,
   1869                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
   1870       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
   1871          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
   1872          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
   1873          HReg    tmp  = newVRegI(env);
   1874          addInstr(env, mk_iMOVsd_RR(r0, tmp));
   1875          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
   1876          return Xcc_NZ;
   1877       }
   1878    }
   1879 
   1880    /* CmpNEZ32(GET(..):I32) */
   1881    if (e->tag == Iex_Unop
   1882        && e->Iex.Unop.op == Iop_CmpNEZ32
   1883        && e->Iex.Unop.arg->tag == Iex_Get) {
   1884       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1885                                  hregX86_EBP());
   1886       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
   1887       return Xcc_NZ;
   1888    }
   1889 
   1890    /* CmpNEZ32(x) */
   1891    if (e->tag == Iex_Unop
   1892        && e->Iex.Unop.op == Iop_CmpNEZ32) {
   1893       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1894       X86RMI* rmi2 = X86RMI_Imm(0);
   1895       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
   1896       return Xcc_NZ;
   1897    }
   1898 
   1899    /* --- patterns rooted at: CmpNEZ64 --- */
   1900 
   1901    /* CmpNEZ64(Or64(x,y)) */
   1902    {
   1903       DECLARE_PATTERN(p_CmpNEZ64_Or64);
   1904       DEFINE_PATTERN(p_CmpNEZ64_Or64,
   1905                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
   1906       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
   1907          HReg    hi1, lo1, hi2, lo2;
   1908          HReg    tmp  = newVRegI(env);
   1909          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
   1910          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
   1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
   1912          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
   1913          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
   1914          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
   1915          return Xcc_NZ;
   1916       }
   1917    }
   1918 
   1919    /* CmpNEZ64(x) */
   1920    if (e->tag == Iex_Unop
   1921        && e->Iex.Unop.op == Iop_CmpNEZ64) {
   1922       HReg hi, lo;
   1923       HReg tmp = newVRegI(env);
   1924       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
   1925       addInstr(env, mk_iMOVsd_RR(hi, tmp));
   1926       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
   1927       return Xcc_NZ;
   1928    }
   1929 
   1930    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
   1931 
   1932    /* CmpEQ8 / CmpNE8 */
   1933    if (e->tag == Iex_Binop
   1934        && (e->Iex.Binop.op == Iop_CmpEQ8
   1935            || e->Iex.Binop.op == Iop_CmpNE8
   1936            || e->Iex.Binop.op == Iop_CasCmpEQ8
   1937            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
   1938       if (isZeroU8(e->Iex.Binop.arg2)) {
   1939          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1940          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
   1941          switch (e->Iex.Binop.op) {
   1942             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
   1943             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
   1944             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
   1945          }
   1946       } else {
   1947          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1948          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   1949          HReg    r    = newVRegI(env);
   1950          addInstr(env, mk_iMOVsd_RR(r1,r));
   1951          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
   1952          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
   1953          switch (e->Iex.Binop.op) {
   1954             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
   1955             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
   1956             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
   1957          }
   1958       }
   1959    }
   1960 
   1961    /* CmpEQ16 / CmpNE16 */
   1962    if (e->tag == Iex_Binop
   1963        && (e->Iex.Binop.op == Iop_CmpEQ16
   1964            || e->Iex.Binop.op == Iop_CmpNE16
   1965            || e->Iex.Binop.op == Iop_CasCmpEQ16
   1966            || e->Iex.Binop.op == Iop_CasCmpNE16
   1967            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
   1968       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1969       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   1970       HReg    r    = newVRegI(env);
   1971       addInstr(env, mk_iMOVsd_RR(r1,r));
   1972       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
   1973       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
   1974       switch (e->Iex.Binop.op) {
   1975          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
   1976             return Xcc_Z;
   1977          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
   1978             return Xcc_NZ;
   1979          default:
   1980             vpanic("iselCondCode(x86): CmpXX16");
   1981       }
   1982    }
   1983 
   1984    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
   1985       Saves a "movl %eax, %tmp" compared to the default route. */
   1986    if (e->tag == Iex_Binop
   1987        && e->Iex.Binop.op == Iop_CmpNE32
   1988        && e->Iex.Binop.arg1->tag == Iex_CCall
   1989        && e->Iex.Binop.arg2->tag == Iex_Const) {
   1990       IRExpr* cal = e->Iex.Binop.arg1;
   1991       IRExpr* con = e->Iex.Binop.arg2;
   1992       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
   1993       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
   1994       vassert(con->Iex.Const.con->tag == Ico_U32);
   1995       /* Marshal args, do the call. */
   1996       UInt   addToSp = 0;
   1997       RetLoc rloc    = mk_RetLoc_INVALID();
   1998       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   1999                     cal->Iex.CCall.cee,
   2000                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
   2001       vassert(is_sane_RetLoc(rloc));
   2002       vassert(rloc.pri == RLPri_Int);
   2003       vassert(addToSp == 0);
   2004       /* */
   2005       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
   2006                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
   2007                                     hregX86_EAX()));
   2008       return Xcc_NZ;
   2009    }
   2010 
   2011    /* Cmp*32*(x,y) */
   2012    if (e->tag == Iex_Binop
   2013        && (e->Iex.Binop.op == Iop_CmpEQ32
   2014            || e->Iex.Binop.op == Iop_CmpNE32
   2015            || e->Iex.Binop.op == Iop_CmpLT32S
   2016            || e->Iex.Binop.op == Iop_CmpLT32U
   2017            || e->Iex.Binop.op == Iop_CmpLE32S
   2018            || e->Iex.Binop.op == Iop_CmpLE32U
   2019            || e->Iex.Binop.op == Iop_CasCmpEQ32
   2020            || e->Iex.Binop.op == Iop_CasCmpNE32
   2021            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
   2022       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2023       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   2024       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
   2025       switch (e->Iex.Binop.op) {
   2026          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
   2027          case Iop_CmpNE32:
   2028          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
   2029          case Iop_CmpLT32S: return Xcc_L;
   2030          case Iop_CmpLT32U: return Xcc_B;
   2031          case Iop_CmpLE32S: return Xcc_LE;
   2032          case Iop_CmpLE32U: return Xcc_BE;
   2033          default: vpanic("iselCondCode(x86): CmpXX32");
   2034       }
   2035    }
   2036 
   2037    /* CmpNE64 */
   2038    if (e->tag == Iex_Binop
   2039        && (e->Iex.Binop.op == Iop_CmpNE64
   2040            || e->Iex.Binop.op == Iop_CmpEQ64)) {
   2041       HReg hi1, hi2, lo1, lo2;
   2042       HReg tHi = newVRegI(env);
   2043       HReg tLo = newVRegI(env);
   2044       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
   2045       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
   2046       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
   2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
   2048       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
   2049       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
   2050       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
   2051       switch (e->Iex.Binop.op) {
   2052          case Iop_CmpNE64: return Xcc_NZ;
   2053          case Iop_CmpEQ64: return Xcc_Z;
   2054          default: vpanic("iselCondCode(x86): CmpXX64");
   2055       }
   2056    }
   2057 
   2058    ppIRExpr(e);
   2059    vpanic("iselCondCode");
   2060 }
   2061 
   2062 
   2063 /*---------------------------------------------------------*/
   2064 /*--- ISEL: Integer expressions (64 bit)                ---*/
   2065 /*---------------------------------------------------------*/
   2066 
   2067 /* Compute a 64-bit value into a register pair, which is returned as
   2068    the first two parameters.  As with iselIntExpr_R, these may be
   2069    either real or virtual regs; in any case they must not be changed
   2070    by subsequent code emitted by the caller.  */
   2071 
   2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   2073 {
   2074    iselInt64Expr_wrk(rHi, rLo, env, e);
   2075 #  if 0
   2076    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2077 #  endif
   2078    vassert(hregClass(*rHi) == HRcInt32);
   2079    vassert(hregIsVirtual(*rHi));
   2080    vassert(hregClass(*rLo) == HRcInt32);
   2081    vassert(hregIsVirtual(*rLo));
   2082 }
   2083 
   2084 /* DO NOT CALL THIS DIRECTLY ! */
   2085 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   2086 {
   2087    MatchInfo mi;
   2088    HWord fn = 0; /* helper fn for most SIMD64 stuff */
   2089    vassert(e);
   2090    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
   2091 
   2092    /* 64-bit literal */
   2093    if (e->tag == Iex_Const) {
   2094       ULong w64 = e->Iex.Const.con->Ico.U64;
   2095       UInt  wHi = toUInt(w64 >> 32);
   2096       UInt  wLo = toUInt(w64);
   2097       HReg  tLo = newVRegI(env);
   2098       HReg  tHi = newVRegI(env);
   2099       vassert(e->Iex.Const.con->tag == Ico_U64);
   2100       if (wLo == wHi) {
   2101          /* Save a precious Int register in this special case. */
   2102          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
   2103          *rHi = tLo;
   2104          *rLo = tLo;
   2105       } else {
   2106          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
   2107          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
   2108          *rHi = tHi;
   2109          *rLo = tLo;
   2110       }
   2111       return;
   2112    }
   2113 
   2114    /* read 64-bit IRTemp */
   2115    if (e->tag == Iex_RdTmp) {
   2116       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
   2117       return;
   2118    }
   2119 
   2120    /* 64-bit load */
   2121    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2122       HReg     tLo, tHi;
   2123       X86AMode *am0, *am4;
   2124       vassert(e->Iex.Load.ty == Ity_I64);
   2125       tLo = newVRegI(env);
   2126       tHi = newVRegI(env);
   2127       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
   2128       am4 = advance4(am0);
   2129       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
   2130       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2131       *rHi = tHi;
   2132       *rLo = tLo;
   2133       return;
   2134    }
   2135 
   2136    /* 64-bit GET */
   2137    if (e->tag == Iex_Get) {
   2138       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
   2139       X86AMode* am4 = advance4(am);
   2140       HReg tLo = newVRegI(env);
   2141       HReg tHi = newVRegI(env);
   2142       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
   2143       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2144       *rHi = tHi;
   2145       *rLo = tLo;
   2146       return;
   2147    }
   2148 
   2149    /* 64-bit GETI */
   2150    if (e->tag == Iex_GetI) {
   2151       X86AMode* am
   2152          = genGuestArrayOffset( env, e->Iex.GetI.descr,
   2153                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
   2154       X86AMode* am4 = advance4(am);
   2155       HReg tLo = newVRegI(env);
   2156       HReg tHi = newVRegI(env);
   2157       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
   2158       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2159       *rHi = tHi;
   2160       *rLo = tLo;
   2161       return;
   2162    }
   2163 
   2164    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
   2165    if (e->tag == Iex_ITE) {
   2166       HReg e0Lo, e0Hi, e1Lo, e1Hi;
   2167       HReg tLo = newVRegI(env);
   2168       HReg tHi = newVRegI(env);
   2169       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
   2170       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
   2171       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
   2172       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
   2173       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   2174       /* This assumes the first cmov32 doesn't trash the condition
   2175          codes, so they are still available for the second cmov32 */
   2176       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
   2177       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
   2178       *rHi = tHi;
   2179       *rLo = tLo;
   2180       return;
   2181    }
   2182 
   2183    /* --------- BINARY ops --------- */
   2184    if (e->tag == Iex_Binop) {
   2185       switch (e->Iex.Binop.op) {
   2186          /* 32 x 32 -> 64 multiply */
   2187          case Iop_MullU32:
   2188          case Iop_MullS32: {
   2189             /* get one operand into %eax, and the other into a R/M.
   2190                Need to make an educated guess about which is better in
   2191                which. */
   2192             HReg   tLo    = newVRegI(env);
   2193             HReg   tHi    = newVRegI(env);
   2194             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
   2195             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
   2196             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2197             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
   2198             addInstr(env, X86Instr_MulL(syned, rmLeft));
   2199             /* Result is now in EDX:EAX.  Tell the caller. */
   2200             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2201             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2202             *rHi = tHi;
   2203             *rLo = tLo;
   2204             return;
   2205          }
   2206 
   2207          /* 64 x 32 -> (32(rem),32(div)) division */
   2208          case Iop_DivModU64to32:
   2209          case Iop_DivModS64to32: {
   2210             /* Get the 64-bit operand into edx:eax, and the other into
   2211                any old R/M. */
   2212             HReg sHi, sLo;
   2213             HReg   tLo     = newVRegI(env);
   2214             HReg   tHi     = newVRegI(env);
   2215             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
   2216             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
   2217             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2218             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
   2219             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
   2220             addInstr(env, X86Instr_Div(syned, rmRight));
   2221             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2222             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2223             *rHi = tHi;
   2224             *rLo = tLo;
   2225             return;
   2226          }
   2227 
   2228          /* Or64/And64/Xor64 */
   2229          case Iop_Or64:
   2230          case Iop_And64:
   2231          case Iop_Xor64: {
   2232             HReg xLo, xHi, yLo, yHi;
   2233             HReg tLo = newVRegI(env);
   2234             HReg tHi = newVRegI(env);
   2235             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
   2236                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
   2237                           : Xalu_XOR;
   2238             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2239             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2240             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2241             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
   2242             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2243             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
   2244             *rHi = tHi;
   2245             *rLo = tLo;
   2246             return;
   2247          }
   2248 
   2249          /* Add64/Sub64 */
   2250          case Iop_Add64:
   2251             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   2252                /* special case Add64(e, const) */
   2253                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2254                UInt  wHi = toUInt(w64 >> 32);
   2255                UInt  wLo = toUInt(w64);
   2256                HReg  tLo = newVRegI(env);
   2257                HReg  tHi = newVRegI(env);
   2258                HReg  xLo, xHi;
   2259                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
   2260                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2261                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2262                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2263                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
   2264                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
   2265                *rHi = tHi;
   2266                *rLo = tLo;
   2267                return;
   2268             }
   2269             /* else fall through to the generic case */
   2270          case Iop_Sub64: {
   2271             HReg xLo, xHi, yLo, yHi;
   2272             HReg tLo = newVRegI(env);
   2273             HReg tHi = newVRegI(env);
   2274             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2275             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2276             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2277             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2278             if (e->Iex.Binop.op==Iop_Add64) {
   2279                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
   2280                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
   2281             } else {
   2282                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
   2283                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
   2284             }
   2285             *rHi = tHi;
   2286             *rLo = tLo;
   2287             return;
   2288          }
   2289 
   2290          /* 32HLto64(e1,e2) */
   2291          case Iop_32HLto64:
   2292             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2293             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2294             return;
   2295 
   2296          /* 64-bit shifts */
   2297          case Iop_Shl64: {
   2298             /* We use the same ingenious scheme as gcc.  Put the value
   2299                to be shifted into %hi:%lo, and the shift amount into
   2300                %cl.  Then (dsts on right, a la ATT syntax):
   2301 
   2302                shldl %cl, %lo, %hi   -- make %hi be right for the
   2303                                      -- shift amt %cl % 32
   2304                shll  %cl, %lo        -- make %lo be right for the
   2305                                      -- shift amt %cl % 32
   2306 
   2307                Now, if (shift amount % 64) is in the range 32 .. 63,
   2308                we have to do a fixup, which puts the result low half
   2309                into the result high half, and zeroes the low half:
   2310 
   2311                testl $32, %ecx
   2312 
   2313                cmovnz %lo, %hi
   2314                movl $0, %tmp         -- sigh; need yet another reg
   2315                cmovnz %tmp, %lo
   2316             */
   2317             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
   2318             tLo = newVRegI(env);
   2319             tHi = newVRegI(env);
   2320             tTemp = newVRegI(env);
   2321             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2322             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2323             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
   2324             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2325             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2326             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
   2327                and those regs are legitimately modifiable. */
   2328             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
   2329             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
   2330             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
   2331             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
   2332             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
   2333             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
   2334             *rHi = tHi;
   2335             *rLo = tLo;
   2336             return;
   2337          }
   2338 
   2339          case Iop_Shr64: {
   2340             /* We use the same ingenious scheme as gcc.  Put the value
   2341                to be shifted into %hi:%lo, and the shift amount into
   2342                %cl.  Then:
   2343 
   2344                shrdl %cl, %hi, %lo   -- make %lo be right for the
   2345                                      -- shift amt %cl % 32
   2346                shrl  %cl, %hi        -- make %hi be right for the
   2347                                      -- shift amt %cl % 32
   2348 
   2349                Now, if (shift amount % 64) is in the range 32 .. 63,
   2350                we have to do a fixup, which puts the result high half
   2351                into the result low half, and zeroes the high half:
   2352 
   2353                testl $32, %ecx
   2354 
   2355                cmovnz %hi, %lo
   2356                movl $0, %tmp         -- sigh; need yet another reg
   2357                cmovnz %tmp, %hi
   2358             */
   2359             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
   2360             tLo = newVRegI(env);
   2361             tHi = newVRegI(env);
   2362             tTemp = newVRegI(env);
   2363             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2364             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2365             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
   2366             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2367             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2368             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
   2369                and those regs are legitimately modifiable. */
   2370             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
   2371             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
   2372             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
   2373             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
   2374             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
   2375             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
   2376             *rHi = tHi;
   2377             *rLo = tLo;
   2378             return;
   2379          }
   2380 
   2381          /* F64 -> I64 */
   2382          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
   2383             case.  Unfortunately I see no easy way to avoid the
   2384             duplication. */
   2385          case Iop_F64toI64S: {
   2386             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   2387             HReg tLo = newVRegI(env);
   2388             HReg tHi = newVRegI(env);
   2389 
   2390             /* Used several times ... */
   2391             /* Careful ... this sharing is only safe because
   2392 	       zero_esp/four_esp do not hold any registers which the
   2393 	       register allocator could attempt to swizzle later. */
   2394             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   2395             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
   2396 
   2397             /* rf now holds the value to be converted, and rrm holds
   2398                the rounding mode value, encoded as per the
   2399                IRRoundingMode enum.  The first thing to do is set the
   2400                FPU's rounding mode accordingly. */
   2401 
   2402             /* Create a space for the format conversion. */
   2403             /* subl $8, %esp */
   2404             sub_from_esp(env, 8);
   2405 
   2406             /* Set host rounding mode */
   2407             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2408 
   2409             /* gistll %rf, 0(%esp) */
   2410             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
   2411 
   2412             /* movl 0(%esp), %dstLo */
   2413             /* movl 4(%esp), %dstHi */
   2414             addInstr(env, X86Instr_Alu32R(
   2415                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
   2416             addInstr(env, X86Instr_Alu32R(
   2417                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
   2418 
   2419             /* Restore default FPU rounding. */
   2420             set_FPU_rounding_default( env );
   2421 
   2422             /* addl $8, %esp */
   2423             add_to_esp(env, 8);
   2424 
   2425             *rHi = tHi;
   2426             *rLo = tLo;
   2427             return;
   2428          }
   2429 
   2430          case Iop_Add8x8:
   2431             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
   2432          case Iop_Add16x4:
   2433             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
   2434          case Iop_Add32x2:
   2435             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
   2436 
   2437          case Iop_Avg8Ux8:
   2438             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
   2439          case Iop_Avg16Ux4:
   2440             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
   2441 
   2442          case Iop_CmpEQ8x8:
   2443             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
   2444          case Iop_CmpEQ16x4:
   2445             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
   2446          case Iop_CmpEQ32x2:
   2447             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
   2448 
   2449          case Iop_CmpGT8Sx8:
   2450             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
   2451          case Iop_CmpGT16Sx4:
   2452             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
   2453          case Iop_CmpGT32Sx2:
   2454             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
   2455 
   2456          case Iop_InterleaveHI8x8:
   2457             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
   2458          case Iop_InterleaveLO8x8:
   2459             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
   2460          case Iop_InterleaveHI16x4:
   2461             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
   2462          case Iop_InterleaveLO16x4:
   2463             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
   2464          case Iop_InterleaveHI32x2:
   2465             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
   2466          case Iop_InterleaveLO32x2:
   2467             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
   2468          case Iop_CatOddLanes16x4:
   2469             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
   2470          case Iop_CatEvenLanes16x4:
   2471             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
   2472          case Iop_Perm8x8:
   2473             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
   2474 
   2475          case Iop_Max8Ux8:
   2476             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
   2477          case Iop_Max16Sx4:
   2478             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
   2479          case Iop_Min8Ux8:
   2480             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
   2481          case Iop_Min16Sx4:
   2482             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
   2483 
   2484          case Iop_Mul16x4:
   2485             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
   2486          case Iop_Mul32x2:
   2487             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
   2488          case Iop_MulHi16Sx4:
   2489             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
   2490          case Iop_MulHi16Ux4:
   2491             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
   2492 
   2493          case Iop_QAdd8Sx8:
   2494             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
   2495          case Iop_QAdd16Sx4:
   2496             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
   2497          case Iop_QAdd8Ux8:
   2498             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
   2499          case Iop_QAdd16Ux4:
   2500             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
   2501 
   2502          case Iop_QNarrowBin32Sto16Sx4:
   2503             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
   2504          case Iop_QNarrowBin16Sto8Sx8:
   2505             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
   2506          case Iop_QNarrowBin16Sto8Ux8:
   2507             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
   2508          case Iop_NarrowBin16to8x8:
   2509             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
   2510          case Iop_NarrowBin32to16x4:
   2511             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
   2512 
   2513          case Iop_QSub8Sx8:
   2514             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
   2515          case Iop_QSub16Sx4:
   2516             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
   2517          case Iop_QSub8Ux8:
   2518             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
   2519          case Iop_QSub16Ux4:
   2520             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
   2521 
   2522          case Iop_Sub8x8:
   2523             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
   2524          case Iop_Sub16x4:
   2525             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
   2526          case Iop_Sub32x2:
   2527             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
   2528 
   2529          binnish: {
   2530             /* Note: the following assumes all helpers are of
   2531                signature
   2532                   ULong fn ( ULong, ULong ), and they are
   2533                not marked as regparm functions.
   2534             */
   2535             HReg xLo, xHi, yLo, yHi;
   2536             HReg tLo = newVRegI(env);
   2537             HReg tHi = newVRegI(env);
   2538             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2539             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
   2540             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
   2541             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2542             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2543             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2544             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
   2545                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2546             add_to_esp(env, 4*4);
   2547             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2548             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2549             *rHi = tHi;
   2550             *rLo = tLo;
   2551             return;
   2552          }
   2553 
   2554          case Iop_ShlN32x2:
   2555             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
   2556          case Iop_ShlN16x4:
   2557             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
   2558          case Iop_ShlN8x8:
   2559             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
   2560          case Iop_ShrN32x2:
   2561             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
   2562          case Iop_ShrN16x4:
   2563             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
   2564          case Iop_SarN32x2:
   2565             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
   2566          case Iop_SarN16x4:
   2567             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
   2568          case Iop_SarN8x8:
   2569             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
   2570          shifty: {
   2571             /* Note: the following assumes all helpers are of
   2572                signature
   2573                   ULong fn ( ULong, UInt ), and they are
   2574                not marked as regparm functions.
   2575             */
   2576             HReg xLo, xHi;
   2577             HReg tLo = newVRegI(env);
   2578             HReg tHi = newVRegI(env);
   2579             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   2580             addInstr(env, X86Instr_Push(y));
   2581             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2582             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2583             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2584             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
   2585                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2586             add_to_esp(env, 3*4);
   2587             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2588             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2589             *rHi = tHi;
   2590             *rLo = tLo;
   2591             return;
   2592          }
   2593 
   2594          default:
   2595             break;
   2596       }
   2597    } /* if (e->tag == Iex_Binop) */
   2598 
   2599 
   2600    /* --------- UNARY ops --------- */
   2601    if (e->tag == Iex_Unop) {
   2602       switch (e->Iex.Unop.op) {
   2603 
   2604          /* 32Sto64(e) */
   2605          case Iop_32Sto64: {
   2606             HReg tLo = newVRegI(env);
   2607             HReg tHi = newVRegI(env);
   2608             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2609             addInstr(env, mk_iMOVsd_RR(src,tHi));
   2610             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2611             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
   2612             *rHi = tHi;
   2613             *rLo = tLo;
   2614             return;
   2615          }
   2616 
   2617          /* 32Uto64(e) */
   2618          case Iop_32Uto64: {
   2619             HReg tLo = newVRegI(env);
   2620             HReg tHi = newVRegI(env);
   2621             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2622             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2623             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2624             *rHi = tHi;
   2625             *rLo = tLo;
   2626             return;
   2627          }
   2628 
   2629          /* 16Uto64(e) */
   2630          case Iop_16Uto64: {
   2631             HReg tLo = newVRegI(env);
   2632             HReg tHi = newVRegI(env);
   2633             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2634             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2635             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   2636                                           X86RMI_Imm(0xFFFF), tLo));
   2637             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2638             *rHi = tHi;
   2639             *rLo = tLo;
   2640             return;
   2641          }
   2642 
   2643          /* V128{HI}to64 */
   2644          case Iop_V128HIto64:
   2645          case Iop_V128to64: {
   2646             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
   2647             HReg tLo = newVRegI(env);
   2648             HReg tHi = newVRegI(env);
   2649             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
   2650             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
   2651             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
   2652             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
   2653             sub_from_esp(env, 16);
   2654             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   2655             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
   2656                                            X86RMI_Mem(espLO), tLo ));
   2657             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
   2658                                            X86RMI_Mem(espHI), tHi ));
   2659             add_to_esp(env, 16);
   2660             *rHi = tHi;
   2661             *rLo = tLo;
   2662             return;
   2663          }
   2664 
   2665          /* could do better than this, but for now ... */
   2666          case Iop_1Sto64: {
   2667             HReg tLo = newVRegI(env);
   2668             HReg tHi = newVRegI(env);
   2669             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   2670             addInstr(env, X86Instr_Set32(cond,tLo));
   2671             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
   2672             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
   2673             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
   2674             *rHi = tHi;
   2675             *rLo = tLo;
   2676             return;
   2677          }
   2678 
   2679          /* Not64(e) */
   2680          case Iop_Not64: {
   2681             HReg tLo = newVRegI(env);
   2682             HReg tHi = newVRegI(env);
   2683             HReg sHi, sLo;
   2684             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
   2685             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2686             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2687             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
   2688             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
   2689             *rHi = tHi;
   2690             *rLo = tLo;
   2691             return;
   2692          }
   2693 
   2694          /* Left64(e) */
   2695          case Iop_Left64: {
   2696             HReg yLo, yHi;
   2697             HReg tLo = newVRegI(env);
   2698             HReg tHi = newVRegI(env);
   2699             /* yHi:yLo = arg */
   2700             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
   2701             /* tLo = 0 - yLo, and set carry */
   2702             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
   2703             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
   2704             /* tHi = 0 - yHi - carry */
   2705             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2706             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
   2707             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
   2708                back in, so as to give the final result
   2709                tHi:tLo = arg | -arg. */
   2710             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
   2711             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
   2712             *rHi = tHi;
   2713             *rLo = tLo;
   2714             return;
   2715          }
   2716 
   2717          /* --- patterns rooted at: CmpwNEZ64 --- */
   2718 
   2719          /* CmpwNEZ64(e) */
   2720          case Iop_CmpwNEZ64: {
   2721 
   2722          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
   2723          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
   2724                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
   2725          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
   2726             /* CmpwNEZ64(Or64(x,y)) */
   2727             HReg xHi,xLo,yHi,yLo;
   2728             HReg xBoth = newVRegI(env);
   2729             HReg merged = newVRegI(env);
   2730             HReg tmp2 = newVRegI(env);
   2731 
   2732             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
   2733             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
   2734             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2735                                           X86RMI_Reg(xLo),xBoth));
   2736 
   2737             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
   2738             addInstr(env, mk_iMOVsd_RR(yHi,merged));
   2739             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2740                                           X86RMI_Reg(yLo),merged));
   2741             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2742                                              X86RMI_Reg(xBoth),merged));
   2743 
   2744             /* tmp2 = (merged | -merged) >>s 31 */
   2745             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
   2746             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
   2747             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2748                                           X86RMI_Reg(merged), tmp2));
   2749             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
   2750             *rHi = tmp2;
   2751             *rLo = tmp2;
   2752             return;
   2753          } else {
   2754             /* CmpwNEZ64(e) */
   2755             HReg srcLo, srcHi;
   2756             HReg tmp1  = newVRegI(env);
   2757             HReg tmp2  = newVRegI(env);
   2758             /* srcHi:srcLo = arg */
   2759             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   2760             /* tmp1 = srcHi | srcLo */
   2761             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
   2762             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2763                                           X86RMI_Reg(srcLo), tmp1));
   2764             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
   2765             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
   2766             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
   2767             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2768                                           X86RMI_Reg(tmp1), tmp2));
   2769             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
   2770             *rHi = tmp2;
   2771             *rLo = tmp2;
   2772             return;
   2773          }
   2774          }
   2775 
   2776          /* ReinterpF64asI64(e) */
   2777          /* Given an IEEE754 double, produce an I64 with the same bit
   2778             pattern. */
   2779          case Iop_ReinterpF64asI64: {
   2780             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
   2781             HReg tLo  = newVRegI(env);
   2782             HReg tHi  = newVRegI(env);
   2783             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   2784             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
   2785             /* paranoia */
   2786             set_FPU_rounding_default(env);
   2787             /* subl $8, %esp */
   2788             sub_from_esp(env, 8);
   2789             /* gstD %rf, 0(%esp) */
   2790             addInstr(env,
   2791                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
   2792             /* movl 0(%esp), %tLo */
   2793             addInstr(env,
   2794                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
   2795             /* movl 4(%esp), %tHi */
   2796             addInstr(env,
   2797                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
   2798             /* addl $8, %esp */
   2799             add_to_esp(env, 8);
   2800             *rHi = tHi;
   2801             *rLo = tLo;
   2802             return;
   2803          }
   2804 
   2805          case Iop_CmpNEZ32x2:
   2806             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
   2807          case Iop_CmpNEZ16x4:
   2808             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
   2809          case Iop_CmpNEZ8x8:
   2810             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
   2811          unish: {
   2812             /* Note: the following assumes all helpers are of
   2813                signature
   2814                   ULong fn ( ULong ), and they are
   2815                not marked as regparm functions.
   2816             */
   2817             HReg xLo, xHi;
   2818             HReg tLo = newVRegI(env);
   2819             HReg tHi = newVRegI(env);
   2820             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
   2821             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2822             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2823             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn,
   2824                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2825             add_to_esp(env, 2*4);
   2826             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2827             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2828             *rHi = tHi;
   2829             *rLo = tLo;
   2830             return;
   2831          }
   2832 
   2833          default:
   2834             break;
   2835       }
   2836    } /* if (e->tag == Iex_Unop) */
   2837 
   2838 
   2839    /* --------- CCALL --------- */
   2840    if (e->tag == Iex_CCall) {
   2841       HReg tLo = newVRegI(env);
   2842       HReg tHi = newVRegI(env);
   2843 
   2844       /* Marshal args, do the call, clear stack. */
   2845       UInt   addToSp = 0;
   2846       RetLoc rloc    = mk_RetLoc_INVALID();
   2847       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   2848                     e->Iex.CCall.cee,
   2849                     e->Iex.CCall.retty, e->Iex.CCall.args );
   2850       vassert(is_sane_RetLoc(rloc));
   2851       vassert(rloc.pri == RLPri_2Int);
   2852       vassert(addToSp == 0);
   2853       /* */
   2854 
   2855       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2856       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2857       *rHi = tHi;
   2858       *rLo = tLo;
   2859       return;
   2860    }
   2861 
   2862    ppIRExpr(e);
   2863    vpanic("iselInt64Expr");
   2864 }
   2865 
   2866 
   2867 /*---------------------------------------------------------*/
   2868 /*--- ISEL: Floating point expressions (32 bit)         ---*/
   2869 /*---------------------------------------------------------*/
   2870 
   2871 /* Nothing interesting here; really just wrappers for
   2872    64-bit stuff. */
   2873 
   2874 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
   2875 {
   2876    HReg r = iselFltExpr_wrk( env, e );
   2877 #  if 0
   2878    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2879 #  endif
   2880    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
   2881    vassert(hregIsVirtual(r));
   2882    return r;
   2883 }
   2884 
   2885 /* DO NOT CALL THIS DIRECTLY */
   2886 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
   2887 {
   2888    IRType ty = typeOfIRExpr(env->type_env,e);
   2889    vassert(ty == Ity_F32);
   2890 
   2891    if (e->tag == Iex_RdTmp) {
   2892       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   2893    }
   2894 
   2895    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2896       X86AMode* am;
   2897       HReg res = newVRegF(env);
   2898       vassert(e->Iex.Load.ty == Ity_F32);
   2899       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   2900       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
   2901       return res;
   2902    }
   2903 
   2904    if (e->tag == Iex_Binop
   2905        && e->Iex.Binop.op == Iop_F64toF32) {
   2906       /* Although the result is still held in a standard FPU register,
   2907          we need to round it to reflect the loss of accuracy/range
   2908          entailed in casting it to a 32-bit float. */
   2909       HReg dst = newVRegF(env);
   2910       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   2911       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2912       addInstr(env, X86Instr_Fp64to32(src,dst));
   2913       set_FPU_rounding_default( env );
   2914       return dst;
   2915    }
   2916 
   2917    if (e->tag == Iex_Get) {
   2918       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
   2919                                   hregX86_EBP() );
   2920       HReg res = newVRegF(env);
   2921       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
   2922       return res;
   2923    }
   2924 
   2925    if (e->tag == Iex_Unop
   2926        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
   2927        /* Given an I32, produce an IEEE754 float with the same bit
   2928           pattern. */
   2929       HReg    dst = newVRegF(env);
   2930       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
   2931       /* paranoia */
   2932       addInstr(env, X86Instr_Push(rmi));
   2933       addInstr(env, X86Instr_FpLdSt(
   2934                        True/*load*/, 4, dst,
   2935                        X86AMode_IR(0, hregX86_ESP())));
   2936       add_to_esp(env, 4);
   2937       return dst;
   2938    }
   2939 
   2940    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
   2941       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
   2942       HReg dst = newVRegF(env);
   2943 
   2944       /* rf now holds the value to be rounded.  The first thing to do
   2945          is set the FPU's rounding mode accordingly. */
   2946 
   2947       /* Set host rounding mode */
   2948       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2949 
   2950       /* grndint %rf, %dst */
   2951       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
   2952 
   2953       /* Restore default FPU rounding. */
   2954       set_FPU_rounding_default( env );
   2955 
   2956       return dst;
   2957    }
   2958 
   2959    ppIRExpr(e);
   2960    vpanic("iselFltExpr_wrk");
   2961 }
   2962 
   2963 
   2964 /*---------------------------------------------------------*/
   2965 /*--- ISEL: Floating point expressions (64 bit)         ---*/
   2966 /*---------------------------------------------------------*/
   2967 
   2968 /* Compute a 64-bit floating point value into a register, the identity
   2969    of which is returned.  As with iselIntExpr_R, the reg may be either
   2970    real or virtual; in any case it must not be changed by subsequent
   2971    code emitted by the caller.  */
   2972 
   2973 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
   2974 
   2975     Type                  S (1 bit)   E (11 bits)   F (52 bits)
   2976     ----                  ---------   -----------   -----------
   2977     signalling NaN        u           2047 (max)    .0uuuuu---u
   2978                                                     (with at least
   2979                                                      one 1 bit)
   2980     quiet NaN             u           2047 (max)    .1uuuuu---u
   2981 
   2982     negative infinity     1           2047 (max)    .000000---0
   2983 
   2984     positive infinity     0           2047 (max)    .000000---0
   2985 
   2986     negative zero         1           0             .000000---0
   2987 
   2988     positive zero         0           0             .000000---0
   2989 */
   2990 
   2991 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
   2992 {
   2993    HReg r = iselDblExpr_wrk( env, e );
   2994 #  if 0
   2995    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2996 #  endif
   2997    vassert(hregClass(r) == HRcFlt64);
   2998    vassert(hregIsVirtual(r));
   2999    return r;
   3000 }
   3001 
   3002 /* DO NOT CALL THIS DIRECTLY */
   3003 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
   3004 {
   3005    IRType ty = typeOfIRExpr(env->type_env,e);
   3006    vassert(e);
   3007    vassert(ty == Ity_F64);
   3008 
   3009    if (e->tag == Iex_RdTmp) {
   3010       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3011    }
   3012 
   3013    if (e->tag == Iex_Const) {
   3014       union { UInt u32x2[2]; ULong u64; Double f64; } u;
   3015       HReg freg = newVRegF(env);
   3016       vassert(sizeof(u) == 8);
   3017       vassert(sizeof(u.u64) == 8);
   3018       vassert(sizeof(u.f64) == 8);
   3019       vassert(sizeof(u.u32x2) == 8);
   3020 
   3021       if (e->Iex.Const.con->tag == Ico_F64) {
   3022          u.f64 = e->Iex.Const.con->Ico.F64;
   3023       }
   3024       else if (e->Iex.Const.con->tag == Ico_F64i) {
   3025          u.u64 = e->Iex.Const.con->Ico.F64i;
   3026       }
   3027       else
   3028          vpanic("iselDblExpr(x86): const");
   3029 
   3030       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
   3031       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
   3032       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
   3033                                     X86AMode_IR(0, hregX86_ESP())));
   3034       add_to_esp(env, 8);
   3035       return freg;
   3036    }
   3037 
   3038    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   3039       X86AMode* am;
   3040       HReg res = newVRegF(env);
   3041       vassert(e->Iex.Load.ty == Ity_F64);
   3042       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   3043       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
   3044       return res;
   3045    }
   3046 
   3047    if (e->tag == Iex_Get) {
   3048       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
   3049                                   hregX86_EBP() );
   3050       HReg res = newVRegF(env);
   3051       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
   3052       return res;
   3053    }
   3054 
   3055    if (e->tag == Iex_GetI) {
   3056       X86AMode* am
   3057          = genGuestArrayOffset(
   3058               env, e->Iex.GetI.descr,
   3059                    e->Iex.GetI.ix, e->Iex.GetI.bias );
   3060       HReg res = newVRegF(env);
   3061       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
   3062       return res;
   3063    }
   3064 
   3065    if (e->tag == Iex_Triop) {
   3066       X86FpOp fpop = Xfp_INVALID;
   3067       IRTriop *triop = e->Iex.Triop.details;
   3068       switch (triop->op) {
   3069          case Iop_AddF64:    fpop = Xfp_ADD; break;
   3070          case Iop_SubF64:    fpop = Xfp_SUB; break;
   3071          case Iop_MulF64:    fpop = Xfp_MUL; break;
   3072          case Iop_DivF64:    fpop = Xfp_DIV; break;
   3073          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
   3074          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
   3075          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
   3076          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
   3077          case Iop_PRemF64:   fpop = Xfp_PREM; break;
   3078          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
   3079          default: break;
   3080       }
   3081       if (fpop != Xfp_INVALID) {
   3082          HReg res  = newVRegF(env);
   3083          HReg srcL = iselDblExpr(env, triop->arg2);
   3084          HReg srcR = iselDblExpr(env, triop->arg3);
   3085          /* XXXROUNDINGFIXME */
   3086          /* set roundingmode here */
   3087          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
   3088 	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
   3089 	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
   3090             roundToF64(env, res);
   3091          return res;
   3092       }
   3093    }
   3094 
   3095    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
   3096       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   3097       HReg dst = newVRegF(env);
   3098 
   3099       /* rf now holds the value to be rounded.  The first thing to do
   3100          is set the FPU's rounding mode accordingly. */
   3101 
   3102       /* Set host rounding mode */
   3103       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   3104 
   3105       /* grndint %rf, %dst */
   3106       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
   3107 
   3108       /* Restore default FPU rounding. */
   3109       set_FPU_rounding_default( env );
   3110 
   3111       return dst;
   3112    }
   3113 
   3114    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
   3115       HReg dst = newVRegF(env);
   3116       HReg rHi,rLo;
   3117       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
   3118       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3119       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3120 
   3121       /* Set host rounding mode */
   3122       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   3123 
   3124       addInstr(env, X86Instr_FpLdStI(
   3125                        True/*load*/, 8, dst,
   3126                        X86AMode_IR(0, hregX86_ESP())));
   3127 
   3128       /* Restore default FPU rounding. */
   3129       set_FPU_rounding_default( env );
   3130 
   3131       add_to_esp(env, 8);
   3132       return dst;
   3133    }
   3134 
   3135    if (e->tag == Iex_Binop) {
   3136       X86FpOp fpop = Xfp_INVALID;
   3137       switch (e->Iex.Binop.op) {
   3138          case Iop_SinF64:  fpop = Xfp_SIN; break;
   3139          case Iop_CosF64:  fpop = Xfp_COS; break;
   3140          case Iop_TanF64:  fpop = Xfp_TAN; break;
   3141          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
   3142          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
   3143          default: break;
   3144       }
   3145       if (fpop != Xfp_INVALID) {
   3146          HReg res = newVRegF(env);
   3147          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   3148          /* XXXROUNDINGFIXME */
   3149          /* set roundingmode here */
   3150          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
   3151             codes.  I don't think that matters, since this insn
   3152             selector never generates such an instruction intervening
   3153             between an flag-setting instruction and a flag-using
   3154             instruction. */
   3155          addInstr(env, X86Instr_FpUnary(fpop,src,res));
   3156 	 if (fpop != Xfp_SQRT
   3157              && fpop != Xfp_NEG && fpop != Xfp_ABS)
   3158             roundToF64(env, res);
   3159          return res;
   3160       }
   3161    }
   3162 
   3163    if (e->tag == Iex_Unop) {
   3164       X86FpOp fpop = Xfp_INVALID;
   3165       switch (e->Iex.Unop.op) {
   3166          case Iop_NegF64:  fpop = Xfp_NEG; break;
   3167          case Iop_AbsF64:  fpop = Xfp_ABS; break;
   3168          default: break;
   3169       }
   3170       if (fpop != Xfp_INVALID) {
   3171          HReg res = newVRegF(env);
   3172          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   3173          addInstr(env, X86Instr_FpUnary(fpop,src,res));
   3174          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
   3175             but might need to do that for other unary ops. */
   3176          return res;
   3177       }
   3178    }
   3179 
   3180    if (e->tag == Iex_Unop) {
   3181       switch (e->Iex.Unop.op) {
   3182          case Iop_I32StoF64: {
   3183             HReg dst = newVRegF(env);
   3184             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
   3185             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
   3186             set_FPU_rounding_default(env);
   3187             addInstr(env, X86Instr_FpLdStI(
   3188                              True/*load*/, 4, dst,
   3189                              X86AMode_IR(0, hregX86_ESP())));
   3190 	    add_to_esp(env, 4);
   3191             return dst;
   3192          }
   3193          case Iop_ReinterpI64asF64: {
   3194             /* Given an I64, produce an IEEE754 double with the same
   3195                bit pattern. */
   3196             HReg dst = newVRegF(env);
   3197             HReg rHi, rLo;
   3198 	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
   3199             /* paranoia */
   3200             set_FPU_rounding_default(env);
   3201             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3202             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3203             addInstr(env, X86Instr_FpLdSt(
   3204                              True/*load*/, 8, dst,
   3205                              X86AMode_IR(0, hregX86_ESP())));
   3206 	    add_to_esp(env, 8);
   3207             return dst;
   3208 	 }
   3209          case Iop_F32toF64: {
   3210             /* this is a no-op */
   3211             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
   3212             return res;
   3213 	 }
   3214          default:
   3215             break;
   3216       }
   3217    }
   3218 
   3219    /* --------- MULTIPLEX --------- */
   3220    if (e->tag == Iex_ITE) { // VFD
   3221      if (ty == Ity_F64
   3222          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   3223         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
   3224         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
   3225         HReg dst = newVRegF(env);
   3226         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
   3227         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   3228         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
   3229         return dst;
   3230       }
   3231    }
   3232 
   3233    ppIRExpr(e);
   3234    vpanic("iselDblExpr_wrk");
   3235 }
   3236 
   3237 
   3238 /*---------------------------------------------------------*/
   3239 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
   3240 /*---------------------------------------------------------*/
   3241 
   3242 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
   3243 {
   3244    HReg r = iselVecExpr_wrk( env, e );
   3245 #  if 0
   3246    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3247 #  endif
   3248    vassert(hregClass(r) == HRcVec128);
   3249    vassert(hregIsVirtual(r));
   3250    return r;
   3251 }
   3252 
   3253 
   3254 /* DO NOT CALL THIS DIRECTLY */
   3255 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
   3256 {
   3257 
   3258 #  define REQUIRE_SSE1                                    \
   3259       do { if (env->hwcaps == 0/*baseline, no sse*/       \
   3260                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
   3261               goto vec_fail;                              \
   3262       } while (0)
   3263 
   3264 #  define REQUIRE_SSE2                                    \
   3265       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
   3266               goto vec_fail;                              \
   3267       } while (0)
   3268 
   3269 #  define SSE2_OR_ABOVE                                   \
   3270        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
   3271 
   3272    HWord     fn = 0; /* address of helper fn, if required */
   3273    MatchInfo mi;
   3274    Bool      arg1isEReg = False;
   3275    X86SseOp  op = Xsse_INVALID;
   3276    IRType    ty = typeOfIRExpr(env->type_env,e);
   3277    vassert(e);
   3278    vassert(ty == Ity_V128);
   3279 
   3280    REQUIRE_SSE1;
   3281 
   3282    if (e->tag == Iex_RdTmp) {
   3283       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3284    }
   3285 
   3286    if (e->tag == Iex_Get) {
   3287       HReg dst = newVRegV(env);
   3288       addInstr(env, X86Instr_SseLdSt(
   3289                        True/*load*/,
   3290                        dst,
   3291                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
   3292                     )
   3293               );
   3294       return dst;
   3295    }
   3296 
   3297    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   3298       HReg      dst = newVRegV(env);
   3299       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
   3300       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
   3301       return dst;
   3302    }
   3303 
   3304    if (e->tag == Iex_Const) {
   3305       HReg dst = newVRegV(env);
   3306       vassert(e->Iex.Const.con->tag == Ico_V128);
   3307       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
   3308       return dst;
   3309    }
   3310 
   3311    if (e->tag == Iex_Unop) {
   3312 
   3313    if (SSE2_OR_ABOVE) {
   3314       /* 64UtoV128(LDle:I64(addr)) */
   3315       DECLARE_PATTERN(p_zwiden_load64);
   3316       DEFINE_PATTERN(p_zwiden_load64,
   3317                      unop(Iop_64UtoV128,
   3318                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
   3319       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
   3320          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
   3321          HReg dst = newVRegV(env);
   3322          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
   3323          return dst;
   3324       }
   3325    }
   3326 
   3327    switch (e->Iex.Unop.op) {
   3328 
   3329       case Iop_NotV128: {
   3330          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3331          return do_sse_Not128(env, arg);
   3332       }
   3333 
   3334       case Iop_CmpNEZ64x2: {
   3335          /* We can use SSE2 instructions for this. */
   3336          /* Ideally, we want to do a 64Ix2 comparison against zero of
   3337             the operand.  Problem is no such insn exists.  Solution
   3338             therefore is to do a 32Ix4 comparison instead, and bitwise-
   3339             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
   3340             let the not'd result of this initial comparison be a:b:c:d.
   3341             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
   3342             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
   3343             giving the required result.
   3344 
   3345             The required selection sequence is 2,3,0,1, which
   3346             according to Intel's documentation means the pshufd
   3347             literal value is 0xB1, that is,
   3348             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
   3349          */
   3350          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
   3351          HReg tmp  = newVRegV(env);
   3352          HReg dst  = newVRegV(env);
   3353          REQUIRE_SSE2;
   3354          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
   3355          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
   3356          tmp = do_sse_Not128(env, tmp);
   3357          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
   3358          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
   3359          return dst;
   3360       }
   3361 
   3362       case Iop_CmpNEZ32x4: {
   3363          /* Sigh, we have to generate lousy code since this has to
   3364             work on SSE1 hosts */
   3365          /* basically, the idea is: for each lane:
   3366                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
   3367                sbbl %r, %r               (now %r = 1Sto32(CF))
   3368                movl %r, lane
   3369          */
   3370          Int       i;
   3371          X86AMode* am;
   3372          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3373          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
   3374          HReg      dst  = newVRegV(env);
   3375          HReg      r32  = newVRegI(env);
   3376          sub_from_esp(env, 16);
   3377          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
   3378          for (i = 0; i < 4; i++) {
   3379             am = X86AMode_IR(i*4, hregX86_ESP());
   3380             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
   3381             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
   3382             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
   3383             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
   3384          }
   3385          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3386          add_to_esp(env, 16);
   3387          return dst;
   3388       }
   3389 
   3390       case Iop_CmpNEZ8x16:
   3391       case Iop_CmpNEZ16x8: {
   3392          /* We can use SSE2 instructions for this. */
   3393          HReg arg;
   3394          HReg vec0 = newVRegV(env);
   3395          HReg vec1 = newVRegV(env);
   3396          HReg dst  = newVRegV(env);
   3397          X86SseOp cmpOp
   3398             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
   3399                                              : Xsse_CMPEQ8;
   3400          REQUIRE_SSE2;
   3401          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
   3402          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
   3403          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
   3404          /* defer arg computation to here so as to give CMPEQF as long
   3405             as possible to complete */
   3406          arg = iselVecExpr(env, e->Iex.Unop.arg);
   3407          /* vec0 is all 0s; vec1 is all 1s */
   3408          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3409          /* 16x8 or 8x16 comparison == */
   3410          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
   3411          /* invert result */
   3412          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
   3413          return dst;
   3414       }
   3415 
   3416       case Iop_Recip32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
   3417       case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
   3418       case Iop_Sqrt32Fx4:  op = Xsse_SQRTF;  goto do_32Fx4_unary;
   3419       do_32Fx4_unary:
   3420       {
   3421          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3422          HReg dst = newVRegV(env);
   3423          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
   3424          return dst;
   3425       }
   3426 
   3427       case Iop_Recip64Fx2: op = Xsse_RCPF;   goto do_64Fx2_unary;
   3428       case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
   3429       case Iop_Sqrt64Fx2:  op = Xsse_SQRTF;  goto do_64Fx2_unary;
   3430       do_64Fx2_unary:
   3431       {
   3432          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3433          HReg dst = newVRegV(env);
   3434          REQUIRE_SSE2;
   3435          addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
   3436          return dst;
   3437       }
   3438 
   3439       case Iop_Recip32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
   3440       case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
   3441       case Iop_Sqrt32F0x4:  op = Xsse_SQRTF;  goto do_32F0x4_unary;
   3442       do_32F0x4_unary:
   3443       {
   3444          /* A bit subtle.  We have to copy the arg to the result
   3445             register first, because actually doing the SSE scalar insn
   3446             leaves the upper 3/4 of the destination register
   3447             unchanged.  Whereas the required semantics of these
   3448             primops is that the upper 3/4 is simply copied in from the
   3449             argument. */
   3450          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3451          HReg dst = newVRegV(env);
   3452          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3453          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
   3454          return dst;
   3455       }
   3456 
   3457       case Iop_Recip64F0x2: op = Xsse_RCPF;   goto do_64F0x2_unary;
   3458       case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
   3459       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
   3460       do_64F0x2_unary:
   3461       {
   3462          /* A bit subtle.  We have to copy the arg to the result
   3463             register first, because actually doing the SSE scalar insn
   3464             leaves the upper half of the destination register
   3465             unchanged.  Whereas the required semantics of these
   3466             primops is that the upper half is simply copied in from the
   3467             argument. */
   3468          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3469          HReg dst = newVRegV(env);
   3470          REQUIRE_SSE2;
   3471          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3472          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
   3473          return dst;
   3474       }
   3475 
   3476       case Iop_32UtoV128: {
   3477          HReg      dst  = newVRegV(env);
   3478          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3479          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
   3480          addInstr(env, X86Instr_Push(rmi));
   3481 	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
   3482          add_to_esp(env, 4);
   3483          return dst;
   3484       }
   3485 
   3486       case Iop_64UtoV128: {
   3487          HReg      rHi, rLo;
   3488          HReg      dst  = newVRegV(env);
   3489          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3490          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
   3491          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3492          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3493 	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
   3494          add_to_esp(env, 8);
   3495          return dst;
   3496       }
   3497 
   3498       default:
   3499          break;
   3500    } /* switch (e->Iex.Unop.op) */
   3501    } /* if (e->tag == Iex_Unop) */
   3502 
   3503    if (e->tag == Iex_Binop) {
   3504    switch (e->Iex.Binop.op) {
   3505 
   3506       case Iop_SetV128lo32: {
   3507          HReg dst = newVRegV(env);
   3508          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
   3509          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
   3510          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3511          sub_from_esp(env, 16);
   3512          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
   3513          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
   3514          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3515          add_to_esp(env, 16);
   3516          return dst;
   3517       }
   3518 
   3519       case Iop_SetV128lo64: {
   3520          HReg dst = newVRegV(env);
   3521          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
   3522          HReg srcIhi, srcIlo;
   3523          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3524          X86AMode* esp4 = advance4(esp0);
   3525          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
   3526          sub_from_esp(env, 16);
   3527          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
   3528          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
   3529          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
   3530          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3531          add_to_esp(env, 16);
   3532          return dst;
   3533       }
   3534 
   3535       case Iop_64HLtoV128: {
   3536          HReg r3, r2, r1, r0;
   3537          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
   3538          X86AMode* esp4  = advance4(esp0);
   3539          X86AMode* esp8  = advance4(esp4);
   3540          X86AMode* esp12 = advance4(esp8);
   3541          HReg dst = newVRegV(env);
   3542 	 /* do this via the stack (easy, convenient, etc) */
   3543          sub_from_esp(env, 16);
   3544          /* Do the less significant 64 bits */
   3545          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
   3546          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
   3547          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
   3548          /* Do the more significant 64 bits */
   3549          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
   3550          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
   3551          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
   3552 	 /* Fetch result back from stack. */
   3553          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3554          add_to_esp(env, 16);
   3555          return dst;
   3556       }
   3557 
   3558       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
   3559       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
   3560       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
   3561       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
   3562       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
   3563       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
   3564       do_32Fx4:
   3565       {
   3566          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3567          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3568          HReg dst = newVRegV(env);
   3569          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3570          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
   3571          return dst;
   3572       }
   3573 
   3574       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
   3575       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
   3576       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
   3577       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
   3578       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
   3579       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
   3580       do_64Fx2:
   3581       {
   3582          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3583          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3584          HReg dst = newVRegV(env);
   3585          REQUIRE_SSE2;
   3586          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3587          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
   3588          return dst;
   3589       }
   3590 
   3591       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
   3592       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
   3593       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
   3594       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
   3595       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
   3596       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
   3597       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
   3598       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
   3599       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
   3600       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
   3601       do_32F0x4: {
   3602          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3603          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3604          HReg dst = newVRegV(env);
   3605          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3606          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
   3607          return dst;
   3608       }
   3609 
   3610       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
   3611       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
   3612       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
   3613       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
   3614       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
   3615       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
   3616       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
   3617       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
   3618       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
   3619       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
   3620       do_64F0x2: {
   3621          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3622          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3623          HReg dst = newVRegV(env);
   3624          REQUIRE_SSE2;
   3625          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3626          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
   3627          return dst;
   3628       }
   3629 
   3630       case Iop_QNarrowBin32Sto16Sx8:
   3631          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
   3632       case Iop_QNarrowBin16Sto8Sx16:
   3633          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
   3634       case Iop_QNarrowBin16Sto8Ux16:
   3635          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
   3636 
   3637       case Iop_InterleaveHI8x16:
   3638          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
   3639       case Iop_InterleaveHI16x8:
   3640          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
   3641       case Iop_InterleaveHI32x4:
   3642          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
   3643       case Iop_InterleaveHI64x2:
   3644          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
   3645 
   3646       case Iop_InterleaveLO8x16:
   3647          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
   3648       case Iop_InterleaveLO16x8:
   3649          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
   3650       case Iop_InterleaveLO32x4:
   3651          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
   3652       case Iop_InterleaveLO64x2:
   3653          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
   3654 
   3655       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
   3656       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
   3657       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
   3658       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
   3659       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
   3660       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
   3661       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
   3662       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
   3663       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
   3664       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
   3665       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
   3666       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
   3667       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
   3668       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
   3669       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
   3670       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
   3671       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
   3672       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
   3673       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
   3674       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
   3675       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
   3676       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
   3677       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
   3678       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
   3679       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
   3680       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
   3681       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
   3682       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
   3683       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
   3684       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
   3685       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
   3686       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
   3687       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
   3688       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
   3689       do_SseReRg: {
   3690          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
   3691          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
   3692          HReg dst = newVRegV(env);
   3693          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
   3694             REQUIRE_SSE2;
   3695          if (arg1isEReg) {
   3696             addInstr(env, mk_vMOVsd_RR(arg2, dst));
   3697             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
   3698          } else {
   3699             addInstr(env, mk_vMOVsd_RR(arg1, dst));
   3700             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
   3701          }
   3702          return dst;
   3703       }
   3704 
   3705       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
   3706       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
   3707       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
   3708       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
   3709       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
   3710       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
   3711       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
   3712       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
   3713       do_SseShift: {
   3714          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
   3715          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   3716          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3717          HReg      ereg = newVRegV(env);
   3718          HReg      dst  = newVRegV(env);
   3719          REQUIRE_SSE2;
   3720          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3721          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3722          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3723          addInstr(env, X86Instr_Push(rmi));
   3724          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
   3725 	 addInstr(env, mk_vMOVsd_RR(greg, dst));
   3726          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
   3727          add_to_esp(env, 16);
   3728          return dst;
   3729       }
   3730 
   3731       case Iop_NarrowBin32to16x8:
   3732          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
   3733          goto do_SseAssistedBinary;
   3734       case Iop_NarrowBin16to8x16:
   3735          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
   3736          goto do_SseAssistedBinary;
   3737       do_SseAssistedBinary: {
   3738          /* As with the amd64 case (where this is copied from) we
   3739             generate pretty bad code. */
   3740          vassert(fn != 0);
   3741          HReg dst = newVRegV(env);
   3742          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3743          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3744          HReg argp = newVRegI(env);
   3745          /* subl $112, %esp         -- make a space */
   3746          sub_from_esp(env, 112);
   3747          /* leal 48(%esp), %r_argp  -- point into it */
   3748          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
   3749                                       argp));
   3750          /* andl $-16, %r_argp      -- 16-align the pointer */
   3751          addInstr(env, X86Instr_Alu32R(Xalu_AND,
   3752                                        X86RMI_Imm( ~(UInt)15 ),
   3753                                        argp));
   3754          /* Prepare 3 arg regs:
   3755             leal  0(%r_argp), %eax
   3756             leal 16(%r_argp), %edx
   3757             leal 32(%r_argp), %ecx
   3758          */
   3759          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
   3760                                       hregX86_EAX()));
   3761          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
   3762                                       hregX86_EDX()));
   3763          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
   3764                                       hregX86_ECX()));
   3765          /* Store the two args, at (%edx) and (%ecx):
   3766             movupd  %argL, 0(%edx)
   3767             movupd  %argR, 0(%ecx)
   3768          */
   3769          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
   3770                                         X86AMode_IR(0, hregX86_EDX())));
   3771          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
   3772                                         X86AMode_IR(0, hregX86_ECX())));
   3773          /* call the helper */
   3774          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   3775                                       3, mk_RetLoc_simple(RLPri_None) ));
   3776          /* fetch the result from memory, using %r_argp, which the
   3777             register allocator will keep alive across the call. */
   3778          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
   3779                                         X86AMode_IR(0, argp)));
   3780          /* and finally, clear the space */
   3781          add_to_esp(env, 112);
   3782          return dst;
   3783       }
   3784 
   3785       default:
   3786          break;
   3787    } /* switch (e->Iex.Binop.op) */
   3788    } /* if (e->tag == Iex_Binop) */
   3789 
   3790 
   3791    if (e->tag == Iex_Triop) {
   3792    IRTriop *triop = e->Iex.Triop.details;
   3793    switch (triop->op) {
   3794 
   3795       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
   3796       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
   3797       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
   3798       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
   3799       do_32Fx4_w_rm:
   3800       {
   3801          HReg argL = iselVecExpr(env, triop->arg2);
   3802          HReg argR = iselVecExpr(env, triop->arg3);
   3803          HReg dst = newVRegV(env);
   3804          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3805          /* XXXROUNDINGFIXME */
   3806          /* set roundingmode here */
   3807          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
   3808          return dst;
   3809       }
   3810 
   3811       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
   3812       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
   3813       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
   3814       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
   3815       do_64Fx2_w_rm:
   3816       {
   3817          HReg argL = iselVecExpr(env, triop->arg2);
   3818          HReg argR = iselVecExpr(env, triop->arg3);
   3819          HReg dst = newVRegV(env);
   3820          REQUIRE_SSE2;
   3821          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3822          /* XXXROUNDINGFIXME */
   3823          /* set roundingmode here */
   3824          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
   3825          return dst;
   3826       }
   3827 
   3828       default:
   3829          break;
   3830    } /* switch (triop->op) */
   3831    } /* if (e->tag == Iex_Triop) */
   3832 
   3833 
   3834    if (e->tag == Iex_ITE) { // VFD
   3835       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
   3836       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
   3837       HReg dst = newVRegV(env);
   3838       addInstr(env, mk_vMOVsd_RR(r1,dst));
   3839       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   3840       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
   3841       return dst;
   3842    }
   3843 
   3844    vec_fail:
   3845    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
   3846               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
   3847    ppIRExpr(e);
   3848    vpanic("iselVecExpr_wrk");
   3849 
   3850 #  undef REQUIRE_SSE1
   3851 #  undef REQUIRE_SSE2
   3852 #  undef SSE2_OR_ABOVE
   3853 }
   3854 
   3855 
   3856 /*---------------------------------------------------------*/
   3857 /*--- ISEL: Statements                                  ---*/
   3858 /*---------------------------------------------------------*/
   3859 
   3860 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
   3861 {
   3862    if (vex_traceflags & VEX_TRACE_VCODE) {
   3863       vex_printf("\n-- ");
   3864       ppIRStmt(stmt);
   3865       vex_printf("\n");
   3866    }
   3867 
   3868    switch (stmt->tag) {
   3869 
   3870    /* --------- STORE --------- */
   3871    case Ist_Store: {
   3872       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
   3873       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
   3874       IREndness end   = stmt->Ist.Store.end;
   3875 
   3876       if (tya != Ity_I32 || end != Iend_LE)
   3877          goto stmt_fail;
   3878 
   3879       if (tyd == Ity_I32) {
   3880          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3881          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
   3882          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
   3883          return;
   3884       }
   3885       if (tyd == Ity_I8 || tyd == Ity_I16) {
   3886          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3887          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
   3888          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
   3889                                        r,am ));
   3890          return;
   3891       }
   3892       if (tyd == Ity_F64) {
   3893          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3894          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
   3895          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
   3896          return;
   3897       }
   3898       if (tyd == Ity_F32) {
   3899          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3900          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
   3901          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
   3902          return;
   3903       }
   3904       if (tyd == Ity_I64) {
   3905          HReg vHi, vLo, rA;
   3906          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
   3907          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
   3908          addInstr(env, X86Instr_Alu32M(
   3909                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
   3910          addInstr(env, X86Instr_Alu32M(
   3911                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
   3912          return;
   3913       }
   3914       if (tyd == Ity_V128) {
   3915          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3916          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
   3917          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
   3918          return;
   3919       }
   3920       break;
   3921    }
   3922 
   3923    /* --------- PUT --------- */
   3924    case Ist_Put: {
   3925       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
   3926       if (ty == Ity_I32) {
   3927          /* We're going to write to memory, so compute the RHS into an
   3928             X86RI. */
   3929          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
   3930          addInstr(env,
   3931                   X86Instr_Alu32M(
   3932                      Xalu_MOV,
   3933                      ri,
   3934                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
   3935                  ));
   3936          return;
   3937       }
   3938       if (ty == Ity_I8 || ty == Ity_I16) {
   3939          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
   3940          addInstr(env, X86Instr_Store(
   3941                           toUChar(ty==Ity_I8 ? 1 : 2),
   3942                           r,
   3943                           X86AMode_IR(stmt->Ist.Put.offset,
   3944                                       hregX86_EBP())));
   3945          return;
   3946       }
   3947       if (ty == Ity_I64) {
   3948          HReg vHi, vLo;
   3949          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3950          X86AMode* am4 = advance4(am);
   3951          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
   3952          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
   3953          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
   3954          return;
   3955       }
   3956       if (ty == Ity_V128) {
   3957          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
   3958          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3959          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
   3960          return;
   3961       }
   3962       if (ty == Ity_F32) {
   3963          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
   3964          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3965          set_FPU_rounding_default(env); /* paranoia */
   3966          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
   3967          return;
   3968       }
   3969       if (ty == Ity_F64) {
   3970          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
   3971          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3972          set_FPU_rounding_default(env); /* paranoia */
   3973          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
   3974          return;
   3975       }
   3976       break;
   3977    }
   3978 
   3979    /* --------- Indexed PUT --------- */
   3980    case Ist_PutI: {
   3981       IRPutI *puti = stmt->Ist.PutI.details;
   3982 
   3983       X86AMode* am
   3984          = genGuestArrayOffset(
   3985               env, puti->descr,
   3986                    puti->ix, puti->bias );
   3987 
   3988       IRType ty = typeOfIRExpr(env->type_env, puti->data);
   3989       if (ty == Ity_F64) {
   3990          HReg val = iselDblExpr(env, puti->data);
   3991          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
   3992          return;
   3993       }
   3994       if (ty == Ity_I8) {
   3995          HReg r = iselIntExpr_R(env, puti->data);
   3996          addInstr(env, X86Instr_Store( 1, r, am ));
   3997          return;
   3998       }
   3999       if (ty == Ity_I32) {
   4000          HReg r = iselIntExpr_R(env, puti->data);
   4001          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
   4002          return;
   4003       }
   4004       if (ty == Ity_I64) {
   4005          HReg rHi, rLo;
   4006          X86AMode* am4 = advance4(am);
   4007          iselInt64Expr(&rHi, &rLo, env, puti->data);
   4008          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
   4009          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
   4010          return;
   4011       }
   4012       break;
   4013    }
   4014 
   4015    /* --------- TMP --------- */
   4016    case Ist_WrTmp: {
   4017       IRTemp tmp = stmt->Ist.WrTmp.tmp;
   4018       IRType ty = typeOfIRTemp(env->type_env, tmp);
   4019 
   4020       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
   4021          compute it into an AMode and then use LEA.  This usually
   4022          produces fewer instructions, often because (for memcheck
   4023          created IR) we get t = address-expression, (t is later used
   4024          twice) and so doing this naturally turns address-expression
   4025          back into an X86 amode. */
   4026       if (ty == Ity_I32
   4027           && stmt->Ist.WrTmp.data->tag == Iex_Binop
   4028           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
   4029          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
   4030          HReg dst = lookupIRTemp(env, tmp);
   4031          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
   4032             /* Hmm, iselIntExpr_AMode wimped out and just computed the
   4033                value into a register.  Just emit a normal reg-reg move
   4034                so reg-alloc can coalesce it away in the usual way. */
   4035             HReg src = am->Xam.IR.reg;
   4036             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
   4037          } else {
   4038             addInstr(env, X86Instr_Lea32(am,dst));
   4039          }
   4040          return;
   4041       }
   4042 
   4043       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   4044          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
   4045          HReg dst = lookupIRTemp(env, tmp);
   4046          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
   4047          return;
   4048       }
   4049       if (ty == Ity_I64) {
   4050          HReg rHi, rLo, dstHi, dstLo;
   4051          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
   4052          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
   4053          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
   4054          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
   4055          return;
   4056       }
   4057       if (ty == Ity_I1) {
   4058          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
   4059          HReg dst = lookupIRTemp(env, tmp);
   4060          addInstr(env, X86Instr_Set32(cond, dst));
   4061          return;
   4062       }
   4063       if (ty == Ity_F64) {
   4064          HReg dst = lookupIRTemp(env, tmp);
   4065          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
   4066          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
   4067          return;
   4068       }
   4069       if (ty == Ity_F32) {
   4070          HReg dst = lookupIRTemp(env, tmp);
   4071          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
   4072          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
   4073          return;
   4074       }
   4075       if (ty == Ity_V128) {
   4076          HReg dst = lookupIRTemp(env, tmp);
   4077          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
   4078          addInstr(env, mk_vMOVsd_RR(src,dst));
   4079          return;
   4080       }
   4081       break;
   4082    }
   4083 
   4084    /* --------- Call to DIRTY helper --------- */
   4085    case Ist_Dirty: {
   4086       IRDirty* d = stmt->Ist.Dirty.details;
   4087 
   4088       /* Figure out the return type, if any. */
   4089       IRType retty = Ity_INVALID;
   4090       if (d->tmp != IRTemp_INVALID)
   4091          retty = typeOfIRTemp(env->type_env, d->tmp);
   4092 
   4093       Bool retty_ok = False;
   4094       switch (retty) {
   4095          case Ity_INVALID: /* function doesn't return anything */
   4096          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   4097          case Ity_V128:
   4098             retty_ok = True; break;
   4099          default:
   4100             break;
   4101       }
   4102       if (!retty_ok)
   4103          break; /* will go to stmt_fail: */
   4104 
   4105       /* Marshal args, do the call, and set the return value to
   4106          0x555..555 if this is a conditional call that returns a value
   4107          and the call is skipped. */
   4108       UInt   addToSp = 0;
   4109       RetLoc rloc    = mk_RetLoc_INVALID();
   4110       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
   4111       vassert(is_sane_RetLoc(rloc));
   4112 
   4113       /* Now figure out what to do with the returned value, if any. */
   4114       switch (retty) {
   4115          case Ity_INVALID: {
   4116             /* No return value.  Nothing to do. */
   4117             vassert(d->tmp == IRTemp_INVALID);
   4118             vassert(rloc.pri == RLPri_None);
   4119             vassert(addToSp == 0);
   4120             return;
   4121          }
   4122          case Ity_I32: case Ity_I16: case Ity_I8: {
   4123             /* The returned value is in %eax.  Park it in the register
   4124                associated with tmp. */
   4125             vassert(rloc.pri == RLPri_Int);
   4126             vassert(addToSp == 0);
   4127             HReg dst = lookupIRTemp(env, d->tmp);
   4128             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
   4129             return;
   4130          }
   4131          case Ity_I64: {
   4132             /* The returned value is in %edx:%eax.  Park it in the
   4133                register-pair associated with tmp. */
   4134             vassert(rloc.pri == RLPri_2Int);
   4135             vassert(addToSp == 0);
   4136             HReg dstHi, dstLo;
   4137             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
   4138             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
   4139             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
   4140             return;
   4141          }
   4142          case Ity_V128: {
   4143             /* The returned value is on the stack, and *retloc tells
   4144                us where.  Fish it off the stack and then move the
   4145                stack pointer upwards to clear it, as directed by
   4146                doHelperCall. */
   4147             vassert(rloc.pri == RLPri_V128SpRel);
   4148             vassert(addToSp >= 16);
   4149             HReg      dst = lookupIRTemp(env, d->tmp);
   4150             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
   4151             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
   4152             add_to_esp(env, addToSp);
   4153             return;
   4154          }
   4155          default:
   4156             /*NOTREACHED*/
   4157             vassert(0);
   4158       }
   4159       break;
   4160    }
   4161 
   4162    /* --------- MEM FENCE --------- */
   4163    case Ist_MBE:
   4164       switch (stmt->Ist.MBE.event) {
   4165          case Imbe_Fence:
   4166             addInstr(env, X86Instr_MFence(env->hwcaps));
   4167             return;
   4168          default:
   4169             break;
   4170       }
   4171       break;
   4172 
   4173    /* --------- ACAS --------- */
   4174    case Ist_CAS:
   4175       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
   4176          /* "normal" singleton CAS */
   4177          UChar  sz;
   4178          IRCAS* cas = stmt->Ist.CAS.details;
   4179          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
   4180          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
   4181          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
   4182          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
   4183          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
   4184          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
   4185          vassert(cas->expdHi == NULL);
   4186          vassert(cas->dataHi == NULL);
   4187          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
   4188          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
   4189          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
   4190          switch (ty) {
   4191             case Ity_I32: sz = 4; break;
   4192             case Ity_I16: sz = 2; break;
   4193             case Ity_I8:  sz = 1; break;
   4194             default: goto unhandled_cas;
   4195          }
   4196          addInstr(env, X86Instr_ACAS(am, sz));
   4197          addInstr(env,
   4198                   X86Instr_CMov32(Xcc_NZ,
   4199                                   X86RM_Reg(hregX86_EAX()), rOldLo));
   4200          return;
   4201       } else {
   4202          /* double CAS */
   4203          IRCAS* cas = stmt->Ist.CAS.details;
   4204          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
   4205          /* only 32-bit allowed in this case */
   4206          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
   4207          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
   4208          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
   4209          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
   4210          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
   4211          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
   4212          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
   4213          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
   4214          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
   4215          if (ty != Ity_I32)
   4216             goto unhandled_cas;
   4217          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
   4218          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
   4219          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
   4220          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
   4221          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
   4222          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
   4223          addInstr(env, X86Instr_DACAS(am));
   4224          addInstr(env,
   4225                   X86Instr_CMov32(Xcc_NZ,
   4226                                   X86RM_Reg(hregX86_EDX()), rOldHi));
   4227          addInstr(env,
   4228                   X86Instr_CMov32(Xcc_NZ,
   4229                                   X86RM_Reg(hregX86_EAX()), rOldLo));
   4230          return;
   4231       }
   4232       unhandled_cas:
   4233       break;
   4234 
   4235    /* --------- INSTR MARK --------- */
   4236    /* Doesn't generate any executable code ... */
   4237    case Ist_IMark:
   4238        return;
   4239 
   4240    /* --------- NO-OP --------- */
   4241    /* Fairly self-explanatory, wouldn't you say? */
   4242    case Ist_NoOp:
   4243        return;
   4244 
   4245    /* --------- EXIT --------- */
   4246    case Ist_Exit: {
   4247       if (stmt->Ist.Exit.dst->tag != Ico_U32)
   4248          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
   4249 
   4250       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
   4251       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
   4252                                       hregX86_EBP());
   4253 
   4254       /* Case: boring transfer to known address */
   4255       if (stmt->Ist.Exit.jk == Ijk_Boring) {
   4256          if (env->chainingAllowed) {
   4257             /* .. almost always true .. */
   4258             /* Skip the event check at the dst if this is a forwards
   4259                edge. */
   4260             Bool toFastEP
   4261                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
   4262             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
   4263             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
   4264                                            amEIP, cc, toFastEP));
   4265          } else {
   4266             /* .. very occasionally .. */
   4267             /* We can't use chaining, so ask for an assisted transfer,
   4268                as that's the only alternative that is allowable. */
   4269             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   4270             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
   4271          }
   4272          return;
   4273       }
   4274 
   4275       /* Case: assisted transfer to arbitrary address */
   4276       switch (stmt->Ist.Exit.jk) {
   4277          /* Keep this list in sync with that in iselNext below */
   4278          case Ijk_ClientReq:
   4279          case Ijk_EmWarn:
   4280          case Ijk_MapFail:
   4281          case Ijk_NoDecode:
   4282          case Ijk_NoRedir:
   4283          case Ijk_SigSEGV:
   4284          case Ijk_SigTRAP:
   4285          case Ijk_Sys_int128:
   4286          case Ijk_Sys_int129:
   4287          case Ijk_Sys_int130:
   4288          case Ijk_Sys_syscall:
   4289          case Ijk_Sys_sysenter:
   4290          case Ijk_InvalICache:
   4291          case Ijk_Yield:
   4292          {
   4293             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   4294             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
   4295             return;
   4296          }
   4297          default:
   4298             break;
   4299       }
   4300 
   4301       /* Do we ever expect to see any other kind? */
   4302       goto stmt_fail;
   4303    }
   4304 
   4305    default: break;
   4306    }
   4307   stmt_fail:
   4308    ppIRStmt(stmt);
   4309    vpanic("iselStmt");
   4310 }
   4311 
   4312 
   4313 /*---------------------------------------------------------*/
   4314 /*--- ISEL: Basic block terminators (Nexts)             ---*/
   4315 /*---------------------------------------------------------*/
   4316 
   4317 static void iselNext ( ISelEnv* env,
   4318                        IRExpr* next, IRJumpKind jk, Int offsIP )
   4319 {
   4320    if (vex_traceflags & VEX_TRACE_VCODE) {
   4321       vex_printf( "\n-- PUT(%d) = ", offsIP);
   4322       ppIRExpr( next );
   4323       vex_printf( "; exit-");
   4324       ppIRJumpKind(jk);
   4325       vex_printf( "\n");
   4326    }
   4327 
   4328    /* Case: boring transfer to known address */
   4329    if (next->tag == Iex_Const) {
   4330       IRConst* cdst = next->Iex.Const.con;
   4331       vassert(cdst->tag == Ico_U32);
   4332       if (jk == Ijk_Boring || jk == Ijk_Call) {
   4333          /* Boring transfer to known address */
   4334          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4335          if (env->chainingAllowed) {
   4336             /* .. almost always true .. */
   4337             /* Skip the event check at the dst if this is a forwards
   4338                edge. */
   4339             Bool toFastEP
   4340                = ((Addr64)cdst->Ico.U32) > env->max_ga;
   4341             if (0) vex_printf("%s", toFastEP ? "X" : ".");
   4342             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
   4343                                            amEIP, Xcc_ALWAYS,
   4344                                            toFastEP));
   4345          } else {
   4346             /* .. very occasionally .. */
   4347             /* We can't use chaining, so ask for an assisted transfer,
   4348                as that's the only alternative that is allowable. */
   4349             HReg r = iselIntExpr_R(env, next);
   4350             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
   4351                                              Ijk_Boring));
   4352          }
   4353          return;
   4354       }
   4355    }
   4356 
   4357    /* Case: call/return (==boring) transfer to any address */
   4358    switch (jk) {
   4359       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
   4360          HReg      r     = iselIntExpr_R(env, next);
   4361          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4362          if (env->chainingAllowed) {
   4363             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
   4364          } else {
   4365             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
   4366                                                Ijk_Boring));
   4367          }
   4368          return;
   4369       }
   4370       default:
   4371          break;
   4372    }
   4373 
   4374    /* Case: assisted transfer to arbitrary address */
   4375    switch (jk) {
   4376       /* Keep this list in sync with that for Ist_Exit above */
   4377       case Ijk_ClientReq:
   4378       case Ijk_EmWarn:
   4379       case Ijk_MapFail:
   4380       case Ijk_NoDecode:
   4381       case Ijk_NoRedir:
   4382       case Ijk_SigSEGV:
   4383       case Ijk_SigTRAP:
   4384       case Ijk_Sys_int128:
   4385       case Ijk_Sys_int129:
   4386       case Ijk_Sys_int130:
   4387       case Ijk_Sys_syscall:
   4388       case Ijk_Sys_sysenter:
   4389       case Ijk_InvalICache:
   4390       case Ijk_Yield:
   4391       {
   4392          HReg      r     = iselIntExpr_R(env, next);
   4393          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4394          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
   4395          return;
   4396       }
   4397       default:
   4398          break;
   4399    }
   4400 
   4401    vex_printf( "\n-- PUT(%d) = ", offsIP);
   4402    ppIRExpr( next );
   4403    vex_printf( "; exit-");
   4404    ppIRJumpKind(jk);
   4405    vex_printf( "\n");
   4406    vassert(0); // are we expecting any other kind?
   4407 }
   4408 
   4409 
   4410 /*---------------------------------------------------------*/
   4411 /*--- Insn selector top-level                           ---*/
   4412 /*---------------------------------------------------------*/
   4413 
   4414 /* Translate an entire SB to x86 code. */
   4415 
   4416 HInstrArray* iselSB_X86 ( IRSB* bb,
   4417                           VexArch      arch_host,
   4418                           VexArchInfo* archinfo_host,
   4419                           VexAbiInfo*  vbi/*UNUSED*/,
   4420                           Int offs_Host_EvC_Counter,
   4421                           Int offs_Host_EvC_FailAddr,
   4422                           Bool chainingAllowed,
   4423                           Bool addProfInc,
   4424                           Addr64 max_ga )
   4425 {
   4426    Int      i, j;
   4427    HReg     hreg, hregHI;
   4428    ISelEnv* env;
   4429    UInt     hwcaps_host = archinfo_host->hwcaps;
   4430    X86AMode *amCounter, *amFailAddr;
   4431 
   4432    /* sanity ... */
   4433    vassert(arch_host == VexArchX86);
   4434    vassert(0 == (hwcaps_host
   4435                  & ~(VEX_HWCAPS_X86_MMXEXT
   4436                      | VEX_HWCAPS_X86_SSE1
   4437                      | VEX_HWCAPS_X86_SSE2
   4438                      | VEX_HWCAPS_X86_SSE3
   4439                      | VEX_HWCAPS_X86_LZCNT)));
   4440    vassert(sizeof(max_ga) == 8);
   4441    vassert((max_ga >> 32) == 0);
   4442 
   4443    /* Make up an initial environment to use. */
   4444    env = LibVEX_Alloc(sizeof(ISelEnv));
   4445    env->vreg_ctr = 0;
   4446 
   4447    /* Set up output code array. */
   4448    env->code = newHInstrArray();
   4449 
   4450    /* Copy BB's type env. */
   4451    env->type_env = bb->tyenv;
   4452 
   4453    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
   4454       change as we go along. */
   4455    env->n_vregmap = bb->tyenv->types_used;
   4456    env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   4457    env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   4458 
   4459    /* and finally ... */
   4460    env->chainingAllowed = chainingAllowed;
   4461    env->hwcaps          = hwcaps_host;
   4462    env->max_ga          = max_ga;
   4463 
   4464    /* For each IR temporary, allocate a suitably-kinded virtual
   4465       register. */
   4466    j = 0;
   4467    for (i = 0; i < env->n_vregmap; i++) {
   4468       hregHI = hreg = INVALID_HREG;
   4469       switch (bb->tyenv->types[i]) {
   4470          case Ity_I1:
   4471          case Ity_I8:
   4472          case Ity_I16:
   4473          case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
   4474          case Ity_I64:  hreg   = mkHReg(j++, HRcInt32, True);
   4475                         hregHI = mkHReg(j++, HRcInt32, True); break;
   4476          case Ity_F32:
   4477          case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
   4478          case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
   4479          default: ppIRType(bb->tyenv->types[i]);
   4480                   vpanic("iselBB: IRTemp type");
   4481       }
   4482       env->vregmap[i]   = hreg;
   4483       env->vregmapHI[i] = hregHI;
   4484    }
   4485    env->vreg_ctr = j;
   4486 
   4487    /* The very first instruction must be an event check. */
   4488    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
   4489    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
   4490    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
   4491 
   4492    /* Possibly a block counter increment (for profiling).  At this
   4493       point we don't know the address of the counter, so just pretend
   4494       it is zero.  It will have to be patched later, but before this
   4495       translation is used, by a call to LibVEX_patchProfCtr. */
   4496    if (addProfInc) {
   4497       addInstr(env, X86Instr_ProfInc());
   4498    }
   4499 
   4500    /* Ok, finally we can iterate over the statements. */
   4501    for (i = 0; i < bb->stmts_used; i++)
   4502       iselStmt(env, bb->stmts[i]);
   4503 
   4504    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
   4505 
   4506    /* record the number of vregs we used. */
   4507    env->code->n_vregs = env->vreg_ctr;
   4508    return env->code;
   4509 }
   4510 
   4511 
   4512 /*---------------------------------------------------------------*/
   4513 /*--- end                                     host_x86_isel.c ---*/
   4514 /*---------------------------------------------------------------*/
   4515