Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_x86_isel.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2017 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 
     40 #include "ir_match.h"
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "host_generic_regs.h"
     44 #include "host_generic_simd64.h"
     45 #include "host_generic_simd128.h"
     46 #include "host_x86_defs.h"
     47 
     48 /* TODO 21 Apr 2005:
     49 
     50    -- (Really an assembler issue) don't emit CMov32 as a cmov
     51       insn, since that's expensive on P4 and conditional branch
     52       is cheaper if (as we expect) the condition is highly predictable
     53 
     54    -- preserve xmm registers across function calls (by declaring them
     55       as trashed by call insns)
     56 
     57    -- preserve x87 ST stack discipline across function calls.  Sigh.
     58 
     59    -- Check doHelperCall: if a call is conditional, we cannot safely
     60       compute any regparm args directly to registers.  Hence, the
     61       fast-regparm marshalling should be restricted to unconditional
     62       calls only.
     63 */
     64 
     65 /*---------------------------------------------------------*/
     66 /*--- x87 control word stuff                            ---*/
     67 /*---------------------------------------------------------*/
     68 
     69 /* Vex-generated code expects to run with the FPU set as follows: all
     70    exceptions masked, round-to-nearest, precision = 53 bits.  This
     71    corresponds to a FPU control word value of 0x027F.
     72 
     73    Similarly the SSE control word (%mxcsr) should be 0x1F80.
     74 
     75    %fpucw and %mxcsr should have these values on entry to
     76    Vex-generated code, and should those values should be
     77    unchanged at exit.
     78 */
     79 
     80 #define DEFAULT_FPUCW 0x027F
     81 
     82 /* debugging only, do not use */
     83 /* define DEFAULT_FPUCW 0x037F */
     84 
     85 
     86 /*---------------------------------------------------------*/
     87 /*--- misc helpers                                      ---*/
     88 /*---------------------------------------------------------*/
     89 
     90 /* These are duplicated in guest-x86/toIR.c */
     91 static IRExpr* unop ( IROp op, IRExpr* a )
     92 {
     93    return IRExpr_Unop(op, a);
     94 }
     95 
     96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
     97 {
     98    return IRExpr_Binop(op, a1, a2);
     99 }
    100 
    101 static IRExpr* bind ( Int binder )
    102 {
    103    return IRExpr_Binder(binder);
    104 }
    105 
    106 static Bool isZeroU8 ( IRExpr* e )
    107 {
    108    return e->tag == Iex_Const
    109           && e->Iex.Const.con->tag == Ico_U8
    110           && e->Iex.Const.con->Ico.U8 == 0;
    111 }
    112 
    113 static Bool isZeroU32 ( IRExpr* e )
    114 {
    115    return e->tag == Iex_Const
    116           && e->Iex.Const.con->tag == Ico_U32
    117           && e->Iex.Const.con->Ico.U32 == 0;
    118 }
    119 
    120 //static Bool isZeroU64 ( IRExpr* e )
    121 //{
    122 //   return e->tag == Iex_Const
    123 //          && e->Iex.Const.con->tag == Ico_U64
    124 //          && e->Iex.Const.con->Ico.U64 == 0ULL;
    125 //}
    126 
    127 
    128 /*---------------------------------------------------------*/
    129 /*--- ISelEnv                                           ---*/
    130 /*---------------------------------------------------------*/
    131 
    132 /* This carries around:
    133 
    134    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
    135      might encounter.  This is computed before insn selection starts,
    136      and does not change.
    137 
    138    - A mapping from IRTemp to HReg.  This tells the insn selector
    139      which virtual register(s) are associated with each IRTemp
    140      temporary.  This is computed before insn selection starts, and
    141      does not change.  We expect this mapping to map precisely the
    142      same set of IRTemps as the type mapping does.
    143 
    144         - vregmap   holds the primary register for the IRTemp.
    145         - vregmapHI is only used for 64-bit integer-typed
    146              IRTemps.  It holds the identity of a second
    147              32-bit virtual HReg, which holds the high half
    148              of the value.
    149 
    150    - The code array, that is, the insns selected so far.
    151 
    152    - A counter, for generating new virtual registers.
    153 
    154    - The host subarchitecture we are selecting insns for.
    155      This is set at the start and does not change.
    156 
    157    - A Bool for indicating whether we may generate chain-me
    158      instructions for control flow transfers, or whether we must use
    159      XAssisted.
    160 
    161    - The maximum guest address of any guest insn in this block.
    162      Actually, the address of the highest-addressed byte from any insn
    163      in this block.  Is set at the start and does not change.  This is
    164      used for detecting jumps which are definitely forward-edges from
    165      this block, and therefore can be made (chained) to the fast entry
    166      point of the destination, thereby avoiding the destination's
    167      event check.
    168 
    169    Note, this is all (well, mostly) host-independent.
    170 */
    171 
    172 typedef
    173    struct {
    174       /* Constant -- are set at the start and do not change. */
    175       IRTypeEnv*   type_env;
    176 
    177       HReg*        vregmap;
    178       HReg*        vregmapHI;
    179       Int          n_vregmap;
    180 
    181       UInt         hwcaps;
    182 
    183       Bool         chainingAllowed;
    184       Addr32       max_ga;
    185 
    186       /* These are modified as we go along. */
    187       HInstrArray* code;
    188       Int          vreg_ctr;
    189    }
    190    ISelEnv;
    191 
    192 
    193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
    194 {
    195    vassert(tmp >= 0);
    196    vassert(tmp < env->n_vregmap);
    197    return env->vregmap[tmp];
    198 }
    199 
    200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
    201 {
    202    vassert(tmp >= 0);
    203    vassert(tmp < env->n_vregmap);
    204    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
    205    *vrLO = env->vregmap[tmp];
    206    *vrHI = env->vregmapHI[tmp];
    207 }
    208 
    209 static void addInstr ( ISelEnv* env, X86Instr* instr )
    210 {
    211    addHInstr(env->code, instr);
    212    if (vex_traceflags & VEX_TRACE_VCODE) {
    213       ppX86Instr(instr, False);
    214       vex_printf("\n");
    215    }
    216 }
    217 
    218 static HReg newVRegI ( ISelEnv* env )
    219 {
    220    HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
    221    env->vreg_ctr++;
    222    return reg;
    223 }
    224 
    225 static HReg newVRegF ( ISelEnv* env )
    226 {
    227    HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
    228    env->vreg_ctr++;
    229    return reg;
    230 }
    231 
    232 static HReg newVRegV ( ISelEnv* env )
    233 {
    234    HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
    235    env->vreg_ctr++;
    236    return reg;
    237 }
    238 
    239 
    240 /*---------------------------------------------------------*/
    241 /*--- ISEL: Forward declarations                        ---*/
    242 /*---------------------------------------------------------*/
    243 
    244 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
    245    iselXXX_wrk do the real work, but are not to be called directly.
    246    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
    247    checks that all returned registers are virtual.  You should not
    248    call the _wrk version directly.
    249 */
    250 static X86RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
    251 static X86RMI*     iselIntExpr_RMI     ( ISelEnv* env, const IRExpr* e );
    252 
    253 static X86RI*      iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
    254 static X86RI*      iselIntExpr_RI     ( ISelEnv* env, const IRExpr* e );
    255 
    256 static X86RM*      iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
    257 static X86RM*      iselIntExpr_RM     ( ISelEnv* env, const IRExpr* e );
    258 
    259 static HReg        iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
    260 static HReg        iselIntExpr_R     ( ISelEnv* env, const IRExpr* e );
    261 
    262 static X86AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
    263 static X86AMode*   iselIntExpr_AMode     ( ISelEnv* env, const IRExpr* e );
    264 
    265 static void        iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
    266                                        ISelEnv* env, const IRExpr* e );
    267 static void        iselInt64Expr     ( HReg* rHi, HReg* rLo,
    268                                        ISelEnv* env, const IRExpr* e );
    269 
    270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
    271 static X86CondCode iselCondCode     ( ISelEnv* env, const IRExpr* e );
    272 
    273 static HReg        iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
    274 static HReg        iselDblExpr     ( ISelEnv* env, const IRExpr* e );
    275 
    276 static HReg        iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
    277 static HReg        iselFltExpr     ( ISelEnv* env, const IRExpr* e );
    278 
    279 static HReg        iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
    280 static HReg        iselVecExpr     ( ISelEnv* env, const IRExpr* e );
    281 
    282 
    283 /*---------------------------------------------------------*/
    284 /*--- ISEL: Misc helpers                                ---*/
    285 /*---------------------------------------------------------*/
    286 
    287 /* Make a int reg-reg move. */
    288 
    289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
    290 {
    291    vassert(hregClass(src) == HRcInt32);
    292    vassert(hregClass(dst) == HRcInt32);
    293    return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
    294 }
    295 
    296 
    297 /* Make a vector reg-reg move. */
    298 
    299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
    300 {
    301    vassert(hregClass(src) == HRcVec128);
    302    vassert(hregClass(dst) == HRcVec128);
    303    return X86Instr_SseReRg(Xsse_MOV, src, dst);
    304 }
    305 
    306 /* Advance/retreat %esp by n. */
    307 
    308 static void add_to_esp ( ISelEnv* env, Int n )
    309 {
    310    vassert(n > 0 && n < 256 && (n%4) == 0);
    311    addInstr(env,
    312             X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
    313 }
    314 
    315 static void sub_from_esp ( ISelEnv* env, Int n )
    316 {
    317    vassert(n > 0 && n < 256 && (n%4) == 0);
    318    addInstr(env,
    319             X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
    320 }
    321 
    322 
    323 /* Given an amode, return one which references 4 bytes further
    324    along. */
    325 
    326 static X86AMode* advance4 ( X86AMode* am )
    327 {
    328    X86AMode* am4 = dopyX86AMode(am);
    329    switch (am4->tag) {
    330       case Xam_IRRS:
    331          am4->Xam.IRRS.imm += 4; break;
    332       case Xam_IR:
    333          am4->Xam.IR.imm += 4; break;
    334       default:
    335          vpanic("advance4(x86,host)");
    336    }
    337    return am4;
    338 }
    339 
    340 
    341 /* Push an arg onto the host stack, in preparation for a call to a
    342    helper function of some kind.  Returns the number of 32-bit words
    343    pushed.  If we encounter an IRExpr_VECRET() then we expect that
    344    r_vecRetAddr will be a valid register, that holds the relevant
    345    address.
    346 */
    347 static Int pushArg ( ISelEnv* env, IRExpr* arg, HReg r_vecRetAddr )
    348 {
    349    if (UNLIKELY(arg->tag == Iex_VECRET)) {
    350       vassert(0); //ATC
    351       vassert(!hregIsInvalid(r_vecRetAddr));
    352       addInstr(env, X86Instr_Push(X86RMI_Reg(r_vecRetAddr)));
    353       return 1;
    354    }
    355    if (UNLIKELY(arg->tag == Iex_GSPTR)) {
    356       addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
    357       return 1;
    358    }
    359    /* Else it's a "normal" expression. */
    360    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
    361    if (arg_ty == Ity_I32) {
    362       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
    363       return 1;
    364    } else
    365    if (arg_ty == Ity_I64) {
    366       HReg rHi, rLo;
    367       iselInt64Expr(&rHi, &rLo, env, arg);
    368       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
    369       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
    370       return 2;
    371    }
    372    ppIRExpr(arg);
    373    vpanic("pushArg(x86): can't handle arg of this type");
    374 }
    375 
    376 
    377 /* Complete the call to a helper function, by calling the
    378    helper and clearing the args off the stack. */
    379 
    380 static
    381 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
    382                               IRCallee* cee, Int n_arg_ws,
    383                               RetLoc rloc )
    384 {
    385    /* Complication.  Need to decide which reg to use as the fn address
    386       pointer, in a way that doesn't trash regparm-passed
    387       parameters. */
    388    vassert(sizeof(void*) == 4);
    389 
    390    addInstr(env, X86Instr_Call( cc, (Addr)cee->addr,
    391                                 cee->regparms, rloc));
    392    if (n_arg_ws > 0)
    393       add_to_esp(env, 4*n_arg_ws);
    394 }
    395 
    396 
    397 /* Used only in doHelperCall.  See big comment in doHelperCall re
    398    handling of regparm args.  This function figures out whether
    399    evaluation of an expression might require use of a fixed register.
    400    If in doubt return True (safe but suboptimal).
    401 */
    402 static
    403 Bool mightRequireFixedRegs ( IRExpr* e )
    404 {
    405    if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
    406       // These are always "safe" -- either a copy of %esp in some
    407       // arbitrary vreg, or a copy of %ebp, respectively.
    408       return False;
    409    }
    410    /* Else it's a "normal" expression. */
    411    switch (e->tag) {
    412       case Iex_RdTmp: case Iex_Const: case Iex_Get:
    413          return False;
    414       default:
    415          return True;
    416    }
    417 }
    418 
    419 
    420 /* Do a complete function call.  |guard| is a Ity_Bit expression
    421    indicating whether or not the call happens.  If guard==NULL, the
    422    call is unconditional.  |retloc| is set to indicate where the
    423    return value is after the call.  The caller (of this fn) must
    424    generate code to add |stackAdjustAfterCall| to the stack pointer
    425    after the call is done. */
    426 
    427 static
    428 void doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
    429                     /*OUT*/RetLoc* retloc,
    430                     ISelEnv* env,
    431                     IRExpr* guard,
    432                     IRCallee* cee, IRType retTy, IRExpr** args )
    433 {
    434    X86CondCode cc;
    435    HReg        argregs[3];
    436    HReg        tmpregs[3];
    437    Bool        danger;
    438    Int         not_done_yet, n_args, n_arg_ws, stack_limit,
    439                i, argreg, argregX;
    440 
    441    /* Set default returns.  We'll update them later if needed. */
    442    *stackAdjustAfterCall = 0;
    443    *retloc               = mk_RetLoc_INVALID();
    444 
    445    /* These are used for cross-checking that IR-level constraints on
    446       the use of Iex_VECRET and Iex_GSPTR are observed. */
    447    UInt nVECRETs = 0;
    448    UInt nGSPTRs  = 0;
    449 
    450    /* Marshal args for a call, do the call, and clear the stack.
    451       Complexities to consider:
    452 
    453       * The return type can be I{64,32,16,8} or V128.  In the V128
    454         case, it is expected that |args| will contain the special
    455         node IRExpr_VECRET(), in which case this routine generates
    456         code to allocate space on the stack for the vector return
    457         value.  Since we are not passing any scalars on the stack, it
    458         is enough to preallocate the return space before marshalling
    459         any arguments, in this case.
    460 
    461         |args| may also contain IRExpr_GSPTR(), in which case the
    462         value in %ebp is passed as the corresponding argument.
    463 
    464       * If the callee claims regparmness of 1, 2 or 3, we must pass the
    465         first 1, 2 or 3 args in registers (EAX, EDX, and ECX
    466         respectively).  To keep things relatively simple, only args of
    467         type I32 may be passed as regparms -- just bomb out if anything
    468         else turns up.  Clearly this depends on the front ends not
    469         trying to pass any other types as regparms.
    470    */
    471 
    472    /* 16 Nov 2004: the regparm handling is complicated by the
    473       following problem.
    474 
    475       Consider a call two a function with two regparm parameters:
    476       f(e1,e2).  We need to compute e1 into %eax and e2 into %edx.
    477       Suppose code is first generated to compute e1 into %eax.  Then,
    478       code is generated to compute e2 into %edx.  Unfortunately, if
    479       the latter code sequence uses %eax, it will trash the value of
    480       e1 computed by the former sequence.  This could happen if (for
    481       example) e2 itself involved a function call.  In the code below,
    482       args are evaluated right-to-left, not left-to-right, but the
    483       principle and the problem are the same.
    484 
    485       One solution is to compute all regparm-bound args into vregs
    486       first, and once they are all done, move them to the relevant
    487       real regs.  This always gives correct code, but it also gives
    488       a bunch of vreg-to-rreg moves which are usually redundant but
    489       are hard for the register allocator to get rid of.
    490 
    491       A compromise is to first examine all regparm'd argument
    492       expressions.  If they are all so simple that it is clear
    493       they will be evaluated without use of any fixed registers,
    494       use the old compute-directly-to-fixed-target scheme.  If not,
    495       be safe and use the via-vregs scheme.
    496 
    497       Note this requires being able to examine an expression and
    498       determine whether or not evaluation of it might use a fixed
    499       register.  That requires knowledge of how the rest of this
    500       insn selector works.  Currently just the following 3 are
    501       regarded as safe -- hopefully they cover the majority of
    502       arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
    503    */
    504    vassert(cee->regparms >= 0 && cee->regparms <= 3);
    505 
    506    /* Count the number of args and also the VECRETs */
    507    n_args = n_arg_ws = 0;
    508    while (args[n_args]) {
    509       IRExpr* arg = args[n_args];
    510       n_args++;
    511       if (UNLIKELY(arg->tag == Iex_VECRET)) {
    512          nVECRETs++;
    513       } else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
    514          nGSPTRs++;
    515       }
    516    }
    517 
    518    /* If this fails, the IR is ill-formed */
    519    vassert(nGSPTRs == 0 || nGSPTRs == 1);
    520 
    521    /* If we have a VECRET, allocate space on the stack for the return
    522       value, and record the stack pointer after that. */
    523    HReg r_vecRetAddr = INVALID_HREG;
    524    if (nVECRETs == 1) {
    525       vassert(retTy == Ity_V128 || retTy == Ity_V256);
    526       vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
    527       r_vecRetAddr = newVRegI(env);
    528       sub_from_esp(env, 16);
    529       addInstr(env, mk_iMOVsd_RR( hregX86_ESP(), r_vecRetAddr ));
    530    } else {
    531       // If either of these fail, the IR is ill-formed
    532       vassert(retTy != Ity_V128 && retTy != Ity_V256);
    533       vassert(nVECRETs == 0);
    534    }
    535 
    536    not_done_yet = n_args;
    537 
    538    stack_limit = cee->regparms;
    539 
    540    /* ------ BEGIN marshall all arguments ------ */
    541 
    542    /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
    543    for (i = n_args-1; i >= stack_limit; i--) {
    544       n_arg_ws += pushArg(env, args[i], r_vecRetAddr);
    545       not_done_yet--;
    546    }
    547 
    548    /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
    549       registers. */
    550 
    551    if (cee->regparms > 0) {
    552 
    553       /* ------ BEGIN deal with regparms ------ */
    554 
    555       /* deal with regparms, not forgetting %ebp if needed. */
    556       argregs[0] = hregX86_EAX();
    557       argregs[1] = hregX86_EDX();
    558       argregs[2] = hregX86_ECX();
    559       tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
    560 
    561       argreg = cee->regparms;
    562 
    563       /* In keeping with big comment above, detect potential danger
    564          and use the via-vregs scheme if needed. */
    565       danger = False;
    566       for (i = stack_limit-1; i >= 0; i--) {
    567          if (mightRequireFixedRegs(args[i])) {
    568             danger = True;
    569             break;
    570          }
    571       }
    572 
    573       if (danger) {
    574 
    575          /* Move via temporaries */
    576          argregX = argreg;
    577          for (i = stack_limit-1; i >= 0; i--) {
    578 
    579             if (0) {
    580                vex_printf("x86 host: register param is complex: ");
    581                ppIRExpr(args[i]);
    582                vex_printf("\n");
    583             }
    584 
    585             IRExpr* arg = args[i];
    586             argreg--;
    587             vassert(argreg >= 0);
    588             if (UNLIKELY(arg->tag == Iex_VECRET)) {
    589                vassert(0); //ATC
    590             }
    591             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
    592                vassert(0); //ATC
    593             } else {
    594                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
    595                tmpregs[argreg] = iselIntExpr_R(env, arg);
    596             }
    597             not_done_yet--;
    598          }
    599          for (i = stack_limit-1; i >= 0; i--) {
    600             argregX--;
    601             vassert(argregX >= 0);
    602             addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
    603          }
    604 
    605       } else {
    606          /* It's safe to compute all regparm args directly into their
    607             target registers. */
    608          for (i = stack_limit-1; i >= 0; i--) {
    609             IRExpr* arg = args[i];
    610             argreg--;
    611             vassert(argreg >= 0);
    612             if (UNLIKELY(arg->tag == Iex_VECRET)) {
    613                vassert(!hregIsInvalid(r_vecRetAddr));
    614                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    615                                              X86RMI_Reg(r_vecRetAddr),
    616                                              argregs[argreg]));
    617             }
    618             else if (UNLIKELY(arg->tag == Iex_GSPTR)) {
    619                vassert(0); //ATC
    620             } else {
    621                vassert(typeOfIRExpr(env->type_env, arg) == Ity_I32);
    622                addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    623                                              iselIntExpr_RMI(env, arg),
    624                                              argregs[argreg]));
    625             }
    626             not_done_yet--;
    627          }
    628 
    629       }
    630 
    631       /* ------ END deal with regparms ------ */
    632 
    633    }
    634 
    635    vassert(not_done_yet == 0);
    636 
    637    /* ------ END marshall all arguments ------ */
    638 
    639    /* Now we can compute the condition.  We can't do it earlier
    640       because the argument computations could trash the condition
    641       codes.  Be a bit clever to handle the common case where the
    642       guard is 1:Bit. */
    643    cc = Xcc_ALWAYS;
    644    if (guard) {
    645       if (guard->tag == Iex_Const
    646           && guard->Iex.Const.con->tag == Ico_U1
    647           && guard->Iex.Const.con->Ico.U1 == True) {
    648          /* unconditional -- do nothing */
    649       } else {
    650          cc = iselCondCode( env, guard );
    651       }
    652    }
    653 
    654    /* Do final checks, set the return values, and generate the call
    655       instruction proper. */
    656    vassert(*stackAdjustAfterCall == 0);
    657    vassert(is_RetLoc_INVALID(*retloc));
    658    switch (retTy) {
    659          case Ity_INVALID:
    660             /* Function doesn't return a value. */
    661             *retloc = mk_RetLoc_simple(RLPri_None);
    662             break;
    663          case Ity_I64:
    664             *retloc = mk_RetLoc_simple(RLPri_2Int);
    665             break;
    666          case Ity_I32: case Ity_I16: case Ity_I8:
    667             *retloc = mk_RetLoc_simple(RLPri_Int);
    668             break;
    669          case Ity_V128:
    670             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
    671             *stackAdjustAfterCall = 16;
    672             break;
    673          case Ity_V256:
    674             vassert(0); // ATC
    675             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
    676             *stackAdjustAfterCall = 32;
    677             break;
    678          default:
    679             /* IR can denote other possible return types, but we don't
    680                handle those here. */
    681            vassert(0);
    682    }
    683 
    684    /* Finally, generate the call itself.  This needs the *retloc value
    685       set in the switch above, which is why it's at the end. */
    686    callHelperAndClearArgs( env, cc, cee, n_arg_ws, *retloc );
    687 }
    688 
    689 
    690 /* Given a guest-state array descriptor, an index expression and a
    691    bias, generate an X86AMode holding the relevant guest state
    692    offset. */
    693 
    694 static
    695 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
    696                                 IRExpr* off, Int bias )
    697 {
    698    HReg tmp, roff;
    699    Int  elemSz = sizeofIRType(descr->elemTy);
    700    Int  nElems = descr->nElems;
    701    Int  shift  = 0;
    702 
    703    /* throw out any cases not generated by an x86 front end.  In
    704       theory there might be a day where we need to handle them -- if
    705       we ever run non-x86-guest on x86 host. */
    706 
    707    if (nElems != 8)
    708       vpanic("genGuestArrayOffset(x86 host)(1)");
    709 
    710    switch (elemSz) {
    711       case 1:  shift = 0; break;
    712       case 4:  shift = 2; break;
    713       case 8:  shift = 3; break;
    714       default: vpanic("genGuestArrayOffset(x86 host)(2)");
    715    }
    716 
    717    /* Compute off into a reg, %off.  Then return:
    718 
    719          movl %off, %tmp
    720          addl $bias, %tmp  (if bias != 0)
    721          andl %tmp, 7
    722          ... base(%ebp, %tmp, shift) ...
    723    */
    724    tmp  = newVRegI(env);
    725    roff = iselIntExpr_R(env, off);
    726    addInstr(env, mk_iMOVsd_RR(roff, tmp));
    727    if (bias != 0) {
    728       addInstr(env,
    729                X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
    730    }
    731    addInstr(env,
    732             X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
    733    return
    734       X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
    735 }
    736 
    737 
    738 /* Mess with the FPU's rounding mode: set to the default rounding mode
    739    (DEFAULT_FPUCW). */
    740 static
    741 void set_FPU_rounding_default ( ISelEnv* env )
    742 {
    743    /* pushl $DEFAULT_FPUCW
    744       fldcw 0(%esp)
    745       addl $4, %esp
    746    */
    747    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    748    addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
    749    addInstr(env, X86Instr_FpLdCW(zero_esp));
    750    add_to_esp(env, 4);
    751 }
    752 
    753 
    754 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
    755    expression denoting a value in the range 0 .. 3, indicating a round
    756    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
    757    the same rounding.
    758 */
    759 static
    760 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
    761 {
    762    HReg rrm  = iselIntExpr_R(env, mode);
    763    HReg rrm2 = newVRegI(env);
    764    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    765 
    766    /* movl  %rrm, %rrm2
    767       andl  $3, %rrm2   -- shouldn't be needed; paranoia
    768       shll  $10, %rrm2
    769       orl   $DEFAULT_FPUCW, %rrm2
    770       pushl %rrm2
    771       fldcw 0(%esp)
    772       addl  $4, %esp
    773    */
    774    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
    775    addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
    776    addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
    777    addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
    778    addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
    779    addInstr(env, X86Instr_FpLdCW(zero_esp));
    780    add_to_esp(env, 4);
    781 }
    782 
    783 
    784 /* Generate !src into a new vector register, and be sure that the code
    785    is SSE1 compatible.  Amazing that Intel doesn't offer a less crappy
    786    way to do this.
    787 */
    788 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
    789 {
    790    HReg dst = newVRegV(env);
    791    /* Set dst to zero.  If dst contains a NaN then all hell might
    792       break loose after the comparison.  So, first zero it. */
    793    addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
    794    /* And now make it all 1s ... */
    795    addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
    796    /* Finally, xor 'src' into it. */
    797    addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
    798    /* Doesn't that just totally suck? */
    799    return dst;
    800 }
    801 
    802 
    803 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
    804    after most non-simple FPU operations (simple = +, -, *, / and
    805    sqrt).
    806 
    807    This could be done a lot more efficiently if needed, by loading
    808    zero and adding it to the value to be rounded (fldz ; faddp?).
    809 */
    810 static void roundToF64 ( ISelEnv* env, HReg reg )
    811 {
    812    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
    813    sub_from_esp(env, 8);
    814    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
    815    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
    816    add_to_esp(env, 8);
    817 }
    818 
    819 
    820 /*---------------------------------------------------------*/
    821 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
    822 /*---------------------------------------------------------*/
    823 
    824 /* Select insns for an integer-typed expression, and add them to the
    825    code list.  Return a reg holding the result.  This reg will be a
    826    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
    827    want to modify it, ask for a new vreg, copy it in there, and modify
    828    the copy.  The register allocator will do its best to map both
    829    vregs to the same real register, so the copies will often disappear
    830    later in the game.
    831 
    832    This should handle expressions of 32, 16 and 8-bit type.  All
    833    results are returned in a 32-bit register.  For 16- and 8-bit
    834    expressions, the upper 16/24 bits are arbitrary, so you should mask
    835    or sign extend partial values if necessary.
    836 */
    837 
    838 static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
    839 {
    840    HReg r = iselIntExpr_R_wrk(env, e);
    841    /* sanity checks ... */
    842 #  if 0
    843    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
    844 #  endif
    845    vassert(hregClass(r) == HRcInt32);
    846    vassert(hregIsVirtual(r));
    847    return r;
    848 }
    849 
    850 /* DO NOT CALL THIS DIRECTLY ! */
    851 static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
    852 {
    853    MatchInfo mi;
    854 
    855    IRType ty = typeOfIRExpr(env->type_env,e);
    856    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
    857 
    858    switch (e->tag) {
    859 
    860    /* --------- TEMP --------- */
    861    case Iex_RdTmp: {
    862       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    863    }
    864 
    865    /* --------- LOAD --------- */
    866    case Iex_Load: {
    867       HReg dst = newVRegI(env);
    868       X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
    869 
    870       /* We can't handle big-endian loads, nor load-linked. */
    871       if (e->Iex.Load.end != Iend_LE)
    872          goto irreducible;
    873 
    874       if (ty == Ity_I32) {
    875          addInstr(env, X86Instr_Alu32R(Xalu_MOV,
    876                                        X86RMI_Mem(amode), dst) );
    877          return dst;
    878       }
    879       if (ty == Ity_I16) {
    880          addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
    881          return dst;
    882       }
    883       if (ty == Ity_I8) {
    884          addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
    885          return dst;
    886       }
    887       break;
    888    }
    889 
    890    /* --------- TERNARY OP --------- */
    891    case Iex_Triop: {
    892       IRTriop *triop = e->Iex.Triop.details;
    893       /* C3210 flags following FPU partial remainder (fprem), both
    894          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
    895       if (triop->op == Iop_PRemC3210F64
    896           || triop->op == Iop_PRem1C3210F64) {
    897          HReg junk = newVRegF(env);
    898          HReg dst  = newVRegI(env);
    899          HReg srcL = iselDblExpr(env, triop->arg2);
    900          HReg srcR = iselDblExpr(env, triop->arg3);
    901          /* XXXROUNDINGFIXME */
    902          /* set roundingmode here */
    903          addInstr(env, X86Instr_FpBinary(
    904                            e->Iex.Binop.op==Iop_PRemC3210F64
    905                               ? Xfp_PREM : Xfp_PREM1,
    906                            srcL,srcR,junk
    907                  ));
    908          /* The previous pseudo-insn will have left the FPU's C3210
    909             flags set correctly.  So bag them. */
    910          addInstr(env, X86Instr_FpStSW_AX());
    911          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
    912          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
    913          return dst;
    914       }
    915 
    916       break;
    917    }
    918 
    919    /* --------- BINARY OP --------- */
    920    case Iex_Binop: {
    921       X86AluOp   aluOp;
    922       X86ShiftOp shOp;
    923 
    924       /* Pattern: Sub32(0,x) */
    925       if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
    926          HReg dst = newVRegI(env);
    927          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
    928          addInstr(env, mk_iMOVsd_RR(reg,dst));
    929          addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
    930          return dst;
    931       }
    932 
    933       /* Is it an addition or logical style op? */
    934       switch (e->Iex.Binop.op) {
    935          case Iop_Add8: case Iop_Add16: case Iop_Add32:
    936             aluOp = Xalu_ADD; break;
    937          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
    938             aluOp = Xalu_SUB; break;
    939          case Iop_And8: case Iop_And16: case Iop_And32:
    940             aluOp = Xalu_AND; break;
    941          case Iop_Or8: case Iop_Or16: case Iop_Or32:
    942             aluOp = Xalu_OR; break;
    943          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
    944             aluOp = Xalu_XOR; break;
    945          case Iop_Mul16: case Iop_Mul32:
    946             aluOp = Xalu_MUL; break;
    947          default:
    948             aluOp = Xalu_INVALID; break;
    949       }
    950       /* For commutative ops we assume any literal
    951          values are on the second operand. */
    952       if (aluOp != Xalu_INVALID) {
    953          HReg dst    = newVRegI(env);
    954          HReg reg    = iselIntExpr_R(env, e->Iex.Binop.arg1);
    955          X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
    956          addInstr(env, mk_iMOVsd_RR(reg,dst));
    957          addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
    958          return dst;
    959       }
    960       /* Could do better here; forcing the first arg into a reg
    961          isn't always clever.
    962          -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
    963                         LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
    964                         t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
    965             movl 0xFFFFFFA0(%vr41),%vr107
    966             movl 0xFFFFFFA4(%vr41),%vr108
    967             movl %vr107,%vr106
    968             xorl %vr108,%vr106
    969             movl 0xFFFFFFA8(%vr41),%vr109
    970             movl %vr106,%vr105
    971             andl %vr109,%vr105
    972             movl 0xFFFFFFA0(%vr41),%vr110
    973             movl %vr105,%vr104
    974             xorl %vr110,%vr104
    975             movl %vr104,%vr70
    976       */
    977 
    978       /* Perhaps a shift op? */
    979       switch (e->Iex.Binop.op) {
    980          case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
    981             shOp = Xsh_SHL; break;
    982          case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
    983             shOp = Xsh_SHR; break;
    984          case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
    985             shOp = Xsh_SAR; break;
    986          default:
    987             shOp = Xsh_INVALID; break;
    988       }
    989       if (shOp != Xsh_INVALID) {
    990          HReg dst = newVRegI(env);
    991 
    992          /* regL = the value to be shifted */
    993          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
    994          addInstr(env, mk_iMOVsd_RR(regL,dst));
    995 
    996          /* Do any necessary widening for 16/8 bit operands */
    997          switch (e->Iex.Binop.op) {
    998             case Iop_Shr8:
    999                addInstr(env, X86Instr_Alu32R(
   1000                                 Xalu_AND, X86RMI_Imm(0xFF), dst));
   1001                break;
   1002             case Iop_Shr16:
   1003                addInstr(env, X86Instr_Alu32R(
   1004                                 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
   1005                break;
   1006             case Iop_Sar8:
   1007                addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
   1008                addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
   1009                break;
   1010             case Iop_Sar16:
   1011                addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
   1012                addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
   1013                break;
   1014             default: break;
   1015          }
   1016 
   1017          /* Now consider the shift amount.  If it's a literal, we
   1018             can do a much better job than the general case. */
   1019          if (e->Iex.Binop.arg2->tag == Iex_Const) {
   1020             /* assert that the IR is well-typed */
   1021             Int nshift;
   1022             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   1023             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1024 	    vassert(nshift >= 0);
   1025 	    if (nshift > 0)
   1026                /* Can't allow nshift==0 since that means %cl */
   1027                addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
   1028          } else {
   1029             /* General case; we have to force the amount into %cl. */
   1030             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1031             addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
   1032             addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
   1033          }
   1034          return dst;
   1035       }
   1036 
   1037       /* Handle misc other ops. */
   1038 
   1039       if (e->Iex.Binop.op == Iop_Max32U) {
   1040          HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1041          HReg dst  = newVRegI(env);
   1042          HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1043          addInstr(env, mk_iMOVsd_RR(src1,dst));
   1044          addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
   1045          addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
   1046          return dst;
   1047       }
   1048 
   1049       if (e->Iex.Binop.op == Iop_8HLto16) {
   1050          HReg hi8  = newVRegI(env);
   1051          HReg lo8  = newVRegI(env);
   1052          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1053          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1054          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
   1055          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
   1056          addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
   1057          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
   1058          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
   1059          return hi8;
   1060       }
   1061 
   1062       if (e->Iex.Binop.op == Iop_16HLto32) {
   1063          HReg hi16  = newVRegI(env);
   1064          HReg lo16  = newVRegI(env);
   1065          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1066          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1067          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
   1068          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
   1069          addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
   1070          addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
   1071          addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
   1072          return hi16;
   1073       }
   1074 
   1075       if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
   1076           || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
   1077          HReg a16   = newVRegI(env);
   1078          HReg b16   = newVRegI(env);
   1079          HReg a16s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1080          HReg b16s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1081          Int  shift = (e->Iex.Binop.op == Iop_MullS8
   1082                        || e->Iex.Binop.op == Iop_MullU8)
   1083                          ? 24 : 16;
   1084          X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
   1085                               || e->Iex.Binop.op == Iop_MullS16)
   1086                                 ? Xsh_SAR : Xsh_SHR;
   1087 
   1088          addInstr(env, mk_iMOVsd_RR(a16s, a16));
   1089          addInstr(env, mk_iMOVsd_RR(b16s, b16));
   1090          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
   1091          addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
   1092          addInstr(env, X86Instr_Sh32(shr_op,  shift, a16));
   1093          addInstr(env, X86Instr_Sh32(shr_op,  shift, b16));
   1094          addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
   1095          return b16;
   1096       }
   1097 
   1098       if (e->Iex.Binop.op == Iop_CmpF64) {
   1099          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
   1100          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
   1101          HReg dst = newVRegI(env);
   1102          addInstr(env, X86Instr_FpCmp(fL,fR,dst));
   1103          /* shift this right 8 bits so as to conform to CmpF64
   1104             definition. */
   1105          addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
   1106          return dst;
   1107       }
   1108 
   1109       if (e->Iex.Binop.op == Iop_F64toI32S
   1110           || e->Iex.Binop.op == Iop_F64toI16S) {
   1111          Int  sz  = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
   1112          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   1113          HReg dst = newVRegI(env);
   1114 
   1115          /* Used several times ... */
   1116          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   1117 
   1118 	 /* rf now holds the value to be converted, and rrm holds the
   1119 	    rounding mode value, encoded as per the IRRoundingMode
   1120 	    enum.  The first thing to do is set the FPU's rounding
   1121 	    mode accordingly. */
   1122 
   1123          /* Create a space for the format conversion. */
   1124          /* subl $4, %esp */
   1125          sub_from_esp(env, 4);
   1126 
   1127 	 /* Set host rounding mode */
   1128 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   1129 
   1130          /* gistw/l %rf, 0(%esp) */
   1131          addInstr(env, X86Instr_FpLdStI(False/*store*/,
   1132                                         toUChar(sz), rf, zero_esp));
   1133 
   1134          if (sz == 2) {
   1135             /* movzwl 0(%esp), %dst */
   1136             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
   1137          } else {
   1138             /* movl 0(%esp), %dst */
   1139             vassert(sz == 4);
   1140             addInstr(env, X86Instr_Alu32R(
   1141                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
   1142          }
   1143 
   1144 	 /* Restore default FPU rounding. */
   1145          set_FPU_rounding_default( env );
   1146 
   1147          /* addl $4, %esp */
   1148 	 add_to_esp(env, 4);
   1149          return dst;
   1150       }
   1151 
   1152       break;
   1153    }
   1154 
   1155    /* --------- UNARY OP --------- */
   1156    case Iex_Unop: {
   1157 
   1158       /* 1Uto8(32to1(expr32)) */
   1159       if (e->Iex.Unop.op == Iop_1Uto8) {
   1160          DECLARE_PATTERN(p_32to1_then_1Uto8);
   1161          DEFINE_PATTERN(p_32to1_then_1Uto8,
   1162                         unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
   1163          if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
   1164             const IRExpr* expr32 = mi.bindee[0];
   1165             HReg dst = newVRegI(env);
   1166             HReg src = iselIntExpr_R(env, expr32);
   1167             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1168             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1169                                           X86RMI_Imm(1), dst));
   1170             return dst;
   1171          }
   1172       }
   1173 
   1174       /* 8Uto32(LDle(expr32)) */
   1175       if (e->Iex.Unop.op == Iop_8Uto32) {
   1176          DECLARE_PATTERN(p_LDle8_then_8Uto32);
   1177          DEFINE_PATTERN(p_LDle8_then_8Uto32,
   1178                         unop(Iop_8Uto32,
   1179                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1180          if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
   1181             HReg dst = newVRegI(env);
   1182             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1183             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1184             return dst;
   1185          }
   1186       }
   1187 
   1188       /* 8Sto32(LDle(expr32)) */
   1189       if (e->Iex.Unop.op == Iop_8Sto32) {
   1190          DECLARE_PATTERN(p_LDle8_then_8Sto32);
   1191          DEFINE_PATTERN(p_LDle8_then_8Sto32,
   1192                         unop(Iop_8Sto32,
   1193                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1194          if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
   1195             HReg dst = newVRegI(env);
   1196             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1197             addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
   1198             return dst;
   1199          }
   1200       }
   1201 
   1202       /* 16Uto32(LDle(expr32)) */
   1203       if (e->Iex.Unop.op == Iop_16Uto32) {
   1204          DECLARE_PATTERN(p_LDle16_then_16Uto32);
   1205          DEFINE_PATTERN(p_LDle16_then_16Uto32,
   1206                         unop(Iop_16Uto32,
   1207                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
   1208          if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
   1209             HReg dst = newVRegI(env);
   1210             X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1211             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1212             return dst;
   1213          }
   1214       }
   1215 
   1216       /* 8Uto32(GET:I8) */
   1217       if (e->Iex.Unop.op == Iop_8Uto32) {
   1218          if (e->Iex.Unop.arg->tag == Iex_Get) {
   1219             HReg      dst;
   1220             X86AMode* amode;
   1221             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
   1222             dst = newVRegI(env);
   1223             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1224                                 hregX86_EBP());
   1225             addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1226             return dst;
   1227          }
   1228       }
   1229 
   1230       /* 16to32(GET:I16) */
   1231       if (e->Iex.Unop.op == Iop_16Uto32) {
   1232          if (e->Iex.Unop.arg->tag == Iex_Get) {
   1233             HReg      dst;
   1234             X86AMode* amode;
   1235             vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
   1236             dst = newVRegI(env);
   1237             amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1238                                 hregX86_EBP());
   1239             addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1240             return dst;
   1241          }
   1242       }
   1243 
   1244       switch (e->Iex.Unop.op) {
   1245          case Iop_8Uto16:
   1246          case Iop_8Uto32:
   1247          case Iop_16Uto32: {
   1248             HReg dst = newVRegI(env);
   1249             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1250             UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
   1251             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1252             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1253                                           X86RMI_Imm(mask), dst));
   1254             return dst;
   1255          }
   1256          case Iop_8Sto16:
   1257          case Iop_8Sto32:
   1258          case Iop_16Sto32: {
   1259             HReg dst = newVRegI(env);
   1260             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1261             UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
   1262             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1263             addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
   1264             addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
   1265             return dst;
   1266          }
   1267 	 case Iop_Not8:
   1268 	 case Iop_Not16:
   1269          case Iop_Not32: {
   1270             HReg dst = newVRegI(env);
   1271             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1272             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1273             addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
   1274             return dst;
   1275          }
   1276          case Iop_64HIto32: {
   1277             HReg rHi, rLo;
   1278             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1279             return rHi; /* and abandon rLo .. poor wee thing :-) */
   1280          }
   1281          case Iop_64to32: {
   1282             HReg rHi, rLo;
   1283             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1284             return rLo; /* similar stupid comment to the above ... */
   1285          }
   1286          case Iop_16HIto8:
   1287          case Iop_32HIto16: {
   1288             HReg dst  = newVRegI(env);
   1289             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
   1290             Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
   1291             addInstr(env, mk_iMOVsd_RR(src,dst) );
   1292             addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
   1293             return dst;
   1294          }
   1295          case Iop_1Uto32:
   1296          case Iop_1Uto8: {
   1297             HReg dst         = newVRegI(env);
   1298             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1299             addInstr(env, X86Instr_Set32(cond,dst));
   1300             return dst;
   1301          }
   1302          case Iop_1Sto8:
   1303          case Iop_1Sto16:
   1304          case Iop_1Sto32: {
   1305             /* could do better than this, but for now ... */
   1306             HReg dst         = newVRegI(env);
   1307             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1308             addInstr(env, X86Instr_Set32(cond,dst));
   1309             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
   1310             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1311             return dst;
   1312          }
   1313          case Iop_Ctz32: {
   1314             /* Count trailing zeroes, implemented by x86 'bsfl' */
   1315             HReg dst = newVRegI(env);
   1316             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1317             addInstr(env, X86Instr_Bsfr32(True,src,dst));
   1318             return dst;
   1319          }
   1320          case Iop_Clz32: {
   1321             /* Count leading zeroes.  Do 'bsrl' to establish the index
   1322                of the highest set bit, and subtract that value from
   1323                31. */
   1324             HReg tmp = newVRegI(env);
   1325             HReg dst = newVRegI(env);
   1326             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1327             addInstr(env, X86Instr_Bsfr32(False,src,tmp));
   1328             addInstr(env, X86Instr_Alu32R(Xalu_MOV,
   1329                                           X86RMI_Imm(31), dst));
   1330             addInstr(env, X86Instr_Alu32R(Xalu_SUB,
   1331                                           X86RMI_Reg(tmp), dst));
   1332             return dst;
   1333          }
   1334 
   1335          case Iop_CmpwNEZ32: {
   1336             HReg dst = newVRegI(env);
   1337             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1338             addInstr(env, mk_iMOVsd_RR(src,dst));
   1339             addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
   1340             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   1341                                           X86RMI_Reg(src), dst));
   1342             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1343             return dst;
   1344          }
   1345          case Iop_Left8:
   1346          case Iop_Left16:
   1347          case Iop_Left32: {
   1348             HReg dst = newVRegI(env);
   1349             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1350             addInstr(env, mk_iMOVsd_RR(src, dst));
   1351             addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
   1352             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
   1353             return dst;
   1354          }
   1355 
   1356          case Iop_V128to32: {
   1357             HReg      dst  = newVRegI(env);
   1358             HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
   1359             X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   1360             sub_from_esp(env, 16);
   1361             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   1362             addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
   1363             add_to_esp(env, 16);
   1364             return dst;
   1365          }
   1366 
   1367          /* ReinterpF32asI32(e) */
   1368          /* Given an IEEE754 single, produce an I32 with the same bit
   1369             pattern.  Keep stack 8-aligned even though only using 4
   1370             bytes. */
   1371          case Iop_ReinterpF32asI32: {
   1372             HReg rf   = iselFltExpr(env, e->Iex.Unop.arg);
   1373             HReg dst  = newVRegI(env);
   1374             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   1375             /* paranoia */
   1376             set_FPU_rounding_default(env);
   1377             /* subl $8, %esp */
   1378             sub_from_esp(env, 8);
   1379             /* gstF %rf, 0(%esp) */
   1380             addInstr(env,
   1381                      X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
   1382             /* movl 0(%esp), %dst */
   1383             addInstr(env,
   1384                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
   1385             /* addl $8, %esp */
   1386             add_to_esp(env, 8);
   1387             return dst;
   1388          }
   1389 
   1390          case Iop_16to8:
   1391          case Iop_32to8:
   1392          case Iop_32to16:
   1393             /* These are no-ops. */
   1394             return iselIntExpr_R(env, e->Iex.Unop.arg);
   1395 
   1396          case Iop_GetMSBs8x8: {
   1397             /* Note: the following assumes the helper is of
   1398                signature
   1399                   UInt fn ( ULong ), and is not a regparm fn.
   1400             */
   1401             HReg  xLo, xHi;
   1402             HReg  dst = newVRegI(env);
   1403             Addr fn = (Addr)h_generic_calc_GetMSBs8x8;
   1404             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
   1405             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   1406             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   1407             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   1408                                          0, mk_RetLoc_simple(RLPri_Int) ));
   1409             add_to_esp(env, 2*4);
   1410             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1411             return dst;
   1412          }
   1413 
   1414          default:
   1415             break;
   1416       }
   1417       break;
   1418    }
   1419 
   1420    /* --------- GET --------- */
   1421    case Iex_Get: {
   1422       if (ty == Ity_I32) {
   1423          HReg dst = newVRegI(env);
   1424          addInstr(env, X86Instr_Alu32R(
   1425                           Xalu_MOV,
   1426                           X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
   1427                                                  hregX86_EBP())),
   1428                           dst));
   1429          return dst;
   1430       }
   1431       if (ty == Ity_I8 || ty == Ity_I16) {
   1432          HReg dst = newVRegI(env);
   1433          addInstr(env, X86Instr_LoadEX(
   1434                           toUChar(ty==Ity_I8 ? 1 : 2),
   1435                           False,
   1436                           X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
   1437                           dst));
   1438          return dst;
   1439       }
   1440       break;
   1441    }
   1442 
   1443    case Iex_GetI: {
   1444       X86AMode* am
   1445          = genGuestArrayOffset(
   1446               env, e->Iex.GetI.descr,
   1447                    e->Iex.GetI.ix, e->Iex.GetI.bias );
   1448       HReg dst = newVRegI(env);
   1449       if (ty == Ity_I8) {
   1450          addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
   1451          return dst;
   1452       }
   1453       if (ty == Ity_I32) {
   1454          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
   1455          return dst;
   1456       }
   1457       break;
   1458    }
   1459 
   1460    /* --------- CCALL --------- */
   1461    case Iex_CCall: {
   1462       HReg    dst = newVRegI(env);
   1463       vassert(ty == e->Iex.CCall.retty);
   1464 
   1465       /* be very restrictive for now.  Only 32/64-bit ints allowed for
   1466          args, and 32 bits for return type.  Don't forget to change
   1467          the RetLoc if more return types are allowed in future. */
   1468       if (e->Iex.CCall.retty != Ity_I32)
   1469          goto irreducible;
   1470 
   1471       /* Marshal args, do the call, clear stack. */
   1472       UInt   addToSp = 0;
   1473       RetLoc rloc    = mk_RetLoc_INVALID();
   1474       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   1475                     e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
   1476       vassert(is_sane_RetLoc(rloc));
   1477       vassert(rloc.pri == RLPri_Int);
   1478       vassert(addToSp == 0);
   1479 
   1480       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1481       return dst;
   1482    }
   1483 
   1484    /* --------- LITERAL --------- */
   1485    /* 32/16/8-bit literals */
   1486    case Iex_Const: {
   1487       X86RMI* rmi = iselIntExpr_RMI ( env, e );
   1488       HReg    r   = newVRegI(env);
   1489       addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
   1490       return r;
   1491    }
   1492 
   1493    /* --------- MULTIPLEX --------- */
   1494    case Iex_ITE: { // VFD
   1495      if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
   1496          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   1497         HReg   r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
   1498         X86RM* r0  = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
   1499         HReg   dst = newVRegI(env);
   1500         addInstr(env, mk_iMOVsd_RR(r1,dst));
   1501         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   1502         addInstr(env, X86Instr_CMov32(cc ^ 1, r0, dst));
   1503         return dst;
   1504       }
   1505       break;
   1506    }
   1507 
   1508    default:
   1509    break;
   1510    } /* switch (e->tag) */
   1511 
   1512    /* We get here if no pattern matched. */
   1513   irreducible:
   1514    ppIRExpr(e);
   1515    vpanic("iselIntExpr_R: cannot reduce tree");
   1516 }
   1517 
   1518 
   1519 /*---------------------------------------------------------*/
   1520 /*--- ISEL: Integer expression auxiliaries              ---*/
   1521 /*---------------------------------------------------------*/
   1522 
   1523 /* --------------------- AMODEs --------------------- */
   1524 
   1525 /* Return an AMode which computes the value of the specified
   1526    expression, possibly also adding insns to the code list as a
   1527    result.  The expression may only be a 32-bit one.
   1528 */
   1529 
   1530 static Bool sane_AMode ( X86AMode* am )
   1531 {
   1532    switch (am->tag) {
   1533       case Xam_IR:
   1534          return
   1535             toBool( hregClass(am->Xam.IR.reg) == HRcInt32
   1536                     && (hregIsVirtual(am->Xam.IR.reg)
   1537                         || sameHReg(am->Xam.IR.reg, hregX86_EBP())) );
   1538       case Xam_IRRS:
   1539          return
   1540             toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
   1541                     && hregIsVirtual(am->Xam.IRRS.base)
   1542                     && hregClass(am->Xam.IRRS.index) == HRcInt32
   1543                     && hregIsVirtual(am->Xam.IRRS.index) );
   1544       default:
   1545         vpanic("sane_AMode: unknown x86 amode tag");
   1546    }
   1547 }
   1548 
   1549 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
   1550 {
   1551    X86AMode* am = iselIntExpr_AMode_wrk(env, e);
   1552    vassert(sane_AMode(am));
   1553    return am;
   1554 }
   1555 
   1556 /* DO NOT CALL THIS DIRECTLY ! */
   1557 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
   1558 {
   1559    IRType ty = typeOfIRExpr(env->type_env,e);
   1560    vassert(ty == Ity_I32);
   1561 
   1562    /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
   1563    if (e->tag == Iex_Binop
   1564        && e->Iex.Binop.op == Iop_Add32
   1565        && e->Iex.Binop.arg2->tag == Iex_Const
   1566        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
   1567        && e->Iex.Binop.arg1->tag == Iex_Binop
   1568        && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
   1569        && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
   1570        && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
   1571        && e->Iex.Binop.arg1
   1572            ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
   1573        && e->Iex.Binop.arg1
   1574            ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
   1575       UInt shift = e->Iex.Binop.arg1
   1576                     ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1577       UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
   1578       if (shift == 1 || shift == 2 || shift == 3) {
   1579          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
   1580          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
   1581                                        ->Iex.Binop.arg2->Iex.Binop.arg1 );
   1582          return X86AMode_IRRS(imm32, r1, r2, shift);
   1583       }
   1584    }
   1585 
   1586    /* Add32(expr1, Shl32(expr2, imm)) */
   1587    if (e->tag == Iex_Binop
   1588        && e->Iex.Binop.op == Iop_Add32
   1589        && e->Iex.Binop.arg2->tag == Iex_Binop
   1590        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
   1591        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
   1592        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
   1593       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1594       if (shift == 1 || shift == 2 || shift == 3) {
   1595          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1596          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
   1597          return X86AMode_IRRS(0, r1, r2, shift);
   1598       }
   1599    }
   1600 
   1601    /* Add32(expr,i) */
   1602    if (e->tag == Iex_Binop
   1603        && e->Iex.Binop.op == Iop_Add32
   1604        && e->Iex.Binop.arg2->tag == Iex_Const
   1605        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
   1606       HReg r1 = iselIntExpr_R(env,  e->Iex.Binop.arg1);
   1607       return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
   1608    }
   1609 
   1610    /* Doesn't match anything in particular.  Generate it into
   1611       a register and use that. */
   1612    {
   1613       HReg r1 = iselIntExpr_R(env, e);
   1614       return X86AMode_IR(0, r1);
   1615    }
   1616 }
   1617 
   1618 
   1619 /* --------------------- RMIs --------------------- */
   1620 
   1621 /* Similarly, calculate an expression into an X86RMI operand.  As with
   1622    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
   1623 
   1624 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
   1625 {
   1626    X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
   1627    /* sanity checks ... */
   1628    switch (rmi->tag) {
   1629       case Xrmi_Imm:
   1630          return rmi;
   1631       case Xrmi_Reg:
   1632          vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
   1633          vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
   1634          return rmi;
   1635       case Xrmi_Mem:
   1636          vassert(sane_AMode(rmi->Xrmi.Mem.am));
   1637          return rmi;
   1638       default:
   1639          vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
   1640    }
   1641 }
   1642 
   1643 /* DO NOT CALL THIS DIRECTLY ! */
   1644 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
   1645 {
   1646    IRType ty = typeOfIRExpr(env->type_env,e);
   1647    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1648 
   1649    /* special case: immediate */
   1650    if (e->tag == Iex_Const) {
   1651       UInt u;
   1652       switch (e->Iex.Const.con->tag) {
   1653          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1654          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1655          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1656          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
   1657       }
   1658       return X86RMI_Imm(u);
   1659    }
   1660 
   1661    /* special case: 32-bit GET */
   1662    if (e->tag == Iex_Get && ty == Ity_I32) {
   1663       return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
   1664                                     hregX86_EBP()));
   1665    }
   1666 
   1667    /* special case: 32-bit load from memory */
   1668    if (e->tag == Iex_Load && ty == Ity_I32
   1669        && e->Iex.Load.end == Iend_LE) {
   1670       X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   1671       return X86RMI_Mem(am);
   1672    }
   1673 
   1674    /* default case: calculate into a register and return that */
   1675    {
   1676       HReg r = iselIntExpr_R ( env, e );
   1677       return X86RMI_Reg(r);
   1678    }
   1679 }
   1680 
   1681 
   1682 /* --------------------- RIs --------------------- */
   1683 
   1684 /* Calculate an expression into an X86RI operand.  As with
   1685    iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
   1686 
   1687 static X86RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
   1688 {
   1689    X86RI* ri = iselIntExpr_RI_wrk(env, e);
   1690    /* sanity checks ... */
   1691    switch (ri->tag) {
   1692       case Xri_Imm:
   1693          return ri;
   1694       case Xri_Reg:
   1695          vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
   1696          vassert(hregIsVirtual(ri->Xri.Reg.reg));
   1697          return ri;
   1698       default:
   1699          vpanic("iselIntExpr_RI: unknown x86 RI tag");
   1700    }
   1701 }
   1702 
   1703 /* DO NOT CALL THIS DIRECTLY ! */
   1704 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
   1705 {
   1706    IRType ty = typeOfIRExpr(env->type_env,e);
   1707    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1708 
   1709    /* special case: immediate */
   1710    if (e->tag == Iex_Const) {
   1711       UInt u;
   1712       switch (e->Iex.Const.con->tag) {
   1713          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1714          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1715          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1716          default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
   1717       }
   1718       return X86RI_Imm(u);
   1719    }
   1720 
   1721    /* default case: calculate into a register and return that */
   1722    {
   1723       HReg r = iselIntExpr_R ( env, e );
   1724       return X86RI_Reg(r);
   1725    }
   1726 }
   1727 
   1728 
   1729 /* --------------------- RMs --------------------- */
   1730 
   1731 /* Similarly, calculate an expression into an X86RM operand.  As with
   1732    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
   1733 
   1734 static X86RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
   1735 {
   1736    X86RM* rm = iselIntExpr_RM_wrk(env, e);
   1737    /* sanity checks ... */
   1738    switch (rm->tag) {
   1739       case Xrm_Reg:
   1740          vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
   1741          vassert(hregIsVirtual(rm->Xrm.Reg.reg));
   1742          return rm;
   1743       case Xrm_Mem:
   1744          vassert(sane_AMode(rm->Xrm.Mem.am));
   1745          return rm;
   1746       default:
   1747          vpanic("iselIntExpr_RM: unknown x86 RM tag");
   1748    }
   1749 }
   1750 
   1751 /* DO NOT CALL THIS DIRECTLY ! */
   1752 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
   1753 {
   1754    IRType ty = typeOfIRExpr(env->type_env,e);
   1755    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1756 
   1757    /* special case: 32-bit GET */
   1758    if (e->tag == Iex_Get && ty == Ity_I32) {
   1759       return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
   1760                                    hregX86_EBP()));
   1761    }
   1762 
   1763    /* special case: load from memory */
   1764 
   1765    /* default case: calculate into a register and return that */
   1766    {
   1767       HReg r = iselIntExpr_R ( env, e );
   1768       return X86RM_Reg(r);
   1769    }
   1770 }
   1771 
   1772 
   1773 /* --------------------- CONDCODE --------------------- */
   1774 
   1775 /* Generate code to evaluated a bit-typed expression, returning the
   1776    condition code which would correspond when the expression would
   1777    notionally have returned 1. */
   1778 
   1779 static X86CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
   1780 {
   1781    /* Uh, there's nothing we can sanity check here, unfortunately. */
   1782    return iselCondCode_wrk(env,e);
   1783 }
   1784 
   1785 /* DO NOT CALL THIS DIRECTLY ! */
   1786 static X86CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
   1787 {
   1788    MatchInfo mi;
   1789 
   1790    vassert(e);
   1791    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
   1792 
   1793    /* var */
   1794    if (e->tag == Iex_RdTmp) {
   1795       HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1796       /* Test32 doesn't modify r32; so this is OK. */
   1797       addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
   1798       return Xcc_NZ;
   1799    }
   1800 
   1801    /* Constant 1:Bit */
   1802    if (e->tag == Iex_Const) {
   1803       HReg r;
   1804       vassert(e->Iex.Const.con->tag == Ico_U1);
   1805       vassert(e->Iex.Const.con->Ico.U1 == True
   1806               || e->Iex.Const.con->Ico.U1 == False);
   1807       r = newVRegI(env);
   1808       addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
   1809       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
   1810       return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
   1811    }
   1812 
   1813    /* Not1(e) */
   1814    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
   1815       /* Generate code for the arg, and negate the test condition */
   1816       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
   1817    }
   1818 
   1819    /* --- patterns rooted at: 32to1 --- */
   1820 
   1821    if (e->tag == Iex_Unop
   1822        && e->Iex.Unop.op == Iop_32to1) {
   1823       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1824       addInstr(env, X86Instr_Test32(1,rm));
   1825       return Xcc_NZ;
   1826    }
   1827 
   1828    /* --- patterns rooted at: CmpNEZ8 --- */
   1829 
   1830    /* CmpNEZ8(x) */
   1831    if (e->tag == Iex_Unop
   1832        && e->Iex.Unop.op == Iop_CmpNEZ8) {
   1833       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1834       addInstr(env, X86Instr_Test32(0xFF,rm));
   1835       return Xcc_NZ;
   1836    }
   1837 
   1838    /* --- patterns rooted at: CmpNEZ16 --- */
   1839 
   1840    /* CmpNEZ16(x) */
   1841    if (e->tag == Iex_Unop
   1842        && e->Iex.Unop.op == Iop_CmpNEZ16) {
   1843       X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
   1844       addInstr(env, X86Instr_Test32(0xFFFF,rm));
   1845       return Xcc_NZ;
   1846    }
   1847 
   1848    /* --- patterns rooted at: CmpNEZ32 --- */
   1849 
   1850    /* CmpNEZ32(And32(x,y)) */
   1851    {
   1852       DECLARE_PATTERN(p_CmpNEZ32_And32);
   1853       DEFINE_PATTERN(p_CmpNEZ32_And32,
   1854                      unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
   1855       if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
   1856          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
   1857          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
   1858          HReg    tmp  = newVRegI(env);
   1859          addInstr(env, mk_iMOVsd_RR(r0, tmp));
   1860          addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
   1861          return Xcc_NZ;
   1862       }
   1863    }
   1864 
   1865    /* CmpNEZ32(Or32(x,y)) */
   1866    {
   1867       DECLARE_PATTERN(p_CmpNEZ32_Or32);
   1868       DEFINE_PATTERN(p_CmpNEZ32_Or32,
   1869                      unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
   1870       if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
   1871          HReg    r0   = iselIntExpr_R(env, mi.bindee[0]);
   1872          X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
   1873          HReg    tmp  = newVRegI(env);
   1874          addInstr(env, mk_iMOVsd_RR(r0, tmp));
   1875          addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
   1876          return Xcc_NZ;
   1877       }
   1878    }
   1879 
   1880    /* CmpNEZ32(GET(..):I32) */
   1881    if (e->tag == Iex_Unop
   1882        && e->Iex.Unop.op == Iop_CmpNEZ32
   1883        && e->Iex.Unop.arg->tag == Iex_Get) {
   1884       X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1885                                  hregX86_EBP());
   1886       addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
   1887       return Xcc_NZ;
   1888    }
   1889 
   1890    /* CmpNEZ32(x) */
   1891    if (e->tag == Iex_Unop
   1892        && e->Iex.Unop.op == Iop_CmpNEZ32) {
   1893       HReg    r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1894       X86RMI* rmi2 = X86RMI_Imm(0);
   1895       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
   1896       return Xcc_NZ;
   1897    }
   1898 
   1899    /* --- patterns rooted at: CmpNEZ64 --- */
   1900 
   1901    /* CmpNEZ64(Or64(x,y)) */
   1902    {
   1903       DECLARE_PATTERN(p_CmpNEZ64_Or64);
   1904       DEFINE_PATTERN(p_CmpNEZ64_Or64,
   1905                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
   1906       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
   1907          HReg    hi1, lo1, hi2, lo2;
   1908          HReg    tmp  = newVRegI(env);
   1909          iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
   1910          addInstr(env, mk_iMOVsd_RR(hi1, tmp));
   1911          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
   1912          iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
   1913          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
   1914          addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
   1915          return Xcc_NZ;
   1916       }
   1917    }
   1918 
   1919    /* CmpNEZ64(x) */
   1920    if (e->tag == Iex_Unop
   1921        && e->Iex.Unop.op == Iop_CmpNEZ64) {
   1922       HReg hi, lo;
   1923       HReg tmp = newVRegI(env);
   1924       iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
   1925       addInstr(env, mk_iMOVsd_RR(hi, tmp));
   1926       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
   1927       return Xcc_NZ;
   1928    }
   1929 
   1930    /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
   1931 
   1932    /* CmpEQ8 / CmpNE8 */
   1933    if (e->tag == Iex_Binop
   1934        && (e->Iex.Binop.op == Iop_CmpEQ8
   1935            || e->Iex.Binop.op == Iop_CmpNE8
   1936            || e->Iex.Binop.op == Iop_CasCmpEQ8
   1937            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
   1938       if (isZeroU8(e->Iex.Binop.arg2)) {
   1939          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1940          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
   1941          switch (e->Iex.Binop.op) {
   1942             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
   1943             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
   1944             default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
   1945          }
   1946       } else {
   1947          HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1948          X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   1949          HReg    r    = newVRegI(env);
   1950          addInstr(env, mk_iMOVsd_RR(r1,r));
   1951          addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
   1952          addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
   1953          switch (e->Iex.Binop.op) {
   1954             case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
   1955             case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
   1956             default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
   1957          }
   1958       }
   1959    }
   1960 
   1961    /* CmpEQ16 / CmpNE16 */
   1962    if (e->tag == Iex_Binop
   1963        && (e->Iex.Binop.op == Iop_CmpEQ16
   1964            || e->Iex.Binop.op == Iop_CmpNE16
   1965            || e->Iex.Binop.op == Iop_CasCmpEQ16
   1966            || e->Iex.Binop.op == Iop_CasCmpNE16
   1967            || e->Iex.Binop.op == Iop_ExpCmpNE16)) {
   1968       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1969       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   1970       HReg    r    = newVRegI(env);
   1971       addInstr(env, mk_iMOVsd_RR(r1,r));
   1972       addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
   1973       addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
   1974       switch (e->Iex.Binop.op) {
   1975          case Iop_CmpEQ16: case Iop_CasCmpEQ16:
   1976             return Xcc_Z;
   1977          case Iop_CmpNE16: case Iop_CasCmpNE16: case Iop_ExpCmpNE16:
   1978             return Xcc_NZ;
   1979          default:
   1980             vpanic("iselCondCode(x86): CmpXX16");
   1981       }
   1982    }
   1983 
   1984    /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
   1985       Saves a "movl %eax, %tmp" compared to the default route. */
   1986    if (e->tag == Iex_Binop
   1987        && e->Iex.Binop.op == Iop_CmpNE32
   1988        && e->Iex.Binop.arg1->tag == Iex_CCall
   1989        && e->Iex.Binop.arg2->tag == Iex_Const) {
   1990       IRExpr* cal = e->Iex.Binop.arg1;
   1991       IRExpr* con = e->Iex.Binop.arg2;
   1992       /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
   1993       vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
   1994       vassert(con->Iex.Const.con->tag == Ico_U32);
   1995       /* Marshal args, do the call. */
   1996       UInt   addToSp = 0;
   1997       RetLoc rloc    = mk_RetLoc_INVALID();
   1998       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   1999                     cal->Iex.CCall.cee,
   2000                     cal->Iex.CCall.retty, cal->Iex.CCall.args );
   2001       vassert(is_sane_RetLoc(rloc));
   2002       vassert(rloc.pri == RLPri_Int);
   2003       vassert(addToSp == 0);
   2004       /* */
   2005       addInstr(env, X86Instr_Alu32R(Xalu_CMP,
   2006                                     X86RMI_Imm(con->Iex.Const.con->Ico.U32),
   2007                                     hregX86_EAX()));
   2008       return Xcc_NZ;
   2009    }
   2010 
   2011    /* Cmp*32*(x,y) */
   2012    if (e->tag == Iex_Binop
   2013        && (e->Iex.Binop.op == Iop_CmpEQ32
   2014            || e->Iex.Binop.op == Iop_CmpNE32
   2015            || e->Iex.Binop.op == Iop_CmpLT32S
   2016            || e->Iex.Binop.op == Iop_CmpLT32U
   2017            || e->Iex.Binop.op == Iop_CmpLE32S
   2018            || e->Iex.Binop.op == Iop_CmpLE32U
   2019            || e->Iex.Binop.op == Iop_CasCmpEQ32
   2020            || e->Iex.Binop.op == Iop_CasCmpNE32
   2021            || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
   2022       HReg    r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2023       X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   2024       addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
   2025       switch (e->Iex.Binop.op) {
   2026          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
   2027          case Iop_CmpNE32:
   2028          case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Xcc_NZ;
   2029          case Iop_CmpLT32S: return Xcc_L;
   2030          case Iop_CmpLT32U: return Xcc_B;
   2031          case Iop_CmpLE32S: return Xcc_LE;
   2032          case Iop_CmpLE32U: return Xcc_BE;
   2033          default: vpanic("iselCondCode(x86): CmpXX32");
   2034       }
   2035    }
   2036 
   2037    /* CmpNE64 */
   2038    if (e->tag == Iex_Binop
   2039        && (e->Iex.Binop.op == Iop_CmpNE64
   2040            || e->Iex.Binop.op == Iop_CmpEQ64)) {
   2041       HReg hi1, hi2, lo1, lo2;
   2042       HReg tHi = newVRegI(env);
   2043       HReg tLo = newVRegI(env);
   2044       iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
   2045       iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
   2046       addInstr(env, mk_iMOVsd_RR(hi1, tHi));
   2047       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
   2048       addInstr(env, mk_iMOVsd_RR(lo1, tLo));
   2049       addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
   2050       addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
   2051       switch (e->Iex.Binop.op) {
   2052          case Iop_CmpNE64: return Xcc_NZ;
   2053          case Iop_CmpEQ64: return Xcc_Z;
   2054          default: vpanic("iselCondCode(x86): CmpXX64");
   2055       }
   2056    }
   2057 
   2058    ppIRExpr(e);
   2059    vpanic("iselCondCode");
   2060 }
   2061 
   2062 
   2063 /*---------------------------------------------------------*/
   2064 /*--- ISEL: Integer expressions (64 bit)                ---*/
   2065 /*---------------------------------------------------------*/
   2066 
   2067 /* Compute a 64-bit value into a register pair, which is returned as
   2068    the first two parameters.  As with iselIntExpr_R, these may be
   2069    either real or virtual regs; in any case they must not be changed
   2070    by subsequent code emitted by the caller.  */
   2071 
   2072 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env,
   2073                             const IRExpr* e )
   2074 {
   2075    iselInt64Expr_wrk(rHi, rLo, env, e);
   2076 #  if 0
   2077    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2078 #  endif
   2079    vassert(hregClass(*rHi) == HRcInt32);
   2080    vassert(hregIsVirtual(*rHi));
   2081    vassert(hregClass(*rLo) == HRcInt32);
   2082    vassert(hregIsVirtual(*rLo));
   2083 }
   2084 
   2085 /* DO NOT CALL THIS DIRECTLY ! */
   2086 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
   2087                                 const IRExpr* e )
   2088 {
   2089    MatchInfo mi;
   2090    HWord fn = 0; /* helper fn for most SIMD64 stuff */
   2091    vassert(e);
   2092    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
   2093 
   2094    /* 64-bit literal */
   2095    if (e->tag == Iex_Const) {
   2096       ULong w64 = e->Iex.Const.con->Ico.U64;
   2097       UInt  wHi = toUInt(w64 >> 32);
   2098       UInt  wLo = toUInt(w64);
   2099       HReg  tLo = newVRegI(env);
   2100       HReg  tHi = newVRegI(env);
   2101       vassert(e->Iex.Const.con->tag == Ico_U64);
   2102       if (wLo == wHi) {
   2103          /* Save a precious Int register in this special case. */
   2104          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
   2105          *rHi = tLo;
   2106          *rLo = tLo;
   2107       } else {
   2108          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
   2109          addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
   2110          *rHi = tHi;
   2111          *rLo = tLo;
   2112       }
   2113       return;
   2114    }
   2115 
   2116    /* read 64-bit IRTemp */
   2117    if (e->tag == Iex_RdTmp) {
   2118       lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
   2119       return;
   2120    }
   2121 
   2122    /* 64-bit load */
   2123    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2124       HReg     tLo, tHi;
   2125       X86AMode *am0, *am4;
   2126       vassert(e->Iex.Load.ty == Ity_I64);
   2127       tLo = newVRegI(env);
   2128       tHi = newVRegI(env);
   2129       am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
   2130       am4 = advance4(am0);
   2131       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
   2132       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2133       *rHi = tHi;
   2134       *rLo = tLo;
   2135       return;
   2136    }
   2137 
   2138    /* 64-bit GET */
   2139    if (e->tag == Iex_Get) {
   2140       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
   2141       X86AMode* am4 = advance4(am);
   2142       HReg tLo = newVRegI(env);
   2143       HReg tHi = newVRegI(env);
   2144       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
   2145       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2146       *rHi = tHi;
   2147       *rLo = tLo;
   2148       return;
   2149    }
   2150 
   2151    /* 64-bit GETI */
   2152    if (e->tag == Iex_GetI) {
   2153       X86AMode* am
   2154          = genGuestArrayOffset( env, e->Iex.GetI.descr,
   2155                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
   2156       X86AMode* am4 = advance4(am);
   2157       HReg tLo = newVRegI(env);
   2158       HReg tHi = newVRegI(env);
   2159       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
   2160       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
   2161       *rHi = tHi;
   2162       *rLo = tLo;
   2163       return;
   2164    }
   2165 
   2166    /* 64-bit ITE: ITE(g, expr, expr) */ // VFD
   2167    if (e->tag == Iex_ITE) {
   2168       HReg e0Lo, e0Hi, e1Lo, e1Hi;
   2169       HReg tLo = newVRegI(env);
   2170       HReg tHi = newVRegI(env);
   2171       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
   2172       iselInt64Expr(&e1Hi, &e1Lo, env, e->Iex.ITE.iftrue);
   2173       addInstr(env, mk_iMOVsd_RR(e1Hi, tHi));
   2174       addInstr(env, mk_iMOVsd_RR(e1Lo, tLo));
   2175       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   2176       /* This assumes the first cmov32 doesn't trash the condition
   2177          codes, so they are still available for the second cmov32 */
   2178       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Hi), tHi));
   2179       addInstr(env, X86Instr_CMov32(cc ^ 1, X86RM_Reg(e0Lo), tLo));
   2180       *rHi = tHi;
   2181       *rLo = tLo;
   2182       return;
   2183    }
   2184 
   2185    /* --------- BINARY ops --------- */
   2186    if (e->tag == Iex_Binop) {
   2187       switch (e->Iex.Binop.op) {
   2188          /* 32 x 32 -> 64 multiply */
   2189          case Iop_MullU32:
   2190          case Iop_MullS32: {
   2191             /* get one operand into %eax, and the other into a R/M.
   2192                Need to make an educated guess about which is better in
   2193                which. */
   2194             HReg   tLo    = newVRegI(env);
   2195             HReg   tHi    = newVRegI(env);
   2196             Bool   syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
   2197             X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
   2198             HReg   rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2199             addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
   2200             addInstr(env, X86Instr_MulL(syned, rmLeft));
   2201             /* Result is now in EDX:EAX.  Tell the caller. */
   2202             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2203             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2204             *rHi = tHi;
   2205             *rLo = tLo;
   2206             return;
   2207          }
   2208 
   2209          /* 64 x 32 -> (32(rem),32(div)) division */
   2210          case Iop_DivModU64to32:
   2211          case Iop_DivModS64to32: {
   2212             /* Get the 64-bit operand into edx:eax, and the other into
   2213                any old R/M. */
   2214             HReg sHi, sLo;
   2215             HReg   tLo     = newVRegI(env);
   2216             HReg   tHi     = newVRegI(env);
   2217             Bool   syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
   2218             X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
   2219             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2220             addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
   2221             addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
   2222             addInstr(env, X86Instr_Div(syned, rmRight));
   2223             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2224             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2225             *rHi = tHi;
   2226             *rLo = tLo;
   2227             return;
   2228          }
   2229 
   2230          /* Or64/And64/Xor64 */
   2231          case Iop_Or64:
   2232          case Iop_And64:
   2233          case Iop_Xor64: {
   2234             HReg xLo, xHi, yLo, yHi;
   2235             HReg tLo = newVRegI(env);
   2236             HReg tHi = newVRegI(env);
   2237             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
   2238                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
   2239                           : Xalu_XOR;
   2240             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2241             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2242             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2243             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
   2244             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2245             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
   2246             *rHi = tHi;
   2247             *rLo = tLo;
   2248             return;
   2249          }
   2250 
   2251          /* Add64/Sub64 */
   2252          case Iop_Add64:
   2253             if (e->Iex.Binop.arg2->tag == Iex_Const) {
   2254                /* special case Add64(e, const) */
   2255                ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   2256                UInt  wHi = toUInt(w64 >> 32);
   2257                UInt  wLo = toUInt(w64);
   2258                HReg  tLo = newVRegI(env);
   2259                HReg  tHi = newVRegI(env);
   2260                HReg  xLo, xHi;
   2261                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
   2262                iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2263                addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2264                addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2265                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
   2266                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
   2267                *rHi = tHi;
   2268                *rLo = tLo;
   2269                return;
   2270             }
   2271             /* else fall through to the generic case */
   2272          case Iop_Sub64: {
   2273             HReg xLo, xHi, yLo, yHi;
   2274             HReg tLo = newVRegI(env);
   2275             HReg tHi = newVRegI(env);
   2276             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2277             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
   2278             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
   2279             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2280             if (e->Iex.Binop.op==Iop_Add64) {
   2281                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
   2282                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
   2283             } else {
   2284                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
   2285                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
   2286             }
   2287             *rHi = tHi;
   2288             *rLo = tLo;
   2289             return;
   2290          }
   2291 
   2292          /* 32HLto64(e1,e2) */
   2293          case Iop_32HLto64:
   2294             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2295             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2296             return;
   2297 
   2298          /* 64-bit shifts */
   2299          case Iop_Shl64: {
   2300             /* We use the same ingenious scheme as gcc.  Put the value
   2301                to be shifted into %hi:%lo, and the shift amount into
   2302                %cl.  Then (dsts on right, a la ATT syntax):
   2303 
   2304                shldl %cl, %lo, %hi   -- make %hi be right for the
   2305                                      -- shift amt %cl % 32
   2306                shll  %cl, %lo        -- make %lo be right for the
   2307                                      -- shift amt %cl % 32
   2308 
   2309                Now, if (shift amount % 64) is in the range 32 .. 63,
   2310                we have to do a fixup, which puts the result low half
   2311                into the result high half, and zeroes the low half:
   2312 
   2313                testl $32, %ecx
   2314 
   2315                cmovnz %lo, %hi
   2316                movl $0, %tmp         -- sigh; need yet another reg
   2317                cmovnz %tmp, %lo
   2318             */
   2319             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
   2320             tLo = newVRegI(env);
   2321             tHi = newVRegI(env);
   2322             tTemp = newVRegI(env);
   2323             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2324             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2325             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
   2326             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2327             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2328             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
   2329                and those regs are legitimately modifiable. */
   2330             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
   2331             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
   2332             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
   2333             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
   2334             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
   2335             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
   2336             *rHi = tHi;
   2337             *rLo = tLo;
   2338             return;
   2339          }
   2340 
   2341          case Iop_Shr64: {
   2342             /* We use the same ingenious scheme as gcc.  Put the value
   2343                to be shifted into %hi:%lo, and the shift amount into
   2344                %cl.  Then:
   2345 
   2346                shrdl %cl, %hi, %lo   -- make %lo be right for the
   2347                                      -- shift amt %cl % 32
   2348                shrl  %cl, %hi        -- make %hi be right for the
   2349                                      -- shift amt %cl % 32
   2350 
   2351                Now, if (shift amount % 64) is in the range 32 .. 63,
   2352                we have to do a fixup, which puts the result high half
   2353                into the result low half, and zeroes the high half:
   2354 
   2355                testl $32, %ecx
   2356 
   2357                cmovnz %hi, %lo
   2358                movl $0, %tmp         -- sigh; need yet another reg
   2359                cmovnz %tmp, %hi
   2360             */
   2361             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
   2362             tLo = newVRegI(env);
   2363             tHi = newVRegI(env);
   2364             tTemp = newVRegI(env);
   2365             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2366             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
   2367             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
   2368             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2369             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2370             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
   2371                and those regs are legitimately modifiable. */
   2372             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
   2373             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
   2374             addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
   2375             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
   2376             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
   2377             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
   2378             *rHi = tHi;
   2379             *rLo = tLo;
   2380             return;
   2381          }
   2382 
   2383          /* F64 -> I64 */
   2384          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
   2385             case.  Unfortunately I see no easy way to avoid the
   2386             duplication. */
   2387          case Iop_F64toI64S: {
   2388             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   2389             HReg tLo = newVRegI(env);
   2390             HReg tHi = newVRegI(env);
   2391 
   2392             /* Used several times ... */
   2393             /* Careful ... this sharing is only safe because
   2394 	       zero_esp/four_esp do not hold any registers which the
   2395 	       register allocator could attempt to swizzle later. */
   2396             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   2397             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
   2398 
   2399             /* rf now holds the value to be converted, and rrm holds
   2400                the rounding mode value, encoded as per the
   2401                IRRoundingMode enum.  The first thing to do is set the
   2402                FPU's rounding mode accordingly. */
   2403 
   2404             /* Create a space for the format conversion. */
   2405             /* subl $8, %esp */
   2406             sub_from_esp(env, 8);
   2407 
   2408             /* Set host rounding mode */
   2409             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2410 
   2411             /* gistll %rf, 0(%esp) */
   2412             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
   2413 
   2414             /* movl 0(%esp), %dstLo */
   2415             /* movl 4(%esp), %dstHi */
   2416             addInstr(env, X86Instr_Alu32R(
   2417                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
   2418             addInstr(env, X86Instr_Alu32R(
   2419                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
   2420 
   2421             /* Restore default FPU rounding. */
   2422             set_FPU_rounding_default( env );
   2423 
   2424             /* addl $8, %esp */
   2425             add_to_esp(env, 8);
   2426 
   2427             *rHi = tHi;
   2428             *rLo = tLo;
   2429             return;
   2430          }
   2431 
   2432          case Iop_Add8x8:
   2433             fn = (HWord)h_generic_calc_Add8x8; goto binnish;
   2434          case Iop_Add16x4:
   2435             fn = (HWord)h_generic_calc_Add16x4; goto binnish;
   2436          case Iop_Add32x2:
   2437             fn = (HWord)h_generic_calc_Add32x2; goto binnish;
   2438 
   2439          case Iop_Avg8Ux8:
   2440             fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
   2441          case Iop_Avg16Ux4:
   2442             fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
   2443 
   2444          case Iop_CmpEQ8x8:
   2445             fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
   2446          case Iop_CmpEQ16x4:
   2447             fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
   2448          case Iop_CmpEQ32x2:
   2449             fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
   2450 
   2451          case Iop_CmpGT8Sx8:
   2452             fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
   2453          case Iop_CmpGT16Sx4:
   2454             fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
   2455          case Iop_CmpGT32Sx2:
   2456             fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
   2457 
   2458          case Iop_InterleaveHI8x8:
   2459             fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
   2460          case Iop_InterleaveLO8x8:
   2461             fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
   2462          case Iop_InterleaveHI16x4:
   2463             fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
   2464          case Iop_InterleaveLO16x4:
   2465             fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
   2466          case Iop_InterleaveHI32x2:
   2467             fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
   2468          case Iop_InterleaveLO32x2:
   2469             fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
   2470          case Iop_CatOddLanes16x4:
   2471             fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
   2472          case Iop_CatEvenLanes16x4:
   2473             fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
   2474          case Iop_Perm8x8:
   2475             fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
   2476 
   2477          case Iop_Max8Ux8:
   2478             fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
   2479          case Iop_Max16Sx4:
   2480             fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
   2481          case Iop_Min8Ux8:
   2482             fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
   2483          case Iop_Min16Sx4:
   2484             fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
   2485 
   2486          case Iop_Mul16x4:
   2487             fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
   2488          case Iop_Mul32x2:
   2489             fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
   2490          case Iop_MulHi16Sx4:
   2491             fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
   2492          case Iop_MulHi16Ux4:
   2493             fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
   2494 
   2495          case Iop_QAdd8Sx8:
   2496             fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
   2497          case Iop_QAdd16Sx4:
   2498             fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
   2499          case Iop_QAdd8Ux8:
   2500             fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
   2501          case Iop_QAdd16Ux4:
   2502             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
   2503 
   2504          case Iop_QNarrowBin32Sto16Sx4:
   2505             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
   2506          case Iop_QNarrowBin16Sto8Sx8:
   2507             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
   2508          case Iop_QNarrowBin16Sto8Ux8:
   2509             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
   2510          case Iop_NarrowBin16to8x8:
   2511             fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
   2512          case Iop_NarrowBin32to16x4:
   2513             fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
   2514 
   2515          case Iop_QSub8Sx8:
   2516             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
   2517          case Iop_QSub16Sx4:
   2518             fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
   2519          case Iop_QSub8Ux8:
   2520             fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
   2521          case Iop_QSub16Ux4:
   2522             fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
   2523 
   2524          case Iop_Sub8x8:
   2525             fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
   2526          case Iop_Sub16x4:
   2527             fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
   2528          case Iop_Sub32x2:
   2529             fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
   2530 
   2531          binnish: {
   2532             /* Note: the following assumes all helpers are of
   2533                signature
   2534                   ULong fn ( ULong, ULong ), and they are
   2535                not marked as regparm functions.
   2536             */
   2537             HReg xLo, xHi, yLo, yHi;
   2538             HReg tLo = newVRegI(env);
   2539             HReg tHi = newVRegI(env);
   2540             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2541             addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
   2542             addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
   2543             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2544             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2545             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2546             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   2547                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2548             add_to_esp(env, 4*4);
   2549             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2550             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2551             *rHi = tHi;
   2552             *rLo = tLo;
   2553             return;
   2554          }
   2555 
   2556          case Iop_ShlN32x2:
   2557             fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
   2558          case Iop_ShlN16x4:
   2559             fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
   2560          case Iop_ShlN8x8:
   2561             fn = (HWord)h_generic_calc_ShlN8x8;  goto shifty;
   2562          case Iop_ShrN32x2:
   2563             fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
   2564          case Iop_ShrN16x4:
   2565             fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
   2566          case Iop_SarN32x2:
   2567             fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
   2568          case Iop_SarN16x4:
   2569             fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
   2570          case Iop_SarN8x8:
   2571             fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
   2572          shifty: {
   2573             /* Note: the following assumes all helpers are of
   2574                signature
   2575                   ULong fn ( ULong, UInt ), and they are
   2576                not marked as regparm functions.
   2577             */
   2578             HReg xLo, xHi;
   2579             HReg tLo = newVRegI(env);
   2580             HReg tHi = newVRegI(env);
   2581             X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   2582             addInstr(env, X86Instr_Push(y));
   2583             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2584             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2585             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2586             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   2587                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2588             add_to_esp(env, 3*4);
   2589             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2590             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2591             *rHi = tHi;
   2592             *rLo = tLo;
   2593             return;
   2594          }
   2595 
   2596          default:
   2597             break;
   2598       }
   2599    } /* if (e->tag == Iex_Binop) */
   2600 
   2601 
   2602    /* --------- UNARY ops --------- */
   2603    if (e->tag == Iex_Unop) {
   2604       switch (e->Iex.Unop.op) {
   2605 
   2606          /* 32Sto64(e) */
   2607          case Iop_32Sto64: {
   2608             HReg tLo = newVRegI(env);
   2609             HReg tHi = newVRegI(env);
   2610             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2611             addInstr(env, mk_iMOVsd_RR(src,tHi));
   2612             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2613             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
   2614             *rHi = tHi;
   2615             *rLo = tLo;
   2616             return;
   2617          }
   2618 
   2619          /* 32Uto64(e) */
   2620          case Iop_32Uto64: {
   2621             HReg tLo = newVRegI(env);
   2622             HReg tHi = newVRegI(env);
   2623             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2624             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2625             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2626             *rHi = tHi;
   2627             *rLo = tLo;
   2628             return;
   2629          }
   2630 
   2631          /* 16Uto64(e) */
   2632          case Iop_16Uto64: {
   2633             HReg tLo = newVRegI(env);
   2634             HReg tHi = newVRegI(env);
   2635             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   2636             addInstr(env, mk_iMOVsd_RR(src,tLo));
   2637             addInstr(env, X86Instr_Alu32R(Xalu_AND,
   2638                                           X86RMI_Imm(0xFFFF), tLo));
   2639             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2640             *rHi = tHi;
   2641             *rLo = tLo;
   2642             return;
   2643          }
   2644 
   2645          /* V128{HI}to64 */
   2646          case Iop_V128HIto64:
   2647          case Iop_V128to64: {
   2648             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
   2649             HReg tLo = newVRegI(env);
   2650             HReg tHi = newVRegI(env);
   2651             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
   2652             X86AMode* esp0  = X86AMode_IR(0,     hregX86_ESP());
   2653             X86AMode* espLO = X86AMode_IR(off,   hregX86_ESP());
   2654             X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
   2655             sub_from_esp(env, 16);
   2656             addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   2657             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
   2658                                            X86RMI_Mem(espLO), tLo ));
   2659             addInstr(env, X86Instr_Alu32R( Xalu_MOV,
   2660                                            X86RMI_Mem(espHI), tHi ));
   2661             add_to_esp(env, 16);
   2662             *rHi = tHi;
   2663             *rLo = tLo;
   2664             return;
   2665          }
   2666 
   2667          /* could do better than this, but for now ... */
   2668          case Iop_1Sto64: {
   2669             HReg tLo = newVRegI(env);
   2670             HReg tHi = newVRegI(env);
   2671             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   2672             addInstr(env, X86Instr_Set32(cond,tLo));
   2673             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
   2674             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
   2675             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
   2676             *rHi = tHi;
   2677             *rLo = tLo;
   2678             return;
   2679          }
   2680 
   2681          /* Not64(e) */
   2682          case Iop_Not64: {
   2683             HReg tLo = newVRegI(env);
   2684             HReg tHi = newVRegI(env);
   2685             HReg sHi, sLo;
   2686             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
   2687             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
   2688             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
   2689             addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
   2690             addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
   2691             *rHi = tHi;
   2692             *rLo = tLo;
   2693             return;
   2694          }
   2695 
   2696          /* Left64(e) */
   2697          case Iop_Left64: {
   2698             HReg yLo, yHi;
   2699             HReg tLo = newVRegI(env);
   2700             HReg tHi = newVRegI(env);
   2701             /* yHi:yLo = arg */
   2702             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
   2703             /* tLo = 0 - yLo, and set carry */
   2704             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
   2705             addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
   2706             /* tHi = 0 - yHi - carry */
   2707             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
   2708             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
   2709             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
   2710                back in, so as to give the final result
   2711                tHi:tLo = arg | -arg. */
   2712             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
   2713             addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
   2714             *rHi = tHi;
   2715             *rLo = tLo;
   2716             return;
   2717          }
   2718 
   2719          /* --- patterns rooted at: CmpwNEZ64 --- */
   2720 
   2721          /* CmpwNEZ64(e) */
   2722          case Iop_CmpwNEZ64: {
   2723 
   2724          DECLARE_PATTERN(p_CmpwNEZ64_Or64);
   2725          DEFINE_PATTERN(p_CmpwNEZ64_Or64,
   2726                         unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
   2727          if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
   2728             /* CmpwNEZ64(Or64(x,y)) */
   2729             HReg xHi,xLo,yHi,yLo;
   2730             HReg xBoth = newVRegI(env);
   2731             HReg merged = newVRegI(env);
   2732             HReg tmp2 = newVRegI(env);
   2733 
   2734             iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
   2735             addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
   2736             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2737                                           X86RMI_Reg(xLo),xBoth));
   2738 
   2739             iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
   2740             addInstr(env, mk_iMOVsd_RR(yHi,merged));
   2741             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2742                                           X86RMI_Reg(yLo),merged));
   2743             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2744                                              X86RMI_Reg(xBoth),merged));
   2745 
   2746             /* tmp2 = (merged | -merged) >>s 31 */
   2747             addInstr(env, mk_iMOVsd_RR(merged,tmp2));
   2748             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
   2749             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2750                                           X86RMI_Reg(merged), tmp2));
   2751             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
   2752             *rHi = tmp2;
   2753             *rLo = tmp2;
   2754             return;
   2755          } else {
   2756             /* CmpwNEZ64(e) */
   2757             HReg srcLo, srcHi;
   2758             HReg tmp1  = newVRegI(env);
   2759             HReg tmp2  = newVRegI(env);
   2760             /* srcHi:srcLo = arg */
   2761             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   2762             /* tmp1 = srcHi | srcLo */
   2763             addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
   2764             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2765                                           X86RMI_Reg(srcLo), tmp1));
   2766             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
   2767             addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
   2768             addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
   2769             addInstr(env, X86Instr_Alu32R(Xalu_OR,
   2770                                           X86RMI_Reg(tmp1), tmp2));
   2771             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
   2772             *rHi = tmp2;
   2773             *rLo = tmp2;
   2774             return;
   2775          }
   2776          }
   2777 
   2778          /* ReinterpF64asI64(e) */
   2779          /* Given an IEEE754 double, produce an I64 with the same bit
   2780             pattern. */
   2781          case Iop_ReinterpF64asI64: {
   2782             HReg rf   = iselDblExpr(env, e->Iex.Unop.arg);
   2783             HReg tLo  = newVRegI(env);
   2784             HReg tHi  = newVRegI(env);
   2785             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
   2786             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
   2787             /* paranoia */
   2788             set_FPU_rounding_default(env);
   2789             /* subl $8, %esp */
   2790             sub_from_esp(env, 8);
   2791             /* gstD %rf, 0(%esp) */
   2792             addInstr(env,
   2793                      X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
   2794             /* movl 0(%esp), %tLo */
   2795             addInstr(env,
   2796                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
   2797             /* movl 4(%esp), %tHi */
   2798             addInstr(env,
   2799                      X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
   2800             /* addl $8, %esp */
   2801             add_to_esp(env, 8);
   2802             *rHi = tHi;
   2803             *rLo = tLo;
   2804             return;
   2805          }
   2806 
   2807          case Iop_CmpNEZ32x2:
   2808             fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
   2809          case Iop_CmpNEZ16x4:
   2810             fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
   2811          case Iop_CmpNEZ8x8:
   2812             fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
   2813          unish: {
   2814             /* Note: the following assumes all helpers are of
   2815                signature
   2816                   ULong fn ( ULong ), and they are
   2817                not marked as regparm functions.
   2818             */
   2819             HReg xLo, xHi;
   2820             HReg tLo = newVRegI(env);
   2821             HReg tHi = newVRegI(env);
   2822             iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
   2823             addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
   2824             addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
   2825             addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   2826                                          0, mk_RetLoc_simple(RLPri_2Int) ));
   2827             add_to_esp(env, 2*4);
   2828             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2829             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2830             *rHi = tHi;
   2831             *rLo = tLo;
   2832             return;
   2833          }
   2834 
   2835          default:
   2836             break;
   2837       }
   2838    } /* if (e->tag == Iex_Unop) */
   2839 
   2840 
   2841    /* --------- CCALL --------- */
   2842    if (e->tag == Iex_CCall) {
   2843       HReg tLo = newVRegI(env);
   2844       HReg tHi = newVRegI(env);
   2845 
   2846       /* Marshal args, do the call, clear stack. */
   2847       UInt   addToSp = 0;
   2848       RetLoc rloc    = mk_RetLoc_INVALID();
   2849       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   2850                     e->Iex.CCall.cee,
   2851                     e->Iex.CCall.retty, e->Iex.CCall.args );
   2852       vassert(is_sane_RetLoc(rloc));
   2853       vassert(rloc.pri == RLPri_2Int);
   2854       vassert(addToSp == 0);
   2855       /* */
   2856 
   2857       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
   2858       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
   2859       *rHi = tHi;
   2860       *rLo = tLo;
   2861       return;
   2862    }
   2863 
   2864    ppIRExpr(e);
   2865    vpanic("iselInt64Expr");
   2866 }
   2867 
   2868 
   2869 /*---------------------------------------------------------*/
   2870 /*--- ISEL: Floating point expressions (32 bit)         ---*/
   2871 /*---------------------------------------------------------*/
   2872 
   2873 /* Nothing interesting here; really just wrappers for
   2874    64-bit stuff. */
   2875 
   2876 static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
   2877 {
   2878    HReg r = iselFltExpr_wrk( env, e );
   2879 #  if 0
   2880    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2881 #  endif
   2882    vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
   2883    vassert(hregIsVirtual(r));
   2884    return r;
   2885 }
   2886 
   2887 /* DO NOT CALL THIS DIRECTLY */
   2888 static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
   2889 {
   2890    IRType ty = typeOfIRExpr(env->type_env,e);
   2891    vassert(ty == Ity_F32);
   2892 
   2893    if (e->tag == Iex_RdTmp) {
   2894       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   2895    }
   2896 
   2897    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2898       X86AMode* am;
   2899       HReg res = newVRegF(env);
   2900       vassert(e->Iex.Load.ty == Ity_F32);
   2901       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   2902       addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
   2903       return res;
   2904    }
   2905 
   2906    if (e->tag == Iex_Binop
   2907        && e->Iex.Binop.op == Iop_F64toF32) {
   2908       /* Although the result is still held in a standard FPU register,
   2909          we need to round it to reflect the loss of accuracy/range
   2910          entailed in casting it to a 32-bit float. */
   2911       HReg dst = newVRegF(env);
   2912       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   2913       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2914       addInstr(env, X86Instr_Fp64to32(src,dst));
   2915       set_FPU_rounding_default( env );
   2916       return dst;
   2917    }
   2918 
   2919    if (e->tag == Iex_Get) {
   2920       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
   2921                                   hregX86_EBP() );
   2922       HReg res = newVRegF(env);
   2923       addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
   2924       return res;
   2925    }
   2926 
   2927    if (e->tag == Iex_Unop
   2928        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
   2929        /* Given an I32, produce an IEEE754 float with the same bit
   2930           pattern. */
   2931       HReg    dst = newVRegF(env);
   2932       X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
   2933       /* paranoia */
   2934       addInstr(env, X86Instr_Push(rmi));
   2935       addInstr(env, X86Instr_FpLdSt(
   2936                        True/*load*/, 4, dst,
   2937                        X86AMode_IR(0, hregX86_ESP())));
   2938       add_to_esp(env, 4);
   2939       return dst;
   2940    }
   2941 
   2942    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
   2943       HReg rf  = iselFltExpr(env, e->Iex.Binop.arg2);
   2944       HReg dst = newVRegF(env);
   2945 
   2946       /* rf now holds the value to be rounded.  The first thing to do
   2947          is set the FPU's rounding mode accordingly. */
   2948 
   2949       /* Set host rounding mode */
   2950       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   2951 
   2952       /* grndint %rf, %dst */
   2953       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
   2954 
   2955       /* Restore default FPU rounding. */
   2956       set_FPU_rounding_default( env );
   2957 
   2958       return dst;
   2959    }
   2960 
   2961    ppIRExpr(e);
   2962    vpanic("iselFltExpr_wrk");
   2963 }
   2964 
   2965 
   2966 /*---------------------------------------------------------*/
   2967 /*--- ISEL: Floating point expressions (64 bit)         ---*/
   2968 /*---------------------------------------------------------*/
   2969 
   2970 /* Compute a 64-bit floating point value into a register, the identity
   2971    of which is returned.  As with iselIntExpr_R, the reg may be either
   2972    real or virtual; in any case it must not be changed by subsequent
   2973    code emitted by the caller.  */
   2974 
   2975 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
   2976 
   2977     Type                  S (1 bit)   E (11 bits)   F (52 bits)
   2978     ----                  ---------   -----------   -----------
   2979     signalling NaN        u           2047 (max)    .0uuuuu---u
   2980                                                     (with at least
   2981                                                      one 1 bit)
   2982     quiet NaN             u           2047 (max)    .1uuuuu---u
   2983 
   2984     negative infinity     1           2047 (max)    .000000---0
   2985 
   2986     positive infinity     0           2047 (max)    .000000---0
   2987 
   2988     negative zero         1           0             .000000---0
   2989 
   2990     positive zero         0           0             .000000---0
   2991 */
   2992 
   2993 static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
   2994 {
   2995    HReg r = iselDblExpr_wrk( env, e );
   2996 #  if 0
   2997    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   2998 #  endif
   2999    vassert(hregClass(r) == HRcFlt64);
   3000    vassert(hregIsVirtual(r));
   3001    return r;
   3002 }
   3003 
   3004 /* DO NOT CALL THIS DIRECTLY */
   3005 static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
   3006 {
   3007    IRType ty = typeOfIRExpr(env->type_env,e);
   3008    vassert(e);
   3009    vassert(ty == Ity_F64);
   3010 
   3011    if (e->tag == Iex_RdTmp) {
   3012       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3013    }
   3014 
   3015    if (e->tag == Iex_Const) {
   3016       union { UInt u32x2[2]; ULong u64; Double f64; } u;
   3017       HReg freg = newVRegF(env);
   3018       vassert(sizeof(u) == 8);
   3019       vassert(sizeof(u.u64) == 8);
   3020       vassert(sizeof(u.f64) == 8);
   3021       vassert(sizeof(u.u32x2) == 8);
   3022 
   3023       if (e->Iex.Const.con->tag == Ico_F64) {
   3024          u.f64 = e->Iex.Const.con->Ico.F64;
   3025       }
   3026       else if (e->Iex.Const.con->tag == Ico_F64i) {
   3027          u.u64 = e->Iex.Const.con->Ico.F64i;
   3028       }
   3029       else
   3030          vpanic("iselDblExpr(x86): const");
   3031 
   3032       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
   3033       addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
   3034       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
   3035                                     X86AMode_IR(0, hregX86_ESP())));
   3036       add_to_esp(env, 8);
   3037       return freg;
   3038    }
   3039 
   3040    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   3041       X86AMode* am;
   3042       HReg res = newVRegF(env);
   3043       vassert(e->Iex.Load.ty == Ity_F64);
   3044       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
   3045       addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
   3046       return res;
   3047    }
   3048 
   3049    if (e->tag == Iex_Get) {
   3050       X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
   3051                                   hregX86_EBP() );
   3052       HReg res = newVRegF(env);
   3053       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
   3054       return res;
   3055    }
   3056 
   3057    if (e->tag == Iex_GetI) {
   3058       X86AMode* am
   3059          = genGuestArrayOffset(
   3060               env, e->Iex.GetI.descr,
   3061                    e->Iex.GetI.ix, e->Iex.GetI.bias );
   3062       HReg res = newVRegF(env);
   3063       addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
   3064       return res;
   3065    }
   3066 
   3067    if (e->tag == Iex_Triop) {
   3068       X86FpOp fpop = Xfp_INVALID;
   3069       IRTriop *triop = e->Iex.Triop.details;
   3070       switch (triop->op) {
   3071          case Iop_AddF64:    fpop = Xfp_ADD; break;
   3072          case Iop_SubF64:    fpop = Xfp_SUB; break;
   3073          case Iop_MulF64:    fpop = Xfp_MUL; break;
   3074          case Iop_DivF64:    fpop = Xfp_DIV; break;
   3075          case Iop_ScaleF64:  fpop = Xfp_SCALE; break;
   3076          case Iop_Yl2xF64:   fpop = Xfp_YL2X; break;
   3077          case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
   3078          case Iop_AtanF64:   fpop = Xfp_ATAN; break;
   3079          case Iop_PRemF64:   fpop = Xfp_PREM; break;
   3080          case Iop_PRem1F64:  fpop = Xfp_PREM1; break;
   3081          default: break;
   3082       }
   3083       if (fpop != Xfp_INVALID) {
   3084          HReg res  = newVRegF(env);
   3085          HReg srcL = iselDblExpr(env, triop->arg2);
   3086          HReg srcR = iselDblExpr(env, triop->arg3);
   3087          /* XXXROUNDINGFIXME */
   3088          /* set roundingmode here */
   3089          addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
   3090 	 if (fpop != Xfp_ADD && fpop != Xfp_SUB
   3091 	     && fpop != Xfp_MUL && fpop != Xfp_DIV)
   3092             roundToF64(env, res);
   3093          return res;
   3094       }
   3095    }
   3096 
   3097    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
   3098       HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
   3099       HReg dst = newVRegF(env);
   3100 
   3101       /* rf now holds the value to be rounded.  The first thing to do
   3102          is set the FPU's rounding mode accordingly. */
   3103 
   3104       /* Set host rounding mode */
   3105       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   3106 
   3107       /* grndint %rf, %dst */
   3108       addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
   3109 
   3110       /* Restore default FPU rounding. */
   3111       set_FPU_rounding_default( env );
   3112 
   3113       return dst;
   3114    }
   3115 
   3116    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
   3117       HReg dst = newVRegF(env);
   3118       HReg rHi,rLo;
   3119       iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
   3120       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3121       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3122 
   3123       /* Set host rounding mode */
   3124       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
   3125 
   3126       addInstr(env, X86Instr_FpLdStI(
   3127                        True/*load*/, 8, dst,
   3128                        X86AMode_IR(0, hregX86_ESP())));
   3129 
   3130       /* Restore default FPU rounding. */
   3131       set_FPU_rounding_default( env );
   3132 
   3133       add_to_esp(env, 8);
   3134       return dst;
   3135    }
   3136 
   3137    if (e->tag == Iex_Binop) {
   3138       X86FpOp fpop = Xfp_INVALID;
   3139       switch (e->Iex.Binop.op) {
   3140          case Iop_SinF64:  fpop = Xfp_SIN; break;
   3141          case Iop_CosF64:  fpop = Xfp_COS; break;
   3142          case Iop_TanF64:  fpop = Xfp_TAN; break;
   3143          case Iop_2xm1F64: fpop = Xfp_2XM1; break;
   3144          case Iop_SqrtF64: fpop = Xfp_SQRT; break;
   3145          default: break;
   3146       }
   3147       if (fpop != Xfp_INVALID) {
   3148          HReg res = newVRegF(env);
   3149          HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   3150          /* XXXROUNDINGFIXME */
   3151          /* set roundingmode here */
   3152          /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition
   3153             codes.  I don't think that matters, since this insn
   3154             selector never generates such an instruction intervening
   3155             between an flag-setting instruction and a flag-using
   3156             instruction. */
   3157          addInstr(env, X86Instr_FpUnary(fpop,src,res));
   3158 	 if (fpop != Xfp_SQRT
   3159              && fpop != Xfp_NEG && fpop != Xfp_ABS)
   3160             roundToF64(env, res);
   3161          return res;
   3162       }
   3163    }
   3164 
   3165    if (e->tag == Iex_Unop) {
   3166       X86FpOp fpop = Xfp_INVALID;
   3167       switch (e->Iex.Unop.op) {
   3168          case Iop_NegF64:  fpop = Xfp_NEG; break;
   3169          case Iop_AbsF64:  fpop = Xfp_ABS; break;
   3170          default: break;
   3171       }
   3172       if (fpop != Xfp_INVALID) {
   3173          HReg res = newVRegF(env);
   3174          HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   3175          addInstr(env, X86Instr_FpUnary(fpop,src,res));
   3176          /* No need to do roundToF64(env,res) for Xfp_NEG or Xfp_ABS,
   3177             but might need to do that for other unary ops. */
   3178          return res;
   3179       }
   3180    }
   3181 
   3182    if (e->tag == Iex_Unop) {
   3183       switch (e->Iex.Unop.op) {
   3184          case Iop_I32StoF64: {
   3185             HReg dst = newVRegF(env);
   3186             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
   3187             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
   3188             set_FPU_rounding_default(env);
   3189             addInstr(env, X86Instr_FpLdStI(
   3190                              True/*load*/, 4, dst,
   3191                              X86AMode_IR(0, hregX86_ESP())));
   3192 	    add_to_esp(env, 4);
   3193             return dst;
   3194          }
   3195          case Iop_ReinterpI64asF64: {
   3196             /* Given an I64, produce an IEEE754 double with the same
   3197                bit pattern. */
   3198             HReg dst = newVRegF(env);
   3199             HReg rHi, rLo;
   3200 	    iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
   3201             /* paranoia */
   3202             set_FPU_rounding_default(env);
   3203             addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3204             addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3205             addInstr(env, X86Instr_FpLdSt(
   3206                              True/*load*/, 8, dst,
   3207                              X86AMode_IR(0, hregX86_ESP())));
   3208 	    add_to_esp(env, 8);
   3209             return dst;
   3210 	 }
   3211          case Iop_F32toF64: {
   3212             /* this is a no-op */
   3213             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
   3214             return res;
   3215 	 }
   3216          default:
   3217             break;
   3218       }
   3219    }
   3220 
   3221    /* --------- MULTIPLEX --------- */
   3222    if (e->tag == Iex_ITE) { // VFD
   3223      if (ty == Ity_F64
   3224          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   3225         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
   3226         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
   3227         HReg dst = newVRegF(env);
   3228         addInstr(env, X86Instr_FpUnary(Xfp_MOV,r1,dst));
   3229         X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   3230         addInstr(env, X86Instr_FpCMov(cc ^ 1, r0, dst));
   3231         return dst;
   3232       }
   3233    }
   3234 
   3235    ppIRExpr(e);
   3236    vpanic("iselDblExpr_wrk");
   3237 }
   3238 
   3239 
   3240 /*---------------------------------------------------------*/
   3241 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
   3242 /*---------------------------------------------------------*/
   3243 
   3244 static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
   3245 {
   3246    HReg r = iselVecExpr_wrk( env, e );
   3247 #  if 0
   3248    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   3249 #  endif
   3250    vassert(hregClass(r) == HRcVec128);
   3251    vassert(hregIsVirtual(r));
   3252    return r;
   3253 }
   3254 
   3255 
   3256 /* DO NOT CALL THIS DIRECTLY */
   3257 static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
   3258 {
   3259 
   3260 #  define REQUIRE_SSE1                                    \
   3261       do { if (env->hwcaps == 0/*baseline, no sse*/       \
   3262                ||  env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
   3263               goto vec_fail;                              \
   3264       } while (0)
   3265 
   3266 #  define REQUIRE_SSE2                                    \
   3267       do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2))  \
   3268               goto vec_fail;                              \
   3269       } while (0)
   3270 
   3271 #  define SSE2_OR_ABOVE                                   \
   3272        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
   3273 
   3274    HWord     fn = 0; /* address of helper fn, if required */
   3275    MatchInfo mi;
   3276    Bool      arg1isEReg = False;
   3277    X86SseOp  op = Xsse_INVALID;
   3278    IRType    ty = typeOfIRExpr(env->type_env,e);
   3279    vassert(e);
   3280    vassert(ty == Ity_V128);
   3281 
   3282    REQUIRE_SSE1;
   3283 
   3284    if (e->tag == Iex_RdTmp) {
   3285       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3286    }
   3287 
   3288    if (e->tag == Iex_Get) {
   3289       HReg dst = newVRegV(env);
   3290       addInstr(env, X86Instr_SseLdSt(
   3291                        True/*load*/,
   3292                        dst,
   3293                        X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
   3294                     )
   3295               );
   3296       return dst;
   3297    }
   3298 
   3299    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   3300       HReg      dst = newVRegV(env);
   3301       X86AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
   3302       addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
   3303       return dst;
   3304    }
   3305 
   3306    if (e->tag == Iex_Const) {
   3307       HReg dst = newVRegV(env);
   3308       vassert(e->Iex.Const.con->tag == Ico_V128);
   3309       addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
   3310       return dst;
   3311    }
   3312 
   3313    if (e->tag == Iex_Unop) {
   3314 
   3315    if (SSE2_OR_ABOVE) {
   3316       /* 64UtoV128(LDle:I64(addr)) */
   3317       DECLARE_PATTERN(p_zwiden_load64);
   3318       DEFINE_PATTERN(p_zwiden_load64,
   3319                      unop(Iop_64UtoV128,
   3320                           IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
   3321       if (matchIRExpr(&mi, p_zwiden_load64, e)) {
   3322          X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
   3323          HReg dst = newVRegV(env);
   3324          addInstr(env, X86Instr_SseLdzLO(8, dst, am));
   3325          return dst;
   3326       }
   3327    }
   3328 
   3329    switch (e->Iex.Unop.op) {
   3330 
   3331       case Iop_NotV128: {
   3332          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3333          return do_sse_Not128(env, arg);
   3334       }
   3335 
   3336       case Iop_CmpNEZ64x2: {
   3337          /* We can use SSE2 instructions for this. */
   3338          /* Ideally, we want to do a 64Ix2 comparison against zero of
   3339             the operand.  Problem is no such insn exists.  Solution
   3340             therefore is to do a 32Ix4 comparison instead, and bitwise-
   3341             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
   3342             let the not'd result of this initial comparison be a:b:c:d.
   3343             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
   3344             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
   3345             giving the required result.
   3346 
   3347             The required selection sequence is 2,3,0,1, which
   3348             according to Intel's documentation means the pshufd
   3349             literal value is 0xB1, that is,
   3350             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
   3351          */
   3352          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
   3353          HReg tmp  = newVRegV(env);
   3354          HReg dst  = newVRegV(env);
   3355          REQUIRE_SSE2;
   3356          addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
   3357          addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
   3358          tmp = do_sse_Not128(env, tmp);
   3359          addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
   3360          addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
   3361          return dst;
   3362       }
   3363 
   3364       case Iop_CmpNEZ32x4: {
   3365          /* Sigh, we have to generate lousy code since this has to
   3366             work on SSE1 hosts */
   3367          /* basically, the idea is: for each lane:
   3368                movl lane, %r ; negl %r   (now CF = lane==0 ? 0 : 1)
   3369                sbbl %r, %r               (now %r = 1Sto32(CF))
   3370                movl %r, lane
   3371          */
   3372          Int       i;
   3373          X86AMode* am;
   3374          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3375          HReg      arg  = iselVecExpr(env, e->Iex.Unop.arg);
   3376          HReg      dst  = newVRegV(env);
   3377          HReg      r32  = newVRegI(env);
   3378          sub_from_esp(env, 16);
   3379          addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
   3380          for (i = 0; i < 4; i++) {
   3381             am = X86AMode_IR(i*4, hregX86_ESP());
   3382             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
   3383             addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
   3384             addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
   3385             addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
   3386          }
   3387          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3388          add_to_esp(env, 16);
   3389          return dst;
   3390       }
   3391 
   3392       case Iop_CmpNEZ8x16:
   3393       case Iop_CmpNEZ16x8: {
   3394          /* We can use SSE2 instructions for this. */
   3395          HReg arg;
   3396          HReg vec0 = newVRegV(env);
   3397          HReg vec1 = newVRegV(env);
   3398          HReg dst  = newVRegV(env);
   3399          X86SseOp cmpOp
   3400             = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
   3401                                              : Xsse_CMPEQ8;
   3402          REQUIRE_SSE2;
   3403          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
   3404          addInstr(env, mk_vMOVsd_RR(vec0, vec1));
   3405          addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
   3406          /* defer arg computation to here so as to give CMPEQF as long
   3407             as possible to complete */
   3408          arg = iselVecExpr(env, e->Iex.Unop.arg);
   3409          /* vec0 is all 0s; vec1 is all 1s */
   3410          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3411          /* 16x8 or 8x16 comparison == */
   3412          addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
   3413          /* invert result */
   3414          addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
   3415          return dst;
   3416       }
   3417 
   3418       case Iop_RecipEst32Fx4: op = Xsse_RCPF;   goto do_32Fx4_unary;
   3419       case Iop_RSqrtEst32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
   3420       do_32Fx4_unary:
   3421       {
   3422          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3423          HReg dst = newVRegV(env);
   3424          addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
   3425          return dst;
   3426       }
   3427 
   3428       case Iop_RecipEst32F0x4: op = Xsse_RCPF;   goto do_32F0x4_unary;
   3429       case Iop_RSqrtEst32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
   3430       case Iop_Sqrt32F0x4:     op = Xsse_SQRTF;  goto do_32F0x4_unary;
   3431       do_32F0x4_unary:
   3432       {
   3433          /* A bit subtle.  We have to copy the arg to the result
   3434             register first, because actually doing the SSE scalar insn
   3435             leaves the upper 3/4 of the destination register
   3436             unchanged.  Whereas the required semantics of these
   3437             primops is that the upper 3/4 is simply copied in from the
   3438             argument. */
   3439          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3440          HReg dst = newVRegV(env);
   3441          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3442          addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
   3443          return dst;
   3444       }
   3445 
   3446       case Iop_Sqrt64F0x2:  op = Xsse_SQRTF;  goto do_64F0x2_unary;
   3447       do_64F0x2_unary:
   3448       {
   3449          /* A bit subtle.  We have to copy the arg to the result
   3450             register first, because actually doing the SSE scalar insn
   3451             leaves the upper half of the destination register
   3452             unchanged.  Whereas the required semantics of these
   3453             primops is that the upper half is simply copied in from the
   3454             argument. */
   3455          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
   3456          HReg dst = newVRegV(env);
   3457          REQUIRE_SSE2;
   3458          addInstr(env, mk_vMOVsd_RR(arg, dst));
   3459          addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
   3460          return dst;
   3461       }
   3462 
   3463       case Iop_32UtoV128: {
   3464          HReg      dst  = newVRegV(env);
   3465          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3466          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
   3467          addInstr(env, X86Instr_Push(rmi));
   3468 	 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
   3469          add_to_esp(env, 4);
   3470          return dst;
   3471       }
   3472 
   3473       case Iop_64UtoV128: {
   3474          HReg      rHi, rLo;
   3475          HReg      dst  = newVRegV(env);
   3476          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3477          iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
   3478          addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
   3479          addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
   3480 	 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
   3481          add_to_esp(env, 8);
   3482          return dst;
   3483       }
   3484 
   3485       default:
   3486          break;
   3487    } /* switch (e->Iex.Unop.op) */
   3488    } /* if (e->tag == Iex_Unop) */
   3489 
   3490    if (e->tag == Iex_Binop) {
   3491    switch (e->Iex.Binop.op) {
   3492 
   3493       case Iop_Sqrt64Fx2:
   3494          REQUIRE_SSE2;
   3495          /* fallthrough */
   3496       case Iop_Sqrt32Fx4: {
   3497          /* :: (rmode, vec) -> vec */
   3498          HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
   3499          HReg dst = newVRegV(env);
   3500          /* XXXROUNDINGFIXME */
   3501          /* set roundingmode here */
   3502          addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
   3503                            ? X86Instr_Sse64Fx2 : X86Instr_Sse32Fx4)
   3504                        (Xsse_SQRTF, arg, dst));
   3505          return dst;
   3506       }
   3507 
   3508       case Iop_SetV128lo32: {
   3509          HReg dst = newVRegV(env);
   3510          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
   3511          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
   3512          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3513          sub_from_esp(env, 16);
   3514          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
   3515          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
   3516          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3517          add_to_esp(env, 16);
   3518          return dst;
   3519       }
   3520 
   3521       case Iop_SetV128lo64: {
   3522          HReg dst = newVRegV(env);
   3523          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
   3524          HReg srcIhi, srcIlo;
   3525          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3526          X86AMode* esp4 = advance4(esp0);
   3527          iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
   3528          sub_from_esp(env, 16);
   3529          addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
   3530          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
   3531          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
   3532          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3533          add_to_esp(env, 16);
   3534          return dst;
   3535       }
   3536 
   3537       case Iop_64HLtoV128: {
   3538          HReg r3, r2, r1, r0;
   3539          X86AMode* esp0  = X86AMode_IR(0, hregX86_ESP());
   3540          X86AMode* esp4  = advance4(esp0);
   3541          X86AMode* esp8  = advance4(esp4);
   3542          X86AMode* esp12 = advance4(esp8);
   3543          HReg dst = newVRegV(env);
   3544 	 /* do this via the stack (easy, convenient, etc) */
   3545          sub_from_esp(env, 16);
   3546          /* Do the less significant 64 bits */
   3547          iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
   3548          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
   3549          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
   3550          /* Do the more significant 64 bits */
   3551          iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
   3552          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
   3553          addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
   3554 	 /* Fetch result back from stack. */
   3555          addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
   3556          add_to_esp(env, 16);
   3557          return dst;
   3558       }
   3559 
   3560       case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
   3561       case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
   3562       case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
   3563       case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
   3564       case Iop_Max32Fx4:   op = Xsse_MAXF;   goto do_32Fx4;
   3565       case Iop_Min32Fx4:   op = Xsse_MINF;   goto do_32Fx4;
   3566       do_32Fx4:
   3567       {
   3568          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3569          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3570          HReg dst = newVRegV(env);
   3571          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3572          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
   3573          return dst;
   3574       }
   3575 
   3576       case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
   3577       case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
   3578       case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
   3579       case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
   3580       case Iop_Max64Fx2:   op = Xsse_MAXF;   goto do_64Fx2;
   3581       case Iop_Min64Fx2:   op = Xsse_MINF;   goto do_64Fx2;
   3582       do_64Fx2:
   3583       {
   3584          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3585          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3586          HReg dst = newVRegV(env);
   3587          REQUIRE_SSE2;
   3588          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3589          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
   3590          return dst;
   3591       }
   3592 
   3593       case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
   3594       case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
   3595       case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
   3596       case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
   3597       case Iop_Add32F0x4:   op = Xsse_ADDF;   goto do_32F0x4;
   3598       case Iop_Div32F0x4:   op = Xsse_DIVF;   goto do_32F0x4;
   3599       case Iop_Max32F0x4:   op = Xsse_MAXF;   goto do_32F0x4;
   3600       case Iop_Min32F0x4:   op = Xsse_MINF;   goto do_32F0x4;
   3601       case Iop_Mul32F0x4:   op = Xsse_MULF;   goto do_32F0x4;
   3602       case Iop_Sub32F0x4:   op = Xsse_SUBF;   goto do_32F0x4;
   3603       do_32F0x4: {
   3604          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3605          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3606          HReg dst = newVRegV(env);
   3607          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3608          addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
   3609          return dst;
   3610       }
   3611 
   3612       case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
   3613       case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
   3614       case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
   3615       case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
   3616       case Iop_Add64F0x2:   op = Xsse_ADDF;   goto do_64F0x2;
   3617       case Iop_Div64F0x2:   op = Xsse_DIVF;   goto do_64F0x2;
   3618       case Iop_Max64F0x2:   op = Xsse_MAXF;   goto do_64F0x2;
   3619       case Iop_Min64F0x2:   op = Xsse_MINF;   goto do_64F0x2;
   3620       case Iop_Mul64F0x2:   op = Xsse_MULF;   goto do_64F0x2;
   3621       case Iop_Sub64F0x2:   op = Xsse_SUBF;   goto do_64F0x2;
   3622       do_64F0x2: {
   3623          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3624          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3625          HReg dst = newVRegV(env);
   3626          REQUIRE_SSE2;
   3627          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3628          addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
   3629          return dst;
   3630       }
   3631 
   3632       case Iop_QNarrowBin32Sto16Sx8:
   3633          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
   3634       case Iop_QNarrowBin16Sto8Sx16:
   3635          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
   3636       case Iop_QNarrowBin16Sto8Ux16:
   3637          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
   3638 
   3639       case Iop_InterleaveHI8x16:
   3640          op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
   3641       case Iop_InterleaveHI16x8:
   3642          op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
   3643       case Iop_InterleaveHI32x4:
   3644          op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
   3645       case Iop_InterleaveHI64x2:
   3646          op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
   3647 
   3648       case Iop_InterleaveLO8x16:
   3649          op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
   3650       case Iop_InterleaveLO16x8:
   3651          op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
   3652       case Iop_InterleaveLO32x4:
   3653          op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
   3654       case Iop_InterleaveLO64x2:
   3655          op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
   3656 
   3657       case Iop_AndV128:    op = Xsse_AND;      goto do_SseReRg;
   3658       case Iop_OrV128:     op = Xsse_OR;       goto do_SseReRg;
   3659       case Iop_XorV128:    op = Xsse_XOR;      goto do_SseReRg;
   3660       case Iop_Add8x16:    op = Xsse_ADD8;     goto do_SseReRg;
   3661       case Iop_Add16x8:    op = Xsse_ADD16;    goto do_SseReRg;
   3662       case Iop_Add32x4:    op = Xsse_ADD32;    goto do_SseReRg;
   3663       case Iop_Add64x2:    op = Xsse_ADD64;    goto do_SseReRg;
   3664       case Iop_QAdd8Sx16:  op = Xsse_QADD8S;   goto do_SseReRg;
   3665       case Iop_QAdd16Sx8:  op = Xsse_QADD16S;  goto do_SseReRg;
   3666       case Iop_QAdd8Ux16:  op = Xsse_QADD8U;   goto do_SseReRg;
   3667       case Iop_QAdd16Ux8:  op = Xsse_QADD16U;  goto do_SseReRg;
   3668       case Iop_Avg8Ux16:   op = Xsse_AVG8U;    goto do_SseReRg;
   3669       case Iop_Avg16Ux8:   op = Xsse_AVG16U;   goto do_SseReRg;
   3670       case Iop_CmpEQ8x16:  op = Xsse_CMPEQ8;   goto do_SseReRg;
   3671       case Iop_CmpEQ16x8:  op = Xsse_CMPEQ16;  goto do_SseReRg;
   3672       case Iop_CmpEQ32x4:  op = Xsse_CMPEQ32;  goto do_SseReRg;
   3673       case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S;  goto do_SseReRg;
   3674       case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
   3675       case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
   3676       case Iop_Max16Sx8:   op = Xsse_MAX16S;   goto do_SseReRg;
   3677       case Iop_Max8Ux16:   op = Xsse_MAX8U;    goto do_SseReRg;
   3678       case Iop_Min16Sx8:   op = Xsse_MIN16S;   goto do_SseReRg;
   3679       case Iop_Min8Ux16:   op = Xsse_MIN8U;    goto do_SseReRg;
   3680       case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
   3681       case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
   3682       case Iop_Mul16x8:    op = Xsse_MUL16;    goto do_SseReRg;
   3683       case Iop_Sub8x16:    op = Xsse_SUB8;     goto do_SseReRg;
   3684       case Iop_Sub16x8:    op = Xsse_SUB16;    goto do_SseReRg;
   3685       case Iop_Sub32x4:    op = Xsse_SUB32;    goto do_SseReRg;
   3686       case Iop_Sub64x2:    op = Xsse_SUB64;    goto do_SseReRg;
   3687       case Iop_QSub8Sx16:  op = Xsse_QSUB8S;   goto do_SseReRg;
   3688       case Iop_QSub16Sx8:  op = Xsse_QSUB16S;  goto do_SseReRg;
   3689       case Iop_QSub8Ux16:  op = Xsse_QSUB8U;   goto do_SseReRg;
   3690       case Iop_QSub16Ux8:  op = Xsse_QSUB16U;  goto do_SseReRg;
   3691       do_SseReRg: {
   3692          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
   3693          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
   3694          HReg dst = newVRegV(env);
   3695          if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
   3696             REQUIRE_SSE2;
   3697          if (arg1isEReg) {
   3698             addInstr(env, mk_vMOVsd_RR(arg2, dst));
   3699             addInstr(env, X86Instr_SseReRg(op, arg1, dst));
   3700          } else {
   3701             addInstr(env, mk_vMOVsd_RR(arg1, dst));
   3702             addInstr(env, X86Instr_SseReRg(op, arg2, dst));
   3703          }
   3704          return dst;
   3705       }
   3706 
   3707       case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
   3708       case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
   3709       case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
   3710       case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
   3711       case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
   3712       case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
   3713       case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
   3714       case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
   3715       do_SseShift: {
   3716          HReg      greg = iselVecExpr(env, e->Iex.Binop.arg1);
   3717          X86RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
   3718          X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   3719          HReg      ereg = newVRegV(env);
   3720          HReg      dst  = newVRegV(env);
   3721          REQUIRE_SSE2;
   3722          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3723          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3724          addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
   3725          addInstr(env, X86Instr_Push(rmi));
   3726          addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
   3727 	 addInstr(env, mk_vMOVsd_RR(greg, dst));
   3728          addInstr(env, X86Instr_SseReRg(op, ereg, dst));
   3729          add_to_esp(env, 16);
   3730          return dst;
   3731       }
   3732 
   3733       case Iop_NarrowBin32to16x8:
   3734          fn = (HWord)h_generic_calc_NarrowBin32to16x8;
   3735          goto do_SseAssistedBinary;
   3736       case Iop_NarrowBin16to8x16:
   3737          fn = (HWord)h_generic_calc_NarrowBin16to8x16;
   3738          goto do_SseAssistedBinary;
   3739       do_SseAssistedBinary: {
   3740          /* As with the amd64 case (where this is copied from) we
   3741             generate pretty bad code. */
   3742          vassert(fn != 0);
   3743          HReg dst = newVRegV(env);
   3744          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
   3745          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
   3746          HReg argp = newVRegI(env);
   3747          /* subl $112, %esp         -- make a space */
   3748          sub_from_esp(env, 112);
   3749          /* leal 48(%esp), %r_argp  -- point into it */
   3750          addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
   3751                                       argp));
   3752          /* andl $-16, %r_argp      -- 16-align the pointer */
   3753          addInstr(env, X86Instr_Alu32R(Xalu_AND,
   3754                                        X86RMI_Imm( ~(UInt)15 ),
   3755                                        argp));
   3756          /* Prepare 3 arg regs:
   3757             leal  0(%r_argp), %eax
   3758             leal 16(%r_argp), %edx
   3759             leal 32(%r_argp), %ecx
   3760          */
   3761          addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
   3762                                       hregX86_EAX()));
   3763          addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
   3764                                       hregX86_EDX()));
   3765          addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
   3766                                       hregX86_ECX()));
   3767          /* Store the two args, at (%edx) and (%ecx):
   3768             movupd  %argL, 0(%edx)
   3769             movupd  %argR, 0(%ecx)
   3770          */
   3771          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
   3772                                         X86AMode_IR(0, hregX86_EDX())));
   3773          addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
   3774                                         X86AMode_IR(0, hregX86_ECX())));
   3775          /* call the helper */
   3776          addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn,
   3777                                       3, mk_RetLoc_simple(RLPri_None) ));
   3778          /* fetch the result from memory, using %r_argp, which the
   3779             register allocator will keep alive across the call. */
   3780          addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
   3781                                         X86AMode_IR(0, argp)));
   3782          /* and finally, clear the space */
   3783          add_to_esp(env, 112);
   3784          return dst;
   3785       }
   3786 
   3787       default:
   3788          break;
   3789    } /* switch (e->Iex.Binop.op) */
   3790    } /* if (e->tag == Iex_Binop) */
   3791 
   3792 
   3793    if (e->tag == Iex_Triop) {
   3794    IRTriop *triop = e->Iex.Triop.details;
   3795    switch (triop->op) {
   3796 
   3797       case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
   3798       case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
   3799       case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
   3800       case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
   3801       do_32Fx4_w_rm:
   3802       {
   3803          HReg argL = iselVecExpr(env, triop->arg2);
   3804          HReg argR = iselVecExpr(env, triop->arg3);
   3805          HReg dst = newVRegV(env);
   3806          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3807          /* XXXROUNDINGFIXME */
   3808          /* set roundingmode here */
   3809          addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
   3810          return dst;
   3811       }
   3812 
   3813       case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
   3814       case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
   3815       case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
   3816       case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
   3817       do_64Fx2_w_rm:
   3818       {
   3819          HReg argL = iselVecExpr(env, triop->arg2);
   3820          HReg argR = iselVecExpr(env, triop->arg3);
   3821          HReg dst = newVRegV(env);
   3822          REQUIRE_SSE2;
   3823          addInstr(env, mk_vMOVsd_RR(argL, dst));
   3824          /* XXXROUNDINGFIXME */
   3825          /* set roundingmode here */
   3826          addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
   3827          return dst;
   3828       }
   3829 
   3830       default:
   3831          break;
   3832    } /* switch (triop->op) */
   3833    } /* if (e->tag == Iex_Triop) */
   3834 
   3835 
   3836    if (e->tag == Iex_ITE) { // VFD
   3837       HReg r1  = iselVecExpr(env, e->Iex.ITE.iftrue);
   3838       HReg r0  = iselVecExpr(env, e->Iex.ITE.iffalse);
   3839       HReg dst = newVRegV(env);
   3840       addInstr(env, mk_vMOVsd_RR(r1,dst));
   3841       X86CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   3842       addInstr(env, X86Instr_SseCMov(cc ^ 1, r0, dst));
   3843       return dst;
   3844    }
   3845 
   3846    vec_fail:
   3847    vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
   3848               LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
   3849    ppIRExpr(e);
   3850    vpanic("iselVecExpr_wrk");
   3851 
   3852 #  undef REQUIRE_SSE1
   3853 #  undef REQUIRE_SSE2
   3854 #  undef SSE2_OR_ABOVE
   3855 }
   3856 
   3857 
   3858 /*---------------------------------------------------------*/
   3859 /*--- ISEL: Statements                                  ---*/
   3860 /*---------------------------------------------------------*/
   3861 
   3862 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
   3863 {
   3864    if (vex_traceflags & VEX_TRACE_VCODE) {
   3865       vex_printf("\n-- ");
   3866       ppIRStmt(stmt);
   3867       vex_printf("\n");
   3868    }
   3869 
   3870    switch (stmt->tag) {
   3871 
   3872    /* --------- STORE --------- */
   3873    case Ist_Store: {
   3874       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
   3875       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
   3876       IREndness end   = stmt->Ist.Store.end;
   3877 
   3878       if (tya != Ity_I32 || end != Iend_LE)
   3879          goto stmt_fail;
   3880 
   3881       if (tyd == Ity_I32) {
   3882          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3883          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
   3884          addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
   3885          return;
   3886       }
   3887       if (tyd == Ity_I8 || tyd == Ity_I16) {
   3888          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3889          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
   3890          addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
   3891                                        r,am ));
   3892          return;
   3893       }
   3894       if (tyd == Ity_F64) {
   3895          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3896          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
   3897          addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
   3898          return;
   3899       }
   3900       if (tyd == Ity_F32) {
   3901          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3902          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
   3903          addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
   3904          return;
   3905       }
   3906       if (tyd == Ity_I64) {
   3907          HReg vHi, vLo, rA;
   3908          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
   3909          rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
   3910          addInstr(env, X86Instr_Alu32M(
   3911                           Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
   3912          addInstr(env, X86Instr_Alu32M(
   3913                           Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
   3914          return;
   3915       }
   3916       if (tyd == Ity_V128) {
   3917          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
   3918          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
   3919          addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
   3920          return;
   3921       }
   3922       break;
   3923    }
   3924 
   3925    /* --------- PUT --------- */
   3926    case Ist_Put: {
   3927       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
   3928       if (ty == Ity_I32) {
   3929          /* We're going to write to memory, so compute the RHS into an
   3930             X86RI. */
   3931          X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
   3932          addInstr(env,
   3933                   X86Instr_Alu32M(
   3934                      Xalu_MOV,
   3935                      ri,
   3936                      X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
   3937                  ));
   3938          return;
   3939       }
   3940       if (ty == Ity_I8 || ty == Ity_I16) {
   3941          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
   3942          addInstr(env, X86Instr_Store(
   3943                           toUChar(ty==Ity_I8 ? 1 : 2),
   3944                           r,
   3945                           X86AMode_IR(stmt->Ist.Put.offset,
   3946                                       hregX86_EBP())));
   3947          return;
   3948       }
   3949       if (ty == Ity_I64) {
   3950          HReg vHi, vLo;
   3951          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3952          X86AMode* am4 = advance4(am);
   3953          iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
   3954          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
   3955          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
   3956          return;
   3957       }
   3958       if (ty == Ity_V128) {
   3959          HReg      vec = iselVecExpr(env, stmt->Ist.Put.data);
   3960          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3961          addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
   3962          return;
   3963       }
   3964       if (ty == Ity_F32) {
   3965          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
   3966          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3967          set_FPU_rounding_default(env); /* paranoia */
   3968          addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
   3969          return;
   3970       }
   3971       if (ty == Ity_F64) {
   3972          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
   3973          X86AMode* am  = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
   3974          set_FPU_rounding_default(env); /* paranoia */
   3975          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
   3976          return;
   3977       }
   3978       break;
   3979    }
   3980 
   3981    /* --------- Indexed PUT --------- */
   3982    case Ist_PutI: {
   3983       IRPutI *puti = stmt->Ist.PutI.details;
   3984 
   3985       X86AMode* am
   3986          = genGuestArrayOffset(
   3987               env, puti->descr,
   3988                    puti->ix, puti->bias );
   3989 
   3990       IRType ty = typeOfIRExpr(env->type_env, puti->data);
   3991       if (ty == Ity_F64) {
   3992          HReg val = iselDblExpr(env, puti->data);
   3993          addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
   3994          return;
   3995       }
   3996       if (ty == Ity_I8) {
   3997          HReg r = iselIntExpr_R(env, puti->data);
   3998          addInstr(env, X86Instr_Store( 1, r, am ));
   3999          return;
   4000       }
   4001       if (ty == Ity_I32) {
   4002          HReg r = iselIntExpr_R(env, puti->data);
   4003          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
   4004          return;
   4005       }
   4006       if (ty == Ity_I64) {
   4007          HReg rHi, rLo;
   4008          X86AMode* am4 = advance4(am);
   4009          iselInt64Expr(&rHi, &rLo, env, puti->data);
   4010          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
   4011          addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
   4012          return;
   4013       }
   4014       break;
   4015    }
   4016 
   4017    /* --------- TMP --------- */
   4018    case Ist_WrTmp: {
   4019       IRTemp tmp = stmt->Ist.WrTmp.tmp;
   4020       IRType ty = typeOfIRTemp(env->type_env, tmp);
   4021 
   4022       /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
   4023          compute it into an AMode and then use LEA.  This usually
   4024          produces fewer instructions, often because (for memcheck
   4025          created IR) we get t = address-expression, (t is later used
   4026          twice) and so doing this naturally turns address-expression
   4027          back into an X86 amode. */
   4028       if (ty == Ity_I32
   4029           && stmt->Ist.WrTmp.data->tag == Iex_Binop
   4030           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
   4031          X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
   4032          HReg dst = lookupIRTemp(env, tmp);
   4033          if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
   4034             /* Hmm, iselIntExpr_AMode wimped out and just computed the
   4035                value into a register.  Just emit a normal reg-reg move
   4036                so reg-alloc can coalesce it away in the usual way. */
   4037             HReg src = am->Xam.IR.reg;
   4038             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
   4039          } else {
   4040             addInstr(env, X86Instr_Lea32(am,dst));
   4041          }
   4042          return;
   4043       }
   4044 
   4045       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   4046          X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
   4047          HReg dst = lookupIRTemp(env, tmp);
   4048          addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
   4049          return;
   4050       }
   4051       if (ty == Ity_I64) {
   4052          HReg rHi, rLo, dstHi, dstLo;
   4053          iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
   4054          lookupIRTemp64( &dstHi, &dstLo, env, tmp);
   4055          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
   4056          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
   4057          return;
   4058       }
   4059       if (ty == Ity_I1) {
   4060          X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
   4061          HReg dst = lookupIRTemp(env, tmp);
   4062          addInstr(env, X86Instr_Set32(cond, dst));
   4063          return;
   4064       }
   4065       if (ty == Ity_F64) {
   4066          HReg dst = lookupIRTemp(env, tmp);
   4067          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
   4068          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
   4069          return;
   4070       }
   4071       if (ty == Ity_F32) {
   4072          HReg dst = lookupIRTemp(env, tmp);
   4073          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
   4074          addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
   4075          return;
   4076       }
   4077       if (ty == Ity_V128) {
   4078          HReg dst = lookupIRTemp(env, tmp);
   4079          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
   4080          addInstr(env, mk_vMOVsd_RR(src,dst));
   4081          return;
   4082       }
   4083       break;
   4084    }
   4085 
   4086    /* --------- Call to DIRTY helper --------- */
   4087    case Ist_Dirty: {
   4088       IRDirty* d = stmt->Ist.Dirty.details;
   4089 
   4090       /* Figure out the return type, if any. */
   4091       IRType retty = Ity_INVALID;
   4092       if (d->tmp != IRTemp_INVALID)
   4093          retty = typeOfIRTemp(env->type_env, d->tmp);
   4094 
   4095       Bool retty_ok = False;
   4096       switch (retty) {
   4097          case Ity_INVALID: /* function doesn't return anything */
   4098          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   4099          case Ity_V128:
   4100             retty_ok = True; break;
   4101          default:
   4102             break;
   4103       }
   4104       if (!retty_ok)
   4105          break; /* will go to stmt_fail: */
   4106 
   4107       /* Marshal args, do the call, and set the return value to
   4108          0x555..555 if this is a conditional call that returns a value
   4109          and the call is skipped. */
   4110       UInt   addToSp = 0;
   4111       RetLoc rloc    = mk_RetLoc_INVALID();
   4112       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
   4113       vassert(is_sane_RetLoc(rloc));
   4114 
   4115       /* Now figure out what to do with the returned value, if any. */
   4116       switch (retty) {
   4117          case Ity_INVALID: {
   4118             /* No return value.  Nothing to do. */
   4119             vassert(d->tmp == IRTemp_INVALID);
   4120             vassert(rloc.pri == RLPri_None);
   4121             vassert(addToSp == 0);
   4122             return;
   4123          }
   4124          case Ity_I32: case Ity_I16: case Ity_I8: {
   4125             /* The returned value is in %eax.  Park it in the register
   4126                associated with tmp. */
   4127             vassert(rloc.pri == RLPri_Int);
   4128             vassert(addToSp == 0);
   4129             HReg dst = lookupIRTemp(env, d->tmp);
   4130             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
   4131             return;
   4132          }
   4133          case Ity_I64: {
   4134             /* The returned value is in %edx:%eax.  Park it in the
   4135                register-pair associated with tmp. */
   4136             vassert(rloc.pri == RLPri_2Int);
   4137             vassert(addToSp == 0);
   4138             HReg dstHi, dstLo;
   4139             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
   4140             addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
   4141             addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
   4142             return;
   4143          }
   4144          case Ity_V128: {
   4145             /* The returned value is on the stack, and *retloc tells
   4146                us where.  Fish it off the stack and then move the
   4147                stack pointer upwards to clear it, as directed by
   4148                doHelperCall. */
   4149             vassert(rloc.pri == RLPri_V128SpRel);
   4150             vassert(addToSp >= 16);
   4151             HReg      dst = lookupIRTemp(env, d->tmp);
   4152             X86AMode* am  = X86AMode_IR(rloc.spOff, hregX86_ESP());
   4153             addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
   4154             add_to_esp(env, addToSp);
   4155             return;
   4156          }
   4157          default:
   4158             /*NOTREACHED*/
   4159             vassert(0);
   4160       }
   4161       break;
   4162    }
   4163 
   4164    /* --------- MEM FENCE --------- */
   4165    case Ist_MBE:
   4166       switch (stmt->Ist.MBE.event) {
   4167          case Imbe_Fence:
   4168             addInstr(env, X86Instr_MFence(env->hwcaps));
   4169             return;
   4170          default:
   4171             break;
   4172       }
   4173       break;
   4174 
   4175    /* --------- ACAS --------- */
   4176    case Ist_CAS:
   4177       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
   4178          /* "normal" singleton CAS */
   4179          UChar  sz;
   4180          IRCAS* cas = stmt->Ist.CAS.details;
   4181          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
   4182          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
   4183          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
   4184          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
   4185          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
   4186          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
   4187          vassert(cas->expdHi == NULL);
   4188          vassert(cas->dataHi == NULL);
   4189          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
   4190          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
   4191          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
   4192          switch (ty) {
   4193             case Ity_I32: sz = 4; break;
   4194             case Ity_I16: sz = 2; break;
   4195             case Ity_I8:  sz = 1; break;
   4196             default: goto unhandled_cas;
   4197          }
   4198          addInstr(env, X86Instr_ACAS(am, sz));
   4199          addInstr(env,
   4200                   X86Instr_CMov32(Xcc_NZ,
   4201                                   X86RM_Reg(hregX86_EAX()), rOldLo));
   4202          return;
   4203       } else {
   4204          /* double CAS */
   4205          IRCAS* cas = stmt->Ist.CAS.details;
   4206          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
   4207          /* only 32-bit allowed in this case */
   4208          /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
   4209          /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
   4210          X86AMode* am = iselIntExpr_AMode(env, cas->addr);
   4211          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
   4212          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
   4213          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
   4214          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
   4215          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
   4216          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
   4217          if (ty != Ity_I32)
   4218             goto unhandled_cas;
   4219          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
   4220          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
   4221          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
   4222          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
   4223          addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
   4224          addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
   4225          addInstr(env, X86Instr_DACAS(am));
   4226          addInstr(env,
   4227                   X86Instr_CMov32(Xcc_NZ,
   4228                                   X86RM_Reg(hregX86_EDX()), rOldHi));
   4229          addInstr(env,
   4230                   X86Instr_CMov32(Xcc_NZ,
   4231                                   X86RM_Reg(hregX86_EAX()), rOldLo));
   4232          return;
   4233       }
   4234       unhandled_cas:
   4235       break;
   4236 
   4237    /* --------- INSTR MARK --------- */
   4238    /* Doesn't generate any executable code ... */
   4239    case Ist_IMark:
   4240        return;
   4241 
   4242    /* --------- NO-OP --------- */
   4243    /* Fairly self-explanatory, wouldn't you say? */
   4244    case Ist_NoOp:
   4245        return;
   4246 
   4247    /* --------- EXIT --------- */
   4248    case Ist_Exit: {
   4249       if (stmt->Ist.Exit.dst->tag != Ico_U32)
   4250          vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
   4251 
   4252       X86CondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
   4253       X86AMode*   amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
   4254                                       hregX86_EBP());
   4255 
   4256       /* Case: boring transfer to known address */
   4257       if (stmt->Ist.Exit.jk == Ijk_Boring) {
   4258          if (env->chainingAllowed) {
   4259             /* .. almost always true .. */
   4260             /* Skip the event check at the dst if this is a forwards
   4261                edge. */
   4262             Bool toFastEP
   4263                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
   4264             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
   4265             addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
   4266                                            amEIP, cc, toFastEP));
   4267          } else {
   4268             /* .. very occasionally .. */
   4269             /* We can't use chaining, so ask for an assisted transfer,
   4270                as that's the only alternative that is allowable. */
   4271             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   4272             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
   4273          }
   4274          return;
   4275       }
   4276 
   4277       /* Case: assisted transfer to arbitrary address */
   4278       switch (stmt->Ist.Exit.jk) {
   4279          /* Keep this list in sync with that in iselNext below */
   4280          case Ijk_ClientReq:
   4281          case Ijk_EmWarn:
   4282          case Ijk_MapFail:
   4283          case Ijk_NoDecode:
   4284          case Ijk_NoRedir:
   4285          case Ijk_SigSEGV:
   4286          case Ijk_SigTRAP:
   4287          case Ijk_Sys_int128:
   4288          case Ijk_Sys_int129:
   4289          case Ijk_Sys_int130:
   4290          case Ijk_Sys_int145:
   4291          case Ijk_Sys_int210:
   4292          case Ijk_Sys_syscall:
   4293          case Ijk_Sys_sysenter:
   4294          case Ijk_InvalICache:
   4295          case Ijk_Yield:
   4296          {
   4297             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   4298             addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
   4299             return;
   4300          }
   4301          default:
   4302             break;
   4303       }
   4304 
   4305       /* Do we ever expect to see any other kind? */
   4306       goto stmt_fail;
   4307    }
   4308 
   4309    default: break;
   4310    }
   4311   stmt_fail:
   4312    ppIRStmt(stmt);
   4313    vpanic("iselStmt");
   4314 }
   4315 
   4316 
   4317 /*---------------------------------------------------------*/
   4318 /*--- ISEL: Basic block terminators (Nexts)             ---*/
   4319 /*---------------------------------------------------------*/
   4320 
   4321 static void iselNext ( ISelEnv* env,
   4322                        IRExpr* next, IRJumpKind jk, Int offsIP )
   4323 {
   4324    if (vex_traceflags & VEX_TRACE_VCODE) {
   4325       vex_printf( "\n-- PUT(%d) = ", offsIP);
   4326       ppIRExpr( next );
   4327       vex_printf( "; exit-");
   4328       ppIRJumpKind(jk);
   4329       vex_printf( "\n");
   4330    }
   4331 
   4332    /* Case: boring transfer to known address */
   4333    if (next->tag == Iex_Const) {
   4334       IRConst* cdst = next->Iex.Const.con;
   4335       vassert(cdst->tag == Ico_U32);
   4336       if (jk == Ijk_Boring || jk == Ijk_Call) {
   4337          /* Boring transfer to known address */
   4338          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4339          if (env->chainingAllowed) {
   4340             /* .. almost always true .. */
   4341             /* Skip the event check at the dst if this is a forwards
   4342                edge. */
   4343             Bool toFastEP
   4344                = ((Addr32)cdst->Ico.U32) > env->max_ga;
   4345             if (0) vex_printf("%s", toFastEP ? "X" : ".");
   4346             addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
   4347                                            amEIP, Xcc_ALWAYS,
   4348                                            toFastEP));
   4349          } else {
   4350             /* .. very occasionally .. */
   4351             /* We can't use chaining, so ask for an assisted transfer,
   4352                as that's the only alternative that is allowable. */
   4353             HReg r = iselIntExpr_R(env, next);
   4354             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
   4355                                              Ijk_Boring));
   4356          }
   4357          return;
   4358       }
   4359    }
   4360 
   4361    /* Case: call/return (==boring) transfer to any address */
   4362    switch (jk) {
   4363       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
   4364          HReg      r     = iselIntExpr_R(env, next);
   4365          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4366          if (env->chainingAllowed) {
   4367             addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
   4368          } else {
   4369             addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
   4370                                                Ijk_Boring));
   4371          }
   4372          return;
   4373       }
   4374       default:
   4375          break;
   4376    }
   4377 
   4378    /* Case: assisted transfer to arbitrary address */
   4379    switch (jk) {
   4380       /* Keep this list in sync with that for Ist_Exit above */
   4381       case Ijk_ClientReq:
   4382       case Ijk_EmWarn:
   4383       case Ijk_MapFail:
   4384       case Ijk_NoDecode:
   4385       case Ijk_NoRedir:
   4386       case Ijk_SigSEGV:
   4387       case Ijk_SigTRAP:
   4388       case Ijk_Sys_int128:
   4389       case Ijk_Sys_int129:
   4390       case Ijk_Sys_int130:
   4391       case Ijk_Sys_int145:
   4392       case Ijk_Sys_int210:
   4393       case Ijk_Sys_syscall:
   4394       case Ijk_Sys_sysenter:
   4395       case Ijk_InvalICache:
   4396       case Ijk_Yield:
   4397       {
   4398          HReg      r     = iselIntExpr_R(env, next);
   4399          X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
   4400          addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
   4401          return;
   4402       }
   4403       default:
   4404          break;
   4405    }
   4406 
   4407    vex_printf( "\n-- PUT(%d) = ", offsIP);
   4408    ppIRExpr( next );
   4409    vex_printf( "; exit-");
   4410    ppIRJumpKind(jk);
   4411    vex_printf( "\n");
   4412    vassert(0); // are we expecting any other kind?
   4413 }
   4414 
   4415 
   4416 /*---------------------------------------------------------*/
   4417 /*--- Insn selector top-level                           ---*/
   4418 /*---------------------------------------------------------*/
   4419 
   4420 /* Translate an entire SB to x86 code. */
   4421 
   4422 HInstrArray* iselSB_X86 ( const IRSB* bb,
   4423                           VexArch      arch_host,
   4424                           const VexArchInfo* archinfo_host,
   4425                           const VexAbiInfo*  vbi/*UNUSED*/,
   4426                           Int offs_Host_EvC_Counter,
   4427                           Int offs_Host_EvC_FailAddr,
   4428                           Bool chainingAllowed,
   4429                           Bool addProfInc,
   4430                           Addr max_ga )
   4431 {
   4432    Int      i, j;
   4433    HReg     hreg, hregHI;
   4434    ISelEnv* env;
   4435    UInt     hwcaps_host = archinfo_host->hwcaps;
   4436    X86AMode *amCounter, *amFailAddr;
   4437 
   4438    /* sanity ... */
   4439    vassert(arch_host == VexArchX86);
   4440    vassert(0 == (hwcaps_host
   4441                  & ~(VEX_HWCAPS_X86_MMXEXT
   4442                      | VEX_HWCAPS_X86_SSE1
   4443                      | VEX_HWCAPS_X86_SSE2
   4444                      | VEX_HWCAPS_X86_SSE3
   4445                      | VEX_HWCAPS_X86_LZCNT)));
   4446 
   4447    /* Check that the host's endianness is as expected. */
   4448    vassert(archinfo_host->endness == VexEndnessLE);
   4449 
   4450    /* Make up an initial environment to use. */
   4451    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
   4452    env->vreg_ctr = 0;
   4453 
   4454    /* Set up output code array. */
   4455    env->code = newHInstrArray();
   4456 
   4457    /* Copy BB's type env. */
   4458    env->type_env = bb->tyenv;
   4459 
   4460    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
   4461       change as we go along. */
   4462    env->n_vregmap = bb->tyenv->types_used;
   4463    env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
   4464    env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
   4465 
   4466    /* and finally ... */
   4467    env->chainingAllowed = chainingAllowed;
   4468    env->hwcaps          = hwcaps_host;
   4469    env->max_ga          = max_ga;
   4470 
   4471    /* For each IR temporary, allocate a suitably-kinded virtual
   4472       register. */
   4473    j = 0;
   4474    for (i = 0; i < env->n_vregmap; i++) {
   4475       hregHI = hreg = INVALID_HREG;
   4476       switch (bb->tyenv->types[i]) {
   4477          case Ity_I1:
   4478          case Ity_I8:
   4479          case Ity_I16:
   4480          case Ity_I32:  hreg   = mkHReg(True, HRcInt32,  0, j++); break;
   4481          case Ity_I64:  hreg   = mkHReg(True, HRcInt32,  0, j++);
   4482                         hregHI = mkHReg(True, HRcInt32,  0, j++); break;
   4483          case Ity_F32:
   4484          case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
   4485          case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
   4486          default: ppIRType(bb->tyenv->types[i]);
   4487                   vpanic("iselBB: IRTemp type");
   4488       }
   4489       env->vregmap[i]   = hreg;
   4490       env->vregmapHI[i] = hregHI;
   4491    }
   4492    env->vreg_ctr = j;
   4493 
   4494    /* The very first instruction must be an event check. */
   4495    amCounter  = X86AMode_IR(offs_Host_EvC_Counter,  hregX86_EBP());
   4496    amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
   4497    addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
   4498 
   4499    /* Possibly a block counter increment (for profiling).  At this
   4500       point we don't know the address of the counter, so just pretend
   4501       it is zero.  It will have to be patched later, but before this
   4502       translation is used, by a call to LibVEX_patchProfCtr. */
   4503    if (addProfInc) {
   4504       addInstr(env, X86Instr_ProfInc());
   4505    }
   4506 
   4507    /* Ok, finally we can iterate over the statements. */
   4508    for (i = 0; i < bb->stmts_used; i++)
   4509       iselStmt(env, bb->stmts[i]);
   4510 
   4511    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
   4512 
   4513    /* record the number of vregs we used. */
   4514    env->code->n_vregs = env->vreg_ctr;
   4515    return env->code;
   4516 }
   4517 
   4518 
   4519 /*---------------------------------------------------------------*/
   4520 /*--- end                                     host_x86_isel.c ---*/
   4521 /*---------------------------------------------------------------*/
   4522