Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_isel.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2011 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 #include "ir_match.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "host_generic_regs.h"
     44 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
     45 #include "host_arm_defs.h"
     46 
     47 
     48 /*---------------------------------------------------------*/
     49 /*--- ARMvfp control word stuff                         ---*/
     50 /*---------------------------------------------------------*/
     51 
     52 /* Vex-generated code expects to run with the FPU set as follows: all
     53    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
     54    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
     55    this corresponds to a FPSCR value of zero.
     56 
     57    fpscr should therefore be zero on entry to Vex-generated code, and
     58    should be unchanged at exit.  (Or at least the bottom 28 bits
     59    should be zero).
     60 */
     61 
     62 #define DEFAULT_FPSCR 0
     63 
     64 
     65 /*---------------------------------------------------------*/
     66 /*--- ISelEnv                                           ---*/
     67 /*---------------------------------------------------------*/
     68 
     69 /* This carries around:
     70 
     71    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
     72      might encounter.  This is computed before insn selection starts,
     73      and does not change.
     74 
     75    - A mapping from IRTemp to HReg.  This tells the insn selector
     76      which virtual register(s) are associated with each IRTemp
     77      temporary.  This is computed before insn selection starts, and
     78      does not change.  We expect this mapping to map precisely the
     79      same set of IRTemps as the type mapping does.
     80 
     81         - vregmap   holds the primary register for the IRTemp.
     82         - vregmapHI is only used for 64-bit integer-typed
     83              IRTemps.  It holds the identity of a second
     84              32-bit virtual HReg, which holds the high half
     85              of the value.
     86 
     87    - The name of the vreg in which we stash a copy of the link reg, so
     88      helper functions don't kill it.
     89 
     90    - The code array, that is, the insns selected so far.
     91 
     92    - A counter, for generating new virtual registers.
     93 
     94    - The host hardware capabilities word.  This is set at the start
     95      and does not change.
     96 
     97    Note, this is all host-independent.  */
     98 
     99 typedef
    100    struct {
    101       IRTypeEnv*   type_env;
    102 
    103       HReg*        vregmap;
    104       HReg*        vregmapHI;
    105       Int          n_vregmap;
    106 
    107       HReg         savedLR;
    108 
    109       HInstrArray* code;
    110 
    111       Int          vreg_ctr;
    112 
    113       UInt         hwcaps;
    114    }
    115    ISelEnv;
    116 
    117 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
    118 {
    119    vassert(tmp >= 0);
    120    vassert(tmp < env->n_vregmap);
    121    return env->vregmap[tmp];
    122 }
    123 
    124 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
    125 {
    126    vassert(tmp >= 0);
    127    vassert(tmp < env->n_vregmap);
    128    vassert(env->vregmapHI[tmp] != INVALID_HREG);
    129    *vrLO = env->vregmap[tmp];
    130    *vrHI = env->vregmapHI[tmp];
    131 }
    132 
    133 static void addInstr ( ISelEnv* env, ARMInstr* instr )
    134 {
    135    addHInstr(env->code, instr);
    136    if (vex_traceflags & VEX_TRACE_VCODE) {
    137       ppARMInstr(instr);
    138       vex_printf("\n");
    139    }
    140 #if 0
    141    if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
    142          || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
    143          || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
    144       ppARMInstr(instr);
    145       vex_printf("\n");
    146    }
    147 #endif
    148 }
    149 
    150 static HReg newVRegI ( ISelEnv* env )
    151 {
    152    HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
    153    env->vreg_ctr++;
    154    return reg;
    155 }
    156 
    157 static HReg newVRegD ( ISelEnv* env )
    158 {
    159    HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
    160    env->vreg_ctr++;
    161    return reg;
    162 }
    163 
    164 static HReg newVRegF ( ISelEnv* env )
    165 {
    166    HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
    167    env->vreg_ctr++;
    168    return reg;
    169 }
    170 
    171 static HReg newVRegV ( ISelEnv* env )
    172 {
    173    HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
    174    env->vreg_ctr++;
    175    return reg;
    176 }
    177 
    178 /* These are duplicated in guest_arm_toIR.c */
    179 static IRExpr* unop ( IROp op, IRExpr* a )
    180 {
    181    return IRExpr_Unop(op, a);
    182 }
    183 
    184 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    185 {
    186    return IRExpr_Binop(op, a1, a2);
    187 }
    188 
    189 static IRExpr* bind ( Int binder )
    190 {
    191    return IRExpr_Binder(binder);
    192 }
    193 
    194 
    195 /*---------------------------------------------------------*/
    196 /*--- ISEL: Forward declarations                        ---*/
    197 /*---------------------------------------------------------*/
    198 
    199 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
    200    iselXXX_wrk do the real work, but are not to be called directly.
    201    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
    202    checks that all returned registers are virtual.  You should not
    203    call the _wrk version directly.
    204 */
    205 static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
    206 static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
    207 
    208 static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
    209 static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
    210 
    211 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
    212 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
    213 
    214 static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
    215 static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
    216 
    217 static ARMRI84*    iselIntExpr_RI84_wrk
    218         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    219 static ARMRI84*    iselIntExpr_RI84
    220         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    221 
    222 static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
    223 static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
    224 
    225 static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
    226 static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
    227 
    228 static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
    229 static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
    230 
    231 static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
    232                                             ISelEnv* env, IRExpr* e );
    233 static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
    234                                             ISelEnv* env, IRExpr* e );
    235 
    236 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
    237 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
    238 
    239 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
    240 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
    241 
    242 static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
    243 static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
    244 
    245 static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
    246 static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
    247 
    248 /*---------------------------------------------------------*/
    249 /*--- ISEL: Misc helpers                                ---*/
    250 /*---------------------------------------------------------*/
    251 
    252 static UInt ROR32 ( UInt x, UInt sh ) {
    253    vassert(sh >= 0 && sh < 32);
    254    if (sh == 0)
    255       return x;
    256    else
    257       return (x << (32-sh)) | (x >> sh);
    258 }
    259 
    260 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
    261    form, and if so return the components. */
    262 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
    263 {
    264    UInt i;
    265    for (i = 0; i < 16; i++) {
    266       if (0 == (u & 0xFFFFFF00)) {
    267          *u8 = u;
    268          *u4 = i;
    269          return True;
    270       }
    271       u = ROR32(u, 30);
    272    }
    273    vassert(i == 16);
    274    return False;
    275 }
    276 
    277 /* Make a int reg-reg move. */
    278 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
    279 {
    280    vassert(hregClass(src) == HRcInt32);
    281    vassert(hregClass(dst) == HRcInt32);
    282    return ARMInstr_Mov(dst, ARMRI84_R(src));
    283 }
    284 
    285 /* Set the VFP unit's rounding mode to default (round to nearest). */
    286 static void set_VFP_rounding_default ( ISelEnv* env )
    287 {
    288    /* mov rTmp, #DEFAULT_FPSCR
    289       fmxr fpscr, rTmp
    290    */
    291    HReg rTmp = newVRegI(env);
    292    addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
    293    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
    294 }
    295 
    296 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
    297    expression denoting a value in the range 0 .. 3, indicating a round
    298    mode encoded as per type IRRoundingMode.  Set FPSCR to have the
    299    same rounding.
    300 */
    301 static
    302 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
    303 {
    304    /* This isn't simple, because 'mode' carries an IR rounding
    305       encoding, and we need to translate that to an ARMvfp one:
    306       The IR encoding:
    307          00  to nearest (the default)
    308          10  to +infinity
    309          01  to -infinity
    310          11  to zero
    311       The ARMvfp encoding:
    312          00  to nearest
    313          01  to +infinity
    314          10  to -infinity
    315          11  to zero
    316       Easy enough to do; just swap the two bits.
    317    */
    318    HReg irrm = iselIntExpr_R(env, mode);
    319    HReg tL   = newVRegI(env);
    320    HReg tR   = newVRegI(env);
    321    HReg t3   = newVRegI(env);
    322    /* tL = irrm << 1;
    323       tR = irrm >> 1;  if we're lucky, these will issue together
    324       tL &= 2;
    325       tR &= 1;         ditto
    326       t3 = tL | tR;
    327       t3 <<= 22;
    328       fmxr fpscr, t3
    329    */
    330    addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
    331    addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
    332    addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
    333    addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
    334    addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
    335    addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
    336    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
    337 }
    338 
    339 
    340 /*---------------------------------------------------------*/
    341 /*--- ISEL: Function call helpers                       ---*/
    342 /*---------------------------------------------------------*/
    343 
    344 /* Used only in doHelperCall.  See big comment in doHelperCall re
    345    handling of register-parameter args.  This function figures out
    346    whether evaluation of an expression might require use of a fixed
    347    register.  If in doubt return True (safe but suboptimal).
    348 */
    349 static
    350 Bool mightRequireFixedRegs ( IRExpr* e )
    351 {
    352    switch (e->tag) {
    353    case Iex_RdTmp: case Iex_Const: case Iex_Get:
    354       return False;
    355    default:
    356       return True;
    357    }
    358 }
    359 
    360 
    361 /* Do a complete function call.  guard is a Ity_Bit expression
    362    indicating whether or not the call happens.  If guard==NULL, the
    363    call is unconditional.  Returns True iff it managed to handle this
    364    combination of arg/return types, else returns False. */
    365 
    366 static
    367 Bool doHelperCall ( ISelEnv* env,
    368                     Bool passBBP,
    369                     IRExpr* guard, IRCallee* cee, IRExpr** args )
    370 {
    371    ARMCondCode cc;
    372    HReg        argregs[ARM_N_ARGREGS];
    373    HReg        tmpregs[ARM_N_ARGREGS];
    374    Bool        go_fast;
    375    Int         n_args, i, nextArgReg;
    376    ULong       target;
    377 
    378    vassert(ARM_N_ARGREGS == 4);
    379 
    380    /* Marshal args for a call and do the call.
    381 
    382       If passBBP is True, r8 (the baseblock pointer) is to be passed
    383       as the first arg.
    384 
    385       This function only deals with a tiny set of possibilities, which
    386       cover all helpers in practice.  The restrictions are that only
    387       arguments in registers are supported, hence only ARM_N_REGPARMS
    388       x 32 integer bits in total can be passed.  In fact the only
    389       supported arg types are I32 and I64.
    390 
    391       Generating code which is both efficient and correct when
    392       parameters are to be passed in registers is difficult, for the
    393       reasons elaborated in detail in comments attached to
    394       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
    395       of the method described in those comments.
    396 
    397       The problem is split into two cases: the fast scheme and the
    398       slow scheme.  In the fast scheme, arguments are computed
    399       directly into the target (real) registers.  This is only safe
    400       when we can be sure that computation of each argument will not
    401       trash any real registers set by computation of any other
    402       argument.
    403 
    404       In the slow scheme, all args are first computed into vregs, and
    405       once they are all done, they are moved to the relevant real
    406       regs.  This always gives correct code, but it also gives a bunch
    407       of vreg-to-rreg moves which are usually redundant but are hard
    408       for the register allocator to get rid of.
    409 
    410       To decide which scheme to use, all argument expressions are
    411       first examined.  If they are all so simple that it is clear they
    412       will be evaluated without use of any fixed registers, use the
    413       fast scheme, else use the slow scheme.  Note also that only
    414       unconditional calls may use the fast scheme, since having to
    415       compute a condition expression could itself trash real
    416       registers.
    417 
    418       Note this requires being able to examine an expression and
    419       determine whether or not evaluation of it might use a fixed
    420       register.  That requires knowledge of how the rest of this insn
    421       selector works.  Currently just the following 3 are regarded as
    422       safe -- hopefully they cover the majority of arguments in
    423       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
    424    */
    425 
    426    /* Note that the cee->regparms field is meaningless on ARM hosts
    427       (since there is only one calling convention) and so we always
    428       ignore it. */
    429 
    430    n_args = 0;
    431    for (i = 0; args[i]; i++)
    432       n_args++;
    433 
    434    argregs[0] = hregARM_R0();
    435    argregs[1] = hregARM_R1();
    436    argregs[2] = hregARM_R2();
    437    argregs[3] = hregARM_R3();
    438 
    439    tmpregs[0] = tmpregs[1] = tmpregs[2] =
    440    tmpregs[3] = INVALID_HREG;
    441 
    442    /* First decide which scheme (slow or fast) is to be used.  First
    443       assume the fast scheme, and select slow if any contraindications
    444       (wow) appear. */
    445 
    446    go_fast = True;
    447 
    448    if (guard) {
    449       if (guard->tag == Iex_Const
    450           && guard->Iex.Const.con->tag == Ico_U1
    451           && guard->Iex.Const.con->Ico.U1 == True) {
    452          /* unconditional */
    453       } else {
    454          /* Not manifestly unconditional -- be conservative. */
    455          go_fast = False;
    456       }
    457    }
    458 
    459    if (go_fast) {
    460       for (i = 0; i < n_args; i++) {
    461          if (mightRequireFixedRegs(args[i])) {
    462             go_fast = False;
    463             break;
    464          }
    465       }
    466    }
    467    /* At this point the scheme to use has been established.  Generate
    468       code to get the arg values into the argument rregs.  If we run
    469       out of arg regs, give up. */
    470 
    471    if (go_fast) {
    472 
    473       /* FAST SCHEME */
    474       nextArgReg = 0;
    475       if (passBBP) {
    476          addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    477                                      hregARM_R8() ));
    478          nextArgReg++;
    479       }
    480 
    481       for (i = 0; i < n_args; i++) {
    482          IRType aTy = typeOfIRExpr(env->type_env, args[i]);
    483          if (nextArgReg >= ARM_N_ARGREGS)
    484             return False; /* out of argregs */
    485          if (aTy == Ity_I32) {
    486             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    487                                         iselIntExpr_R(env, args[i]) ));
    488             nextArgReg++;
    489          }
    490          else if (aTy == Ity_I64) {
    491             /* 64-bit args must be passed in an a reg-pair of the form
    492                n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
    493                On a little-endian host, the less significant word is
    494                passed in the lower-numbered register. */
    495             if (nextArgReg & 1) {
    496                if (nextArgReg >= ARM_N_ARGREGS)
    497                   return False; /* out of argregs */
    498                addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
    499                nextArgReg++;
    500             }
    501             if (nextArgReg >= ARM_N_ARGREGS)
    502                return False; /* out of argregs */
    503             HReg raHi, raLo;
    504             iselInt64Expr(&raHi, &raLo, env, args[i]);
    505             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
    506             nextArgReg++;
    507             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
    508             nextArgReg++;
    509          }
    510          else
    511             return False; /* unhandled arg type */
    512       }
    513 
    514       /* Fast scheme only applies for unconditional calls.  Hence: */
    515       cc = ARMcc_AL;
    516 
    517    } else {
    518 
    519       /* SLOW SCHEME; move via temporaries */
    520       nextArgReg = 0;
    521 
    522       if (passBBP) {
    523          /* This is pretty stupid; better to move directly to r0
    524             after the rest of the args are done. */
    525          tmpregs[nextArgReg] = newVRegI(env);
    526          addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
    527                                      hregARM_R8() ));
    528          nextArgReg++;
    529       }
    530 
    531       for (i = 0; i < n_args; i++) {
    532          IRType aTy = typeOfIRExpr(env->type_env, args[i]);
    533          if (nextArgReg >= ARM_N_ARGREGS)
    534             return False; /* out of argregs */
    535          if (aTy == Ity_I32) {
    536             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
    537             nextArgReg++;
    538          }
    539          else if (aTy == Ity_I64) {
    540             /* Same comment applies as in the Fast-scheme case. */
    541             if (nextArgReg & 1)
    542                nextArgReg++;
    543             if (nextArgReg + 1 >= ARM_N_ARGREGS)
    544                return False; /* out of argregs */
    545             HReg raHi, raLo;
    546             iselInt64Expr(&raHi, &raLo, env, args[i]);
    547             tmpregs[nextArgReg] = raLo;
    548             nextArgReg++;
    549             tmpregs[nextArgReg] = raHi;
    550             nextArgReg++;
    551          }
    552       }
    553 
    554       /* Now we can compute the condition.  We can't do it earlier
    555          because the argument computations could trash the condition
    556          codes.  Be a bit clever to handle the common case where the
    557          guard is 1:Bit. */
    558       cc = ARMcc_AL;
    559       if (guard) {
    560          if (guard->tag == Iex_Const
    561              && guard->Iex.Const.con->tag == Ico_U1
    562              && guard->Iex.Const.con->Ico.U1 == True) {
    563             /* unconditional -- do nothing */
    564          } else {
    565             cc = iselCondCode( env, guard );
    566          }
    567       }
    568 
    569       /* Move the args to their final destinations. */
    570       for (i = 0; i < nextArgReg; i++) {
    571          if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
    572             addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
    573             continue;
    574          }
    575          /* None of these insns, including any spill code that might
    576             be generated, may alter the condition codes. */
    577          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
    578       }
    579 
    580    }
    581 
    582    /* Should be assured by checks above */
    583    vassert(nextArgReg <= ARM_N_ARGREGS);
    584 
    585    target = (HWord)Ptr_to_ULong(cee->addr);
    586 
    587    /* nextArgReg doles out argument registers.  Since these are
    588       assigned in the order r0, r1, r2, r3, its numeric value at this
    589       point, which must be between 0 and 4 inclusive, is going to be
    590       equal to the number of arg regs in use for the call.  Hence bake
    591       that number into the call (we'll need to know it when doing
    592       register allocation, to know what regs the call reads.)
    593 
    594       There is a bit of a twist -- harmless but worth recording.
    595       Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
    596       the first arg in r0 and the second in r3:r2, but r1 isn't used.
    597       We nevertheless have nextArgReg==4 and bake that into the call
    598       instruction.  This will mean the register allocator wil believe
    599       this insn reads r1 when in fact it doesn't.  But that's
    600       harmless; it just artificially extends the live range of r1
    601       unnecessarily.  The best fix would be to put into the
    602       instruction, a bitmask indicating which of r0/1/2/3 carry live
    603       values.  But that's too much hassle. */
    604 
    605    /* Finally, the call itself. */
    606    addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
    607 
    608    return True; /* success */
    609 }
    610 
    611 
    612 /*---------------------------------------------------------*/
    613 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
    614 /*---------------------------------------------------------*/
    615 
    616 /* Select insns for an integer-typed expression, and add them to the
    617    code list.  Return a reg holding the result.  This reg will be a
    618    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
    619    want to modify it, ask for a new vreg, copy it in there, and modify
    620    the copy.  The register allocator will do its best to map both
    621    vregs to the same real register, so the copies will often disappear
    622    later in the game.
    623 
    624    This should handle expressions of 32, 16 and 8-bit type.  All
    625    results are returned in a 32-bit register.  For 16- and 8-bit
    626    expressions, the upper 16/24 bits are arbitrary, so you should mask
    627    or sign extend partial values if necessary.
    628 */
    629 
    630 /* --------------------- AMode1 --------------------- */
    631 
    632 /* Return an AMode1 which computes the value of the specified
    633    expression, possibly also adding insns to the code list as a
    634    result.  The expression may only be a 32-bit one.
    635 */
    636 
    637 static Bool sane_AMode1 ( ARMAMode1* am )
    638 {
    639    switch (am->tag) {
    640       case ARMam1_RI:
    641          return
    642             toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
    643                     && (hregIsVirtual(am->ARMam1.RI.reg)
    644                         || am->ARMam1.RI.reg == hregARM_R8())
    645                     && am->ARMam1.RI.simm13 >= -4095
    646                     && am->ARMam1.RI.simm13 <= 4095 );
    647       case ARMam1_RRS:
    648          return
    649             toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
    650                     && hregIsVirtual(am->ARMam1.RRS.base)
    651                     && hregClass(am->ARMam1.RRS.index) == HRcInt32
    652                     && hregIsVirtual(am->ARMam1.RRS.index)
    653                     && am->ARMam1.RRS.shift >= 0
    654                     && am->ARMam1.RRS.shift <= 3 );
    655       default:
    656          vpanic("sane_AMode: unknown ARM AMode1 tag");
    657    }
    658 }
    659 
    660 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
    661 {
    662    ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
    663    vassert(sane_AMode1(am));
    664    return am;
    665 }
    666 
    667 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
    668 {
    669    IRType ty = typeOfIRExpr(env->type_env,e);
    670    vassert(ty == Ity_I32);
    671 
    672    /* FIXME: add RRS matching */
    673 
    674    /* {Add32,Sub32}(expr,simm13) */
    675    if (e->tag == Iex_Binop
    676        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    677        && e->Iex.Binop.arg2->tag == Iex_Const
    678        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    679       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    680       if (simm >= -4095 && simm <= 4095) {
    681          HReg reg;
    682          if (e->Iex.Binop.op == Iop_Sub32)
    683             simm = -simm;
    684          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    685          return ARMAMode1_RI(reg, simm);
    686       }
    687    }
    688 
    689    /* Doesn't match anything in particular.  Generate it into
    690       a register and use that. */
    691    {
    692       HReg reg = iselIntExpr_R(env, e);
    693       return ARMAMode1_RI(reg, 0);
    694    }
    695 
    696 }
    697 
    698 
    699 /* --------------------- AMode2 --------------------- */
    700 
    701 /* Return an AMode2 which computes the value of the specified
    702    expression, possibly also adding insns to the code list as a
    703    result.  The expression may only be a 32-bit one.
    704 */
    705 
    706 static Bool sane_AMode2 ( ARMAMode2* am )
    707 {
    708    switch (am->tag) {
    709       case ARMam2_RI:
    710          return
    711             toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
    712                     && hregIsVirtual(am->ARMam2.RI.reg)
    713                     && am->ARMam2.RI.simm9 >= -255
    714                     && am->ARMam2.RI.simm9 <= 255 );
    715       case ARMam2_RR:
    716          return
    717             toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
    718                     && hregIsVirtual(am->ARMam2.RR.base)
    719                     && hregClass(am->ARMam2.RR.index) == HRcInt32
    720                     && hregIsVirtual(am->ARMam2.RR.index) );
    721       default:
    722          vpanic("sane_AMode: unknown ARM AMode2 tag");
    723    }
    724 }
    725 
    726 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
    727 {
    728    ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
    729    vassert(sane_AMode2(am));
    730    return am;
    731 }
    732 
    733 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
    734 {
    735    IRType ty = typeOfIRExpr(env->type_env,e);
    736    vassert(ty == Ity_I32);
    737 
    738    /* FIXME: add RR matching */
    739 
    740    /* {Add32,Sub32}(expr,simm8) */
    741    if (e->tag == Iex_Binop
    742        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    743        && e->Iex.Binop.arg2->tag == Iex_Const
    744        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    745       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    746       if (simm >= -255 && simm <= 255) {
    747          HReg reg;
    748          if (e->Iex.Binop.op == Iop_Sub32)
    749             simm = -simm;
    750          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    751          return ARMAMode2_RI(reg, simm);
    752       }
    753    }
    754 
    755    /* Doesn't match anything in particular.  Generate it into
    756       a register and use that. */
    757    {
    758       HReg reg = iselIntExpr_R(env, e);
    759       return ARMAMode2_RI(reg, 0);
    760    }
    761 
    762 }
    763 
    764 
    765 /* --------------------- AModeV --------------------- */
    766 
    767 /* Return an AModeV which computes the value of the specified
    768    expression, possibly also adding insns to the code list as a
    769    result.  The expression may only be a 32-bit one.
    770 */
    771 
    772 static Bool sane_AModeV ( ARMAModeV* am )
    773 {
    774   return toBool( hregClass(am->reg) == HRcInt32
    775                  && hregIsVirtual(am->reg)
    776                  && am->simm11 >= -1020 && am->simm11 <= 1020
    777                  && 0 == (am->simm11 & 3) );
    778 }
    779 
    780 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
    781 {
    782    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
    783    vassert(sane_AModeV(am));
    784    return am;
    785 }
    786 
    787 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
    788 {
    789    IRType ty = typeOfIRExpr(env->type_env,e);
    790    vassert(ty == Ity_I32);
    791 
    792    /* {Add32,Sub32}(expr, simm8 << 2) */
    793    if (e->tag == Iex_Binop
    794        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    795        && e->Iex.Binop.arg2->tag == Iex_Const
    796        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    797       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    798       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
    799          HReg reg;
    800          if (e->Iex.Binop.op == Iop_Sub32)
    801             simm = -simm;
    802          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    803          return mkARMAModeV(reg, simm);
    804       }
    805    }
    806 
    807    /* Doesn't match anything in particular.  Generate it into
    808       a register and use that. */
    809    {
    810       HReg reg = iselIntExpr_R(env, e);
    811       return mkARMAModeV(reg, 0);
    812    }
    813 
    814 }
    815 
    816 /* -------------------- AModeN -------------------- */
    817 
    818 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
    819 {
    820    return iselIntExpr_AModeN_wrk(env, e);
    821 }
    822 
    823 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
    824 {
    825    HReg reg = iselIntExpr_R(env, e);
    826    return mkARMAModeN_R(reg);
    827 }
    828 
    829 
    830 /* --------------------- RI84 --------------------- */
    831 
    832 /* Select instructions to generate 'e' into a RI84.  If mayInv is
    833    true, then the caller will also accept an I84 form that denotes
    834    'not e'.  In this case didInv may not be NULL, and *didInv is set
    835    to True.  This complication is so as to allow generation of an RI84
    836    which is suitable for use in either an AND or BIC instruction,
    837    without knowing (before this call) which one.
    838 */
    839 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
    840                                    ISelEnv* env, IRExpr* e )
    841 {
    842    ARMRI84* ri;
    843    if (mayInv)
    844       vassert(didInv != NULL);
    845    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
    846    /* sanity checks ... */
    847    switch (ri->tag) {
    848       case ARMri84_I84:
    849          return ri;
    850       case ARMri84_R:
    851          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
    852          vassert(hregIsVirtual(ri->ARMri84.R.reg));
    853          return ri;
    854       default:
    855          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
    856    }
    857 }
    858 
    859 /* DO NOT CALL THIS DIRECTLY ! */
    860 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
    861                                        ISelEnv* env, IRExpr* e )
    862 {
    863    IRType ty = typeOfIRExpr(env->type_env,e);
    864    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
    865 
    866    if (didInv) *didInv = False;
    867 
    868    /* special case: immediate */
    869    if (e->tag == Iex_Const) {
    870       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
    871       switch (e->Iex.Const.con->tag) {
    872          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
    873          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
    874          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
    875          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
    876       }
    877       if (fitsIn8x4(&u8, &u4, u)) {
    878          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    879       }
    880       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
    881          vassert(didInv);
    882          *didInv = True;
    883          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    884       }
    885       /* else fail, fall through to default case */
    886    }
    887 
    888    /* default case: calculate into a register and return that */
    889    {
    890       HReg r = iselIntExpr_R ( env, e );
    891       return ARMRI84_R(r);
    892    }
    893 }
    894 
    895 
    896 /* --------------------- RI5 --------------------- */
    897 
    898 /* Select instructions to generate 'e' into a RI5. */
    899 
    900 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
    901 {
    902    ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
    903    /* sanity checks ... */
    904    switch (ri->tag) {
    905       case ARMri5_I5:
    906          return ri;
    907       case ARMri5_R:
    908          vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
    909          vassert(hregIsVirtual(ri->ARMri5.R.reg));
    910          return ri;
    911       default:
    912          vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
    913    }
    914 }
    915 
    916 /* DO NOT CALL THIS DIRECTLY ! */
    917 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
    918 {
    919    IRType ty = typeOfIRExpr(env->type_env,e);
    920    vassert(ty == Ity_I32 || ty == Ity_I8);
    921 
    922    /* special case: immediate */
    923    if (e->tag == Iex_Const) {
    924       UInt u; /* both invalid */
    925       switch (e->Iex.Const.con->tag) {
    926          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
    927          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
    928          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
    929          default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
    930       }
    931       if (u >= 1 && u <= 31) {
    932          return ARMRI5_I5(u);
    933       }
    934       /* else fail, fall through to default case */
    935    }
    936 
    937    /* default case: calculate into a register and return that */
    938    {
    939       HReg r = iselIntExpr_R ( env, e );
    940       return ARMRI5_R(r);
    941    }
    942 }
    943 
    944 
    945 /* ------------------- CondCode ------------------- */
    946 
    947 /* Generate code to evaluated a bit-typed expression, returning the
    948    condition code which would correspond when the expression would
    949    notionally have returned 1. */
    950 
    951 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
    952 {
    953    ARMCondCode cc = iselCondCode_wrk(env,e);
    954    vassert(cc != ARMcc_NV);
    955    return cc;
    956 }
    957 
    958 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
    959 {
    960    vassert(e);
    961    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
    962 
    963    /* var */
    964    if (e->tag == Iex_RdTmp) {
    965       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
    966       /* CmpOrTst doesn't modify rTmp; so this is OK. */
    967       ARMRI84* one  = ARMRI84_I84(1,0);
    968       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
    969       return ARMcc_NE;
    970    }
    971 
    972    /* Not1(e) */
    973    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
    974       /* Generate code for the arg, and negate the test condition */
    975       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
    976    }
    977 
    978    /* --- patterns rooted at: 32to1 --- */
    979 
    980    if (e->tag == Iex_Unop
    981        && e->Iex.Unop.op == Iop_32to1) {
    982       HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
    983       ARMRI84* one  = ARMRI84_I84(1,0);
    984       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
    985       return ARMcc_NE;
    986    }
    987 
    988    /* --- patterns rooted at: CmpNEZ8 --- */
    989 
    990    if (e->tag == Iex_Unop
    991        && e->Iex.Unop.op == Iop_CmpNEZ8) {
    992       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
    993       ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
    994       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
    995       return ARMcc_NE;
    996    }
    997 
    998    /* --- patterns rooted at: CmpNEZ32 --- */
    999 
   1000    if (e->tag == Iex_Unop
   1001        && e->Iex.Unop.op == Iop_CmpNEZ32) {
   1002       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1003       ARMRI84* zero = ARMRI84_I84(0,0);
   1004       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
   1005       return ARMcc_NE;
   1006    }
   1007 
   1008    /* --- patterns rooted at: CmpNEZ64 --- */
   1009 
   1010    if (e->tag == Iex_Unop
   1011        && e->Iex.Unop.op == Iop_CmpNEZ64) {
   1012       HReg     tHi, tLo;
   1013       HReg     tmp  = newVRegI(env);
   1014       ARMRI84* zero = ARMRI84_I84(0,0);
   1015       iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
   1016       addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
   1017       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
   1018       return ARMcc_NE;
   1019    }
   1020 
   1021    /* --- Cmp*32*(x,y) --- */
   1022    if (e->tag == Iex_Binop
   1023        && (e->Iex.Binop.op == Iop_CmpEQ32
   1024            || e->Iex.Binop.op == Iop_CmpNE32
   1025            || e->Iex.Binop.op == Iop_CmpLT32S
   1026            || e->Iex.Binop.op == Iop_CmpLT32U
   1027            || e->Iex.Binop.op == Iop_CmpLE32S
   1028            || e->Iex.Binop.op == Iop_CmpLE32U)) {
   1029       HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1030       ARMRI84* argR = iselIntExpr_RI84(NULL,False,
   1031                                        env, e->Iex.Binop.arg2);
   1032       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
   1033       switch (e->Iex.Binop.op) {
   1034          case Iop_CmpEQ32:  return ARMcc_EQ;
   1035          case Iop_CmpNE32:  return ARMcc_NE;
   1036          case Iop_CmpLT32S: return ARMcc_LT;
   1037          case Iop_CmpLT32U: return ARMcc_LO;
   1038          case Iop_CmpLE32S: return ARMcc_LE;
   1039          case Iop_CmpLE32U: return ARMcc_LS;
   1040          default: vpanic("iselCondCode(arm): CmpXX32");
   1041       }
   1042    }
   1043 
   1044    /* --- CasCmpEQ* --- */
   1045    /* Ist_Cas has a dummy argument to compare with, so comparison is
   1046       always true. */
   1047    if (e->tag == Iex_Binop
   1048        && (e->Iex.Binop.op == Iop_CasCmpEQ32
   1049            || e->Iex.Binop.op == Iop_CasCmpEQ16
   1050            || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
   1051       return ARMcc_AL;
   1052    }
   1053 
   1054    ppIRExpr(e);
   1055    vpanic("iselCondCode");
   1056 }
   1057 
   1058 
   1059 /* --------------------- Reg --------------------- */
   1060 
   1061 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
   1062 {
   1063    HReg r = iselIntExpr_R_wrk(env, e);
   1064    /* sanity checks ... */
   1065 #  if 0
   1066    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1067 #  endif
   1068    vassert(hregClass(r) == HRcInt32);
   1069    vassert(hregIsVirtual(r));
   1070    return r;
   1071 }
   1072 
   1073 /* DO NOT CALL THIS DIRECTLY ! */
   1074 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
   1075 {
   1076    IRType ty = typeOfIRExpr(env->type_env,e);
   1077    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1078 //   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1079 
   1080    switch (e->tag) {
   1081 
   1082    /* --------- TEMP --------- */
   1083    case Iex_RdTmp: {
   1084       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1085    }
   1086 
   1087    /* --------- LOAD --------- */
   1088    case Iex_Load: {
   1089       HReg dst  = newVRegI(env);
   1090 
   1091       if (e->Iex.Load.end != Iend_LE)
   1092          goto irreducible;
   1093 
   1094       if (ty == Ity_I32) {
   1095          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1096          addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
   1097          return dst;
   1098       }
   1099       if (ty == Ity_I16) {
   1100          ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
   1101          addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
   1102                                        dst, amode));
   1103          return dst;
   1104       }
   1105       if (ty == Ity_I8) {
   1106          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1107          addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
   1108          return dst;
   1109       }
   1110 
   1111 //zz      if (ty == Ity_I16) {
   1112 //zz         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1113 //zz         return dst;
   1114 //zz      }
   1115 //zz      if (ty == Ity_I8) {
   1116 //zz         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1117 //zz         return dst;
   1118 //zz      }
   1119       break;
   1120    }
   1121 
   1122 //zz   /* --------- TERNARY OP --------- */
   1123 //zz   case Iex_Triop: {
   1124 //zz      /* C3210 flags following FPU partial remainder (fprem), both
   1125 //zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
   1126 //zz      if (e->Iex.Triop.op == Iop_PRemC3210F64
   1127 //zz          || e->Iex.Triop.op == Iop_PRem1C3210F64) {
   1128 //zz         HReg junk = newVRegF(env);
   1129 //zz         HReg dst  = newVRegI(env);
   1130 //zz         HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
   1131 //zz         HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
   1132 //zz         /* XXXROUNDINGFIXME */
   1133 //zz         /* set roundingmode here */
   1134 //zz         addInstr(env, X86Instr_FpBinary(
   1135 //zz                           e->Iex.Binop.op==Iop_PRemC3210F64
   1136 //zz                              ? Xfp_PREM : Xfp_PREM1,
   1137 //zz                           srcL,srcR,junk
   1138 //zz                 ));
   1139 //zz         /* The previous pseudo-insn will have left the FPU's C3210
   1140 //zz            flags set correctly.  So bag them. */
   1141 //zz         addInstr(env, X86Instr_FpStSW_AX());
   1142 //zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1143 //zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
   1144 //zz         return dst;
   1145 //zz      }
   1146 //zz
   1147 //zz      break;
   1148 //zz   }
   1149 
   1150    /* --------- BINARY OP --------- */
   1151    case Iex_Binop: {
   1152 
   1153       ARMAluOp   aop = 0; /* invalid */
   1154       ARMShiftOp sop = 0; /* invalid */
   1155 
   1156       /* ADD/SUB/AND/OR/XOR */
   1157       switch (e->Iex.Binop.op) {
   1158          case Iop_And32: {
   1159             Bool     didInv = False;
   1160             HReg     dst    = newVRegI(env);
   1161             HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1162             ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
   1163                                                env, e->Iex.Binop.arg2);
   1164             addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
   1165                                        dst, argL, argR));
   1166             return dst;
   1167          }
   1168          case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
   1169          case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
   1170          case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
   1171          case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
   1172          std_binop: {
   1173             HReg     dst  = newVRegI(env);
   1174             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1175             ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
   1176                                              env, e->Iex.Binop.arg2);
   1177             addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
   1178             return dst;
   1179          }
   1180          default: break;
   1181       }
   1182 
   1183       /* SHL/SHR/SAR */
   1184       switch (e->Iex.Binop.op) {
   1185          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
   1186          case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
   1187          case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
   1188          sh_binop: {
   1189             HReg    dst  = newVRegI(env);
   1190             HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1191             ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
   1192             addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
   1193             vassert(ty == Ity_I32); /* else the IR is ill-typed */
   1194             return dst;
   1195          }
   1196          default: break;
   1197       }
   1198 
   1199       /* MUL */
   1200       if (e->Iex.Binop.op == Iop_Mul32) {
   1201          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1202          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1203          HReg dst  = newVRegI(env);
   1204          addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1205          addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1206          addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
   1207          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1208          return dst;
   1209       }
   1210 
   1211       /* Handle misc other ops. */
   1212 
   1213       if (e->Iex.Binop.op == Iop_Max32U) {
   1214          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1215          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1216          HReg dst  = newVRegI(env);
   1217          addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
   1218                                          ARMRI84_R(argR)));
   1219          addInstr(env, mk_iMOVds_RR(dst, argL));
   1220          addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
   1221          return dst;
   1222       }
   1223 
   1224       if (e->Iex.Binop.op == Iop_CmpF64) {
   1225          HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
   1226          HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
   1227          HReg dst = newVRegI(env);
   1228          /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
   1229             FMSTAT, so we can examine the results directly. */
   1230          addInstr(env, ARMInstr_VCmpD(dL, dR));
   1231          /* Create in dst, the IRCmpF64Result encoded result. */
   1232          addInstr(env, ARMInstr_Imm32(dst, 0));
   1233          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
   1234          addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
   1235          addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
   1236          addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
   1237          return dst;
   1238       }
   1239 
   1240       if (e->Iex.Binop.op == Iop_F64toI32S
   1241           || e->Iex.Binop.op == Iop_F64toI32U) {
   1242          /* Wretched uglyness all round, due to having to deal
   1243             with rounding modes.  Oh well. */
   1244          /* FIXME: if arg1 is a constant indicating round-to-zero,
   1245             then we could skip all this arsing around with FPSCR and
   1246             simply emit FTO{S,U}IZD. */
   1247          Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
   1248          HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
   1249          set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   1250          /* FTO{S,U}ID valF, valD */
   1251          HReg valF = newVRegF(env);
   1252          addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
   1253                                        valF, valD));
   1254          set_VFP_rounding_default(env);
   1255          /* VMOV dst, valF */
   1256          HReg dst = newVRegI(env);
   1257          addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
   1258          return dst;
   1259       }
   1260 
   1261       if (e->Iex.Binop.op == Iop_GetElem8x8
   1262           || e->Iex.Binop.op == Iop_GetElem16x4
   1263           || e->Iex.Binop.op == Iop_GetElem32x2) {
   1264          HReg res = newVRegI(env);
   1265          HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
   1266          UInt index, size;
   1267          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1268              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1269             vpanic("ARM target supports GetElem with constant "
   1270                    "second argument only\n");
   1271          }
   1272          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1273          switch (e->Iex.Binop.op) {
   1274             case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
   1275             case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
   1276             case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
   1277             default: vassert(0);
   1278          }
   1279          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1280                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1281                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1282                                         size, False));
   1283          return res;
   1284       }
   1285 
   1286       if (e->Iex.Binop.op == Iop_GetElem8x16
   1287           || e->Iex.Binop.op == Iop_GetElem16x8
   1288           || e->Iex.Binop.op == Iop_GetElem32x4) {
   1289          HReg res = newVRegI(env);
   1290          HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
   1291          UInt index, size;
   1292          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1293              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1294             vpanic("ARM target supports GetElem with constant "
   1295                    "second argument only\n");
   1296          }
   1297          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1298          switch (e->Iex.Binop.op) {
   1299             case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
   1300             case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
   1301             case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
   1302             default: vassert(0);
   1303          }
   1304          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1305                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1306                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1307                                         size, True));
   1308          return res;
   1309       }
   1310 
   1311       /* All cases involving host-side helper calls. */
   1312       void* fn = NULL;
   1313       switch (e->Iex.Binop.op) {
   1314          case Iop_Add16x2:
   1315             fn = &h_generic_calc_Add16x2; break;
   1316          case Iop_Sub16x2:
   1317             fn = &h_generic_calc_Sub16x2; break;
   1318          case Iop_HAdd16Ux2:
   1319             fn = &h_generic_calc_HAdd16Ux2; break;
   1320          case Iop_HAdd16Sx2:
   1321             fn = &h_generic_calc_HAdd16Sx2; break;
   1322          case Iop_HSub16Ux2:
   1323             fn = &h_generic_calc_HSub16Ux2; break;
   1324          case Iop_HSub16Sx2:
   1325             fn = &h_generic_calc_HSub16Sx2; break;
   1326          case Iop_QAdd16Sx2:
   1327             fn = &h_generic_calc_QAdd16Sx2; break;
   1328          case Iop_QSub16Sx2:
   1329             fn = &h_generic_calc_QSub16Sx2; break;
   1330          case Iop_Add8x4:
   1331             fn = &h_generic_calc_Add8x4; break;
   1332          case Iop_Sub8x4:
   1333             fn = &h_generic_calc_Sub8x4; break;
   1334          case Iop_HAdd8Ux4:
   1335             fn = &h_generic_calc_HAdd8Ux4; break;
   1336          case Iop_HAdd8Sx4:
   1337             fn = &h_generic_calc_HAdd8Sx4; break;
   1338          case Iop_HSub8Ux4:
   1339             fn = &h_generic_calc_HSub8Ux4; break;
   1340          case Iop_HSub8Sx4:
   1341             fn = &h_generic_calc_HSub8Sx4; break;
   1342          case Iop_QAdd8Sx4:
   1343             fn = &h_generic_calc_QAdd8Sx4; break;
   1344          case Iop_QAdd8Ux4:
   1345             fn = &h_generic_calc_QAdd8Ux4; break;
   1346          case Iop_QSub8Sx4:
   1347             fn = &h_generic_calc_QSub8Sx4; break;
   1348          case Iop_QSub8Ux4:
   1349             fn = &h_generic_calc_QSub8Ux4; break;
   1350          case Iop_Sad8Ux4:
   1351             fn = &h_generic_calc_Sad8Ux4; break;
   1352          default:
   1353             break;
   1354       }
   1355 
   1356       if (fn) {
   1357          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1358          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1359          HReg res  = newVRegI(env);
   1360          addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
   1361          addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
   1362          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
   1363          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1364          return res;
   1365       }
   1366 
   1367       break;
   1368    }
   1369 
   1370    /* --------- UNARY OP --------- */
   1371    case Iex_Unop: {
   1372 
   1373 //zz      /* 1Uto8(32to1(expr32)) */
   1374 //zz      if (e->Iex.Unop.op == Iop_1Uto8) {
   1375 //zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
   1376 //zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
   1377 //zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
   1378 //zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
   1379 //zz            IRExpr* expr32 = mi.bindee[0];
   1380 //zz            HReg dst = newVRegI(env);
   1381 //zz            HReg src = iselIntExpr_R(env, expr32);
   1382 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1383 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1384 //zz                                          X86RMI_Imm(1), dst));
   1385 //zz            return dst;
   1386 //zz         }
   1387 //zz      }
   1388 //zz
   1389 //zz      /* 8Uto32(LDle(expr32)) */
   1390 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1391 //zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
   1392 //zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
   1393 //zz                        unop(Iop_8Uto32,
   1394 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1395 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
   1396 //zz            HReg dst = newVRegI(env);
   1397 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1398 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1399 //zz            return dst;
   1400 //zz         }
   1401 //zz      }
   1402 //zz
   1403 //zz      /* 8Sto32(LDle(expr32)) */
   1404 //zz      if (e->Iex.Unop.op == Iop_8Sto32) {
   1405 //zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
   1406 //zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
   1407 //zz                        unop(Iop_8Sto32,
   1408 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1409 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
   1410 //zz            HReg dst = newVRegI(env);
   1411 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1412 //zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
   1413 //zz            return dst;
   1414 //zz         }
   1415 //zz      }
   1416 //zz
   1417 //zz      /* 16Uto32(LDle(expr32)) */
   1418 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1419 //zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
   1420 //zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
   1421 //zz                        unop(Iop_16Uto32,
   1422 //zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
   1423 //zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
   1424 //zz            HReg dst = newVRegI(env);
   1425 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1426 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1427 //zz            return dst;
   1428 //zz         }
   1429 //zz      }
   1430 //zz
   1431 //zz      /* 8Uto32(GET:I8) */
   1432 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1433 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1434 //zz            HReg      dst;
   1435 //zz            X86AMode* amode;
   1436 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
   1437 //zz            dst = newVRegI(env);
   1438 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1439 //zz                                hregX86_EBP());
   1440 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1441 //zz            return dst;
   1442 //zz         }
   1443 //zz      }
   1444 //zz
   1445 //zz      /* 16to32(GET:I16) */
   1446 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1447 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1448 //zz            HReg      dst;
   1449 //zz            X86AMode* amode;
   1450 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
   1451 //zz            dst = newVRegI(env);
   1452 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1453 //zz                                hregX86_EBP());
   1454 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1455 //zz            return dst;
   1456 //zz         }
   1457 //zz      }
   1458 
   1459       switch (e->Iex.Unop.op) {
   1460          case Iop_8Uto32: {
   1461             HReg dst = newVRegI(env);
   1462             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1463             addInstr(env, ARMInstr_Alu(ARMalu_AND,
   1464                                        dst, src, ARMRI84_I84(0xFF,0)));
   1465             return dst;
   1466          }
   1467 //zz         case Iop_8Uto16:
   1468 //zz         case Iop_8Uto32:
   1469 //zz         case Iop_16Uto32: {
   1470 //zz            HReg dst = newVRegI(env);
   1471 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1472 //zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
   1473 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1474 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1475 //zz                                          X86RMI_Imm(mask), dst));
   1476 //zz            return dst;
   1477 //zz         }
   1478 //zz         case Iop_8Sto16:
   1479 //zz         case Iop_8Sto32:
   1480          case Iop_16Uto32: {
   1481             HReg dst = newVRegI(env);
   1482             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1483             ARMRI5* amt = ARMRI5_I5(16);
   1484             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1485             addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
   1486             return dst;
   1487          }
   1488          case Iop_8Sto32:
   1489          case Iop_16Sto32: {
   1490             HReg dst = newVRegI(env);
   1491             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1492             ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
   1493             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1494             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1495             return dst;
   1496          }
   1497 //zz         case Iop_Not8:
   1498 //zz         case Iop_Not16:
   1499          case Iop_Not32: {
   1500             HReg dst = newVRegI(env);
   1501             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1502             addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
   1503             return dst;
   1504          }
   1505          case Iop_64HIto32: {
   1506             HReg rHi, rLo;
   1507             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1508             return rHi; /* and abandon rLo .. poor wee thing :-) */
   1509          }
   1510          case Iop_64to32: {
   1511             HReg rHi, rLo;
   1512             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1513             return rLo; /* similar stupid comment to the above ... */
   1514          }
   1515          case Iop_64to8: {
   1516             HReg rHi, rLo;
   1517             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   1518                HReg tHi = newVRegI(env);
   1519                HReg tLo = newVRegI(env);
   1520                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
   1521                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1522                rHi = tHi;
   1523                rLo = tLo;
   1524             } else {
   1525                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1526             }
   1527             return rLo;
   1528          }
   1529 //zz         case Iop_16HIto8:
   1530 //zz         case Iop_32HIto16: {
   1531 //zz            HReg dst  = newVRegI(env);
   1532 //zz            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
   1533 //zz            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
   1534 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1535 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
   1536 //zz            return dst;
   1537 //zz         }
   1538          case Iop_1Uto32:
   1539          case Iop_1Uto8: {
   1540             HReg        dst  = newVRegI(env);
   1541             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1542             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1543             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1544             return dst;
   1545          }
   1546 
   1547          case Iop_1Sto32: {
   1548             HReg        dst  = newVRegI(env);
   1549             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1550             ARMRI5*     amt  = ARMRI5_I5(31);
   1551             /* This is really rough.  We could do much better here;
   1552                perhaps mvn{cond} dst, #0 as the second insn?
   1553                (same applies to 1Sto64) */
   1554             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1555             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1556             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   1557             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1558             return dst;
   1559          }
   1560 
   1561 
   1562 //zz         case Iop_1Sto8:
   1563 //zz         case Iop_1Sto16:
   1564 //zz         case Iop_1Sto32: {
   1565 //zz            /* could do better than this, but for now ... */
   1566 //zz            HReg dst         = newVRegI(env);
   1567 //zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1568 //zz            addInstr(env, X86Instr_Set32(cond,dst));
   1569 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
   1570 //zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1571 //zz            return dst;
   1572 //zz         }
   1573 //zz         case Iop_Ctz32: {
   1574 //zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
   1575 //zz            HReg dst = newVRegI(env);
   1576 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1577 //zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
   1578 //zz            return dst;
   1579 //zz         }
   1580          case Iop_Clz32: {
   1581             /* Count leading zeroes; easy on ARM. */
   1582             HReg dst = newVRegI(env);
   1583             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1584             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
   1585             return dst;
   1586          }
   1587 
   1588          case Iop_CmpwNEZ32: {
   1589             HReg dst = newVRegI(env);
   1590             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1591             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1592             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1593             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
   1594             return dst;
   1595          }
   1596 
   1597          case Iop_Left32: {
   1598             HReg dst = newVRegI(env);
   1599             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1600             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1601             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1602             return dst;
   1603          }
   1604 
   1605 //zz         case Iop_V128to32: {
   1606 //zz            HReg      dst  = newVRegI(env);
   1607 //zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
   1608 //zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   1609 //zz            sub_from_esp(env, 16);
   1610 //zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   1611 //zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
   1612 //zz            add_to_esp(env, 16);
   1613 //zz            return dst;
   1614 //zz         }
   1615 //zz
   1616          case Iop_ReinterpF32asI32: {
   1617             HReg dst = newVRegI(env);
   1618             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   1619             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
   1620             return dst;
   1621          }
   1622 
   1623 //zz
   1624 //zz         case Iop_16to8:
   1625          case Iop_32to8:
   1626          case Iop_32to16:
   1627             /* These are no-ops. */
   1628             return iselIntExpr_R(env, e->Iex.Unop.arg);
   1629 
   1630          default:
   1631             break;
   1632       }
   1633 
   1634       /* All Unop cases involving host-side helper calls. */
   1635       void* fn = NULL;
   1636       switch (e->Iex.Unop.op) {
   1637          case Iop_CmpNEZ16x2:
   1638             fn = &h_generic_calc_CmpNEZ16x2; break;
   1639          case Iop_CmpNEZ8x4:
   1640             fn = &h_generic_calc_CmpNEZ8x4; break;
   1641          default:
   1642             break;
   1643       }
   1644 
   1645       if (fn) {
   1646          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   1647          HReg res = newVRegI(env);
   1648          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
   1649          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
   1650          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1651          return res;
   1652       }
   1653 
   1654       break;
   1655    }
   1656 
   1657    /* --------- GET --------- */
   1658    case Iex_Get: {
   1659       if (ty == Ity_I32
   1660           && 0 == (e->Iex.Get.offset & 3)
   1661           && e->Iex.Get.offset < 4096-4) {
   1662          HReg dst = newVRegI(env);
   1663          addInstr(env, ARMInstr_LdSt32(
   1664                           True/*isLoad*/,
   1665                           dst,
   1666                           ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
   1667          return dst;
   1668       }
   1669 //zz      if (ty == Ity_I8 || ty == Ity_I16) {
   1670 //zz         HReg dst = newVRegI(env);
   1671 //zz         addInstr(env, X86Instr_LoadEX(
   1672 //zz                          toUChar(ty==Ity_I8 ? 1 : 2),
   1673 //zz                          False,
   1674 //zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
   1675 //zz                          dst));
   1676 //zz         return dst;
   1677 //zz      }
   1678       break;
   1679    }
   1680 
   1681 //zz   case Iex_GetI: {
   1682 //zz      X86AMode* am
   1683 //zz         = genGuestArrayOffset(
   1684 //zz              env, e->Iex.GetI.descr,
   1685 //zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   1686 //zz      HReg dst = newVRegI(env);
   1687 //zz      if (ty == Ity_I8) {
   1688 //zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
   1689 //zz         return dst;
   1690 //zz      }
   1691 //zz      if (ty == Ity_I32) {
   1692 //zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
   1693 //zz         return dst;
   1694 //zz      }
   1695 //zz      break;
   1696 //zz   }
   1697 
   1698    /* --------- CCALL --------- */
   1699    case Iex_CCall: {
   1700       HReg    dst = newVRegI(env);
   1701       vassert(ty == e->Iex.CCall.retty);
   1702 
   1703       /* be very restrictive for now.  Only 32/64-bit ints allowed
   1704          for args, and 32 bits for return type. */
   1705       if (e->Iex.CCall.retty != Ity_I32)
   1706          goto irreducible;
   1707 
   1708       /* Marshal args, do the call, clear stack. */
   1709       Bool ok = doHelperCall( env, False,
   1710                               NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
   1711       if (ok) {
   1712          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1713          return dst;
   1714       }
   1715       /* else fall through; will hit the irreducible: label */
   1716    }
   1717 
   1718    /* --------- LITERAL --------- */
   1719    /* 32 literals */
   1720    case Iex_Const: {
   1721       UInt u   = 0;
   1722       HReg dst = newVRegI(env);
   1723       switch (e->Iex.Const.con->tag) {
   1724          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1725          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1726          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1727          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
   1728       }
   1729       addInstr(env, ARMInstr_Imm32(dst, u));
   1730       return dst;
   1731    }
   1732 
   1733    /* --------- MULTIPLEX --------- */
   1734    case Iex_Mux0X: {
   1735       IRExpr* cond = e->Iex.Mux0X.cond;
   1736 
   1737       /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
   1738       if (ty == Ity_I32
   1739           && cond->tag == Iex_Unop
   1740           && cond->Iex.Unop.op == Iop_32to8
   1741           && cond->Iex.Unop.arg->tag == Iex_Unop
   1742           && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
   1743          ARMCondCode cc;
   1744          HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
   1745          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
   1746          HReg     dst = newVRegI(env);
   1747          addInstr(env, mk_iMOVds_RR(dst, rX));
   1748          cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
   1749          addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
   1750          return dst;
   1751       }
   1752 
   1753       /* Mux0X(cond, expr0, exprX) (general case) */
   1754       if (ty == Ity_I32) {
   1755          HReg     r8;
   1756          HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
   1757          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
   1758          HReg     dst = newVRegI(env);
   1759          addInstr(env, mk_iMOVds_RR(dst, rX));
   1760          r8 = iselIntExpr_R(env, cond);
   1761          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   1762                                          ARMRI84_I84(0xFF,0)));
   1763          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
   1764          return dst;
   1765       }
   1766       break;
   1767    }
   1768 
   1769    default:
   1770    break;
   1771    } /* switch (e->tag) */
   1772 
   1773    /* We get here if no pattern matched. */
   1774   irreducible:
   1775    ppIRExpr(e);
   1776    vpanic("iselIntExpr_R: cannot reduce tree");
   1777 }
   1778 
   1779 
   1780 /* -------------------- 64-bit -------------------- */
   1781 
   1782 /* Compute a 64-bit value into a register pair, which is returned as
   1783    the first two parameters.  As with iselIntExpr_R, these may be
   1784    either real or virtual regs; in any case they must not be changed
   1785    by subsequent code emitted by the caller.  */
   1786 
   1787 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1788 {
   1789    iselInt64Expr_wrk(rHi, rLo, env, e);
   1790 #  if 0
   1791    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1792 #  endif
   1793    vassert(hregClass(*rHi) == HRcInt32);
   1794    vassert(hregIsVirtual(*rHi));
   1795    vassert(hregClass(*rLo) == HRcInt32);
   1796    vassert(hregIsVirtual(*rLo));
   1797 }
   1798 
   1799 /* DO NOT CALL THIS DIRECTLY ! */
   1800 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1801 {
   1802    vassert(e);
   1803    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
   1804 
   1805    /* 64-bit literal */
   1806    if (e->tag == Iex_Const) {
   1807       ULong   w64 = e->Iex.Const.con->Ico.U64;
   1808       UInt    wHi = toUInt(w64 >> 32);
   1809       UInt    wLo = toUInt(w64);
   1810       HReg    tHi = newVRegI(env);
   1811       HReg    tLo = newVRegI(env);
   1812       vassert(e->Iex.Const.con->tag == Ico_U64);
   1813       addInstr(env, ARMInstr_Imm32(tHi, wHi));
   1814       addInstr(env, ARMInstr_Imm32(tLo, wLo));
   1815       *rHi = tHi;
   1816       *rLo = tLo;
   1817       return;
   1818    }
   1819 
   1820    /* read 64-bit IRTemp */
   1821    if (e->tag == Iex_RdTmp) {
   1822       if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   1823          HReg tHi = newVRegI(env);
   1824          HReg tLo = newVRegI(env);
   1825          HReg tmp = iselNeon64Expr(env, e);
   1826          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1827          *rHi = tHi;
   1828          *rLo = tLo;
   1829       } else {
   1830          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
   1831       }
   1832       return;
   1833    }
   1834 
   1835    /* 64-bit load */
   1836    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   1837       HReg      tLo, tHi, rA;
   1838       vassert(e->Iex.Load.ty == Ity_I64);
   1839       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
   1840       tHi = newVRegI(env);
   1841       tLo = newVRegI(env);
   1842       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
   1843       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
   1844       *rHi = tHi;
   1845       *rLo = tLo;
   1846       return;
   1847    }
   1848 
   1849    /* 64-bit GET */
   1850    if (e->tag == Iex_Get) {
   1851       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
   1852       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
   1853       HReg tHi = newVRegI(env);
   1854       HReg tLo = newVRegI(env);
   1855       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
   1856       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
   1857       *rHi = tHi;
   1858       *rLo = tLo;
   1859       return;
   1860    }
   1861 
   1862    /* --------- BINARY ops --------- */
   1863    if (e->tag == Iex_Binop) {
   1864       switch (e->Iex.Binop.op) {
   1865 
   1866          /* 32 x 32 -> 64 multiply */
   1867          case Iop_MullS32:
   1868          case Iop_MullU32: {
   1869             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1870             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1871             HReg     tHi  = newVRegI(env);
   1872             HReg     tLo  = newVRegI(env);
   1873             ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
   1874                                ? ARMmul_SX : ARMmul_ZX;
   1875             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1876             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1877             addInstr(env, ARMInstr_Mul(mop));
   1878             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
   1879             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
   1880             *rHi = tHi;
   1881             *rLo = tLo;
   1882             return;
   1883          }
   1884 
   1885          case Iop_Or64: {
   1886             HReg xLo, xHi, yLo, yHi;
   1887             HReg tHi = newVRegI(env);
   1888             HReg tLo = newVRegI(env);
   1889             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   1890             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   1891             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
   1892             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
   1893             *rHi = tHi;
   1894             *rLo = tLo;
   1895             return;
   1896          }
   1897 
   1898          case Iop_Add64: {
   1899             HReg xLo, xHi, yLo, yHi;
   1900             HReg tHi = newVRegI(env);
   1901             HReg tLo = newVRegI(env);
   1902             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   1903             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   1904             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
   1905             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
   1906             *rHi = tHi;
   1907             *rLo = tLo;
   1908             return;
   1909          }
   1910 
   1911          /* 32HLto64(e1,e2) */
   1912          case Iop_32HLto64: {
   1913             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1914             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1915             return;
   1916          }
   1917 
   1918          default:
   1919             break;
   1920       }
   1921    }
   1922 
   1923    /* --------- UNARY ops --------- */
   1924    if (e->tag == Iex_Unop) {
   1925       switch (e->Iex.Unop.op) {
   1926 
   1927          /* ReinterpF64asI64 */
   1928          case Iop_ReinterpF64asI64: {
   1929             HReg dstHi = newVRegI(env);
   1930             HReg dstLo = newVRegI(env);
   1931             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
   1932             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
   1933             *rHi = dstHi;
   1934             *rLo = dstLo;
   1935             return;
   1936          }
   1937 
   1938          /* Left64(e) */
   1939          case Iop_Left64: {
   1940             HReg yLo, yHi;
   1941             HReg tHi  = newVRegI(env);
   1942             HReg tLo  = newVRegI(env);
   1943             HReg zero = newVRegI(env);
   1944             /* yHi:yLo = arg */
   1945             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
   1946             /* zero = 0 */
   1947             addInstr(env, ARMInstr_Imm32(zero, 0));
   1948             /* tLo = 0 - yLo, and set carry */
   1949             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
   1950                                        tLo, zero, ARMRI84_R(yLo)));
   1951             /* tHi = 0 - yHi - carry */
   1952             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
   1953                                        tHi, zero, ARMRI84_R(yHi)));
   1954             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
   1955                back in, so as to give the final result
   1956                tHi:tLo = arg | -arg. */
   1957             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
   1958             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
   1959             *rHi = tHi;
   1960             *rLo = tLo;
   1961             return;
   1962          }
   1963 
   1964          /* CmpwNEZ64(e) */
   1965          case Iop_CmpwNEZ64: {
   1966             HReg srcLo, srcHi;
   1967             HReg tmp1 = newVRegI(env);
   1968             HReg tmp2 = newVRegI(env);
   1969             /* srcHi:srcLo = arg */
   1970             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   1971             /* tmp1 = srcHi | srcLo */
   1972             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   1973                                        tmp1, srcHi, ARMRI84_R(srcLo)));
   1974             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
   1975             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
   1976             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   1977                                        tmp2, tmp2, ARMRI84_R(tmp1)));
   1978             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
   1979                                          tmp2, tmp2, ARMRI5_I5(31)));
   1980             *rHi = tmp2;
   1981             *rLo = tmp2;
   1982             return;
   1983          }
   1984 
   1985          case Iop_1Sto64: {
   1986             HReg        dst  = newVRegI(env);
   1987             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1988             ARMRI5*     amt  = ARMRI5_I5(31);
   1989             /* This is really rough.  We could do much better here;
   1990                perhaps mvn{cond} dst, #0 as the second insn?
   1991                (same applies to 1Sto32) */
   1992             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1993             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1994             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   1995             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1996             *rHi = dst;
   1997             *rLo = dst;
   1998             return;
   1999          }
   2000 
   2001          default:
   2002             break;
   2003       }
   2004    } /* if (e->tag == Iex_Unop) */
   2005 
   2006    /* --------- MULTIPLEX --------- */
   2007    if (e->tag == Iex_Mux0X) {
   2008       IRType ty8;
   2009       HReg   r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
   2010       ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
   2011       vassert(ty8 == Ity_I8);
   2012       iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
   2013       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
   2014       dstHi = newVRegI(env);
   2015       dstLo = newVRegI(env);
   2016       addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
   2017       addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
   2018       r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   2019       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   2020                                       ARMRI84_I84(0xFF,0)));
   2021       addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
   2022       addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
   2023       *rHi = dstHi;
   2024       *rLo = dstLo;
   2025       return;
   2026    }
   2027 
   2028    /* It is convenient sometimes to call iselInt64Expr even when we
   2029       have NEON support (e.g. in do_helper_call we need 64-bit
   2030       arguments as 2 x 32 regs). */
   2031    if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   2032       HReg tHi = newVRegI(env);
   2033       HReg tLo = newVRegI(env);
   2034       HReg tmp = iselNeon64Expr(env, e);
   2035       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   2036       *rHi = tHi;
   2037       *rLo = tLo;
   2038       return ;
   2039    }
   2040 
   2041    ppIRExpr(e);
   2042    vpanic("iselInt64Expr");
   2043 }
   2044 
   2045 
   2046 /*---------------------------------------------------------*/
   2047 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
   2048 /*---------------------------------------------------------*/
   2049 
   2050 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
   2051 {
   2052    HReg r = iselNeon64Expr_wrk( env, e );
   2053    vassert(hregClass(r) == HRcFlt64);
   2054    vassert(hregIsVirtual(r));
   2055    return r;
   2056 }
   2057 
   2058 /* DO NOT CALL THIS DIRECTLY */
   2059 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
   2060 {
   2061    IRType ty = typeOfIRExpr(env->type_env, e);
   2062    MatchInfo mi;
   2063    vassert(e);
   2064    vassert(ty == Ity_I64);
   2065 
   2066    if (e->tag == Iex_RdTmp) {
   2067       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   2068    }
   2069 
   2070    if (e->tag == Iex_Const) {
   2071       HReg rLo, rHi;
   2072       HReg res = newVRegD(env);
   2073       iselInt64Expr(&rHi, &rLo, env, e);
   2074       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2075       return res;
   2076    }
   2077 
   2078    /* 64-bit load */
   2079    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2080       HReg res = newVRegD(env);
   2081       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   2082       vassert(ty == Ity_I64);
   2083       addInstr(env, ARMInstr_NLdStD(True, res, am));
   2084       return res;
   2085    }
   2086 
   2087    /* 64-bit GET */
   2088    if (e->tag == Iex_Get) {
   2089       HReg addr = newVRegI(env);
   2090       HReg res = newVRegD(env);
   2091       vassert(ty == Ity_I64);
   2092       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   2093       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
   2094       return res;
   2095    }
   2096 
   2097    /* --------- BINARY ops --------- */
   2098    if (e->tag == Iex_Binop) {
   2099       switch (e->Iex.Binop.op) {
   2100 
   2101          /* 32 x 32 -> 64 multiply */
   2102          case Iop_MullS32:
   2103          case Iop_MullU32: {
   2104             HReg rLo, rHi;
   2105             HReg res = newVRegD(env);
   2106             iselInt64Expr(&rHi, &rLo, env, e);
   2107             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2108             return res;
   2109          }
   2110 
   2111          case Iop_And64: {
   2112             HReg res = newVRegD(env);
   2113             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2114             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2115             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   2116                                            res, argL, argR, 4, False));
   2117             return res;
   2118          }
   2119          case Iop_Or64: {
   2120             HReg res = newVRegD(env);
   2121             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2122             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2123             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   2124                                            res, argL, argR, 4, False));
   2125             return res;
   2126          }
   2127          case Iop_Xor64: {
   2128             HReg res = newVRegD(env);
   2129             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2130             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2131             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   2132                                            res, argL, argR, 4, False));
   2133             return res;
   2134          }
   2135 
   2136          /* 32HLto64(e1,e2) */
   2137          case Iop_32HLto64: {
   2138             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2139             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2140             HReg res = newVRegD(env);
   2141             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2142             return res;
   2143          }
   2144 
   2145          case Iop_Add8x8:
   2146          case Iop_Add16x4:
   2147          case Iop_Add32x2:
   2148          case Iop_Add64: {
   2149             HReg res = newVRegD(env);
   2150             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2151             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2152             UInt size;
   2153             switch (e->Iex.Binop.op) {
   2154                case Iop_Add8x8: size = 0; break;
   2155                case Iop_Add16x4: size = 1; break;
   2156                case Iop_Add32x2: size = 2; break;
   2157                case Iop_Add64: size = 3; break;
   2158                default: vassert(0);
   2159             }
   2160             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   2161                                            res, argL, argR, size, False));
   2162             return res;
   2163          }
   2164          case Iop_Add32Fx2: {
   2165             HReg res = newVRegD(env);
   2166             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2167             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2168             UInt size = 0;
   2169             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
   2170                                            res, argL, argR, size, False));
   2171             return res;
   2172          }
   2173          case Iop_Recps32Fx2: {
   2174             HReg res = newVRegD(env);
   2175             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2176             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2177             UInt size = 0;
   2178             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   2179                                            res, argL, argR, size, False));
   2180             return res;
   2181          }
   2182          case Iop_Rsqrts32Fx2: {
   2183             HReg res = newVRegD(env);
   2184             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2185             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2186             UInt size = 0;
   2187             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   2188                                            res, argL, argR, size, False));
   2189             return res;
   2190          }
   2191          case Iop_InterleaveOddLanes8x8:
   2192          case Iop_InterleaveOddLanes16x4:
   2193          case Iop_InterleaveLO32x2:
   2194          case Iop_InterleaveEvenLanes8x8:
   2195          case Iop_InterleaveEvenLanes16x4:
   2196          case Iop_InterleaveHI32x2: {
   2197             HReg tmp = newVRegD(env);
   2198             HReg res = newVRegD(env);
   2199             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2200             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2201             UInt size;
   2202             UInt is_lo;
   2203             switch (e->Iex.Binop.op) {
   2204                case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
   2205                case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
   2206                case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
   2207                case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
   2208                case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
   2209                case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
   2210                default: vassert(0);
   2211             }
   2212             if (is_lo) {
   2213                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2214                                              tmp, argL, 4, False));
   2215                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2216                                              res, argR, 4, False));
   2217                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   2218                                             res, tmp, size, False));
   2219             } else {
   2220                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2221                                              tmp, argR, 4, False));
   2222                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2223                                              res, argL, 4, False));
   2224                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   2225                                             tmp, res, size, False));
   2226             }
   2227             return res;
   2228          }
   2229          case Iop_InterleaveHI8x8:
   2230          case Iop_InterleaveHI16x4:
   2231          case Iop_InterleaveLO8x8:
   2232          case Iop_InterleaveLO16x4: {
   2233             HReg tmp = newVRegD(env);
   2234             HReg res = newVRegD(env);
   2235             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2236             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2237             UInt size;
   2238             UInt is_lo;
   2239             switch (e->Iex.Binop.op) {
   2240                case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
   2241                case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
   2242                case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
   2243                case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
   2244                default: vassert(0);
   2245             }
   2246             if (is_lo) {
   2247                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2248                                              tmp, argL, 4, False));
   2249                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2250                                              res, argR, 4, False));
   2251                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   2252                                             res, tmp, size, False));
   2253             } else {
   2254                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2255                                              tmp, argR, 4, False));
   2256                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2257                                              res, argL, 4, False));
   2258                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   2259                                             tmp, res, size, False));
   2260             }
   2261             return res;
   2262          }
   2263          case Iop_CatOddLanes8x8:
   2264          case Iop_CatOddLanes16x4:
   2265          case Iop_CatEvenLanes8x8:
   2266          case Iop_CatEvenLanes16x4: {
   2267             HReg tmp = newVRegD(env);
   2268             HReg res = newVRegD(env);
   2269             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2270             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2271             UInt size;
   2272             UInt is_lo;
   2273             switch (e->Iex.Binop.op) {
   2274                case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
   2275                case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
   2276                case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
   2277                case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
   2278                default: vassert(0);
   2279             }
   2280             if (is_lo) {
   2281                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2282                                              tmp, argL, 4, False));
   2283                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2284                                              res, argR, 4, False));
   2285                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   2286                                             res, tmp, size, False));
   2287             } else {
   2288                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2289                                              tmp, argR, 4, False));
   2290                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2291                                              res, argL, 4, False));
   2292                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   2293                                             tmp, res, size, False));
   2294             }
   2295             return res;
   2296          }
   2297          case Iop_QAdd8Ux8:
   2298          case Iop_QAdd16Ux4:
   2299          case Iop_QAdd32Ux2:
   2300          case Iop_QAdd64Ux1: {
   2301             HReg res = newVRegD(env);
   2302             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2303             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2304             UInt size;
   2305             switch (e->Iex.Binop.op) {
   2306                case Iop_QAdd8Ux8: size = 0; break;
   2307                case Iop_QAdd16Ux4: size = 1; break;
   2308                case Iop_QAdd32Ux2: size = 2; break;
   2309                case Iop_QAdd64Ux1: size = 3; break;
   2310                default: vassert(0);
   2311             }
   2312             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   2313                                            res, argL, argR, size, False));
   2314             return res;
   2315          }
   2316          case Iop_QAdd8Sx8:
   2317          case Iop_QAdd16Sx4:
   2318          case Iop_QAdd32Sx2:
   2319          case Iop_QAdd64Sx1: {
   2320             HReg res = newVRegD(env);
   2321             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2322             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2323             UInt size;
   2324             switch (e->Iex.Binop.op) {
   2325                case Iop_QAdd8Sx8: size = 0; break;
   2326                case Iop_QAdd16Sx4: size = 1; break;
   2327                case Iop_QAdd32Sx2: size = 2; break;
   2328                case Iop_QAdd64Sx1: size = 3; break;
   2329                default: vassert(0);
   2330             }
   2331             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   2332                                            res, argL, argR, size, False));
   2333             return res;
   2334          }
   2335          case Iop_Sub8x8:
   2336          case Iop_Sub16x4:
   2337          case Iop_Sub32x2:
   2338          case Iop_Sub64: {
   2339             HReg res = newVRegD(env);
   2340             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2341             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2342             UInt size;
   2343             switch (e->Iex.Binop.op) {
   2344                case Iop_Sub8x8: size = 0; break;
   2345                case Iop_Sub16x4: size = 1; break;
   2346                case Iop_Sub32x2: size = 2; break;
   2347                case Iop_Sub64: size = 3; break;
   2348                default: vassert(0);
   2349             }
   2350             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2351                                            res, argL, argR, size, False));
   2352             return res;
   2353          }
   2354          case Iop_Sub32Fx2: {
   2355             HReg res = newVRegD(env);
   2356             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2357             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2358             UInt size = 0;
   2359             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
   2360                                            res, argL, argR, size, False));
   2361             return res;
   2362          }
   2363          case Iop_QSub8Ux8:
   2364          case Iop_QSub16Ux4:
   2365          case Iop_QSub32Ux2:
   2366          case Iop_QSub64Ux1: {
   2367             HReg res = newVRegD(env);
   2368             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2369             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2370             UInt size;
   2371             switch (e->Iex.Binop.op) {
   2372                case Iop_QSub8Ux8: size = 0; break;
   2373                case Iop_QSub16Ux4: size = 1; break;
   2374                case Iop_QSub32Ux2: size = 2; break;
   2375                case Iop_QSub64Ux1: size = 3; break;
   2376                default: vassert(0);
   2377             }
   2378             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   2379                                            res, argL, argR, size, False));
   2380             return res;
   2381          }
   2382          case Iop_QSub8Sx8:
   2383          case Iop_QSub16Sx4:
   2384          case Iop_QSub32Sx2:
   2385          case Iop_QSub64Sx1: {
   2386             HReg res = newVRegD(env);
   2387             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2388             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2389             UInt size;
   2390             switch (e->Iex.Binop.op) {
   2391                case Iop_QSub8Sx8: size = 0; break;
   2392                case Iop_QSub16Sx4: size = 1; break;
   2393                case Iop_QSub32Sx2: size = 2; break;
   2394                case Iop_QSub64Sx1: size = 3; break;
   2395                default: vassert(0);
   2396             }
   2397             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   2398                                            res, argL, argR, size, False));
   2399             return res;
   2400          }
   2401          case Iop_Max8Ux8:
   2402          case Iop_Max16Ux4:
   2403          case Iop_Max32Ux2: {
   2404             HReg res = newVRegD(env);
   2405             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2406             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2407             UInt size;
   2408             switch (e->Iex.Binop.op) {
   2409                case Iop_Max8Ux8: size = 0; break;
   2410                case Iop_Max16Ux4: size = 1; break;
   2411                case Iop_Max32Ux2: size = 2; break;
   2412                default: vassert(0);
   2413             }
   2414             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   2415                                            res, argL, argR, size, False));
   2416             return res;
   2417          }
   2418          case Iop_Max8Sx8:
   2419          case Iop_Max16Sx4:
   2420          case Iop_Max32Sx2: {
   2421             HReg res = newVRegD(env);
   2422             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2423             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2424             UInt size;
   2425             switch (e->Iex.Binop.op) {
   2426                case Iop_Max8Sx8: size = 0; break;
   2427                case Iop_Max16Sx4: size = 1; break;
   2428                case Iop_Max32Sx2: size = 2; break;
   2429                default: vassert(0);
   2430             }
   2431             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   2432                                            res, argL, argR, size, False));
   2433             return res;
   2434          }
   2435          case Iop_Min8Ux8:
   2436          case Iop_Min16Ux4:
   2437          case Iop_Min32Ux2: {
   2438             HReg res = newVRegD(env);
   2439             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2440             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2441             UInt size;
   2442             switch (e->Iex.Binop.op) {
   2443                case Iop_Min8Ux8: size = 0; break;
   2444                case Iop_Min16Ux4: size = 1; break;
   2445                case Iop_Min32Ux2: size = 2; break;
   2446                default: vassert(0);
   2447             }
   2448             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   2449                                            res, argL, argR, size, False));
   2450             return res;
   2451          }
   2452          case Iop_Min8Sx8:
   2453          case Iop_Min16Sx4:
   2454          case Iop_Min32Sx2: {
   2455             HReg res = newVRegD(env);
   2456             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2457             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2458             UInt size;
   2459             switch (e->Iex.Binop.op) {
   2460                case Iop_Min8Sx8: size = 0; break;
   2461                case Iop_Min16Sx4: size = 1; break;
   2462                case Iop_Min32Sx2: size = 2; break;
   2463                default: vassert(0);
   2464             }
   2465             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   2466                                            res, argL, argR, size, False));
   2467             return res;
   2468          }
   2469          case Iop_Sar8x8:
   2470          case Iop_Sar16x4:
   2471          case Iop_Sar32x2: {
   2472             HReg res = newVRegD(env);
   2473             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2474             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2475             HReg argR2 = newVRegD(env);
   2476             HReg zero = newVRegD(env);
   2477             UInt size;
   2478             switch (e->Iex.Binop.op) {
   2479                case Iop_Sar8x8: size = 0; break;
   2480                case Iop_Sar16x4: size = 1; break;
   2481                case Iop_Sar32x2: size = 2; break;
   2482                case Iop_Sar64: size = 3; break;
   2483                default: vassert(0);
   2484             }
   2485             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2486             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2487                                            argR2, zero, argR, size, False));
   2488             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2489                                           res, argL, argR2, size, False));
   2490             return res;
   2491          }
   2492          case Iop_Sal8x8:
   2493          case Iop_Sal16x4:
   2494          case Iop_Sal32x2:
   2495          case Iop_Sal64x1: {
   2496             HReg res = newVRegD(env);
   2497             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2498             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2499             UInt size;
   2500             switch (e->Iex.Binop.op) {
   2501                case Iop_Sal8x8: size = 0; break;
   2502                case Iop_Sal16x4: size = 1; break;
   2503                case Iop_Sal32x2: size = 2; break;
   2504                case Iop_Sal64x1: size = 3; break;
   2505                default: vassert(0);
   2506             }
   2507             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2508                                           res, argL, argR, size, False));
   2509             return res;
   2510          }
   2511          case Iop_Shr8x8:
   2512          case Iop_Shr16x4:
   2513          case Iop_Shr32x2: {
   2514             HReg res = newVRegD(env);
   2515             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2516             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2517             HReg argR2 = newVRegD(env);
   2518             HReg zero = newVRegD(env);
   2519             UInt size;
   2520             switch (e->Iex.Binop.op) {
   2521                case Iop_Shr8x8: size = 0; break;
   2522                case Iop_Shr16x4: size = 1; break;
   2523                case Iop_Shr32x2: size = 2; break;
   2524                default: vassert(0);
   2525             }
   2526             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2527             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2528                                            argR2, zero, argR, size, False));
   2529             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2530                                           res, argL, argR2, size, False));
   2531             return res;
   2532          }
   2533          case Iop_Shl8x8:
   2534          case Iop_Shl16x4:
   2535          case Iop_Shl32x2: {
   2536             HReg res = newVRegD(env);
   2537             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2538             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2539             UInt size;
   2540             switch (e->Iex.Binop.op) {
   2541                case Iop_Shl8x8: size = 0; break;
   2542                case Iop_Shl16x4: size = 1; break;
   2543                case Iop_Shl32x2: size = 2; break;
   2544                default: vassert(0);
   2545             }
   2546             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2547                                           res, argL, argR, size, False));
   2548             return res;
   2549          }
   2550          case Iop_QShl8x8:
   2551          case Iop_QShl16x4:
   2552          case Iop_QShl32x2:
   2553          case Iop_QShl64x1: {
   2554             HReg res = newVRegD(env);
   2555             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2556             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2557             UInt size;
   2558             switch (e->Iex.Binop.op) {
   2559                case Iop_QShl8x8: size = 0; break;
   2560                case Iop_QShl16x4: size = 1; break;
   2561                case Iop_QShl32x2: size = 2; break;
   2562                case Iop_QShl64x1: size = 3; break;
   2563                default: vassert(0);
   2564             }
   2565             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   2566                                           res, argL, argR, size, False));
   2567             return res;
   2568          }
   2569          case Iop_QSal8x8:
   2570          case Iop_QSal16x4:
   2571          case Iop_QSal32x2:
   2572          case Iop_QSal64x1: {
   2573             HReg res = newVRegD(env);
   2574             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2575             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2576             UInt size;
   2577             switch (e->Iex.Binop.op) {
   2578                case Iop_QSal8x8: size = 0; break;
   2579                case Iop_QSal16x4: size = 1; break;
   2580                case Iop_QSal32x2: size = 2; break;
   2581                case Iop_QSal64x1: size = 3; break;
   2582                default: vassert(0);
   2583             }
   2584             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   2585                                           res, argL, argR, size, False));
   2586             return res;
   2587          }
   2588          case Iop_QShlN8x8:
   2589          case Iop_QShlN16x4:
   2590          case Iop_QShlN32x2:
   2591          case Iop_QShlN64x1: {
   2592             HReg res = newVRegD(env);
   2593             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2594             UInt size, imm;
   2595             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2596                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2597                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2598                       "second argument only\n");
   2599             }
   2600             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2601             switch (e->Iex.Binop.op) {
   2602                case Iop_QShlN8x8: size = 8 | imm; break;
   2603                case Iop_QShlN16x4: size = 16 | imm; break;
   2604                case Iop_QShlN32x2: size = 32 | imm; break;
   2605                case Iop_QShlN64x1: size = 64 | imm; break;
   2606                default: vassert(0);
   2607             }
   2608             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   2609                                           res, argL, size, False));
   2610             return res;
   2611          }
   2612          case Iop_QShlN8Sx8:
   2613          case Iop_QShlN16Sx4:
   2614          case Iop_QShlN32Sx2:
   2615          case Iop_QShlN64Sx1: {
   2616             HReg res = newVRegD(env);
   2617             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2618             UInt size, imm;
   2619             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2620                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2621                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2622                       "second argument only\n");
   2623             }
   2624             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2625             switch (e->Iex.Binop.op) {
   2626                case Iop_QShlN8Sx8: size = 8 | imm; break;
   2627                case Iop_QShlN16Sx4: size = 16 | imm; break;
   2628                case Iop_QShlN32Sx2: size = 32 | imm; break;
   2629                case Iop_QShlN64Sx1: size = 64 | imm; break;
   2630                default: vassert(0);
   2631             }
   2632             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   2633                                           res, argL, size, False));
   2634             return res;
   2635          }
   2636          case Iop_QSalN8x8:
   2637          case Iop_QSalN16x4:
   2638          case Iop_QSalN32x2:
   2639          case Iop_QSalN64x1: {
   2640             HReg res = newVRegD(env);
   2641             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2642             UInt size, imm;
   2643             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2644                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2645                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2646                       "second argument only\n");
   2647             }
   2648             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2649             switch (e->Iex.Binop.op) {
   2650                case Iop_QSalN8x8: size = 8 | imm; break;
   2651                case Iop_QSalN16x4: size = 16 | imm; break;
   2652                case Iop_QSalN32x2: size = 32 | imm; break;
   2653                case Iop_QSalN64x1: size = 64 | imm; break;
   2654                default: vassert(0);
   2655             }
   2656             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   2657                                           res, argL, size, False));
   2658             return res;
   2659          }
   2660          case Iop_ShrN8x8:
   2661          case Iop_ShrN16x4:
   2662          case Iop_ShrN32x2:
   2663          case Iop_Shr64: {
   2664             HReg res = newVRegD(env);
   2665             HReg tmp = newVRegD(env);
   2666             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2667             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2668             HReg argR2 = newVRegI(env);
   2669             UInt size;
   2670             switch (e->Iex.Binop.op) {
   2671                case Iop_ShrN8x8: size = 0; break;
   2672                case Iop_ShrN16x4: size = 1; break;
   2673                case Iop_ShrN32x2: size = 2; break;
   2674                case Iop_Shr64: size = 3; break;
   2675                default: vassert(0);
   2676             }
   2677             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2678             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2679             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2680                                           res, argL, tmp, size, False));
   2681             return res;
   2682          }
   2683          case Iop_ShlN8x8:
   2684          case Iop_ShlN16x4:
   2685          case Iop_ShlN32x2:
   2686          case Iop_Shl64: {
   2687             HReg res = newVRegD(env);
   2688             HReg tmp = newVRegD(env);
   2689             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2690             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2691             UInt size;
   2692             switch (e->Iex.Binop.op) {
   2693                case Iop_ShlN8x8: size = 0; break;
   2694                case Iop_ShlN16x4: size = 1; break;
   2695                case Iop_ShlN32x2: size = 2; break;
   2696                case Iop_Shl64: size = 3; break;
   2697                default: vassert(0);
   2698             }
   2699             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
   2700             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2701                                           res, argL, tmp, size, False));
   2702             return res;
   2703          }
   2704          case Iop_SarN8x8:
   2705          case Iop_SarN16x4:
   2706          case Iop_SarN32x2:
   2707          case Iop_Sar64: {
   2708             HReg res = newVRegD(env);
   2709             HReg tmp = newVRegD(env);
   2710             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2711             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2712             HReg argR2 = newVRegI(env);
   2713             UInt size;
   2714             switch (e->Iex.Binop.op) {
   2715                case Iop_SarN8x8: size = 0; break;
   2716                case Iop_SarN16x4: size = 1; break;
   2717                case Iop_SarN32x2: size = 2; break;
   2718                case Iop_Sar64: size = 3; break;
   2719                default: vassert(0);
   2720             }
   2721             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2722             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2723             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2724                                           res, argL, tmp, size, False));
   2725             return res;
   2726          }
   2727          case Iop_CmpGT8Ux8:
   2728          case Iop_CmpGT16Ux4:
   2729          case Iop_CmpGT32Ux2: {
   2730             HReg res = newVRegD(env);
   2731             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2732             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2733             UInt size;
   2734             switch (e->Iex.Binop.op) {
   2735                case Iop_CmpGT8Ux8: size = 0; break;
   2736                case Iop_CmpGT16Ux4: size = 1; break;
   2737                case Iop_CmpGT32Ux2: size = 2; break;
   2738                default: vassert(0);
   2739             }
   2740             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   2741                                            res, argL, argR, size, False));
   2742             return res;
   2743          }
   2744          case Iop_CmpGT8Sx8:
   2745          case Iop_CmpGT16Sx4:
   2746          case Iop_CmpGT32Sx2: {
   2747             HReg res = newVRegD(env);
   2748             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2749             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2750             UInt size;
   2751             switch (e->Iex.Binop.op) {
   2752                case Iop_CmpGT8Sx8: size = 0; break;
   2753                case Iop_CmpGT16Sx4: size = 1; break;
   2754                case Iop_CmpGT32Sx2: size = 2; break;
   2755                default: vassert(0);
   2756             }
   2757             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   2758                                            res, argL, argR, size, False));
   2759             return res;
   2760          }
   2761          case Iop_CmpEQ8x8:
   2762          case Iop_CmpEQ16x4:
   2763          case Iop_CmpEQ32x2: {
   2764             HReg res = newVRegD(env);
   2765             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2766             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2767             UInt size;
   2768             switch (e->Iex.Binop.op) {
   2769                case Iop_CmpEQ8x8: size = 0; break;
   2770                case Iop_CmpEQ16x4: size = 1; break;
   2771                case Iop_CmpEQ32x2: size = 2; break;
   2772                default: vassert(0);
   2773             }
   2774             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   2775                                            res, argL, argR, size, False));
   2776             return res;
   2777          }
   2778          case Iop_Mul8x8:
   2779          case Iop_Mul16x4:
   2780          case Iop_Mul32x2: {
   2781             HReg res = newVRegD(env);
   2782             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2783             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2784             UInt size = 0;
   2785             switch(e->Iex.Binop.op) {
   2786                case Iop_Mul8x8: size = 0; break;
   2787                case Iop_Mul16x4: size = 1; break;
   2788                case Iop_Mul32x2: size = 2; break;
   2789                default: vassert(0);
   2790             }
   2791             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   2792                                            res, argL, argR, size, False));
   2793             return res;
   2794          }
   2795          case Iop_Mul32Fx2: {
   2796             HReg res = newVRegD(env);
   2797             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2798             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2799             UInt size = 0;
   2800             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
   2801                                            res, argL, argR, size, False));
   2802             return res;
   2803          }
   2804          case Iop_QDMulHi16Sx4:
   2805          case Iop_QDMulHi32Sx2: {
   2806             HReg res = newVRegD(env);
   2807             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2808             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2809             UInt size = 0;
   2810             switch(e->Iex.Binop.op) {
   2811                case Iop_QDMulHi16Sx4: size = 1; break;
   2812                case Iop_QDMulHi32Sx2: size = 2; break;
   2813                default: vassert(0);
   2814             }
   2815             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   2816                                            res, argL, argR, size, False));
   2817             return res;
   2818          }
   2819 
   2820          case Iop_QRDMulHi16Sx4:
   2821          case Iop_QRDMulHi32Sx2: {
   2822             HReg res = newVRegD(env);
   2823             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2824             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2825             UInt size = 0;
   2826             switch(e->Iex.Binop.op) {
   2827                case Iop_QRDMulHi16Sx4: size = 1; break;
   2828                case Iop_QRDMulHi32Sx2: size = 2; break;
   2829                default: vassert(0);
   2830             }
   2831             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   2832                                            res, argL, argR, size, False));
   2833             return res;
   2834          }
   2835 
   2836          case Iop_PwAdd8x8:
   2837          case Iop_PwAdd16x4:
   2838          case Iop_PwAdd32x2: {
   2839             HReg res = newVRegD(env);
   2840             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2841             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2842             UInt size = 0;
   2843             switch(e->Iex.Binop.op) {
   2844                case Iop_PwAdd8x8: size = 0; break;
   2845                case Iop_PwAdd16x4: size = 1; break;
   2846                case Iop_PwAdd32x2: size = 2; break;
   2847                default: vassert(0);
   2848             }
   2849             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   2850                                            res, argL, argR, size, False));
   2851             return res;
   2852          }
   2853          case Iop_PwAdd32Fx2: {
   2854             HReg res = newVRegD(env);
   2855             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2856             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2857             UInt size = 0;
   2858             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
   2859                                            res, argL, argR, size, False));
   2860             return res;
   2861          }
   2862          case Iop_PwMin8Ux8:
   2863          case Iop_PwMin16Ux4:
   2864          case Iop_PwMin32Ux2: {
   2865             HReg res = newVRegD(env);
   2866             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2867             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2868             UInt size = 0;
   2869             switch(e->Iex.Binop.op) {
   2870                case Iop_PwMin8Ux8: size = 0; break;
   2871                case Iop_PwMin16Ux4: size = 1; break;
   2872                case Iop_PwMin32Ux2: size = 2; break;
   2873                default: vassert(0);
   2874             }
   2875             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
   2876                                            res, argL, argR, size, False));
   2877             return res;
   2878          }
   2879          case Iop_PwMin8Sx8:
   2880          case Iop_PwMin16Sx4:
   2881          case Iop_PwMin32Sx2: {
   2882             HReg res = newVRegD(env);
   2883             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2884             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2885             UInt size = 0;
   2886             switch(e->Iex.Binop.op) {
   2887                case Iop_PwMin8Sx8: size = 0; break;
   2888                case Iop_PwMin16Sx4: size = 1; break;
   2889                case Iop_PwMin32Sx2: size = 2; break;
   2890                default: vassert(0);
   2891             }
   2892             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
   2893                                            res, argL, argR, size, False));
   2894             return res;
   2895          }
   2896          case Iop_PwMax8Ux8:
   2897          case Iop_PwMax16Ux4:
   2898          case Iop_PwMax32Ux2: {
   2899             HReg res = newVRegD(env);
   2900             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2901             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2902             UInt size = 0;
   2903             switch(e->Iex.Binop.op) {
   2904                case Iop_PwMax8Ux8: size = 0; break;
   2905                case Iop_PwMax16Ux4: size = 1; break;
   2906                case Iop_PwMax32Ux2: size = 2; break;
   2907                default: vassert(0);
   2908             }
   2909             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
   2910                                            res, argL, argR, size, False));
   2911             return res;
   2912          }
   2913          case Iop_PwMax8Sx8:
   2914          case Iop_PwMax16Sx4:
   2915          case Iop_PwMax32Sx2: {
   2916             HReg res = newVRegD(env);
   2917             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2918             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2919             UInt size = 0;
   2920             switch(e->Iex.Binop.op) {
   2921                case Iop_PwMax8Sx8: size = 0; break;
   2922                case Iop_PwMax16Sx4: size = 1; break;
   2923                case Iop_PwMax32Sx2: size = 2; break;
   2924                default: vassert(0);
   2925             }
   2926             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
   2927                                            res, argL, argR, size, False));
   2928             return res;
   2929          }
   2930          case Iop_Perm8x8: {
   2931             HReg res = newVRegD(env);
   2932             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2933             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2934             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
   2935                                            res, argL, argR, 0, False));
   2936             return res;
   2937          }
   2938          case Iop_PolynomialMul8x8: {
   2939             HReg res = newVRegD(env);
   2940             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2941             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2942             UInt size = 0;
   2943             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   2944                                            res, argL, argR, size, False));
   2945             return res;
   2946          }
   2947          case Iop_Max32Fx2: {
   2948             HReg res = newVRegD(env);
   2949             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2950             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2951             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   2952                                            res, argL, argR, 2, False));
   2953             return res;
   2954          }
   2955          case Iop_Min32Fx2: {
   2956             HReg res = newVRegD(env);
   2957             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2958             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2959             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   2960                                            res, argL, argR, 2, False));
   2961             return res;
   2962          }
   2963          case Iop_PwMax32Fx2: {
   2964             HReg res = newVRegD(env);
   2965             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2966             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2967             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   2968                                            res, argL, argR, 2, False));
   2969             return res;
   2970          }
   2971          case Iop_PwMin32Fx2: {
   2972             HReg res = newVRegD(env);
   2973             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2974             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2975             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   2976                                            res, argL, argR, 2, False));
   2977             return res;
   2978          }
   2979          case Iop_CmpGT32Fx2: {
   2980             HReg res = newVRegD(env);
   2981             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2982             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2983             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   2984                                            res, argL, argR, 2, False));
   2985             return res;
   2986          }
   2987          case Iop_CmpGE32Fx2: {
   2988             HReg res = newVRegD(env);
   2989             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2990             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2991             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   2992                                            res, argL, argR, 2, False));
   2993             return res;
   2994          }
   2995          case Iop_CmpEQ32Fx2: {
   2996             HReg res = newVRegD(env);
   2997             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2998             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2999             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   3000                                            res, argL, argR, 2, False));
   3001             return res;
   3002          }
   3003          case Iop_F32ToFixed32Ux2_RZ:
   3004          case Iop_F32ToFixed32Sx2_RZ:
   3005          case Iop_Fixed32UToF32x2_RN:
   3006          case Iop_Fixed32SToF32x2_RN: {
   3007             HReg res = newVRegD(env);
   3008             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3009             ARMNeonUnOp op;
   3010             UInt imm6;
   3011             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3012                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3013                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   3014                          "second argument less than 33 only\n");
   3015             }
   3016             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3017             vassert(imm6 <= 32 && imm6 > 0);
   3018             imm6 = 64 - imm6;
   3019             switch(e->Iex.Binop.op) {
   3020                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
   3021                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
   3022                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
   3023                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
   3024                default: vassert(0);
   3025             }
   3026             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
   3027             return res;
   3028          }
   3029          /*
   3030          FIXME: is this here or not?
   3031          case Iop_VDup8x8:
   3032          case Iop_VDup16x4:
   3033          case Iop_VDup32x2: {
   3034             HReg res = newVRegD(env);
   3035             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3036             UInt index;
   3037             UInt imm4;
   3038             UInt size = 0;
   3039             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3040                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3041                   vpanic("ARM supports Iop_VDup with constant "
   3042                          "second argument less than 16 only\n");
   3043             }
   3044             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3045             switch(e->Iex.Binop.op) {
   3046                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
   3047                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
   3048                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
   3049                default: vassert(0);
   3050             }
   3051             if (imm4 >= 16) {
   3052                vpanic("ARM supports Iop_VDup with constant "
   3053                       "second argument less than 16 only\n");
   3054             }
   3055             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   3056                                           res, argL, imm4, False));
   3057             return res;
   3058          }
   3059          */
   3060          default:
   3061             break;
   3062       }
   3063    }
   3064 
   3065    /* --------- UNARY ops --------- */
   3066    if (e->tag == Iex_Unop) {
   3067       switch (e->Iex.Unop.op) {
   3068 
   3069          /* ReinterpF64asI64 */
   3070          case Iop_ReinterpF64asI64:
   3071          /* Left64(e) */
   3072          case Iop_Left64:
   3073          /* CmpwNEZ64(e) */
   3074          //case Iop_CmpwNEZ64:
   3075          case Iop_1Sto64: {
   3076             HReg rLo, rHi;
   3077             HReg res = newVRegD(env);
   3078             iselInt64Expr(&rHi, &rLo, env, e);
   3079             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3080             return res;
   3081          }
   3082          case Iop_Not64: {
   3083             DECLARE_PATTERN(p_veqz_8x8);
   3084             DECLARE_PATTERN(p_veqz_16x4);
   3085             DECLARE_PATTERN(p_veqz_32x2);
   3086             DECLARE_PATTERN(p_vcge_8sx8);
   3087             DECLARE_PATTERN(p_vcge_16sx4);
   3088             DECLARE_PATTERN(p_vcge_32sx2);
   3089             DECLARE_PATTERN(p_vcge_8ux8);
   3090             DECLARE_PATTERN(p_vcge_16ux4);
   3091             DECLARE_PATTERN(p_vcge_32ux2);
   3092             DEFINE_PATTERN(p_veqz_8x8,
   3093                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
   3094             DEFINE_PATTERN(p_veqz_16x4,
   3095                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
   3096             DEFINE_PATTERN(p_veqz_32x2,
   3097                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
   3098             DEFINE_PATTERN(p_vcge_8sx8,
   3099                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
   3100             DEFINE_PATTERN(p_vcge_16sx4,
   3101                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
   3102             DEFINE_PATTERN(p_vcge_32sx2,
   3103                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
   3104             DEFINE_PATTERN(p_vcge_8ux8,
   3105                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
   3106             DEFINE_PATTERN(p_vcge_16ux4,
   3107                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
   3108             DEFINE_PATTERN(p_vcge_32ux2,
   3109                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
   3110             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
   3111                HReg res = newVRegD(env);
   3112                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3113                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
   3114                return res;
   3115             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
   3116                HReg res = newVRegD(env);
   3117                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3118                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
   3119                return res;
   3120             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
   3121                HReg res = newVRegD(env);
   3122                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3123                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
   3124                return res;
   3125             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
   3126                HReg res = newVRegD(env);
   3127                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3128                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3129                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3130                                               res, argL, argR, 0, False));
   3131                return res;
   3132             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
   3133                HReg res = newVRegD(env);
   3134                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3135                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3136                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3137                                               res, argL, argR, 1, False));
   3138                return res;
   3139             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
   3140                HReg res = newVRegD(env);
   3141                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3142                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3143                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3144                                               res, argL, argR, 2, False));
   3145                return res;
   3146             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
   3147                HReg res = newVRegD(env);
   3148                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3149                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3150                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3151                                               res, argL, argR, 0, False));
   3152                return res;
   3153             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
   3154                HReg res = newVRegD(env);
   3155                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3156                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3157                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3158                                               res, argL, argR, 1, False));
   3159                return res;
   3160             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
   3161                HReg res = newVRegD(env);
   3162                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3163                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3164                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3165                                               res, argL, argR, 2, False));
   3166                return res;
   3167             } else {
   3168                HReg res = newVRegD(env);
   3169                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3170                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
   3171                return res;
   3172             }
   3173          }
   3174          case Iop_Dup8x8:
   3175          case Iop_Dup16x4:
   3176          case Iop_Dup32x2: {
   3177             HReg res, arg;
   3178             UInt size;
   3179             DECLARE_PATTERN(p_vdup_8x8);
   3180             DECLARE_PATTERN(p_vdup_16x4);
   3181             DECLARE_PATTERN(p_vdup_32x2);
   3182             DEFINE_PATTERN(p_vdup_8x8,
   3183                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3184             DEFINE_PATTERN(p_vdup_16x4,
   3185                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3186             DEFINE_PATTERN(p_vdup_32x2,
   3187                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3188             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
   3189                UInt index;
   3190                UInt imm4;
   3191                if (mi.bindee[1]->tag == Iex_Const &&
   3192                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3193                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3194                   imm4 = (index << 1) + 1;
   3195                   if (index < 8) {
   3196                      res = newVRegD(env);
   3197                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3198                      addInstr(env, ARMInstr_NUnaryS(
   3199                                       ARMneon_VDUP,
   3200                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3201                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3202                                       imm4, False
   3203                              ));
   3204                      return res;
   3205                   }
   3206                }
   3207             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
   3208                UInt index;
   3209                UInt imm4;
   3210                if (mi.bindee[1]->tag == Iex_Const &&
   3211                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3212                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3213                   imm4 = (index << 2) + 2;
   3214                   if (index < 4) {
   3215                      res = newVRegD(env);
   3216                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3217                      addInstr(env, ARMInstr_NUnaryS(
   3218                                       ARMneon_VDUP,
   3219                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3220                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3221                                       imm4, False
   3222                              ));
   3223                      return res;
   3224                   }
   3225                }
   3226             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
   3227                UInt index;
   3228                UInt imm4;
   3229                if (mi.bindee[1]->tag == Iex_Const &&
   3230                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3231                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3232                   imm4 = (index << 3) + 4;
   3233                   if (index < 2) {
   3234                      res = newVRegD(env);
   3235                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3236                      addInstr(env, ARMInstr_NUnaryS(
   3237                                       ARMneon_VDUP,
   3238                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3239                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3240                                       imm4, False
   3241                              ));
   3242                      return res;
   3243                   }
   3244                }
   3245             }
   3246             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3247             res = newVRegD(env);
   3248             switch (e->Iex.Unop.op) {
   3249                case Iop_Dup8x8: size = 0; break;
   3250                case Iop_Dup16x4: size = 1; break;
   3251                case Iop_Dup32x2: size = 2; break;
   3252                default: vassert(0);
   3253             }
   3254             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
   3255             return res;
   3256          }
   3257          case Iop_Abs8x8:
   3258          case Iop_Abs16x4:
   3259          case Iop_Abs32x2: {
   3260             HReg res = newVRegD(env);
   3261             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3262             UInt size = 0;
   3263             switch(e->Iex.Binop.op) {
   3264                case Iop_Abs8x8: size = 0; break;
   3265                case Iop_Abs16x4: size = 1; break;
   3266                case Iop_Abs32x2: size = 2; break;
   3267                default: vassert(0);
   3268             }
   3269             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
   3270             return res;
   3271          }
   3272          case Iop_Reverse64_8x8:
   3273          case Iop_Reverse64_16x4:
   3274          case Iop_Reverse64_32x2: {
   3275             HReg res = newVRegD(env);
   3276             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3277             UInt size = 0;
   3278             switch(e->Iex.Binop.op) {
   3279                case Iop_Reverse64_8x8: size = 0; break;
   3280                case Iop_Reverse64_16x4: size = 1; break;
   3281                case Iop_Reverse64_32x2: size = 2; break;
   3282                default: vassert(0);
   3283             }
   3284             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   3285                                           res, arg, size, False));
   3286             return res;
   3287          }
   3288          case Iop_Reverse32_8x8:
   3289          case Iop_Reverse32_16x4: {
   3290             HReg res = newVRegD(env);
   3291             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3292             UInt size = 0;
   3293             switch(e->Iex.Binop.op) {
   3294                case Iop_Reverse32_8x8: size = 0; break;
   3295                case Iop_Reverse32_16x4: size = 1; break;
   3296                default: vassert(0);
   3297             }
   3298             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   3299                                           res, arg, size, False));
   3300             return res;
   3301          }
   3302          case Iop_Reverse16_8x8: {
   3303             HReg res = newVRegD(env);
   3304             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3305             UInt size = 0;
   3306             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   3307                                           res, arg, size, False));
   3308             return res;
   3309          }
   3310          case Iop_CmpwNEZ64: {
   3311             HReg x_lsh = newVRegD(env);
   3312             HReg x_rsh = newVRegD(env);
   3313             HReg lsh_amt = newVRegD(env);
   3314             HReg rsh_amt = newVRegD(env);
   3315             HReg zero = newVRegD(env);
   3316             HReg tmp = newVRegD(env);
   3317             HReg tmp2 = newVRegD(env);
   3318             HReg res = newVRegD(env);
   3319             HReg x = newVRegD(env);
   3320             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3321             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
   3322             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
   3323             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   3324             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   3325             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   3326                                            rsh_amt, zero, lsh_amt, 2, False));
   3327             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3328                                           x_lsh, x, lsh_amt, 3, False));
   3329             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3330                                           x_rsh, x, rsh_amt, 3, False));
   3331             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3332                                            tmp, x_lsh, x_rsh, 0, False));
   3333             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3334                                            res, tmp, x, 0, False));
   3335             return res;
   3336          }
   3337          case Iop_CmpNEZ8x8:
   3338          case Iop_CmpNEZ16x4:
   3339          case Iop_CmpNEZ32x2: {
   3340             HReg res = newVRegD(env);
   3341             HReg tmp = newVRegD(env);
   3342             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3343             UInt size;
   3344             switch (e->Iex.Unop.op) {
   3345                case Iop_CmpNEZ8x8: size = 0; break;
   3346                case Iop_CmpNEZ16x4: size = 1; break;
   3347                case Iop_CmpNEZ32x2: size = 2; break;
   3348                default: vassert(0);
   3349             }
   3350             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
   3351             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
   3352             return res;
   3353          }
   3354          case Iop_NarrowUn16to8x8:
   3355          case Iop_NarrowUn32to16x4:
   3356          case Iop_NarrowUn64to32x2: {
   3357             HReg res = newVRegD(env);
   3358             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3359             UInt size = 0;
   3360             switch(e->Iex.Binop.op) {
   3361                case Iop_NarrowUn16to8x8:  size = 0; break;
   3362                case Iop_NarrowUn32to16x4: size = 1; break;
   3363                case Iop_NarrowUn64to32x2: size = 2; break;
   3364                default: vassert(0);
   3365             }
   3366             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
   3367                                           res, arg, size, False));
   3368             return res;
   3369          }
   3370          case Iop_QNarrowUn16Sto8Sx8:
   3371          case Iop_QNarrowUn32Sto16Sx4:
   3372          case Iop_QNarrowUn64Sto32Sx2: {
   3373             HReg res = newVRegD(env);
   3374             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3375             UInt size = 0;
   3376             switch(e->Iex.Binop.op) {
   3377                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
   3378                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
   3379                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
   3380                default: vassert(0);
   3381             }
   3382             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
   3383                                           res, arg, size, False));
   3384             return res;
   3385          }
   3386          case Iop_QNarrowUn16Sto8Ux8:
   3387          case Iop_QNarrowUn32Sto16Ux4:
   3388          case Iop_QNarrowUn64Sto32Ux2: {
   3389             HReg res = newVRegD(env);
   3390             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3391             UInt size = 0;
   3392             switch(e->Iex.Binop.op) {
   3393                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
   3394                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
   3395                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
   3396                default: vassert(0);
   3397             }
   3398             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
   3399                                           res, arg, size, False));
   3400             return res;
   3401          }
   3402          case Iop_QNarrowUn16Uto8Ux8:
   3403          case Iop_QNarrowUn32Uto16Ux4:
   3404          case Iop_QNarrowUn64Uto32Ux2: {
   3405             HReg res = newVRegD(env);
   3406             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3407             UInt size = 0;
   3408             switch(e->Iex.Binop.op) {
   3409                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
   3410                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
   3411                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
   3412                default: vassert(0);
   3413             }
   3414             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
   3415                                           res, arg, size, False));
   3416             return res;
   3417          }
   3418          case Iop_PwAddL8Sx8:
   3419          case Iop_PwAddL16Sx4:
   3420          case Iop_PwAddL32Sx2: {
   3421             HReg res = newVRegD(env);
   3422             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3423             UInt size = 0;
   3424             switch(e->Iex.Binop.op) {
   3425                case Iop_PwAddL8Sx8: size = 0; break;
   3426                case Iop_PwAddL16Sx4: size = 1; break;
   3427                case Iop_PwAddL32Sx2: size = 2; break;
   3428                default: vassert(0);
   3429             }
   3430             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   3431                                           res, arg, size, False));
   3432             return res;
   3433          }
   3434          case Iop_PwAddL8Ux8:
   3435          case Iop_PwAddL16Ux4:
   3436          case Iop_PwAddL32Ux2: {
   3437             HReg res = newVRegD(env);
   3438             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3439             UInt size = 0;
   3440             switch(e->Iex.Binop.op) {
   3441                case Iop_PwAddL8Ux8: size = 0; break;
   3442                case Iop_PwAddL16Ux4: size = 1; break;
   3443                case Iop_PwAddL32Ux2: size = 2; break;
   3444                default: vassert(0);
   3445             }
   3446             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   3447                                           res, arg, size, False));
   3448             return res;
   3449          }
   3450          case Iop_Cnt8x8: {
   3451             HReg res = newVRegD(env);
   3452             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3453             UInt size = 0;
   3454             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
   3455                                           res, arg, size, False));
   3456             return res;
   3457          }
   3458          case Iop_Clz8Sx8:
   3459          case Iop_Clz16Sx4:
   3460          case Iop_Clz32Sx2: {
   3461             HReg res = newVRegD(env);
   3462             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3463             UInt size = 0;
   3464             switch(e->Iex.Binop.op) {
   3465                case Iop_Clz8Sx8: size = 0; break;
   3466                case Iop_Clz16Sx4: size = 1; break;
   3467                case Iop_Clz32Sx2: size = 2; break;
   3468                default: vassert(0);
   3469             }
   3470             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
   3471                                           res, arg, size, False));
   3472             return res;
   3473          }
   3474          case Iop_Cls8Sx8:
   3475          case Iop_Cls16Sx4:
   3476          case Iop_Cls32Sx2: {
   3477             HReg res = newVRegD(env);
   3478             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3479             UInt size = 0;
   3480             switch(e->Iex.Binop.op) {
   3481                case Iop_Cls8Sx8: size = 0; break;
   3482                case Iop_Cls16Sx4: size = 1; break;
   3483                case Iop_Cls32Sx2: size = 2; break;
   3484                default: vassert(0);
   3485             }
   3486             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
   3487                                           res, arg, size, False));
   3488             return res;
   3489          }
   3490          case Iop_FtoI32Sx2_RZ: {
   3491             HReg res = newVRegD(env);
   3492             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3493             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   3494                                           res, arg, 2, False));
   3495             return res;
   3496          }
   3497          case Iop_FtoI32Ux2_RZ: {
   3498             HReg res = newVRegD(env);
   3499             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3500             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   3501                                           res, arg, 2, False));
   3502             return res;
   3503          }
   3504          case Iop_I32StoFx2: {
   3505             HReg res = newVRegD(env);
   3506             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3507             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   3508                                           res, arg, 2, False));
   3509             return res;
   3510          }
   3511          case Iop_I32UtoFx2: {
   3512             HReg res = newVRegD(env);
   3513             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3514             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   3515                                           res, arg, 2, False));
   3516             return res;
   3517          }
   3518          case Iop_F32toF16x4: {
   3519             HReg res = newVRegD(env);
   3520             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3521             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
   3522                                           res, arg, 2, False));
   3523             return res;
   3524          }
   3525          case Iop_Recip32Fx2: {
   3526             HReg res = newVRegD(env);
   3527             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3528             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   3529                                           res, argL, 0, False));
   3530             return res;
   3531          }
   3532          case Iop_Recip32x2: {
   3533             HReg res = newVRegD(env);
   3534             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3535             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   3536                                           res, argL, 0, False));
   3537             return res;
   3538          }
   3539          case Iop_Abs32Fx2: {
   3540             DECLARE_PATTERN(p_vabd_32fx2);
   3541             DEFINE_PATTERN(p_vabd_32fx2,
   3542                            unop(Iop_Abs32Fx2,
   3543                                 binop(Iop_Sub32Fx2,
   3544                                       bind(0),
   3545                                       bind(1))));
   3546             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
   3547                HReg res = newVRegD(env);
   3548                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3549                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3550                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
   3551                                               res, argL, argR, 0, False));
   3552                return res;
   3553             } else {
   3554                HReg res = newVRegD(env);
   3555                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3556                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   3557                                              res, arg, 0, False));
   3558                return res;
   3559             }
   3560          }
   3561          case Iop_Rsqrte32Fx2: {
   3562             HReg res = newVRegD(env);
   3563             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3564             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   3565                                           res, arg, 0, False));
   3566             return res;
   3567          }
   3568          case Iop_Rsqrte32x2: {
   3569             HReg res = newVRegD(env);
   3570             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3571             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   3572                                           res, arg, 0, False));
   3573             return res;
   3574          }
   3575          case Iop_Neg32Fx2: {
   3576             HReg res = newVRegD(env);
   3577             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3578             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   3579                                           res, arg, 0, False));
   3580             return res;
   3581          }
   3582          default:
   3583             break;
   3584       }
   3585    } /* if (e->tag == Iex_Unop) */
   3586 
   3587    if (e->tag == Iex_Triop) {
   3588       switch (e->Iex.Triop.op) {
   3589          case Iop_Extract64: {
   3590             HReg res = newVRegD(env);
   3591             HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
   3592             HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
   3593             UInt imm4;
   3594             if (e->Iex.Triop.arg3->tag != Iex_Const ||
   3595                 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
   3596                vpanic("ARM target supports Iop_Extract64 with constant "
   3597                       "third argument less than 16 only\n");
   3598             }
   3599             imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
   3600             if (imm4 >= 8) {
   3601                vpanic("ARM target supports Iop_Extract64 with constant "
   3602                       "third argument less than 16 only\n");
   3603             }
   3604             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   3605                                            res, argL, argR, imm4, False));
   3606             return res;
   3607          }
   3608          case Iop_SetElem8x8:
   3609          case Iop_SetElem16x4:
   3610          case Iop_SetElem32x2: {
   3611             HReg res = newVRegD(env);
   3612             HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
   3613             HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
   3614             UInt index, size;
   3615             if (e->Iex.Triop.arg2->tag != Iex_Const ||
   3616                 typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
   3617                vpanic("ARM target supports SetElem with constant "
   3618                       "second argument only\n");
   3619             }
   3620             index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
   3621             switch (e->Iex.Triop.op) {
   3622                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
   3623                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
   3624                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
   3625                default: vassert(0);
   3626             }
   3627             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
   3628             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
   3629                                            mkARMNRS(ARMNRS_Scalar, res, index),
   3630                                            mkARMNRS(ARMNRS_Reg, arg, 0),
   3631                                            size, False));
   3632             return res;
   3633          }
   3634          default:
   3635             break;
   3636       }
   3637    }
   3638 
   3639    /* --------- MULTIPLEX --------- */
   3640    if (e->tag == Iex_Mux0X) {
   3641       HReg rLo, rHi;
   3642       HReg res = newVRegD(env);
   3643       iselInt64Expr(&rHi, &rLo, env, e);
   3644       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3645       return res;
   3646    }
   3647 
   3648    ppIRExpr(e);
   3649    vpanic("iselNeon64Expr");
   3650 }
   3651 
   3652 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
   3653 {
   3654    HReg r = iselNeonExpr_wrk( env, e );
   3655    vassert(hregClass(r) == HRcVec128);
   3656    vassert(hregIsVirtual(r));
   3657    return r;
   3658 }
   3659 
   3660 /* DO NOT CALL THIS DIRECTLY */
   3661 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
   3662 {
   3663    IRType ty = typeOfIRExpr(env->type_env, e);
   3664    MatchInfo mi;
   3665    vassert(e);
   3666    vassert(ty == Ity_V128);
   3667 
   3668    if (e->tag == Iex_RdTmp) {
   3669       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3670    }
   3671 
   3672    if (e->tag == Iex_Const) {
   3673       /* At the moment there should be no 128-bit constants in IR for ARM
   3674          generated during disassemble. They are represented as Iop_64HLtoV128
   3675          binary operation and are handled among binary ops. */
   3676       /* But zero can be created by valgrind internal optimizer */
   3677       if (e->Iex.Const.con->Ico.V128 == 0) {
   3678          HReg res = newVRegV(env);
   3679          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
   3680          return res;
   3681       }
   3682       ppIRExpr(e);
   3683       vpanic("128-bit constant is not implemented");
   3684    }
   3685 
   3686    if (e->tag == Iex_Load) {
   3687       HReg res = newVRegV(env);
   3688       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   3689       vassert(ty == Ity_V128);
   3690       addInstr(env, ARMInstr_NLdStQ(True, res, am));
   3691       return res;
   3692    }
   3693 
   3694    if (e->tag == Iex_Get) {
   3695       HReg addr = newVRegI(env);
   3696       HReg res = newVRegV(env);
   3697       vassert(ty == Ity_V128);
   3698       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   3699       addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
   3700       return res;
   3701    }
   3702 
   3703    if (e->tag == Iex_Unop) {
   3704       switch (e->Iex.Unop.op) {
   3705          case Iop_NotV128: {
   3706             DECLARE_PATTERN(p_veqz_8x16);
   3707             DECLARE_PATTERN(p_veqz_16x8);
   3708             DECLARE_PATTERN(p_veqz_32x4);
   3709             DECLARE_PATTERN(p_vcge_8sx16);
   3710             DECLARE_PATTERN(p_vcge_16sx8);
   3711             DECLARE_PATTERN(p_vcge_32sx4);
   3712             DECLARE_PATTERN(p_vcge_8ux16);
   3713             DECLARE_PATTERN(p_vcge_16ux8);
   3714             DECLARE_PATTERN(p_vcge_32ux4);
   3715             DEFINE_PATTERN(p_veqz_8x16,
   3716                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
   3717             DEFINE_PATTERN(p_veqz_16x8,
   3718                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
   3719             DEFINE_PATTERN(p_veqz_32x4,
   3720                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
   3721             DEFINE_PATTERN(p_vcge_8sx16,
   3722                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
   3723             DEFINE_PATTERN(p_vcge_16sx8,
   3724                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
   3725             DEFINE_PATTERN(p_vcge_32sx4,
   3726                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
   3727             DEFINE_PATTERN(p_vcge_8ux16,
   3728                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
   3729             DEFINE_PATTERN(p_vcge_16ux8,
   3730                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
   3731             DEFINE_PATTERN(p_vcge_32ux4,
   3732                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
   3733             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
   3734                HReg res = newVRegV(env);
   3735                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3736                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
   3737                return res;
   3738             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
   3739                HReg res = newVRegV(env);
   3740                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3741                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
   3742                return res;
   3743             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
   3744                HReg res = newVRegV(env);
   3745                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3746                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
   3747                return res;
   3748             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
   3749                HReg res = newVRegV(env);
   3750                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3751                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3752                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3753                                               res, argL, argR, 0, True));
   3754                return res;
   3755             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
   3756                HReg res = newVRegV(env);
   3757                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3758                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3759                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3760                                               res, argL, argR, 1, True));
   3761                return res;
   3762             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
   3763                HReg res = newVRegV(env);
   3764                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3765                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3766                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3767                                               res, argL, argR, 2, True));
   3768                return res;
   3769             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
   3770                HReg res = newVRegV(env);
   3771                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3772                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3773                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3774                                               res, argL, argR, 0, True));
   3775                return res;
   3776             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
   3777                HReg res = newVRegV(env);
   3778                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3779                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3780                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3781                                               res, argL, argR, 1, True));
   3782                return res;
   3783             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
   3784                HReg res = newVRegV(env);
   3785                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3786                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3787                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3788                                               res, argL, argR, 2, True));
   3789                return res;
   3790             } else {
   3791                HReg res = newVRegV(env);
   3792                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3793                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
   3794                return res;
   3795             }
   3796          }
   3797          case Iop_Dup8x16:
   3798          case Iop_Dup16x8:
   3799          case Iop_Dup32x4: {
   3800             HReg res, arg;
   3801             UInt size;
   3802             DECLARE_PATTERN(p_vdup_8x16);
   3803             DECLARE_PATTERN(p_vdup_16x8);
   3804             DECLARE_PATTERN(p_vdup_32x4);
   3805             DEFINE_PATTERN(p_vdup_8x16,
   3806                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3807             DEFINE_PATTERN(p_vdup_16x8,
   3808                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3809             DEFINE_PATTERN(p_vdup_32x4,
   3810                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3811             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
   3812                UInt index;
   3813                UInt imm4;
   3814                if (mi.bindee[1]->tag == Iex_Const &&
   3815                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3816                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3817                   imm4 = (index << 1) + 1;
   3818                   if (index < 8) {
   3819                      res = newVRegV(env);
   3820                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3821                      addInstr(env, ARMInstr_NUnaryS(
   3822                                       ARMneon_VDUP,
   3823                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3824                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3825                                       imm4, True
   3826                              ));
   3827                      return res;
   3828                   }
   3829                }
   3830             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
   3831                UInt index;
   3832                UInt imm4;
   3833                if (mi.bindee[1]->tag == Iex_Const &&
   3834                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3835                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3836                   imm4 = (index << 2) + 2;
   3837                   if (index < 4) {
   3838                      res = newVRegV(env);
   3839                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3840                      addInstr(env, ARMInstr_NUnaryS(
   3841                                       ARMneon_VDUP,
   3842                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3843                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3844                                       imm4, True
   3845                              ));
   3846                      return res;
   3847                   }
   3848                }
   3849             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
   3850                UInt index;
   3851                UInt imm4;
   3852                if (mi.bindee[1]->tag == Iex_Const &&
   3853                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3854                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3855                   imm4 = (index << 3) + 4;
   3856                   if (index < 2) {
   3857                      res = newVRegV(env);
   3858                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3859                      addInstr(env, ARMInstr_NUnaryS(
   3860                                       ARMneon_VDUP,
   3861                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3862                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3863                                       imm4, True
   3864                              ));
   3865                      return res;
   3866                   }
   3867                }
   3868             }
   3869             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3870             res = newVRegV(env);
   3871             switch (e->Iex.Unop.op) {
   3872                case Iop_Dup8x16: size = 0; break;
   3873                case Iop_Dup16x8: size = 1; break;
   3874                case Iop_Dup32x4: size = 2; break;
   3875                default: vassert(0);
   3876             }
   3877             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
   3878             return res;
   3879          }
   3880          case Iop_Abs8x16:
   3881          case Iop_Abs16x8:
   3882          case Iop_Abs32x4: {
   3883             HReg res = newVRegV(env);
   3884             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3885             UInt size = 0;
   3886             switch(e->Iex.Binop.op) {
   3887                case Iop_Abs8x16: size = 0; break;
   3888                case Iop_Abs16x8: size = 1; break;
   3889                case Iop_Abs32x4: size = 2; break;
   3890                default: vassert(0);
   3891             }
   3892             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
   3893             return res;
   3894          }
   3895          case Iop_Reverse64_8x16:
   3896          case Iop_Reverse64_16x8:
   3897          case Iop_Reverse64_32x4: {
   3898             HReg res = newVRegV(env);
   3899             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3900             UInt size = 0;
   3901             switch(e->Iex.Binop.op) {
   3902                case Iop_Reverse64_8x16: size = 0; break;
   3903                case Iop_Reverse64_16x8: size = 1; break;
   3904                case Iop_Reverse64_32x4: size = 2; break;
   3905                default: vassert(0);
   3906             }
   3907             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   3908                                           res, arg, size, True));
   3909             return res;
   3910          }
   3911          case Iop_Reverse32_8x16:
   3912          case Iop_Reverse32_16x8: {
   3913             HReg res = newVRegV(env);
   3914             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3915             UInt size = 0;
   3916             switch(e->Iex.Binop.op) {
   3917                case Iop_Reverse32_8x16: size = 0; break;
   3918                case Iop_Reverse32_16x8: size = 1; break;
   3919                default: vassert(0);
   3920             }
   3921             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   3922                                           res, arg, size, True));
   3923             return res;
   3924          }
   3925          case Iop_Reverse16_8x16: {
   3926             HReg res = newVRegV(env);
   3927             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3928             UInt size = 0;
   3929             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   3930                                           res, arg, size, True));
   3931             return res;
   3932          }
   3933          case Iop_CmpNEZ64x2: {
   3934             HReg x_lsh = newVRegV(env);
   3935             HReg x_rsh = newVRegV(env);
   3936             HReg lsh_amt = newVRegV(env);
   3937             HReg rsh_amt = newVRegV(env);
   3938             HReg zero = newVRegV(env);
   3939             HReg tmp = newVRegV(env);
   3940             HReg tmp2 = newVRegV(env);
   3941             HReg res = newVRegV(env);
   3942             HReg x = newVRegV(env);
   3943             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3944             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
   3945             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
   3946             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   3947             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   3948             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   3949                                            rsh_amt, zero, lsh_amt, 2, True));
   3950             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3951                                           x_lsh, x, lsh_amt, 3, True));
   3952             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3953                                           x_rsh, x, rsh_amt, 3, True));
   3954             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3955                                            tmp, x_lsh, x_rsh, 0, True));
   3956             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3957                                            res, tmp, x, 0, True));
   3958             return res;
   3959          }
   3960          case Iop_CmpNEZ8x16:
   3961          case Iop_CmpNEZ16x8:
   3962          case Iop_CmpNEZ32x4: {
   3963             HReg res = newVRegV(env);
   3964             HReg tmp = newVRegV(env);
   3965             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3966             UInt size;
   3967             switch (e->Iex.Unop.op) {
   3968                case Iop_CmpNEZ8x16: size = 0; break;
   3969                case Iop_CmpNEZ16x8: size = 1; break;
   3970                case Iop_CmpNEZ32x4: size = 2; break;
   3971                default: vassert(0);
   3972             }
   3973             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
   3974             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
   3975             return res;
   3976          }
   3977          case Iop_Widen8Uto16x8:
   3978          case Iop_Widen16Uto32x4:
   3979          case Iop_Widen32Uto64x2: {
   3980             HReg res = newVRegV(env);
   3981             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3982             UInt size;
   3983             switch (e->Iex.Unop.op) {
   3984                case Iop_Widen8Uto16x8:  size = 0; break;
   3985                case Iop_Widen16Uto32x4: size = 1; break;
   3986                case Iop_Widen32Uto64x2: size = 2; break;
   3987                default: vassert(0);
   3988             }
   3989             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
   3990                                           res, arg, size, True));
   3991             return res;
   3992          }
   3993          case Iop_Widen8Sto16x8:
   3994          case Iop_Widen16Sto32x4:
   3995          case Iop_Widen32Sto64x2: {
   3996             HReg res = newVRegV(env);
   3997             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3998             UInt size;
   3999             switch (e->Iex.Unop.op) {
   4000                case Iop_Widen8Sto16x8:  size = 0; break;
   4001                case Iop_Widen16Sto32x4: size = 1; break;
   4002                case Iop_Widen32Sto64x2: size = 2; break;
   4003                default: vassert(0);
   4004             }
   4005             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
   4006                                           res, arg, size, True));
   4007             return res;
   4008          }
   4009          case Iop_PwAddL8Sx16:
   4010          case Iop_PwAddL16Sx8:
   4011          case Iop_PwAddL32Sx4: {
   4012             HReg res = newVRegV(env);
   4013             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4014             UInt size = 0;
   4015             switch(e->Iex.Binop.op) {
   4016                case Iop_PwAddL8Sx16: size = 0; break;
   4017                case Iop_PwAddL16Sx8: size = 1; break;
   4018                case Iop_PwAddL32Sx4: size = 2; break;
   4019                default: vassert(0);
   4020             }
   4021             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   4022                                           res, arg, size, True));
   4023             return res;
   4024          }
   4025          case Iop_PwAddL8Ux16:
   4026          case Iop_PwAddL16Ux8:
   4027          case Iop_PwAddL32Ux4: {
   4028             HReg res = newVRegV(env);
   4029             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4030             UInt size = 0;
   4031             switch(e->Iex.Binop.op) {
   4032                case Iop_PwAddL8Ux16: size = 0; break;
   4033                case Iop_PwAddL16Ux8: size = 1; break;
   4034                case Iop_PwAddL32Ux4: size = 2; break;
   4035                default: vassert(0);
   4036             }
   4037             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   4038                                           res, arg, size, True));
   4039             return res;
   4040          }
   4041          case Iop_Cnt8x16: {
   4042             HReg res = newVRegV(env);
   4043             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4044             UInt size = 0;
   4045             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
   4046             return res;
   4047          }
   4048          case Iop_Clz8Sx16:
   4049          case Iop_Clz16Sx8:
   4050          case Iop_Clz32Sx4: {
   4051             HReg res = newVRegV(env);
   4052             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4053             UInt size = 0;
   4054             switch(e->Iex.Binop.op) {
   4055                case Iop_Clz8Sx16: size = 0; break;
   4056                case Iop_Clz16Sx8: size = 1; break;
   4057                case Iop_Clz32Sx4: size = 2; break;
   4058                default: vassert(0);
   4059             }
   4060             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
   4061             return res;
   4062          }
   4063          case Iop_Cls8Sx16:
   4064          case Iop_Cls16Sx8:
   4065          case Iop_Cls32Sx4: {
   4066             HReg res = newVRegV(env);
   4067             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4068             UInt size = 0;
   4069             switch(e->Iex.Binop.op) {
   4070                case Iop_Cls8Sx16: size = 0; break;
   4071                case Iop_Cls16Sx8: size = 1; break;
   4072                case Iop_Cls32Sx4: size = 2; break;
   4073                default: vassert(0);
   4074             }
   4075             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
   4076             return res;
   4077          }
   4078          case Iop_FtoI32Sx4_RZ: {
   4079             HReg res = newVRegV(env);
   4080             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4081             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   4082                                           res, arg, 2, True));
   4083             return res;
   4084          }
   4085          case Iop_FtoI32Ux4_RZ: {
   4086             HReg res = newVRegV(env);
   4087             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4088             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   4089                                           res, arg, 2, True));
   4090             return res;
   4091          }
   4092          case Iop_I32StoFx4: {
   4093             HReg res = newVRegV(env);
   4094             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4095             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   4096                                           res, arg, 2, True));
   4097             return res;
   4098          }
   4099          case Iop_I32UtoFx4: {
   4100             HReg res = newVRegV(env);
   4101             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4102             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   4103                                           res, arg, 2, True));
   4104             return res;
   4105          }
   4106          case Iop_F16toF32x4: {
   4107             HReg res = newVRegV(env);
   4108             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4109             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
   4110                                           res, arg, 2, True));
   4111             return res;
   4112          }
   4113          case Iop_Recip32Fx4: {
   4114             HReg res = newVRegV(env);
   4115             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4116             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   4117                                           res, argL, 0, True));
   4118             return res;
   4119          }
   4120          case Iop_Recip32x4: {
   4121             HReg res = newVRegV(env);
   4122             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4123             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   4124                                           res, argL, 0, True));
   4125             return res;
   4126          }
   4127          case Iop_Abs32Fx4: {
   4128             DECLARE_PATTERN(p_vabd_32fx4);
   4129             DEFINE_PATTERN(p_vabd_32fx4,
   4130                            unop(Iop_Abs32Fx4,
   4131                                 binop(Iop_Sub32Fx4,
   4132                                       bind(0),
   4133                                       bind(1))));
   4134             if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
   4135                HReg res = newVRegV(env);
   4136                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   4137                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   4138                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
   4139                                               res, argL, argR, 0, True));
   4140                return res;
   4141             } else {
   4142                HReg res = newVRegV(env);
   4143                HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4144                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   4145                                              res, argL, 0, True));
   4146                return res;
   4147             }
   4148          }
   4149          case Iop_Rsqrte32Fx4: {
   4150             HReg res = newVRegV(env);
   4151             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4152             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   4153                                           res, argL, 0, True));
   4154             return res;
   4155          }
   4156          case Iop_Rsqrte32x4: {
   4157             HReg res = newVRegV(env);
   4158             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4159             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   4160                                           res, argL, 0, True));
   4161             return res;
   4162          }
   4163          case Iop_Neg32Fx4: {
   4164             HReg res = newVRegV(env);
   4165             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4166             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   4167                                           res, arg, 0, True));
   4168             return res;
   4169          }
   4170          /* ... */
   4171          default:
   4172             break;
   4173       }
   4174    }
   4175 
   4176    if (e->tag == Iex_Binop) {
   4177       switch (e->Iex.Binop.op) {
   4178          case Iop_64HLtoV128:
   4179             /* Try to match into single "VMOV reg, imm" instruction */
   4180             if (e->Iex.Binop.arg1->tag == Iex_Const &&
   4181                 e->Iex.Binop.arg2->tag == Iex_Const &&
   4182                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
   4183                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
   4184                 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
   4185                            e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
   4186                ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   4187                ARMNImm *imm = Imm64_to_ARMNImm(imm64);
   4188                if (imm) {
   4189                   HReg res = newVRegV(env);
   4190                   addInstr(env, ARMInstr_NeonImm(res, imm));
   4191                   return res;
   4192                }
   4193                if ((imm64 >> 32) == 0LL &&
   4194                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
   4195                   HReg tmp1 = newVRegV(env);
   4196                   HReg tmp2 = newVRegV(env);
   4197                   HReg res = newVRegV(env);
   4198                   if (imm->type < 10) {
   4199                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
   4200                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4201                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4202                                                     res, tmp1, tmp2, 4, True));
   4203                      return res;
   4204                   }
   4205                }
   4206                if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
   4207                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
   4208                   HReg tmp1 = newVRegV(env);
   4209                   HReg tmp2 = newVRegV(env);
   4210                   HReg res = newVRegV(env);
   4211                   if (imm->type < 10) {
   4212                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
   4213                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4214                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4215                                                     res, tmp1, tmp2, 4, True));
   4216                      return res;
   4217                   }
   4218                }
   4219             }
   4220             /* Does not match "VMOV Reg, Imm" form.  We'll have to do
   4221                it the slow way. */
   4222             {
   4223                /* local scope */
   4224                /* Done via the stack for ease of use. */
   4225                /* FIXME: assumes little endian host */
   4226                HReg       w3, w2, w1, w0;
   4227                HReg       res  = newVRegV(env);
   4228                ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
   4229                ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
   4230                ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
   4231                ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
   4232                ARMRI84*   c_16  = ARMRI84_I84(16,0);
   4233                /* Make space for SP */
   4234                addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
   4235                                                       hregARM_R13(), c_16));
   4236 
   4237                /* Store the less significant 64 bits */
   4238                iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
   4239                addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
   4240                addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
   4241 
   4242                /* Store the more significant 64 bits */
   4243                iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
   4244                addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
   4245                addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
   4246 
   4247                 /* Load result back from stack. */
   4248                 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
   4249                                               mkARMAModeN_R(hregARM_R13())));
   4250 
   4251                 /* Restore SP */
   4252                 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
   4253                                            hregARM_R13(), c_16));
   4254                 return res;
   4255             } /* local scope */
   4256             goto neon_expr_bad;
   4257          case Iop_AndV128: {
   4258             HReg res = newVRegV(env);
   4259             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4260             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4261             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4262                                            res, argL, argR, 4, True));
   4263             return res;
   4264          }
   4265          case Iop_OrV128: {
   4266             HReg res = newVRegV(env);
   4267             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4268             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4269             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   4270                                            res, argL, argR, 4, True));
   4271             return res;
   4272          }
   4273          case Iop_XorV128: {
   4274             HReg res = newVRegV(env);
   4275             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4276             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4277             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   4278                                            res, argL, argR, 4, True));
   4279             return res;
   4280          }
   4281          case Iop_Add8x16:
   4282          case Iop_Add16x8:
   4283          case Iop_Add32x4:
   4284          case Iop_Add64x2: {
   4285             /*
   4286             FIXME: remove this if not used
   4287             DECLARE_PATTERN(p_vrhadd_32sx4);
   4288             ULong one = (1LL << 32) | 1LL;
   4289             DEFINE_PATTERN(p_vrhadd_32sx4,
   4290                   binop(Iop_Add32x4,
   4291                         binop(Iop_Add32x4,
   4292                               binop(Iop_SarN32x4,
   4293                                     bind(0),
   4294                                     mkU8(1)),
   4295                               binop(Iop_SarN32x4,
   4296                                     bind(1),
   4297                                     mkU8(1))),
   4298                         binop(Iop_SarN32x4,
   4299                               binop(Iop_Add32x4,
   4300                                     binop(Iop_Add32x4,
   4301                                           binop(Iop_AndV128,
   4302                                                 bind(0),
   4303                                                 mkU128(one)),
   4304                                           binop(Iop_AndV128,
   4305                                                 bind(1),
   4306                                                 mkU128(one))),
   4307                                     mkU128(one)),
   4308                               mkU8(1))));
   4309             */
   4310             HReg res = newVRegV(env);
   4311             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4312             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4313             UInt size;
   4314             switch (e->Iex.Binop.op) {
   4315                case Iop_Add8x16: size = 0; break;
   4316                case Iop_Add16x8: size = 1; break;
   4317                case Iop_Add32x4: size = 2; break;
   4318                case Iop_Add64x2: size = 3; break;
   4319                default:
   4320                   ppIROp(e->Iex.Binop.op);
   4321                   vpanic("Illegal element size in VADD");
   4322             }
   4323             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   4324                                            res, argL, argR, size, True));
   4325             return res;
   4326          }
   4327          case Iop_Add32Fx4: {
   4328             HReg res = newVRegV(env);
   4329             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4330             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4331             UInt size = 0;
   4332             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
   4333                                            res, argL, argR, size, True));
   4334             return res;
   4335          }
   4336          case Iop_Recps32Fx4: {
   4337             HReg res = newVRegV(env);
   4338             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4339             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4340             UInt size = 0;
   4341             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   4342                                            res, argL, argR, size, True));
   4343             return res;
   4344          }
   4345          case Iop_Rsqrts32Fx4: {
   4346             HReg res = newVRegV(env);
   4347             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4348             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4349             UInt size = 0;
   4350             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   4351                                            res, argL, argR, size, True));
   4352             return res;
   4353          }
   4354          case Iop_InterleaveEvenLanes8x16:
   4355          case Iop_InterleaveEvenLanes16x8:
   4356          case Iop_InterleaveEvenLanes32x4:
   4357          case Iop_InterleaveOddLanes8x16:
   4358          case Iop_InterleaveOddLanes16x8:
   4359          case Iop_InterleaveOddLanes32x4: {
   4360             HReg tmp = newVRegV(env);
   4361             HReg res = newVRegV(env);
   4362             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4363             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4364             UInt size;
   4365             UInt is_lo;
   4366             switch (e->Iex.Binop.op) {
   4367                case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
   4368                case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
   4369                case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
   4370                case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
   4371                case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
   4372                case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
   4373                default:
   4374                   ppIROp(e->Iex.Binop.op);
   4375                   vpanic("Illegal element size in VTRN");
   4376             }
   4377             if (is_lo) {
   4378                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4379                                              tmp, argL, 4, True));
   4380                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4381                                              res, argR, 4, True));
   4382                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   4383                                             res, tmp, size, True));
   4384             } else {
   4385                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4386                                              tmp, argR, 4, True));
   4387                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4388                                              res, argL, 4, True));
   4389                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   4390                                             tmp, res, size, True));
   4391             }
   4392             return res;
   4393          }
   4394          case Iop_InterleaveHI8x16:
   4395          case Iop_InterleaveHI16x8:
   4396          case Iop_InterleaveHI32x4:
   4397          case Iop_InterleaveLO8x16:
   4398          case Iop_InterleaveLO16x8:
   4399          case Iop_InterleaveLO32x4: {
   4400             HReg tmp = newVRegV(env);
   4401             HReg res = newVRegV(env);
   4402             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4403             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4404             UInt size;
   4405             UInt is_lo;
   4406             switch (e->Iex.Binop.op) {
   4407                case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
   4408                case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
   4409                case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
   4410                case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
   4411                case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
   4412                case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
   4413                default:
   4414                   ppIROp(e->Iex.Binop.op);
   4415                   vpanic("Illegal element size in VZIP");
   4416             }
   4417             if (is_lo) {
   4418                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4419                                              tmp, argL, 4, True));
   4420                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4421                                              res, argR, 4, True));
   4422                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   4423                                             res, tmp, size, True));
   4424             } else {
   4425                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4426                                              tmp, argR, 4, True));
   4427                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4428                                              res, argL, 4, True));
   4429                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   4430                                             tmp, res, size, True));
   4431             }
   4432             return res;
   4433          }
   4434          case Iop_CatOddLanes8x16:
   4435          case Iop_CatOddLanes16x8:
   4436          case Iop_CatOddLanes32x4:
   4437          case Iop_CatEvenLanes8x16:
   4438          case Iop_CatEvenLanes16x8:
   4439          case Iop_CatEvenLanes32x4: {
   4440             HReg tmp = newVRegV(env);
   4441             HReg res = newVRegV(env);
   4442             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4443             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4444             UInt size;
   4445             UInt is_lo;
   4446             switch (e->Iex.Binop.op) {
   4447                case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
   4448                case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
   4449                case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
   4450                case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
   4451                case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
   4452                case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
   4453                default:
   4454                   ppIROp(e->Iex.Binop.op);
   4455                   vpanic("Illegal element size in VUZP");
   4456             }
   4457             if (is_lo) {
   4458                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4459                                              tmp, argL, 4, True));
   4460                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4461                                              res, argR, 4, True));
   4462                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   4463                                             res, tmp, size, True));
   4464             } else {
   4465                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4466                                              tmp, argR, 4, True));
   4467                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4468                                              res, argL, 4, True));
   4469                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   4470                                             tmp, res, size, True));
   4471             }
   4472             return res;
   4473          }
   4474          case Iop_QAdd8Ux16:
   4475          case Iop_QAdd16Ux8:
   4476          case Iop_QAdd32Ux4:
   4477          case Iop_QAdd64Ux2: {
   4478             HReg res = newVRegV(env);
   4479             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4480             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4481             UInt size;
   4482             switch (e->Iex.Binop.op) {
   4483                case Iop_QAdd8Ux16: size = 0; break;
   4484                case Iop_QAdd16Ux8: size = 1; break;
   4485                case Iop_QAdd32Ux4: size = 2; break;
   4486                case Iop_QAdd64Ux2: size = 3; break;
   4487                default:
   4488                   ppIROp(e->Iex.Binop.op);
   4489                   vpanic("Illegal element size in VQADDU");
   4490             }
   4491             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   4492                                            res, argL, argR, size, True));
   4493             return res;
   4494          }
   4495          case Iop_QAdd8Sx16:
   4496          case Iop_QAdd16Sx8:
   4497          case Iop_QAdd32Sx4:
   4498          case Iop_QAdd64Sx2: {
   4499             HReg res = newVRegV(env);
   4500             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4501             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4502             UInt size;
   4503             switch (e->Iex.Binop.op) {
   4504                case Iop_QAdd8Sx16: size = 0; break;
   4505                case Iop_QAdd16Sx8: size = 1; break;
   4506                case Iop_QAdd32Sx4: size = 2; break;
   4507                case Iop_QAdd64Sx2: size = 3; break;
   4508                default:
   4509                   ppIROp(e->Iex.Binop.op);
   4510                   vpanic("Illegal element size in VQADDS");
   4511             }
   4512             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   4513                                            res, argL, argR, size, True));
   4514             return res;
   4515          }
   4516          case Iop_Sub8x16:
   4517          case Iop_Sub16x8:
   4518          case Iop_Sub32x4:
   4519          case Iop_Sub64x2: {
   4520             HReg res = newVRegV(env);
   4521             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4522             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4523             UInt size;
   4524             switch (e->Iex.Binop.op) {
   4525                case Iop_Sub8x16: size = 0; break;
   4526                case Iop_Sub16x8: size = 1; break;
   4527                case Iop_Sub32x4: size = 2; break;
   4528                case Iop_Sub64x2: size = 3; break;
   4529                default:
   4530                   ppIROp(e->Iex.Binop.op);
   4531                   vpanic("Illegal element size in VSUB");
   4532             }
   4533             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4534                                            res, argL, argR, size, True));
   4535             return res;
   4536          }
   4537          case Iop_Sub32Fx4: {
   4538             HReg res = newVRegV(env);
   4539             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4540             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4541             UInt size = 0;
   4542             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
   4543                                            res, argL, argR, size, True));
   4544             return res;
   4545          }
   4546          case Iop_QSub8Ux16:
   4547          case Iop_QSub16Ux8:
   4548          case Iop_QSub32Ux4:
   4549          case Iop_QSub64Ux2: {
   4550             HReg res = newVRegV(env);
   4551             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4552             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4553             UInt size;
   4554             switch (e->Iex.Binop.op) {
   4555                case Iop_QSub8Ux16: size = 0; break;
   4556                case Iop_QSub16Ux8: size = 1; break;
   4557                case Iop_QSub32Ux4: size = 2; break;
   4558                case Iop_QSub64Ux2: size = 3; break;
   4559                default:
   4560                   ppIROp(e->Iex.Binop.op);
   4561                   vpanic("Illegal element size in VQSUBU");
   4562             }
   4563             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   4564                                            res, argL, argR, size, True));
   4565             return res;
   4566          }
   4567          case Iop_QSub8Sx16:
   4568          case Iop_QSub16Sx8:
   4569          case Iop_QSub32Sx4:
   4570          case Iop_QSub64Sx2: {
   4571             HReg res = newVRegV(env);
   4572             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4573             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4574             UInt size;
   4575             switch (e->Iex.Binop.op) {
   4576                case Iop_QSub8Sx16: size = 0; break;
   4577                case Iop_QSub16Sx8: size = 1; break;
   4578                case Iop_QSub32Sx4: size = 2; break;
   4579                case Iop_QSub64Sx2: size = 3; break;
   4580                default:
   4581                   ppIROp(e->Iex.Binop.op);
   4582                   vpanic("Illegal element size in VQSUBS");
   4583             }
   4584             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   4585                                            res, argL, argR, size, True));
   4586             return res;
   4587          }
   4588          case Iop_Max8Ux16:
   4589          case Iop_Max16Ux8:
   4590          case Iop_Max32Ux4: {
   4591             HReg res = newVRegV(env);
   4592             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4593             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4594             UInt size;
   4595             switch (e->Iex.Binop.op) {
   4596                case Iop_Max8Ux16: size = 0; break;
   4597                case Iop_Max16Ux8: size = 1; break;
   4598                case Iop_Max32Ux4: size = 2; break;
   4599                default: vpanic("Illegal element size in VMAXU");
   4600             }
   4601             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   4602                                            res, argL, argR, size, True));
   4603             return res;
   4604          }
   4605          case Iop_Max8Sx16:
   4606          case Iop_Max16Sx8:
   4607          case Iop_Max32Sx4: {
   4608             HReg res = newVRegV(env);
   4609             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4610             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4611             UInt size;
   4612             switch (e->Iex.Binop.op) {
   4613                case Iop_Max8Sx16: size = 0; break;
   4614                case Iop_Max16Sx8: size = 1; break;
   4615                case Iop_Max32Sx4: size = 2; break;
   4616                default: vpanic("Illegal element size in VMAXU");
   4617             }
   4618             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   4619                                            res, argL, argR, size, True));
   4620             return res;
   4621          }
   4622          case Iop_Min8Ux16:
   4623          case Iop_Min16Ux8:
   4624          case Iop_Min32Ux4: {
   4625             HReg res = newVRegV(env);
   4626             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4627             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4628             UInt size;
   4629             switch (e->Iex.Binop.op) {
   4630                case Iop_Min8Ux16: size = 0; break;
   4631                case Iop_Min16Ux8: size = 1; break;
   4632                case Iop_Min32Ux4: size = 2; break;
   4633                default: vpanic("Illegal element size in VMAXU");
   4634             }
   4635             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   4636                                            res, argL, argR, size, True));
   4637             return res;
   4638          }
   4639          case Iop_Min8Sx16:
   4640          case Iop_Min16Sx8:
   4641          case Iop_Min32Sx4: {
   4642             HReg res = newVRegV(env);
   4643             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4644             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4645             UInt size;
   4646             switch (e->Iex.Binop.op) {
   4647                case Iop_Min8Sx16: size = 0; break;
   4648                case Iop_Min16Sx8: size = 1; break;
   4649                case Iop_Min32Sx4: size = 2; break;
   4650                default: vpanic("Illegal element size in VMAXU");
   4651             }
   4652             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   4653                                            res, argL, argR, size, True));
   4654             return res;
   4655          }
   4656          case Iop_Sar8x16:
   4657          case Iop_Sar16x8:
   4658          case Iop_Sar32x4:
   4659          case Iop_Sar64x2: {
   4660             HReg res = newVRegV(env);
   4661             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4662             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4663             HReg argR2 = newVRegV(env);
   4664             HReg zero = newVRegV(env);
   4665             UInt size;
   4666             switch (e->Iex.Binop.op) {
   4667                case Iop_Sar8x16: size = 0; break;
   4668                case Iop_Sar16x8: size = 1; break;
   4669                case Iop_Sar32x4: size = 2; break;
   4670                case Iop_Sar64x2: size = 3; break;
   4671                default: vassert(0);
   4672             }
   4673             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4674             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4675                                            argR2, zero, argR, size, True));
   4676             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4677                                           res, argL, argR2, size, True));
   4678             return res;
   4679          }
   4680          case Iop_Sal8x16:
   4681          case Iop_Sal16x8:
   4682          case Iop_Sal32x4:
   4683          case Iop_Sal64x2: {
   4684             HReg res = newVRegV(env);
   4685             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4686             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4687             UInt size;
   4688             switch (e->Iex.Binop.op) {
   4689                case Iop_Sal8x16: size = 0; break;
   4690                case Iop_Sal16x8: size = 1; break;
   4691                case Iop_Sal32x4: size = 2; break;
   4692                case Iop_Sal64x2: size = 3; break;
   4693                default: vassert(0);
   4694             }
   4695             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4696                                           res, argL, argR, size, True));
   4697             return res;
   4698          }
   4699          case Iop_Shr8x16:
   4700          case Iop_Shr16x8:
   4701          case Iop_Shr32x4:
   4702          case Iop_Shr64x2: {
   4703             HReg res = newVRegV(env);
   4704             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4705             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4706             HReg argR2 = newVRegV(env);
   4707             HReg zero = newVRegV(env);
   4708             UInt size;
   4709             switch (e->Iex.Binop.op) {
   4710                case Iop_Shr8x16: size = 0; break;
   4711                case Iop_Shr16x8: size = 1; break;
   4712                case Iop_Shr32x4: size = 2; break;
   4713                case Iop_Shr64x2: size = 3; break;
   4714                default: vassert(0);
   4715             }
   4716             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4717             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4718                                            argR2, zero, argR, size, True));
   4719             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4720                                           res, argL, argR2, size, True));
   4721             return res;
   4722          }
   4723          case Iop_Shl8x16:
   4724          case Iop_Shl16x8:
   4725          case Iop_Shl32x4:
   4726          case Iop_Shl64x2: {
   4727             HReg res = newVRegV(env);
   4728             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4729             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4730             UInt size;
   4731             switch (e->Iex.Binop.op) {
   4732                case Iop_Shl8x16: size = 0; break;
   4733                case Iop_Shl16x8: size = 1; break;
   4734                case Iop_Shl32x4: size = 2; break;
   4735                case Iop_Shl64x2: size = 3; break;
   4736                default: vassert(0);
   4737             }
   4738             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4739                                           res, argL, argR, size, True));
   4740             return res;
   4741          }
   4742          case Iop_QShl8x16:
   4743          case Iop_QShl16x8:
   4744          case Iop_QShl32x4:
   4745          case Iop_QShl64x2: {
   4746             HReg res = newVRegV(env);
   4747             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4748             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4749             UInt size;
   4750             switch (e->Iex.Binop.op) {
   4751                case Iop_QShl8x16: size = 0; break;
   4752                case Iop_QShl16x8: size = 1; break;
   4753                case Iop_QShl32x4: size = 2; break;
   4754                case Iop_QShl64x2: size = 3; break;
   4755                default: vassert(0);
   4756             }
   4757             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   4758                                           res, argL, argR, size, True));
   4759             return res;
   4760          }
   4761          case Iop_QSal8x16:
   4762          case Iop_QSal16x8:
   4763          case Iop_QSal32x4:
   4764          case Iop_QSal64x2: {
   4765             HReg res = newVRegV(env);
   4766             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4767             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4768             UInt size;
   4769             switch (e->Iex.Binop.op) {
   4770                case Iop_QSal8x16: size = 0; break;
   4771                case Iop_QSal16x8: size = 1; break;
   4772                case Iop_QSal32x4: size = 2; break;
   4773                case Iop_QSal64x2: size = 3; break;
   4774                default: vassert(0);
   4775             }
   4776             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   4777                                           res, argL, argR, size, True));
   4778             return res;
   4779          }
   4780          case Iop_QShlN8x16:
   4781          case Iop_QShlN16x8:
   4782          case Iop_QShlN32x4:
   4783          case Iop_QShlN64x2: {
   4784             HReg res = newVRegV(env);
   4785             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4786             UInt size, imm;
   4787             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4788                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4789                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4790                       "second argument only\n");
   4791             }
   4792             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4793             switch (e->Iex.Binop.op) {
   4794                case Iop_QShlN8x16: size = 8 | imm; break;
   4795                case Iop_QShlN16x8: size = 16 | imm; break;
   4796                case Iop_QShlN32x4: size = 32 | imm; break;
   4797                case Iop_QShlN64x2: size = 64 | imm; break;
   4798                default: vassert(0);
   4799             }
   4800             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   4801                                           res, argL, size, True));
   4802             return res;
   4803          }
   4804          case Iop_QShlN8Sx16:
   4805          case Iop_QShlN16Sx8:
   4806          case Iop_QShlN32Sx4:
   4807          case Iop_QShlN64Sx2: {
   4808             HReg res = newVRegV(env);
   4809             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4810             UInt size, imm;
   4811             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4812                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4813                vpanic("ARM taget supports Iop_QShlNASxB with constant "
   4814                       "second argument only\n");
   4815             }
   4816             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4817             switch (e->Iex.Binop.op) {
   4818                case Iop_QShlN8Sx16: size = 8 | imm; break;
   4819                case Iop_QShlN16Sx8: size = 16 | imm; break;
   4820                case Iop_QShlN32Sx4: size = 32 | imm; break;
   4821                case Iop_QShlN64Sx2: size = 64 | imm; break;
   4822                default: vassert(0);
   4823             }
   4824             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   4825                                           res, argL, size, True));
   4826             return res;
   4827          }
   4828          case Iop_QSalN8x16:
   4829          case Iop_QSalN16x8:
   4830          case Iop_QSalN32x4:
   4831          case Iop_QSalN64x2: {
   4832             HReg res = newVRegV(env);
   4833             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4834             UInt size, imm;
   4835             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4836                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4837                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4838                       "second argument only\n");
   4839             }
   4840             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4841             switch (e->Iex.Binop.op) {
   4842                case Iop_QSalN8x16: size = 8 | imm; break;
   4843                case Iop_QSalN16x8: size = 16 | imm; break;
   4844                case Iop_QSalN32x4: size = 32 | imm; break;
   4845                case Iop_QSalN64x2: size = 64 | imm; break;
   4846                default: vassert(0);
   4847             }
   4848             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   4849                                           res, argL, size, True));
   4850             return res;
   4851          }
   4852          case Iop_ShrN8x16:
   4853          case Iop_ShrN16x8:
   4854          case Iop_ShrN32x4:
   4855          case Iop_ShrN64x2: {
   4856             HReg res = newVRegV(env);
   4857             HReg tmp = newVRegV(env);
   4858             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4859             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4860             HReg argR2 = newVRegI(env);
   4861             UInt size;
   4862             switch (e->Iex.Binop.op) {
   4863                case Iop_ShrN8x16: size = 0; break;
   4864                case Iop_ShrN16x8: size = 1; break;
   4865                case Iop_ShrN32x4: size = 2; break;
   4866                case Iop_ShrN64x2: size = 3; break;
   4867                default: vassert(0);
   4868             }
   4869             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4870             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
   4871                                           tmp, argR2, 0, True));
   4872             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4873                                           res, argL, tmp, size, True));
   4874             return res;
   4875          }
   4876          case Iop_ShlN8x16:
   4877          case Iop_ShlN16x8:
   4878          case Iop_ShlN32x4:
   4879          case Iop_ShlN64x2: {
   4880             HReg res = newVRegV(env);
   4881             HReg tmp = newVRegV(env);
   4882             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4883             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4884             UInt size;
   4885             switch (e->Iex.Binop.op) {
   4886                case Iop_ShlN8x16: size = 0; break;
   4887                case Iop_ShlN16x8: size = 1; break;
   4888                case Iop_ShlN32x4: size = 2; break;
   4889                case Iop_ShlN64x2: size = 3; break;
   4890                default: vassert(0);
   4891             }
   4892             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
   4893             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4894                                           res, argL, tmp, size, True));
   4895             return res;
   4896          }
   4897          case Iop_SarN8x16:
   4898          case Iop_SarN16x8:
   4899          case Iop_SarN32x4:
   4900          case Iop_SarN64x2: {
   4901             HReg res = newVRegV(env);
   4902             HReg tmp = newVRegV(env);
   4903             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4904             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4905             HReg argR2 = newVRegI(env);
   4906             UInt size;
   4907             switch (e->Iex.Binop.op) {
   4908                case Iop_SarN8x16: size = 0; break;
   4909                case Iop_SarN16x8: size = 1; break;
   4910                case Iop_SarN32x4: size = 2; break;
   4911                case Iop_SarN64x2: size = 3; break;
   4912                default: vassert(0);
   4913             }
   4914             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4915             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
   4916             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4917                                           res, argL, tmp, size, True));
   4918             return res;
   4919          }
   4920          case Iop_CmpGT8Ux16:
   4921          case Iop_CmpGT16Ux8:
   4922          case Iop_CmpGT32Ux4: {
   4923             HReg res = newVRegV(env);
   4924             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4925             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4926             UInt size;
   4927             switch (e->Iex.Binop.op) {
   4928                case Iop_CmpGT8Ux16: size = 0; break;
   4929                case Iop_CmpGT16Ux8: size = 1; break;
   4930                case Iop_CmpGT32Ux4: size = 2; break;
   4931                default: vassert(0);
   4932             }
   4933             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   4934                                            res, argL, argR, size, True));
   4935             return res;
   4936          }
   4937          case Iop_CmpGT8Sx16:
   4938          case Iop_CmpGT16Sx8:
   4939          case Iop_CmpGT32Sx4: {
   4940             HReg res = newVRegV(env);
   4941             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4942             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4943             UInt size;
   4944             switch (e->Iex.Binop.op) {
   4945                case Iop_CmpGT8Sx16: size = 0; break;
   4946                case Iop_CmpGT16Sx8: size = 1; break;
   4947                case Iop_CmpGT32Sx4: size = 2; break;
   4948                default: vassert(0);
   4949             }
   4950             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   4951                                            res, argL, argR, size, True));
   4952             return res;
   4953          }
   4954          case Iop_CmpEQ8x16:
   4955          case Iop_CmpEQ16x8:
   4956          case Iop_CmpEQ32x4: {
   4957             HReg res = newVRegV(env);
   4958             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4959             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4960             UInt size;
   4961             switch (e->Iex.Binop.op) {
   4962                case Iop_CmpEQ8x16: size = 0; break;
   4963                case Iop_CmpEQ16x8: size = 1; break;
   4964                case Iop_CmpEQ32x4: size = 2; break;
   4965                default: vassert(0);
   4966             }
   4967             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   4968                                            res, argL, argR, size, True));
   4969             return res;
   4970          }
   4971          case Iop_Mul8x16:
   4972          case Iop_Mul16x8:
   4973          case Iop_Mul32x4: {
   4974             HReg res = newVRegV(env);
   4975             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4976             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4977             UInt size = 0;
   4978             switch(e->Iex.Binop.op) {
   4979                case Iop_Mul8x16: size = 0; break;
   4980                case Iop_Mul16x8: size = 1; break;
   4981                case Iop_Mul32x4: size = 2; break;
   4982                default: vassert(0);
   4983             }
   4984             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   4985                                            res, argL, argR, size, True));
   4986             return res;
   4987          }
   4988          case Iop_Mul32Fx4: {
   4989             HReg res = newVRegV(env);
   4990             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4991             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4992             UInt size = 0;
   4993             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
   4994                                            res, argL, argR, size, True));
   4995             return res;
   4996          }
   4997          case Iop_Mull8Ux8:
   4998          case Iop_Mull16Ux4:
   4999          case Iop_Mull32Ux2: {
   5000             HReg res = newVRegV(env);
   5001             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5002             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5003             UInt size = 0;
   5004             switch(e->Iex.Binop.op) {
   5005                case Iop_Mull8Ux8: size = 0; break;
   5006                case Iop_Mull16Ux4: size = 1; break;
   5007                case Iop_Mull32Ux2: size = 2; break;
   5008                default: vassert(0);
   5009             }
   5010             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
   5011                                            res, argL, argR, size, True));
   5012             return res;
   5013          }
   5014 
   5015          case Iop_Mull8Sx8:
   5016          case Iop_Mull16Sx4:
   5017          case Iop_Mull32Sx2: {
   5018             HReg res = newVRegV(env);
   5019             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5020             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5021             UInt size = 0;
   5022             switch(e->Iex.Binop.op) {
   5023                case Iop_Mull8Sx8: size = 0; break;
   5024                case Iop_Mull16Sx4: size = 1; break;
   5025                case Iop_Mull32Sx2: size = 2; break;
   5026                default: vassert(0);
   5027             }
   5028             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
   5029                                            res, argL, argR, size, True));
   5030             return res;
   5031          }
   5032 
   5033          case Iop_QDMulHi16Sx8:
   5034          case Iop_QDMulHi32Sx4: {
   5035             HReg res = newVRegV(env);
   5036             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5037             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5038             UInt size = 0;
   5039             switch(e->Iex.Binop.op) {
   5040                case Iop_QDMulHi16Sx8: size = 1; break;
   5041                case Iop_QDMulHi32Sx4: size = 2; break;
   5042                default: vassert(0);
   5043             }
   5044             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   5045                                            res, argL, argR, size, True));
   5046             return res;
   5047          }
   5048 
   5049          case Iop_QRDMulHi16Sx8:
   5050          case Iop_QRDMulHi32Sx4: {
   5051             HReg res = newVRegV(env);
   5052             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5053             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5054             UInt size = 0;
   5055             switch(e->Iex.Binop.op) {
   5056                case Iop_QRDMulHi16Sx8: size = 1; break;
   5057                case Iop_QRDMulHi32Sx4: size = 2; break;
   5058                default: vassert(0);
   5059             }
   5060             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   5061                                            res, argL, argR, size, True));
   5062             return res;
   5063          }
   5064 
   5065          case Iop_QDMulLong16Sx4:
   5066          case Iop_QDMulLong32Sx2: {
   5067             HReg res = newVRegV(env);
   5068             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5069             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5070             UInt size = 0;
   5071             switch(e->Iex.Binop.op) {
   5072                case Iop_QDMulLong16Sx4: size = 1; break;
   5073                case Iop_QDMulLong32Sx2: size = 2; break;
   5074                default: vassert(0);
   5075             }
   5076             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
   5077                                            res, argL, argR, size, True));
   5078             return res;
   5079          }
   5080          case Iop_PolynomialMul8x16: {
   5081             HReg res = newVRegV(env);
   5082             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5083             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5084             UInt size = 0;
   5085             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   5086                                            res, argL, argR, size, True));
   5087             return res;
   5088          }
   5089          case Iop_Max32Fx4: {
   5090             HReg res = newVRegV(env);
   5091             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5092             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5093             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   5094                                            res, argL, argR, 2, True));
   5095             return res;
   5096          }
   5097          case Iop_Min32Fx4: {
   5098             HReg res = newVRegV(env);
   5099             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5100             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5101             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   5102                                            res, argL, argR, 2, True));
   5103             return res;
   5104          }
   5105          case Iop_PwMax32Fx4: {
   5106             HReg res = newVRegV(env);
   5107             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5108             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5109             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   5110                                            res, argL, argR, 2, True));
   5111             return res;
   5112          }
   5113          case Iop_PwMin32Fx4: {
   5114             HReg res = newVRegV(env);
   5115             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5116             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5117             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   5118                                            res, argL, argR, 2, True));
   5119             return res;
   5120          }
   5121          case Iop_CmpGT32Fx4: {
   5122             HReg res = newVRegV(env);
   5123             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5124             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5125             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   5126                                            res, argL, argR, 2, True));
   5127             return res;
   5128          }
   5129          case Iop_CmpGE32Fx4: {
   5130             HReg res = newVRegV(env);
   5131             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5132             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5133             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   5134                                            res, argL, argR, 2, True));
   5135             return res;
   5136          }
   5137          case Iop_CmpEQ32Fx4: {
   5138             HReg res = newVRegV(env);
   5139             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5140             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5141             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   5142                                            res, argL, argR, 2, True));
   5143             return res;
   5144          }
   5145 
   5146          case Iop_PolynomialMull8x8: {
   5147             HReg res = newVRegV(env);
   5148             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5149             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5150             UInt size = 0;
   5151             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
   5152                                            res, argL, argR, size, True));
   5153             return res;
   5154          }
   5155          case Iop_F32ToFixed32Ux4_RZ:
   5156          case Iop_F32ToFixed32Sx4_RZ:
   5157          case Iop_Fixed32UToF32x4_RN:
   5158          case Iop_Fixed32SToF32x4_RN: {
   5159             HReg res = newVRegV(env);
   5160             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
   5161             ARMNeonUnOp op;
   5162             UInt imm6;
   5163             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5164                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5165                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   5166                          "second argument less than 33 only\n");
   5167             }
   5168             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5169             vassert(imm6 <= 32 && imm6 > 0);
   5170             imm6 = 64 - imm6;
   5171             switch(e->Iex.Binop.op) {
   5172                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
   5173                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
   5174                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
   5175                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
   5176                default: vassert(0);
   5177             }
   5178             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
   5179             return res;
   5180          }
   5181          /*
   5182          FIXME remove if not used
   5183          case Iop_VDup8x16:
   5184          case Iop_VDup16x8:
   5185          case Iop_VDup32x4: {
   5186             HReg res = newVRegV(env);
   5187             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5188             UInt imm4;
   5189             UInt index;
   5190             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5191                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5192                   vpanic("ARM supports Iop_VDup with constant "
   5193                          "second argument less than 16 only\n");
   5194             }
   5195             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5196             switch(e->Iex.Binop.op) {
   5197                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
   5198                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
   5199                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
   5200                default: vassert(0);
   5201             }
   5202             if (imm4 >= 16) {
   5203                vpanic("ARM supports Iop_VDup with constant "
   5204                       "second argument less than 16 only\n");
   5205             }
   5206             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   5207                                           res, argL, imm4, True));
   5208             return res;
   5209          }
   5210          */
   5211          case Iop_PwAdd8x16:
   5212          case Iop_PwAdd16x8:
   5213          case Iop_PwAdd32x4: {
   5214             HReg res = newVRegV(env);
   5215             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5216             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5217             UInt size = 0;
   5218             switch(e->Iex.Binop.op) {
   5219                case Iop_PwAdd8x16: size = 0; break;
   5220                case Iop_PwAdd16x8: size = 1; break;
   5221                case Iop_PwAdd32x4: size = 2; break;
   5222                default: vassert(0);
   5223             }
   5224             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   5225                                            res, argL, argR, size, True));
   5226             return res;
   5227          }
   5228          /* ... */
   5229          default:
   5230             break;
   5231       }
   5232    }
   5233 
   5234    if (e->tag == Iex_Triop) {
   5235       switch (e->Iex.Triop.op) {
   5236          case Iop_ExtractV128: {
   5237             HReg res = newVRegV(env);
   5238             HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
   5239             HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
   5240             UInt imm4;
   5241             if (e->Iex.Triop.arg3->tag != Iex_Const ||
   5242                 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
   5243                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5244                       "third argument less than 16 only\n");
   5245             }
   5246             imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
   5247             if (imm4 >= 16) {
   5248                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5249                       "third argument less than 16 only\n");
   5250             }
   5251             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   5252                                            res, argL, argR, imm4, True));
   5253             return res;
   5254          }
   5255          default:
   5256             break;
   5257       }
   5258    }
   5259 
   5260    if (e->tag == Iex_Mux0X) {
   5261       HReg r8;
   5262       HReg rX  = iselNeonExpr(env, e->Iex.Mux0X.exprX);
   5263       HReg r0  = iselNeonExpr(env, e->Iex.Mux0X.expr0);
   5264       HReg dst = newVRegV(env);
   5265       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
   5266       r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5267       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5268                                       ARMRI84_I84(0xFF,0)));
   5269       addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
   5270       return dst;
   5271    }
   5272 
   5273   neon_expr_bad:
   5274    ppIRExpr(e);
   5275    vpanic("iselNeonExpr_wrk");
   5276 }
   5277 
   5278 /*---------------------------------------------------------*/
   5279 /*--- ISEL: Floating point expressions (64 bit)         ---*/
   5280 /*---------------------------------------------------------*/
   5281 
   5282 /* Compute a 64-bit floating point value into a register, the identity
   5283    of which is returned.  As with iselIntExpr_R, the reg may be either
   5284    real or virtual; in any case it must not be changed by subsequent
   5285    code emitted by the caller.  */
   5286 
   5287 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
   5288 {
   5289    HReg r = iselDblExpr_wrk( env, e );
   5290 #  if 0
   5291    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5292 #  endif
   5293    vassert(hregClass(r) == HRcFlt64);
   5294    vassert(hregIsVirtual(r));
   5295    return r;
   5296 }
   5297 
   5298 /* DO NOT CALL THIS DIRECTLY */
   5299 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
   5300 {
   5301    IRType ty = typeOfIRExpr(env->type_env,e);
   5302    vassert(e);
   5303    vassert(ty == Ity_F64);
   5304 
   5305    if (e->tag == Iex_RdTmp) {
   5306       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5307    }
   5308 
   5309    if (e->tag == Iex_Const) {
   5310       /* Just handle the zero case. */
   5311       IRConst* con = e->Iex.Const.con;
   5312       if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
   5313          HReg z32 = newVRegI(env);
   5314          HReg dst = newVRegD(env);
   5315          addInstr(env, ARMInstr_Imm32(z32, 0));
   5316          addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
   5317          return dst;
   5318       }
   5319    }
   5320 
   5321    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5322       ARMAModeV* am;
   5323       HReg res = newVRegD(env);
   5324       vassert(e->Iex.Load.ty == Ity_F64);
   5325       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5326       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5327       return res;
   5328    }
   5329 
   5330    if (e->tag == Iex_Get) {
   5331       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5332       // In which case we'll have to generate more longwinded code.
   5333       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5334       HReg       res = newVRegD(env);
   5335       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5336       return res;
   5337    }
   5338 
   5339    if (e->tag == Iex_Unop) {
   5340       switch (e->Iex.Unop.op) {
   5341          case Iop_ReinterpI64asF64: {
   5342             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5343                return iselNeon64Expr(env, e->Iex.Unop.arg);
   5344             } else {
   5345                HReg srcHi, srcLo;
   5346                HReg dst = newVRegD(env);
   5347                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   5348                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
   5349                return dst;
   5350             }
   5351          }
   5352          case Iop_NegF64: {
   5353             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5354             HReg dst = newVRegD(env);
   5355             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
   5356             return dst;
   5357          }
   5358          case Iop_AbsF64: {
   5359             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5360             HReg dst = newVRegD(env);
   5361             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
   5362             return dst;
   5363          }
   5364          case Iop_F32toF64: {
   5365             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5366             HReg dst = newVRegD(env);
   5367             addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
   5368             return dst;
   5369          }
   5370          case Iop_I32UtoF64:
   5371          case Iop_I32StoF64: {
   5372             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
   5373             HReg f32   = newVRegF(env);
   5374             HReg dst   = newVRegD(env);
   5375             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
   5376             /* VMOV f32, src */
   5377             addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
   5378             /* FSITOD dst, f32 */
   5379             addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
   5380                                           dst, f32));
   5381             return dst;
   5382          }
   5383          default:
   5384             break;
   5385       }
   5386    }
   5387 
   5388    if (e->tag == Iex_Binop) {
   5389       switch (e->Iex.Binop.op) {
   5390          case Iop_SqrtF64: {
   5391             /* first arg is rounding mode; we ignore it. */
   5392             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   5393             HReg dst = newVRegD(env);
   5394             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
   5395             return dst;
   5396          }
   5397          default:
   5398             break;
   5399       }
   5400    }
   5401 
   5402    if (e->tag == Iex_Triop) {
   5403       switch (e->Iex.Triop.op) {
   5404          case Iop_DivF64:
   5405          case Iop_MulF64:
   5406          case Iop_AddF64:
   5407          case Iop_SubF64: {
   5408             ARMVfpOp op = 0; /*INVALID*/
   5409             HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
   5410             HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
   5411             HReg dst  = newVRegD(env);
   5412             switch (e->Iex.Triop.op) {
   5413                case Iop_DivF64: op = ARMvfp_DIV; break;
   5414                case Iop_MulF64: op = ARMvfp_MUL; break;
   5415                case Iop_AddF64: op = ARMvfp_ADD; break;
   5416                case Iop_SubF64: op = ARMvfp_SUB; break;
   5417                default: vassert(0);
   5418             }
   5419             addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
   5420             return dst;
   5421          }
   5422          default:
   5423             break;
   5424       }
   5425    }
   5426 
   5427    if (e->tag == Iex_Mux0X) {
   5428       if (ty == Ity_F64
   5429           && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
   5430          HReg r8;
   5431          HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
   5432          HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
   5433          HReg dst = newVRegD(env);
   5434          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
   5435          r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5436          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5437                                          ARMRI84_I84(0xFF,0)));
   5438          addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
   5439          return dst;
   5440       }
   5441    }
   5442 
   5443    ppIRExpr(e);
   5444    vpanic("iselDblExpr_wrk");
   5445 }
   5446 
   5447 
   5448 /*---------------------------------------------------------*/
   5449 /*--- ISEL: Floating point expressions (32 bit)         ---*/
   5450 /*---------------------------------------------------------*/
   5451 
   5452 /* Compute a 64-bit floating point value into a register, the identity
   5453    of which is returned.  As with iselIntExpr_R, the reg may be either
   5454    real or virtual; in any case it must not be changed by subsequent
   5455    code emitted by the caller.  */
   5456 
   5457 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
   5458 {
   5459    HReg r = iselFltExpr_wrk( env, e );
   5460 #  if 0
   5461    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5462 #  endif
   5463    vassert(hregClass(r) == HRcFlt32);
   5464    vassert(hregIsVirtual(r));
   5465    return r;
   5466 }
   5467 
   5468 /* DO NOT CALL THIS DIRECTLY */
   5469 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
   5470 {
   5471    IRType ty = typeOfIRExpr(env->type_env,e);
   5472    vassert(e);
   5473    vassert(ty == Ity_F32);
   5474 
   5475    if (e->tag == Iex_RdTmp) {
   5476       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5477    }
   5478 
   5479    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5480       ARMAModeV* am;
   5481       HReg res = newVRegF(env);
   5482       vassert(e->Iex.Load.ty == Ity_F32);
   5483       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5484       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5485       return res;
   5486    }
   5487 
   5488    if (e->tag == Iex_Get) {
   5489       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5490       // In which case we'll have to generate more longwinded code.
   5491       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5492       HReg       res = newVRegF(env);
   5493       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5494       return res;
   5495    }
   5496 
   5497    if (e->tag == Iex_Unop) {
   5498       switch (e->Iex.Unop.op) {
   5499          case Iop_ReinterpI32asF32: {
   5500             HReg dst = newVRegF(env);
   5501             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   5502             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
   5503             return dst;
   5504          }
   5505          case Iop_NegF32: {
   5506             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5507             HReg dst = newVRegF(env);
   5508             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
   5509             return dst;
   5510          }
   5511          case Iop_AbsF32: {
   5512             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5513             HReg dst = newVRegF(env);
   5514             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
   5515             return dst;
   5516          }
   5517          default:
   5518             break;
   5519       }
   5520    }
   5521 
   5522    if (e->tag == Iex_Binop) {
   5523       switch (e->Iex.Binop.op) {
   5524          case Iop_SqrtF32: {
   5525             /* first arg is rounding mode; we ignore it. */
   5526             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
   5527             HReg dst = newVRegF(env);
   5528             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
   5529             return dst;
   5530          }
   5531          case Iop_F64toF32: {
   5532             HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
   5533             set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   5534             HReg valS = newVRegF(env);
   5535             /* FCVTSD valS, valD */
   5536             addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
   5537             set_VFP_rounding_default(env);
   5538             return valS;
   5539          }
   5540          default:
   5541             break;
   5542       }
   5543    }
   5544 
   5545    if (e->tag == Iex_Triop) {
   5546       switch (e->Iex.Triop.op) {
   5547          case Iop_DivF32:
   5548          case Iop_MulF32:
   5549          case Iop_AddF32:
   5550          case Iop_SubF32: {
   5551             ARMVfpOp op = 0; /*INVALID*/
   5552             HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
   5553             HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
   5554             HReg dst  = newVRegF(env);
   5555             switch (e->Iex.Triop.op) {
   5556                case Iop_DivF32: op = ARMvfp_DIV; break;
   5557                case Iop_MulF32: op = ARMvfp_MUL; break;
   5558                case Iop_AddF32: op = ARMvfp_ADD; break;
   5559                case Iop_SubF32: op = ARMvfp_SUB; break;
   5560                default: vassert(0);
   5561             }
   5562             addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
   5563             return dst;
   5564          }
   5565          default:
   5566             break;
   5567       }
   5568    }
   5569 
   5570    if (e->tag == Iex_Mux0X) {
   5571       if (ty == Ity_F32
   5572           && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
   5573          HReg r8;
   5574          HReg rX  = iselFltExpr(env, e->Iex.Mux0X.exprX);
   5575          HReg r0  = iselFltExpr(env, e->Iex.Mux0X.expr0);
   5576          HReg dst = newVRegF(env);
   5577          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
   5578          r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5579          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5580                                          ARMRI84_I84(0xFF,0)));
   5581          addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
   5582          return dst;
   5583       }
   5584    }
   5585 
   5586    ppIRExpr(e);
   5587    vpanic("iselFltExpr_wrk");
   5588 }
   5589 
   5590 
   5591 /*---------------------------------------------------------*/
   5592 /*--- ISEL: Statements                                  ---*/
   5593 /*---------------------------------------------------------*/
   5594 
   5595 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
   5596 {
   5597    if (vex_traceflags & VEX_TRACE_VCODE) {
   5598       vex_printf("\n-- ");
   5599       ppIRStmt(stmt);
   5600       vex_printf("\n");
   5601    }
   5602    switch (stmt->tag) {
   5603 
   5604    /* --------- STORE --------- */
   5605    /* little-endian write to memory */
   5606    case Ist_Store: {
   5607       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
   5608       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
   5609       IREndness end  = stmt->Ist.Store.end;
   5610 
   5611       if (tya != Ity_I32 || end != Iend_LE)
   5612          goto stmt_fail;
   5613 
   5614       if (tyd == Ity_I32) {
   5615          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5616          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5617          addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
   5618          return;
   5619       }
   5620       if (tyd == Ity_I16) {
   5621          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5622          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
   5623          addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
   5624                                        False/*!isSignedLoad*/, rD, am));
   5625          return;
   5626       }
   5627       if (tyd == Ity_I8) {
   5628          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5629          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5630          addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
   5631          return;
   5632       }
   5633       if (tyd == Ity_I64) {
   5634          if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5635             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
   5636             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5637             addInstr(env, ARMInstr_NLdStD(False, dD, am));
   5638          } else {
   5639             HReg rDhi, rDlo, rA;
   5640             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
   5641             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
   5642             addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
   5643                                           ARMAMode1_RI(rA,4)));
   5644             addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
   5645                                           ARMAMode1_RI(rA,0)));
   5646          }
   5647          return;
   5648       }
   5649       if (tyd == Ity_F64) {
   5650          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
   5651          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5652          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
   5653          return;
   5654       }
   5655       if (tyd == Ity_F32) {
   5656          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
   5657          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5658          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
   5659          return;
   5660       }
   5661       if (tyd == Ity_V128) {
   5662          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
   5663          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5664          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
   5665          return;
   5666       }
   5667 
   5668       break;
   5669    }
   5670 
   5671    /* --------- PUT --------- */
   5672    /* write guest state, fixed offset */
   5673    case Ist_Put: {
   5674        IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
   5675 
   5676        if (tyd == Ity_I32) {
   5677            HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
   5678            ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
   5679            addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
   5680            return;
   5681        }
   5682        if (tyd == Ity_I64) {
   5683           if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5684              HReg addr = newVRegI(env);
   5685              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
   5686              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5687                                                 stmt->Ist.Put.offset));
   5688              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
   5689           } else {
   5690              HReg rDhi, rDlo;
   5691              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
   5692                                            stmt->Ist.Put.offset + 0);
   5693              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
   5694                                            stmt->Ist.Put.offset + 4);
   5695              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
   5696              addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
   5697              addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
   5698           }
   5699           return;
   5700        }
   5701        if (tyd == Ity_F64) {
   5702           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5703           // In which case we'll have to generate more longwinded code.
   5704           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5705           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
   5706           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
   5707           return;
   5708        }
   5709        if (tyd == Ity_F32) {
   5710           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5711           // In which case we'll have to generate more longwinded code.
   5712           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5713           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
   5714           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
   5715           return;
   5716        }
   5717        if (tyd == Ity_V128) {
   5718           HReg addr = newVRegI(env);
   5719           HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
   5720           addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5721                                        stmt->Ist.Put.offset));
   5722           addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
   5723           return;
   5724        }
   5725        break;
   5726    }
   5727 
   5728 //zz   /* --------- Indexed PUT --------- */
   5729 //zz   /* write guest state, run-time offset */
   5730 //zz   case Ist_PutI: {
   5731 //zz      ARMAMode2* am2
   5732 //zz           = genGuestArrayOffset(
   5733 //zz               env, stmt->Ist.PutI.descr,
   5734 //zz               stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
   5735 //zz
   5736 //zz       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
   5737 //zz
   5738 //zz       if (tyd == Ity_I8) {
   5739 //zz           HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
   5740 //zz           addInstr(env, ARMInstr_StoreB(reg, am2));
   5741 //zz           return;
   5742 //zz       }
   5743 //zz// CAB: Ity_I32, Ity_I16 ?
   5744 //zz       break;
   5745 //zz   }
   5746 
   5747    /* --------- TMP --------- */
   5748    /* assign value to temporary */
   5749    case Ist_WrTmp: {
   5750       IRTemp tmp = stmt->Ist.WrTmp.tmp;
   5751       IRType ty = typeOfIRTemp(env->type_env, tmp);
   5752 
   5753       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   5754          ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
   5755                                           env, stmt->Ist.WrTmp.data);
   5756          HReg     dst  = lookupIRTemp(env, tmp);
   5757          addInstr(env, ARMInstr_Mov(dst,ri84));
   5758          return;
   5759       }
   5760       if (ty == Ity_I1) {
   5761          HReg        dst  = lookupIRTemp(env, tmp);
   5762          ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
   5763          addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   5764          addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   5765          return;
   5766       }
   5767       if (ty == Ity_I64) {
   5768          if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5769             HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
   5770             HReg dst = lookupIRTemp(env, tmp);
   5771             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
   5772          } else {
   5773             HReg rHi, rLo, dstHi, dstLo;
   5774             iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
   5775             lookupIRTemp64( &dstHi, &dstLo, env, tmp);
   5776             addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
   5777             addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
   5778          }
   5779          return;
   5780       }
   5781       if (ty == Ity_F64) {
   5782          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
   5783          HReg dst = lookupIRTemp(env, tmp);
   5784          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
   5785          return;
   5786       }
   5787       if (ty == Ity_F32) {
   5788          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
   5789          HReg dst = lookupIRTemp(env, tmp);
   5790          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
   5791          return;
   5792       }
   5793       if (ty == Ity_V128) {
   5794          HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
   5795          HReg dst = lookupIRTemp(env, tmp);
   5796          addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
   5797          return;
   5798       }
   5799       break;
   5800    }
   5801 
   5802    /* --------- Call to DIRTY helper --------- */
   5803    /* call complex ("dirty") helper function */
   5804    case Ist_Dirty: {
   5805       IRType   retty;
   5806       IRDirty* d = stmt->Ist.Dirty.details;
   5807       Bool     passBBP = False;
   5808 
   5809       if (d->nFxState == 0)
   5810          vassert(!d->needsBBP);
   5811 
   5812       passBBP = toBool(d->nFxState > 0 && d->needsBBP);
   5813 
   5814       /* Marshal args, do the call, clear stack. */
   5815       Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
   5816       if (!ok)
   5817          break; /* will go to stmt_fail: */
   5818 
   5819       /* Now figure out what to do with the returned value, if any. */
   5820       if (d->tmp == IRTemp_INVALID)
   5821          /* No return value.  Nothing to do. */
   5822          return;
   5823 
   5824       retty = typeOfIRTemp(env->type_env, d->tmp);
   5825 
   5826       if (retty == Ity_I64) {
   5827          if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5828             HReg tmp = lookupIRTemp(env, d->tmp);
   5829             addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
   5830                                                      hregARM_R0()));
   5831          } else {
   5832             HReg dstHi, dstLo;
   5833             /* The returned value is in r1:r0.  Park it in the
   5834                register-pair associated with tmp. */
   5835             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
   5836             addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
   5837             addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
   5838          }
   5839          return;
   5840       }
   5841       if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
   5842          /* The returned value is in r0.  Park it in the register
   5843             associated with tmp. */
   5844          HReg dst = lookupIRTemp(env, d->tmp);
   5845          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
   5846          return;
   5847       }
   5848 
   5849       break;
   5850    }
   5851 
   5852    /* --------- Load Linked and Store Conditional --------- */
   5853    case Ist_LLSC: {
   5854       if (stmt->Ist.LLSC.storedata == NULL) {
   5855          /* LL */
   5856          IRTemp res = stmt->Ist.LLSC.result;
   5857          IRType ty  = typeOfIRTemp(env->type_env, res);
   5858          if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   5859             Int  szB   = 0;
   5860             HReg r_dst = lookupIRTemp(env, res);
   5861             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5862             switch (ty) {
   5863                case Ity_I8:  szB = 1; break;
   5864                case Ity_I16: szB = 2; break;
   5865                case Ity_I32: szB = 4; break;
   5866                default:      vassert(0);
   5867             }
   5868             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   5869             addInstr(env, ARMInstr_LdrEX(szB));
   5870             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
   5871             return;
   5872          }
   5873          if (ty == Ity_I64) {
   5874             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5875             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   5876             addInstr(env, ARMInstr_LdrEX(8));
   5877             /* Result is in r3:r2.  On a non-NEON capable CPU, we must
   5878                move it into a result register pair.  On a NEON capable
   5879                CPU, the result register will be a 64 bit NEON
   5880                register, so we must move it there instead. */
   5881             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   5882                HReg dst = lookupIRTemp(env, res);
   5883                addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
   5884                                                         hregARM_R2()));
   5885             } else {
   5886                HReg r_dst_hi, r_dst_lo;
   5887                lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
   5888                addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
   5889                addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
   5890             }
   5891             return;
   5892          }
   5893          /*NOTREACHED*/
   5894          vassert(0);
   5895       } else {
   5896          /* SC */
   5897          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
   5898          if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
   5899             Int  szB = 0;
   5900             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
   5901             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5902             switch (tyd) {
   5903                case Ity_I8:  szB = 1; break;
   5904                case Ity_I16: szB = 2; break;
   5905                case Ity_I32: szB = 4; break;
   5906                default:      vassert(0);
   5907             }
   5908             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
   5909             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   5910             addInstr(env, ARMInstr_StrEX(szB));
   5911          } else {
   5912             vassert(tyd == Ity_I64);
   5913             /* This is really ugly.  There is no is/is-not NEON
   5914                decision akin to the case for LL, because iselInt64Expr
   5915                fudges this for us, and always gets the result into two
   5916                GPRs even if this means moving it from a NEON
   5917                register. */
   5918             HReg rDhi, rDlo;
   5919             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
   5920             HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5921             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
   5922             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
   5923             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   5924             addInstr(env, ARMInstr_StrEX(8));
   5925          }
   5926          /* now r0 is 1 if failed, 0 if success.  Change to IR
   5927             conventions (0 is fail, 1 is success).  Also transfer
   5928             result to r_res. */
   5929          IRTemp   res   = stmt->Ist.LLSC.result;
   5930          IRType   ty    = typeOfIRTemp(env->type_env, res);
   5931          HReg     r_res = lookupIRTemp(env, res);
   5932          ARMRI84* one   = ARMRI84_I84(1,0);
   5933          vassert(ty == Ity_I1);
   5934          addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
   5935          /* And be conservative -- mask off all but the lowest bit */
   5936          addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
   5937          return;
   5938       }
   5939       break;
   5940    }
   5941 
   5942    /* --------- MEM FENCE --------- */
   5943    case Ist_MBE:
   5944       switch (stmt->Ist.MBE.event) {
   5945          case Imbe_Fence:
   5946             addInstr(env, ARMInstr_MFence());
   5947             return;
   5948          case Imbe_CancelReservation:
   5949             addInstr(env, ARMInstr_CLREX());
   5950             return;
   5951          default:
   5952             break;
   5953       }
   5954       break;
   5955 
   5956    /* --------- INSTR MARK --------- */
   5957    /* Doesn't generate any executable code ... */
   5958    case Ist_IMark:
   5959        return;
   5960 
   5961    /* --------- NO-OP --------- */
   5962    case Ist_NoOp:
   5963        return;
   5964 
   5965    /* --------- EXIT --------- */
   5966    case Ist_Exit: {
   5967       HReg        gnext;
   5968       ARMCondCode cc;
   5969       if (stmt->Ist.Exit.dst->tag != Ico_U32)
   5970          vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
   5971       gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   5972       cc    = iselCondCode(env, stmt->Ist.Exit.guard);
   5973       addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
   5974       addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
   5975       return;
   5976    }
   5977 
   5978    default: break;
   5979    }
   5980   stmt_fail:
   5981    ppIRStmt(stmt);
   5982    vpanic("iselStmt");
   5983 }
   5984 
   5985 
   5986 /*---------------------------------------------------------*/
   5987 /*--- ISEL: Basic block terminators (Nexts)             ---*/
   5988 /*---------------------------------------------------------*/
   5989 
   5990 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
   5991 {
   5992    HReg rDst;
   5993    if (vex_traceflags & VEX_TRACE_VCODE) {
   5994       vex_printf("\n-- goto {");
   5995       ppIRJumpKind(jk);
   5996       vex_printf("} ");
   5997       ppIRExpr(next);
   5998       vex_printf("\n");
   5999    }
   6000    rDst = iselIntExpr_R(env, next);
   6001    addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
   6002    addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
   6003 }
   6004 
   6005 
   6006 /*---------------------------------------------------------*/
   6007 /*--- Insn selector top-level                           ---*/
   6008 /*---------------------------------------------------------*/
   6009 
   6010 /* Translate an entire SB to arm code. */
   6011 
   6012 HInstrArray* iselSB_ARM ( IRSB* bb, VexArch      arch_host,
   6013                                     VexArchInfo* archinfo_host,
   6014                                     VexAbiInfo*  vbi/*UNUSED*/ )
   6015 {
   6016    Int      i, j;
   6017    HReg     hreg, hregHI;
   6018    ISelEnv* env;
   6019    UInt     hwcaps_host = archinfo_host->hwcaps;
   6020    static UInt counter = 0;
   6021 
   6022    /* sanity ... */
   6023    vassert(arch_host == VexArchARM);
   6024 
   6025    /* hwcaps should not change from one ISEL call to another. */
   6026    arm_hwcaps = hwcaps_host;
   6027 
   6028    /* Make up an initial environment to use. */
   6029    env = LibVEX_Alloc(sizeof(ISelEnv));
   6030    env->vreg_ctr = 0;
   6031 
   6032    /* Set up output code array. */
   6033    env->code = newHInstrArray();
   6034 
   6035    /* Copy BB's type env. */
   6036    env->type_env = bb->tyenv;
   6037 
   6038    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
   6039       change as we go along. */
   6040    env->n_vregmap = bb->tyenv->types_used;
   6041    env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6042    env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6043 
   6044    /* For each IR temporary, allocate a suitably-kinded virtual
   6045       register. */
   6046    j = 0;
   6047    for (i = 0; i < env->n_vregmap; i++) {
   6048       hregHI = hreg = INVALID_HREG;
   6049       switch (bb->tyenv->types[i]) {
   6050          case Ity_I1:
   6051          case Ity_I8:
   6052          case Ity_I16:
   6053          case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
   6054          case Ity_I64:
   6055             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
   6056                hreg = mkHReg(j++, HRcFlt64, True);
   6057             } else {
   6058                hregHI = mkHReg(j++, HRcInt32, True);
   6059                hreg   = mkHReg(j++, HRcInt32, True);
   6060             }
   6061             break;
   6062          case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
   6063          case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
   6064          case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
   6065          default: ppIRType(bb->tyenv->types[i]);
   6066                   vpanic("iselBB: IRTemp type");
   6067       }
   6068       env->vregmap[i]   = hreg;
   6069       env->vregmapHI[i] = hregHI;
   6070    }
   6071    env->vreg_ctr = j;
   6072 
   6073    /* Keep a copy of the link reg, since any call to a helper function
   6074       will trash it, and we can't get back to the dispatcher once that
   6075       happens. */
   6076    env->savedLR = newVRegI(env);
   6077    addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
   6078 
   6079    /* Ok, finally we can iterate over the statements. */
   6080    for (i = 0; i < bb->stmts_used; i++)
   6081       iselStmt(env,bb->stmts[i]);
   6082 
   6083    iselNext(env,bb->next,bb->jumpkind);
   6084 
   6085    /* record the number of vregs we used. */
   6086    env->code->n_vregs = env->vreg_ctr;
   6087    counter++;
   6088    return env->code;
   6089 }
   6090 
   6091 
   6092 /*---------------------------------------------------------------*/
   6093 /*--- end                                     host_arm_isel.c ---*/
   6094 /*---------------------------------------------------------------*/
   6095